| library(tidyverse) |
|
|
| |
| read_captions_from_directory <- function(directory_path) { |
| |
| txt_files <- list.files(directory_path, pattern = "\\.txt$", full.names = TRUE) |
| |
| |
| if (length(txt_files) == 0) { |
| return() |
| } |
| |
| |
| data <- lapply(txt_files, function(file) { |
| captions <- readLines(file, warn = FALSE) |
| if (length(captions) == 0) { |
| return() |
| } |
| captions_list <- strsplit(captions, ",")[[1]] |
| captions_list <- trimws(captions_list) |
| tibble( |
| image_path = gsub(".txt$", ".png", file), |
| caption_order = 1:length(captions_list), |
| caption = captions_list |
| ) |
| }) |
| |
| |
| bind_rows(data) |
| } |
|
|
| get_caption_frequency <- function(data) { |
| data %>% |
| group_by(caption) %>% |
| summarise(frequency = n()) %>% |
| arrange(-frequency) |
| } |
|
|
| search_by_caption <- function(data, target_caption) { |
| data %>% |
| filter(caption == target_caption) %>% |
| group_by(image_path) %>% |
| distinct() |
| } |
|
|
| remove_caption_and_adjust_order <- function(data, target_image_path, target_caption) { |
| |
| if (!any(data$image_path == target_image_path & data$caption == target_caption)) { |
| cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path)) |
| return(data) |
| } |
|
|
| |
| removed_order <- data$caption_order[data$image_path == target_image_path & data$caption == target_caption] |
| |
| data <- data %>% filter(!(image_path == target_image_path & caption == target_caption)) |
| |
| data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] <- data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] - 1 |
| |
| return(data) |
| } |
|
|
| remove_low_frequency_captions <- function(data, threshold) { |
| |
| caption_freq <- get_caption_frequency(data) |
| |
| |
| low_freq_captions <- caption_freq %>% |
| filter(frequency <= threshold) %>% |
| pull(caption) |
| |
| |
| for (caption in low_freq_captions) { |
| unique_images <- unique(data$image_path[data$caption == caption]) |
| for (image in unique_images) { |
| data <- remove_caption_and_adjust_order(data, image, caption) |
| } |
| } |
| |
| return(data) |
| } |
|
|
| edit_captions_interactively <- function(data, target_caption) { |
| |
| image_paths <- search_by_caption(data, target_caption)$image_path |
| |
| for (path in image_paths) { |
| |
| if (Sys.info()["sysname"] == "Windows") { |
| cmd <- sprintf('start "" "%s"', path) |
| shell(cmd, intern = TRUE) |
| } else if (Sys.info()["sysname"] == "Darwin") { |
| cmd <- sprintf('open "%s"', path) |
| system(cmd) |
| } else { |
| cmd <- sprintf('xdg-open "%s"', path) |
| system(cmd) |
| } |
| |
| |
| cat(sprintf("Do you want to remove the caption '%s' from image '%s'? (yes/no/end): ", target_caption, path)) |
| response <- readline() |
| |
| if (tolower(response) == "end") { |
| break |
| } else if (tolower(response) == "yes") { |
| data <- remove_caption_and_adjust_order(data, path, target_caption) |
| } |
| } |
| |
| return(data) |
| } |
|
|
| add_caption_at_order <- function(data, target_image_path, target_caption, target_order = NULL) { |
| |
| max_order <- max(data$caption_order[data$image_path == target_image_path], na.rm = TRUE) |
| |
| |
| if (target_caption %in% data$caption[data$image_path == target_image_path]) { |
| return(data) |
| } |
| |
| |
| if (is.null(target_order)) { |
| print_image_captions_as_csv(data, target_image_path) |
| cat("Enter the position (order) to insert the new caption (1 to", max_order + 1, "): ") |
| target_order <- as.numeric(readline()) |
| |
| |
| if (target_order <= 0 || target_order > max_order + 1) { |
| target_order <- max_order + 1 |
| } |
| } |
| |
| |
| data <- data %>% |
| mutate(caption_order = ifelse(image_path == target_image_path & caption_order >= target_order, caption_order + 1, caption_order)) |
| |
| |
| new_caption <- tibble( |
| image_path = target_image_path, |
| caption_order = target_order, |
| caption = target_caption |
| ) |
| data <- bind_rows(data, new_caption) |
| |
| return(data) |
| } |
|
|
| move_caption_order <- function(data, target_image_path, target_caption, new_order) { |
|
|
| |
| if (!any(data$image_path == target_image_path & data$caption == target_caption)) { |
| cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path)) |
| return(data) |
| } |
|
|
| |
| data_after_removal <- remove_caption_and_adjust_order(data, target_image_path, target_caption) |
| |
| |
| data_after_addition <- add_caption_at_order(data_after_removal, target_image_path, target_caption, new_order) |
| return(data_after_addition) |
| } |
|
|
| |
| is_caption_present <- function(data, target_image_path, target_caption) { |
| return(any(data$image_path == target_image_path & data$caption == target_caption)) |
| } |
|
|
| |
| print_all_unique_captions_as_csv <- function(data) { |
| |
| unique_captions <- unique(data$caption) |
| |
| cat(paste(unique_captions, collapse = ", "), "\n") |
| } |
|
|
| print_image_captions_as_csv <- function(data, target_image_path) { |
| captions <- filter(data, image_path == target_image_path) %>% |
| arrange(caption_order) %>% |
| pull(caption) |
| |
| cat(paste(captions, collapse = ", "), "\n") |
| } |
|
|
| |
| remove_related_captions_except_representative <- function(data, related_captions, representative_caption, target_image_path) { |
| |
| |
| if (!any(data$image_path == target_image_path & data$caption == representative_caption)) { |
| cat(sprintf("The representative caption '%s' is not associated with image '%s'.\n", representative_caption, target_image_path)) |
| return(data) |
| } |
| |
| |
| for (caption in related_captions) { |
| if (caption != representative_caption) { |
| data <- remove_caption_and_adjust_order(data, target_image_path, caption) |
| } |
| } |
| |
| return(data) |
| } |