# notes
# sample names at times 0 24 and 48 don't make it clear that those samples are part of each of the latter four conditions (aa, af, ff, fa)
# should adjust so that p0-1 becomes 0-aa1, 0-af1, 0-ff1, and 0-fa1
# actually should have 8 conditions; ra52-ha, ra52-hrf, ra76-ha, ra76-hrf, rrf52-ha, rrf52-hrf, rrf76-ha, rrf76-hrf

# changes
# adjusted sample names such that there were separate observations for each condition/time_point


if (!require("pacman")) install.packages("pacman")

pacman::p_load(tidyverse, data.table, broom, janitor)


###############################################################
##################                           ##################
##################      ORGANIC ACIDS        ##################
##################                           ##################
###############################################################

# not using the organic_acids function as processing these data required specific operations not currently compatible with the function
# primarily relating to sample names

acids <- list.files(
  path = "~/chapter-06_strawberry_genotypes/data/primary/VT_24/storage_scenarios/hplc/results/",
  pattern = "*results.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  ) %>%
  janitor::clean_names() %>% 
  mutate(
    across(
      where(
        is.character
      ), 
      str_trim
    ),
    sample_name = str_remove(sample_name, "K")
  )


dryweight <- list.files(
  path = "~/chapter-06_strawberry_genotypes/data/primary/VT_24/storage_scenarios/hplc/extraction_weights/",
  pattern = "*extraction_weights.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  ) %>%
  janitor::clean_names() %>%
  mutate(
    sample = str_remove(sample, "K"),
    across(
      where(
        is.character
      ), 
      str_trim
    )
  )


acids_reduce <- acids %>% 
  filter(
    str_detect(
      name, "ascorbic acid|AsA|asa|malic|citric|CRM|IHRM"
    ),
    !str_detect(
      sample_name, "std"
    )
  ) %>% 
  rename(
    sample = sample_name
  ) %>% 
  select(
    sample,
    name,
    concentration,
    injection_volume_ul,
    amount
  )  %>% 
  drop_na()

acids_process <- acids_reduce %>%
  left_join(
    dryweight, by = "sample"
  ) %>% 
  group_by(
    name
  ) %>% 
  mutate(
    weight_correction = rep(0.89, times = n()),
    extraction_volume = rep(5, times = n()),
    dilution = rep(0.5, times = n()),
    mg_per_g = (concentration / dilution) / ((weight_grams * weight_correction) / extraction_volume),
    experiment = rep(
      "STS24", times = n()
    ),
    measure_var = str_replace_all(
      name, "asa|AsA", "ascorbic acid"
    )
  ) %>% 
  arrange(
    sample
  ) %>% 
  ungroup() %>% 
  select(
    experiment, sample, measure_var, mg_per_g
  ) %>% 
  rename(
    value = mg_per_g
  ) %>% 
  mutate(
    measure_var = str_replace_all(
      measure_var, "\\s", "_"
    )
  ) %>% 
  drop_na(
  ) %>% 
  ungroup()

rm(
  list = c(
    "acids","acids_reduce","dryweight"
  )
)


sts24_samples <- read_csv(
  "~/chapter-06_strawberry_genotypes/data/primary/VT_24/storage_scenarios/sampling/20240708_STS_exp02_subextraction_weights.csv"
) %>% 
  mutate(
    condition = str_remove(
      condition, "-[0-9]-"
    ),
    perc_dw = dryweight_grams / freshweight_grams,
    experiment = rep(
      "STS24" , times = n()
    )
  ) %>% 
  unite(
    "sample",
    c(
      time,condition
    ),
    sep = "-",
    remove = FALSE
  ) %>% 
  unite(
    "sample",
    c(
      sample,rep
    ),
    sep = "",
    remove = FALSE
  ) %>% 
  mutate(
    sample = case_when(
      str_detect(sample, "p1") ~ "p1-0",
      str_detect(sample, "p2") ~ "p2-0",
      str_detect(sample, "p3") ~ "p3-0",
      str_detect(sample, "p4") ~ "p4-0",
      TRUE ~ sample
    )
  )  %>%  
  select(
    date,
    experiment,
    sample,
    time,
    condition,
    rep,
    perc_dw
  ) %>% 
  filter(
    perc_dw > 0 
  ) %>% 
  arrange(
    time, sample
  )

sts24_acids <- sts24_samples %>% 
  left_join(
    acids_process,
    by = c(
      "experiment", "sample"
      ),
    relationship = "many-to-many"
  ) %>% 
  rename(
    time_point = time
  ) %>% 
  mutate(
    tmp_1 = if_else(str_detect(condition, "p|^f$|ff"), "RRF52-HRF", NA),
    tmp_2 = if_else(str_detect(condition, "p|^f$|fa"), "RRF52-HA", NA),
    tmp_3 = if_else(str_detect(condition, "p|^a$|aa"), "RA52-HA", NA),
    tmp_4 = if_else(str_detect(condition, "p|^a$|af"), "RA52-HRF", NA),
    tmp_5 = if_else(str_detect(condition, "p|^f$|ff"), "RRF76-HRF", NA),
    tmp_6 = if_else(str_detect(condition, "p|^f$|fa"), "RRF76-HA", NA),
    tmp_7 = if_else(str_detect(condition, "p|^a$|aa"), "RA76-HA", NA),
    tmp_8 = if_else(str_detect(condition, "p|^a$|af"), "RA76-HRF", NA),
    time_point = case_when(
      time_point == 0 ~ time_point,
      time_point == 24 ~ time_point + 4,
      time_point >= 48 ~ time_point + 5
    )
  ) %>% 
  pivot_longer(
    starts_with("tmp"),
    names_to = "name",
    values_to = "tmp"
  ) %>% 
  drop_na() %>% 
  select(
    -c(condition,name)
  ) %>% 
  rename(
    condition = tmp
  ) %>% 
  unite(
    "sample",
    c(
      time_point,condition
    ),
    sep = "-",
    remove = FALSE
  ) %>% 
  unite(
    "sample",
    c(
      sample,rep
    ),
    sep = ""
  ) %>%  
  filter(
    !str_detect(sample, "101-RA76|101-RRF76"),
    !str_detect(sample, "125-RA52|125-RRF52")
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  mutate(
    ascorbic_acid_mg_100g_fw = (ascorbic_acid * perc_dw) * 100,
    malic_acid_mg_per_g_fw = (malic_acid * perc_dw),
    citric_acid_mg_per_g_fw = (citric_acid * perc_dw),
    condition = as.factor(condition),
    time_point = as.factor(time_point)
  )  %>% 
  select(
    -ends_with("acid")
  ) %>% 
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  select(
    date,
    experiment,
    sample,
    time_point,
    condition,
    measure_var,
    value
  ) %>% 
  arrange(
    time_point, sample
  )

# capture image of relevant variable that can be loaded into an R environment by the user
save.image(
  paste0(
    "~/chapter-06_strawberry_genotypes/data/processed/",format(Sys.Date(),"%Y%m%d"),"_STS_compiled_dataset.Rdata"
  )
)

# save relevant variables as separate csv files to facilitate future analysis in other data analysis software/tools
write_csv(
  sts24_acids,
  paste0(
    "~/chapter-06_strawberry_genotypes/data/processed/", format(Sys.Date(), "%Y%m%d"), "_STS_compiled_dataset.csv"
  )
)
