# notes
# corrected_abs not being calculated for some samples in gae set 3, needs debugging
# see ./data/processed/2023_2024_variety_trials/*compiled_dataset.rds (or .csv) for full chap-05 dataset
# issues described above will be addressed when I next have access to my lab book.


# changes
# gae set 3 corrected_abs working for all samples in all sets, could not see issue that was referenced in above notes...
# have created separate extraction_weight input files for AX and TP due to changes in set numbers made to accommodate VT23 data


if (!require("pacman")) install.packages("pacman")

pacman::p_load(tidyverse, data.table, broom, janitor)


###############################################################
##################                           ##################
##################      ORGANIC ACIDS        ##################
##################                           ##################
###############################################################

source("~/chapter-06_strawberry_genotypes/code/acids_processing_func_v0.0.6.R")

organic_acids(
  hplc_data = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/hplc/results/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/hplc/results/"
  ),
  dry_weight = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/hplc/extraction_weights/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/hplc/extraction_weights/"
  ),
  out_dir = "~/chapter-06_strawberry_genotypes/data/processed/"
)

###############################################################
##################                           ##################
##################     TEAC/ANTIOXIDANTS     ##################
##################                           ##################
###############################################################

#### CALIBRATION DATA ####

# read in spectrophotometric data for calibration standards
calibration_uvspec_AX <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/calibration/teac/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/calibration/teac/"
  ),
  pattern = "*calibration.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_csv(., skip = 1)
  ) %>%
  map(
    ~ clean_names(.)
  ) %>%
  map(
    ~ select(
      .,
      no,
      absorbtion_abs
    )
  ) %>%
  map(
    ~ drop_na(.)
  ) %>%
  map(
    ~ mutate(.,
      abs_x_1000 = absorbtion_abs * 10^3,
      conc_trolox_mM = rep(
        c(0, 0.1, 0.5, 1.0, 2.0),
        each = 3
      ),
      rep = rep(
        1:3,
        times = 5
      )
    )
  )

# machine fault meant that some spectrophotomeric data needed to be recorded manually
calibration_manualentry_AX <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/calibration/teac/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/calibration/teac/"
  ),
  pattern = "*calibration_manualentry.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_csv(.)
  ) %>%
  map(
    ~ clean_names(.)
  ) %>%
  map(
    ~ rename(.,
      conc_trolox_mM = trolox_conc_mmol,
      abs_x_1000 = abs
    )
  )

# join the automatic and manually recorded data
calibration_AX <- append(
  calibration_manualentry_AX, calibration_uvspec_AX
) %>%
  map_df(
    ~ select(
      .,
      conc_trolox_mM, abs_x_1000, rep
    )
  ) %>%
  mutate(
    set = rep(
      1:6,
      each = 15
    )
  ) 

# calculate % inhibition in of the standards in relation to blank sample
calibration_AX <- calibration_AX %>% 
  filter(
    set == 3
  ) %>% 
  bind_rows(
    calibration_AX, .
  ) %>% 
  arrange(
    set
  ) %>% 
  mutate(
    set = rep(
      1:7,
      each = 15
    )
  ) %>%
  group_by(
    set, rep
  ) %>%
  mutate(
    perc_inhib = ((abs_x_1000[conc_trolox_mM == "0"] - abs_x_1000) / abs_x_1000[conc_trolox_mM == "0"]) * 100 
  ) %>%
  ungroup() %>%
  select(
    set, conc_trolox_mM, abs_x_1000, perc_inhib
  )

model_calibs_AX <- calibration_AX %>%
  ungroup() %>%
  nest_by(
    set
  ) %>%
  mutate(
    model = list(lm(perc_inhib ~ conc_trolox_mM, data = data)),
    gradient = coef(model)[2]
  )

# compute linear coefficients of the calibration curve
model_calibs_slope_AX <- tibble(
  set = model_calibs_AX$set,
  gradient = model_calibs_AX$gradient
)

model_calibs_summary_AX <- model_calibs_AX %>%
  reframe(
    glance(
      model
    )
  ) %>%
  left_join(
    model_calibs_slope_AX,
    by = "set"
  )

#### TEAC ASSAY RESULTS ####

# read in the weights of the dried samples to be extracted 
extraction_weights_AX <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/extraction_weights/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/extraction_weights/"
  ),
  pattern = "*AX_extraction_weights.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  ) %>% 
  distinct()

# no option to enter a unique sample ID on the sectrophotometer
# the order in which samples were analysed was recorded manually instead
sample_order_AX <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/extraction_weights/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/extraction_weights/"
  ),
  pattern = "*AX_uvspec_sampleorder.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  )

extraction_weights_number_AX <- sample_order_AX %>%
  left_join(
    extraction_weights_AX,
    by = c("sample", "set")
  ) %>% 
  drop_na(
    sample
  ) %>% 
  mutate(
    sub_set = if_else(
      sample == "blank", seq(1:n()), NA
    ),
    .by = c(set, sample)
  )  %>% 
  tidyr::fill(
    sub_set,
    .direction = "down"
  ) %>% 
  mutate(
    harvest_date = case_when(
      str_detect(sample, "W2") ~ "13/07/2023",
      str_detect(sample, "W3") ~ "20/07/2023",
      TRUE ~ harvest_date
    )
  )

# read in original spectrophotomeric data
teac_results <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/results/teac/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/results/teac/"
  ),
  pattern = "*results.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_csv(., skip = 1)
  ) %>%
  map(
    ~ janitor::clean_names(.)
  ) %>%
  map(
    ~ select(
      .,
      no, absorbtion_abs
    )
  ) %>%
  map(
    ~ drop_na(.)
  ) %>%
  map(
    ~ rename(.,
      sample_number = no,
    )
  ) %>%
  rbindlist(
    idcol = "set"
  ) %>%
  mutate(
    # set = as.numeric(set + 4),
    sample_number = as.numeric(sample_number)
    )


# calculate % inhibition for samples relative to blanks 
# tidy TEAC results
teac_results_reduce <- teac_results %>%
  ungroup() %>%
  left_join(
    extraction_weights_number_AX,
    by = c("set", "sample_number")
  ) %>%
  drop_na(
    sample
  ) %>% 
  filter(
    set > 1
  ) %>% 
  left_join(
    model_calibs_slope_AX,
    by = "set"
  ) %>% 
  mutate(
    abs_x_1000 = absorbtion_abs * 10^3,
    perc_inhib = ((abs_x_1000[sample == "blank"] - abs_x_1000) / abs_x_1000[sample == "blank"]) * 100,
    teac_mM = perc_inhib / gradient,
    teac_uM_per_g = (teac_mM * 10) / weight_grams,
    experiment = if_else(
      str_detect(
        sample, "W"
      ),
      "VT23", "VT24"
    ),
    .by = c(set, sub_set)
  ) %>% 
  select(
    sample,
    experiment,
    harvest_date,
    set,
    sub_set,
    abs_x_1000,
    perc_inhib,
    gradient,
    teac_mM,
    weight_grams,
    teac_uM_per_g,
  )


teac_results_process <- teac_results_reduce %>%
  ungroup() %>%
  select(
    harvest_date,
    experiment,
    sample,
    teac_uM_per_g
  ) %>%
  rename(
    date = harvest_date
  ) %>% 
  mutate(
    sample = if_else(
      experiment == "VT24", str_remove(sample, "-[A-Z]"), str_remove(sample, "W[0-9]-")
    )
  ) %>%
  pivot_longer(
    cols = teac_uM_per_g,
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  drop_na() %>% 
  filter(
    !row_number() %in% c(90,91) # dropping because they show inconsistent values with other reps, likely resulting from sample number matching problem described at head of document
  )

rm(list = c(
  "calibration_AX", "calibration_manualentry_AX", "calibration_uvspec_AX", "extraction_weights_AX", "extraction_weights_number_AX",
  "model_calibs_AX", "model_calibs_slope_AX", "sample_order_AX", "teac_results"
))


###############################################################
##################                           ##################
##################    GAE/TOTAL PHENOLICS    ##################
##################                           ##################
###############################################################

#### GALLIC ACID CALIBRATION DATA ####

calibration_TP <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/calibration/gae/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/calibration/gae"
  ),
  pattern = "*calibration.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_csv(., skip = 1)
  ) %>%
  map(
    ~ clean_names(.)
  ) %>%
  map(
    ~ select(
      .,
      no,
      absorbtion_abs
    )
  ) %>%
  map(
    ~ drop_na(.)
  ) %>%
  map(
    ~ mutate(.,
      abs_x_1000 = absorbtion_abs * 10^3,
      gallic_acid_ug = rep(
        c(0, 6, 12, 24, 48),
        each = 3
      ),
      rep = rep(
        1:3,
        times = 5
      )
    )
  ) %>%
  map_df(
    ~ select(
      .,
      gallic_acid_ug, abs_x_1000, rep
    )
  ) %>%
  mutate(
    set = rep(
      1:6,
      each = 15
    )
  ) %>%
  group_by(
    set, rep
  ) %>%
  mutate(
    corrected_abs = (abs_x_1000 - (abs_x_1000[gallic_acid_ug == "0"]))
  ) %>%
  ungroup() %>%
  select(
    set, gallic_acid_ug, abs_x_1000, corrected_abs
  )


model_calibs_TP <- calibration_TP %>%
  ungroup() %>%
  nest_by(
    set
  ) %>%
  mutate(
    model = list(lm(corrected_abs ~ gallic_acid_ug, data = data)),
    intercept = coef(model)[1],
    gradient = coef(model)[2]
  )


model_calibs_slope_TP <- tibble(
  set = model_calibs_TP$set,
  gradient = model_calibs_TP$gradient,
  intercept = model_calibs_TP$intercept
)

model_calibs_summary_TP <- model_calibs_TP %>%
  reframe(
    glance(
      model
    )
  ) %>%
  left_join(
    model_calibs_slope_TP,
    by = "set"
  )

#### GAE ASSAY RESULTS ####

extraction_weights_TP <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/extraction_weights/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/extraction_weights/"
  ),
  pattern = "*TP_extraction_weights.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  )

sample_order_TP <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/extraction_weights/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/extraction_weights/"
  ),
  pattern = "*TP_uvspec_sampleorder.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  )

extraction_weights_number_TP <- sample_order_TP %>%
  left_join(
    extraction_weights_TP,
    by = c("set", "sample")
  ) %>%
  mutate(
    harvest_date = case_when(
      str_detect(sample, "W2") ~ "13/07/2023",
      str_detect(sample, "W3") ~ "20/07/2023",
      TRUE ~ harvest_date
    )
  )

gae_results <- list.files(
  path = c(
    "~/chapter-06_strawberry_genotypes/data/primary/VT_23/spectrophotometer/results/gae/",
    "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/spectrophotmeter/results/gae/"
  ),
  pattern = "*results.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_csv(., skip = 1)
  ) %>%
  map(
    ~ janitor::clean_names(.)
  ) %>%
  map(
    ~ select(
      .,
      no, absorbtion_abs
    )
  ) %>%
  map(
    ~ drop_na(.)
  ) %>%
  map(
    ~ rename(.,
      sample_number = no
    )
  ) %>%
  rbindlist(
    idcol = "set"
  ) %>% 
  mutate(
    sample_number = as.numeric(sample_number)
  )

gae_results_reduce <- gae_results %>%
  ungroup() %>%
  left_join(
    extraction_weights_number_TP,
    by = c("set", "sample_number")
  ) %>%
  group_by(
    set, sample
  ) %>%
  mutate(
    sub_set = if_else(
      sample == "blank", seq(1:n()), NA
    )
  ) %>%
  ungroup() %>%
  drop_na(
    sample
  ) %>%
  tidyr::fill(
    sub_set,
    .direction = "down"
  ) %>%
  group_by(
    set, sub_set
  ) %>%
  left_join(
    model_calibs_slope_TP,
    by = "set"
  ) %>%
  mutate(
    dilution_factor = rep(3, times = n()),
    abs_x_1000 = absorbtion_abs * 10^3,
    corrected_abs = (abs_x_1000 - (abs_x_1000[sample == "blank"])),
    gae_mg_per_ml = (((corrected_abs - intercept) / gradient) / 300) * 3,
    gae_mg_per_g = (gae_mg_per_ml * 10) / weight_grams,
    experiment = if_else(
      str_detect(
        harvest_date, "2023"
      ),
      "VT23", "VT24"
    ),
    sample = str_remove(
      sample, "-P"
    )
  ) %>%
  select(
    sample,
    harvest_date,
    experiment,
    set,
    sub_set,
    abs_x_1000,
    corrected_abs,
    gradient,
    intercept,
    gae_mg_per_ml,
    weight_grams,
    gae_mg_per_g,
  )

gae_results_process <- gae_results_reduce %>%
  ungroup() %>%
  select(
    harvest_date,
    experiment,
    sample,
    gae_mg_per_g
  ) %>%
  drop_na(
    harvest_date
  ) %>%
  rename(
    date = harvest_date
  ) %>%
  mutate(
    sample = str_remove(
      sample, "W[0-9]-"
    )
  ) %>%
  pivot_longer(
    cols = gae_mg_per_g,
    names_to = "measure_var",
    values_to = "value"
  )

rm(list = c(
  "calibration_TP", "extraction_weights_TP", "extraction_weights_number_TP", "sample_order_TP",
  "model_calibs_TP", "model_calibs_slope_TP", "sample_order_TP", "gae_results"
))

###############################################################
##################                           ##################
##################          FIRMNESS         ##################
##################                           ##################
###############################################################

# read in penetrometer data
firm <- list.files(
  path = "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/quality/",
  pattern = "*firmness.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(., skip = 1)
  ) %>%
  janitor::clean_names() %>%
  select(
    c(
      1:4, 6
    )
  )

# tidy penetrometer data to match formatting of other dataframes
firm_process <- firm %>%
  mutate(
    sample = toupper(sample),
    rep = str_remove(fruit_test, "-1"),
    experiment = rep(
      "VT24", times = n()
    )
  ) %>%
  rename(
    max_load_N = maximum_load_n_4,
    load_8mm_N = load_at_8mm_n_6
  ) %>%
  select(
    date,
    experiment,
    sample,
    max_load_N,
    load_8mm_N
  ) %>%
  pivot_longer(
    ends_with("_N"),
    names_to = "measure_var",
    values_to = "value"
  )

rm(firm)


###############################################################
##################                           ##################
##################           COLOUR          ##################
##################                           ##################
###############################################################

# read in colorimeter data
colour <- list.files(
  path = "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/quality/",
  pattern = "*fleshcolour.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  ) %>%
  janitor::clean_names() %>%
  select(
    c(
      1, 4:6
    )
  )

# calcuate hue and chroma from L a and b colour readings
# tidy colorimeter data
colour_process <- colour %>%
  mutate(
    data_name = toupper(data_name),
    hue = atan2(b_d65, a_d65),
    chroma = sqrt(a_d65^2 + b_d65^2),
    date = if_else(
      str_detect(
        data_name, "M[1-8]"
      ),
      "27/06/2024", "10/06/2024"
    ),
    experiment = rep(
      "VT24",
      times = n()
    )
  ) %>%
  separate(
    data_name,
    into = c("sample", "rep"),
    sep = "-"
  ) %>%
  rename(
    `L*` = l_d65,
    `a*` = a_d65,
    `b*` = b_d65
  ) %>%
  select(
    date,
    experiment,
    sample,
    `L*`,
    `a*`,
    `b*`,
    hue,
    chroma
  ) %>%
  pivot_longer(
    4:8,
    names_to = "measure_var",
    values_to = "value"
  )

rm(colour)

###############################################################
##################                           ##################
##################          TSS/BRIX         ##################
##################                           ##################
###############################################################

# read in TSS data
brix <- list.files(
  path = "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/quality/",
  pattern = "*brix.csv",
  full.names = TRUE
) %>%
  map_df(
    ~ read_csv(.)
  ) %>%
  janitor::clean_names() %>%
  select(
    bag,
    block,
    date,
    starts_with("brix")
  )

# tidy TSS data to create compatibility with other dataframes
brix_process <- brix %>%
  unite(
    "sample",
    bag:block,
    sep = ""
  ) %>%
  pivot_longer(
    starts_with("brix"),
    names_to = "measure_var",
    values_to = "value"
  ) %>%
  mutate(
    measure_var = str_remove(measure_var, "_[1-3]"),
    experiment = rep(
      "VT24", times = n()
    )
  )

rm(brix)

###############################################################
##################                           ##################
##################           YIELD           ##################
##################                           ##################
###############################################################

# read in 2024 yield data and clean
yield_24 <- read_csv(
  "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/yield/20240809_P8_2024_yield.csv"
) %>%
  janitor::clean_names() %>%
  select(
    -c(
      week,
      yield_number,
      primary_subsample_freshweight_grams,
      milled,
      notes
    )
  )

# calculate yield parameters
yield_24_process <- yield_24 %>%
  mutate(
    experiment = str_replace(
      experiment, "VT", "VT24"
    ),
    block = as.factor(
      block
    ),
    perc_dw = subsample_dryweight_grams / final_subsample_freshweight_grams,
    class1_perc_number = (class1_number / (class1_number + class2_number + waste_number)) * 100,
    class1_perc_weight = (class1_weight_grams / yield_weight_grams) * 100,
    avg_berry_size = yield_weight_grams / (class1_number + class2_number + waste_number),
    avg_subsample_berry_size = final_subsample_freshweight_grams / subsample_number_of_berries
  ) %>%
  select(
    -c(
      starts_with(c("subsample","freshweight","final"))
    )
  ) %>% 
  unite(
    "sample",
    bag:block,
    sep = ""
  ) %>%
  pivot_longer(
    where(is.numeric),
    names_to = "measure_var",
    values_to = "value"
  )

# read in 2023 yield data and clean
yield_23 <- read_csv(
  "~/chapter-06_strawberry_genotypes/data/primary/VT_23/yield/20240604_P8_2023_yield.csv"
) %>%
  janitor::clean_names() %>%
  select(
    -c(
      week,
      yield_number,
      primary_subsample_freshweight_grams,
      milled,
      notes
    )
  )

# 2023 samples were composited from samples collected in different weeks
# the exact weights taken from each sample to make up each composite were recorded to enable a weighted averge dry-matter content to be calculated
vt23_composite_weights <- read_csv(
  "~/chapter-06_strawberry_genotypes/data/primary/VT_23/yield/20240604_VT-FS_composite_samples.csv"
  )

perc_dryweight <- yield_23 %>% 
  select(
    date,
    experiment,
    bag,
    final_subsample_freshweight_grams,
    subsample_dryweight_grams
  ) %>% 
  unite(
    "bag",
    c("experiment","bag"),
    sep = ""
  ) %>% 
  mutate(
    date = format(lubridate::dmy(date), "%m%d")
  ) %>% 
  unite(
    "sample",
    c("date","bag"),
    sep = "-"
  ) %>% 
  mutate(
    perc_dw = subsample_dryweight_grams / final_subsample_freshweight_grams
  ) %>% 
  drop_na() %>% 
  select(
    sample,
    perc_dw
  )

composite_weighting <- vt23_composite_weights %>% 
  pivot_longer(
    c("component_1","component_2"),
    names_to = "component",
    values_to = "sample"
  ) %>% 
  distinct() %>% 
  pivot_longer(
    c("weight_grams_1","weight_grams_2"),
    names_to = "name",
    values_to = "weight_grams"
  ) %>% 
  select(
    sample_composite,
    component,
    name,
    sample,
    weight_grams,
    weight_grams_composite
  ) %>% 
  mutate(
    component = str_remove(
      component, "component_"
    ),
    name = str_remove(
      name, "weight_grams_"
    ),
    weighting = weight_grams / weight_grams_composite
  ) %>% 
  filter(
    as.numeric(component) - as.numeric(name) == 0
  ) %>% 
  select(
    -name
  ) %>% 
  drop_na() %>% 
  left_join(
    perc_dryweight, by = "sample"
  )

# calculate weighted average dry matter
vt23_freshweight_conversion <- composite_weighting %>% 
  group_by(
    sample_composite
  ) %>% 
  summarise(
    weighted.mean(perc_dw, weighting)
  ) %>% 
  rename(
    perc_dw = `weighted.mean(perc_dw, weighting)`,
    sample = sample_composite
  ) %>% 
  mutate(
    date = case_when(
        str_detect(sample, "W1") ~ "06/07/2023",
        str_detect(sample, "W2") ~ "13/07/2023",
        str_detect(sample, "W3") ~ "20/07/2023",
      ),
    sample = str_remove(
      sample, "W[0-9]-"
    ),
    experiment = rep(
      "VT23", times = n()
    )
  ) %>% 
  select(
    date,
    experiment,
    sample,
    perc_dw
  )

# calculate vt23 yield parameters
yield_23_process <- yield_23 %>%
  filter(
    experiment == "B"
  ) %>%
  unite(
    "sample",
    experiment:bag,
    sep = ""
  ) %>%
  mutate(
    experiment = rep(
      "VT23",
      times = n()
    ),
    class1_perc_number = (class1_number / (class1_number + class2_number + waste_number)) * 100,
    avg_berry_size = yield_weight_grams / (class1_number + class2_number + waste_number),
    avg_subsample_berry_size = final_subsample_freshweight_grams / subsample_number_of_berries
  ) %>% 
  select(
    -c(
      starts_with(c("subsample","freshweight","final"))
    )
  ) %>% 
  left_join(
    vt23_freshweight_conversion,
    by = c("date","experiment","sample")
  ) %>% 
  pivot_longer(
    where(is.numeric),
    names_to = "measure_var",
    values_to = "value"
  ) %>%
  select(
    date,
    experiment,
    sample,
    measure_var,
    value
  )

# combine the vt23 and vt24 data into a consolidated dataframe
yield_23_24_process <- bind_rows(
  yield_23_process, yield_24_process
) %>%
  drop_na()


rm(
  list = c(
    "yield_23", "yield_24", "yield_23_process", "yield_24_process",
    "vt23_composite_weights", "vt23_freshweight_conversion", "perc_dryweight", "composite_weighting"
  )
)

###############################################################
##################                           ##################
##################        COMPILATION        ##################
##################                           ##################
###############################################################

# read in 2024 experimental layout
# these information relate to the variety and treatment combination that each sample came from
VT24_genotype_positions <- read_csv(
  "~/chapter-06_strawberry_genotypes/data/primary/VT_24/variety_trial/experimental_design/20240304_P8_genotype_design_positions.csv",
  trim_ws = TRUE
) %>% 
  mutate(
    sample = gsub(
      '\\b(\\pL)\\pL{4,}|.','\\U\\1',
      genotype,
      perl = TRUE
    ),
    sample = case_when(
      str_detect(genotype, "\\+") ~ "V+",
      str_detect(sample, "G") ~ "RG",
      TRUE ~ sample
    ),
    experiment = rep(
      "VT24", time = n()
    ),
    block = as.factor(
      block
    )
  ) %>%   
  unite(
    "sample",
    sample,block,
    sep = "",
    remove = FALSE
  )  %>% 
  select(
    experiment,
    sample,
    block,
    genotype
  )

# read in 2023 experimental layout
# these information relate to the variety and treatment combination that each sample came from
VT23_genotype_positions <- read_csv(
  "~/chapter-06_strawberry_genotypes/data/primary/VT_23/experimental_design/20231220_P8_genotype_setup.csv",
  trim_ws = TRUE
) %>%
  mutate(
    bag = as.numeric(
      str_extract(
        sample, "(\\d)+"
      )
    ),
    block =
      as.factor(
        case_when(
          between(bag, 1, 5) ~ "1",
          between(bag, 6, 10) ~ "2",
          between(bag, 11, 15) ~ "3",
          between(bag, 16, 20) ~ "4",
          between(bag, 21, 25) ~ "5",
          between(bag, 26, 30) ~ "6"
        )
      ),
    experiment = rep(
      "VT23", times = n()
    )
  ) %>% 
  select(
    experiment,
    sample,
    block,
    genotype
  )

# combine both experimental designs
genotype_positions <- bind_rows(
  VT23_genotype_positions, VT24_genotype_positions
)

# combine all dataframes into an integrated dataset
vt23_24_compiled_data <- bind_rows(
  list(
    yield_23_24_process,
    acids_process,
    gae_results_process,
    teac_results_process,
    colour_process,
    firm_process,
    brix_process
  )
) %>%
  left_join(
    genotype_positions,
    by = c("experiment","sample")
  ) %>% 
  mutate(
    date = as.Date(
      str_replace_all(
        date, "\\/", "-"
      ),
      "%d-%m-%Y"
    ),
    rep = as.numeric(
      str_remove_all(
        sample, "[A-Z]"
      )
    )
  )  %>% 
  arrange(
    date, rep
  ) %>%
  select(
    date,
    experiment,
    sample,
    genotype,
    block,
    measure_var,
    value
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value",
    values_fn = mean
  ) %>% 
  group_by(
    experiment, sample, genotype, block
  ) %>% 
  # calculate additional parameters from measured variables
  # code factor variables
  mutate(
    ascorbic_acid_mg_100g_fw = (ascorbic_acid * perc_dw) * 100,
    malic_acid_mg_per_g_fw = (malic_acid * perc_dw),
    citric_acid_mg_per_g_fw = (citric_acid * perc_dw),
    theoretical_titratable_acidity = ((citric_acid_mg_per_g_fw / 10) + (malic_acid_mg_per_g_fw / 10)) / (6.4/6.71),
    tss_tta_ratio = (sum(brix, na.rm = TRUE) / theoretical_titratable_acidity),
    gae_mg_per_g_fw = (gae_mg_per_g * perc_dw),
    teac_uM_per_g_fw = (teac_uM_per_g * perc_dw),
    block = as.factor(block),
    experiment = as.factor(experiment),
    genotype = as.factor(genotype)
  ) %>% 
  ungroup() %>% 
  select(
    -c(
      ends_with(
        "acid"
      ),
      gae_mg_per_g,
      teac_uM_per_g
    )
  ) %>% 
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  drop_na()


rm(
  list = c(
    "acids_process", "brix_process", "colour_process",
    "firm_process", "gae_results_process", "teac_results_process",
    "yield_23_24_process", "VT23_genotype_positions", "VT24_genotype_positions",
    "genotype_positions"
  )
)

###############################################################
##################                           ##################
##################     ENVIRONMENTAL DATA    ##################
##################                           ##################
###############################################################

weather_data <- list.files(
  path = "~/chapter-06_strawberry_genotypes/data/primary/weather_data/",
  pattern = "*.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_delim(., delim = ";")
  ) %>% 
  map(
    ~ pivot_longer(
      ., 3:ncol(.),
      names_to = "weather_var",
      values_to = "value"
    )
  ) %>% 
  bind_rows()

weather_data_wrangle <- weather_data %>% 
  mutate(
    date = lubridate::as_date(date, format = "%m/%d/%y"),
    year = lubridate::year(date),
    month = lubridate::month(date)
  ) %>% 
  summarise(
    value = mean(value, na.rm = TRUE),
    .by = c(year, month, weather_var)
  ) %>% 
  arrange(
    year, month, weather_var
  ) %>% 
  mutate(
    value = round(
      value,
      digits = 2
    )
  )

# remove redundant variables from environment
rm(
  list = c(
    "weather_data"
  )
)

# capture image of relevant variable that can be loaded into an R environment by the user
save.image(
  paste0(
    "~/chapter-06_strawberry_genotypes/data/processed/", format(Sys.Date(), "%Y%m%d"), "_VT_compiled_dataset.Rdata"
  )
)

# save relevant variables as separate csv files to facilitate future analysis in other data analysis software/tools
write_csv(
  vt23_24_compiled_data,
  paste0(
    "~/chapter-06_strawberry_genotypes/data/processed/", format(Sys.Date(), "%Y%m%d"), "_VT_compiled_dataset.csv"
  )
)

write_csv(
  weather_data_wrangle,
  paste0(
    "~/chapter-06_strawberry_genotypes/data/processed/", format(Sys.Date(), "%Y%m%d"), "_VT_weather_data.csv"
  )
)