# notes


# changes
# added preliminary data from rotation project carried out between Sep 2020 and Jan 2021


if (!require("pacman")) install.packages("pacman")

pacman::p_load(tidyverse, data.table, broom, janitor)


###############################################################
##################                           ##################
##################      ORGANIC ACIDS        ##################
##################                           ##################
###############################################################

source("~/chapter-05_strawberry_foliar_sprays/code/acids_processing_func_v0.0.7.R")

organic_acids(
  hplc_data = "~/chapter-05_strawberry_foliar_sprays/data/primary/hplc/results/",
  dry_weight = "~/chapter-05_strawberry_foliar_sprays/data/primary/hplc/extraction_weights/",
  experiment = "FS",
  out_dir = "~/chapter-05_strawberry_foliar_sprays/data/processed/"
)

###############################################################
##################                           ##################
##################        PRELIM DATA        ##################
##################                           ##################
###############################################################

# reading in HPLC and UV-Spec data generated during rotation project (Sep 2020 - Jan 2021)
# These data are being used to justify the choice to use Vibrant and M.Centenary as genotypes in the FS experiment

# read all prelim datafiles into a list object
rotation_prelim <- list.files(
  path = "~/chapter-05_strawberry_foliar_sprays/data/primary/rotation_prelim_data/",
  pattern = "*.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_csv(
      .
    )
  )

# assign names to the list elements
names(rotation_prelim) <- c("prelim_AsA", "prelim_Phen", "prelim_TEAC")

# write each list element to a dataframe
list2env(
  rotation_prelim,
  globalenv()
)

prelim_AsA <- prelim_AsA %>% 
  filter(
    extraction_info == "N", # keeping data generated from standard (N) extraction procedures
    !block == "1" # removing data from experimental block 1 due to missing data
  ) %>% 
  select( # removing unnecessary data cols prior to summarising
    sample,
    block,
    mean
  ) %>% 
  separate( # separating replicate information from sample names to enable grouping and summarising by genotype
    sample, 
    c(
      "genotype", "replicate"
    ),
    sep = "-A-"
    ) %>% 
  mutate(
    genotype = str_replace( # renaming genotype Malling Centenary to be consistent with naming in FS experiment
      genotype, "M.Centenary", "Malling Centenary"
    )
  ) %>% 
  filter(
    !str_detect( # removing genotypes with missing data in blocks 3 and 4
      genotype, "CPEM0109|CPEM0162|SBDL122" 
    )
  ) %>% 
  rename(
    ascorbic_acid_per_100g = mean
  ) %>% 
  select(
    genotype,
    block,
    replicate,
    ascorbic_acid_per_100g
  )


prelim_Phen <- prelim_Phen %>% 
  filter(
    !block == "1" # removing data from experimental block 1 due to missing data
  ) %>% 
  select( # removing unnecessary data cols prior to summarising
    sample,
    block,
    mean
  ) %>% 
  separate( # separating replicate information from sample names to enable grouping and summarising by genotype
    sample, 
    c(
      "genotype", "replicate"
    ),
    sep = "-A-"
  ) %>% 
  mutate(
    genotype = str_replace( # renaming genotype Malling Centenary to be consistent with naming in FS experiment
      genotype, "M.Centenary", "Malling Centenary"
    )
  ) %>% 
  filter(
    !str_detect( # removing genotypes with missing data in blocks 3 and 4
      genotype, "CPEM0109|CPEM0162|SBDL122" 
    )
  ) %>% 
  rename( # gae = Gallic Acid Equivalents - measure of total phenolic content
    gae_g_per_g = mean
  ) %>% 
  select(
    genotype,
    block,
    replicate,
    gae_g_per_g
  )


prelim_TEAC <- prelim_TEAC %>% 
  filter(
    !block == "1" # removing data from experimental block 1 due to missing data
  ) %>% 
  slice( # removing duplicate values for vibrant block 4 samples
    -(64:68) # data were genertated on both 9th and 10th Jan 2021, only data from 9th Jan has been kept
  ) %>% 
  select( # removing unnecessary data cols prior to summarising
    sample,
    block,
    mean
  ) %>% 
  separate( # separating replicate information from sample names to enable grouping and summarising by genotype
    sample, 
    c(
      "genotype", "replicate"
    ),
    sep = "-A-"
  ) %>% 
  mutate(
    genotype = str_replace( # renaming genotype Malling Centenary to be consistent with naming in FS experiment
      genotype, "M.Centenary", "Malling Centenary"
    )
  ) %>% 
  rename( # teac = TROLOX Equivalent Antioxidant Capacity - measure of antioxidant capacity
    teac_uM_per_g = mean
  ) %>% 
  filter(
    !str_detect( # removing genotypes with missing data in blocks 3 and 4
      genotype, "CPEM0109|CPEM0162|SBDL122" 
    )
  ) %>% 
  select(
    genotype,
    block,
    replicate,
    teac_uM_per_g
  )

# combining ascorbic acid, phenolics, and antioxidant data into a single dataframe
prelim_summary <- prelim_AsA %>% 
  left_join(
    prelim_Phen,
    by = c(
      "genotype", "block", "replicate"
      )
  ) %>% 
  left_join(
    prelim_TEAC,
    by = c(
      "genotype", "block", "replicate"
    )
  ) %>% 
  pivot_longer( # shape data into long format to enable subsequent conversion of dry weight content to fresh weight
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  mutate( # convert dry weight values to approximate fresh weight values using an approximated dry matter content of 10%
    value = value * 0.1
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  )

rm(
  list = c(
    "rotation_prelim", "prelim_AsA", "prelim_Phen", "prelim_TEAC"
  )
)

###############################################################
##################                           ##################
##################          TSS/BRIX         ##################
##################                           ##################
###############################################################

brix <- read_csv(
  "~/chapter-05_strawberry_foliar_sprays/data/primary/brix/20240730_P8_brix.csv"
) %>% 
  unite(
    "sample",
    experiment,bag,
    sep = ""
  ) %>%
  pivot_longer(
    starts_with("brix"),
    names_to = "measure_var",
    values_to = "value"
  ) %>%
  mutate(
    measure_var = str_remove(measure_var, "_[1-3]"),
    experiment = rep(
      "FS23", times = n()
    )
  ) %>% 
  select(
    date,
    experiment,
    sample,
    measure_var,
    value
  ) %>% 
  filter(
    str_detect(
      sample, "A"
    )
  )


###############################################################
##################                           ##################
##################           YIELD           ##################
##################                           ##################
###############################################################

# note that the yield data include yield data from the 2023 variety trial (chapter-06)
# the relevant yield data are filtered in subsequent code chunks
yield_23 <- read_csv(
  "~/chapter-05_strawberry_foliar_sprays/data/primary/yield/20240604_P8_2023_yield.csv"
) %>%
  janitor::clean_names() %>%
  select(
    -c(
      week,
      primary_subsample_freshweight_grams,
      milled,
      notes
    )
  )

# composite weights contain the weight (grams) taken from samples collected in week 1 and week 2 of the experiment
# see thesis section 5.2.5 for more details on the creation of sample composites
fs_composite_weights <- read_csv(
  "~/chapter-05_strawberry_foliar_sprays/data/primary/yield/20240604_VT-FS_composite_samples.csv"
)

perc_dryweight <- yield_23 %>% 
  select(
    date,
    experiment,
    bag,
    final_subsample_freshweight_grams,
    subsample_dryweight_grams
  ) %>% 
  unite(
    "bag",
    c("experiment","bag"),
    sep = ""
  ) %>% 
  mutate(
    date = format(lubridate::dmy(date), "%m%d")
  ) %>% 
  unite(
    "sample",
    c("date","bag"),
    sep = "-"
  ) %>% 
  mutate(
    perc_dw = subsample_dryweight_grams / final_subsample_freshweight_grams
  ) %>% 
  drop_na() %>% 
  select(
    sample,
    perc_dw
  )

fs_composite_weighting <- fs_composite_weights %>% 
  pivot_longer(
    c("component_1","component_2"),
    names_to = "component",
    values_to = "sample"
  ) %>% 
  distinct() %>% 
  pivot_longer(
    c("weight_grams_1","weight_grams_2"),
    names_to = "name",
    values_to = "weight_grams"
  ) %>% 
  select(
    sample_composite,
    component,
    name,
    sample,
    weight_grams,
    weight_grams_composite
  ) %>% 
  mutate(
    component = str_remove(
      component, "component_"
    ),
    name = str_remove(
      name, "weight_grams_"
    ),
    weighting = weight_grams / weight_grams_composite
  ) %>% 
  filter(
    as.numeric(component) - as.numeric(name) == 0
  ) %>% 
  select(
    -name
  ) %>% 
  drop_na() %>% 
  left_join(
    perc_dryweight, by = "sample"
  )

fs_freshweight_conversion <- fs_composite_weighting %>% 
  group_by(
    sample_composite
  ) %>% 
  summarise(
    weighted.mean(perc_dw, weighting)
  ) %>% 
  rename(
    perc_dw = `weighted.mean(perc_dw, weighting)`,
    sample = sample_composite
  ) %>% 
  mutate(
    date = case_when(
      str_detect(sample, "W1") ~ "06/07/2023",
      str_detect(sample, "W2") ~ "13/07/2023",
      str_detect(sample, "W3") ~ "20/07/2023",
    ),
    sample = str_remove(
      sample, "W[0-9]-"
    ),
    experiment = rep(
      "FS23", times = n()
    )
  ) %>% 
  select(
    date,
    experiment,
    sample,
    perc_dw
  )

yield_23_process <- yield_23 %>%
  filter(
    experiment == "A"
  ) %>%
  unite(
    "sample",
    experiment:bag,
    sep = ""
  ) %>%
  mutate(
    experiment = rep(
      "FS23",
      times = n()
    ),
    date = str_replace(
      date, "05/07/2023", "06/07/2023"
    )
  ) %>% 
  left_join(
    fs_freshweight_conversion,
    by = c("date","experiment","sample")
  ) %>% 
  pivot_longer(
    where(is.numeric),
    names_to = "measure_var",
    values_to = "value"
  ) %>%
  select(
    date,
    experiment,
    sample,
    measure_var,
    value
  )


rm(
  list = c(
    "yield_23", "fs_composite_weights", "fs_freshweight_conversion", "perc_dryweight", "fs_composite_weighting"
  )
)


###############################################################
##################                           ##################
##################          DUALEX           ##################
##################                           ##################
###############################################################

# read in dualex datafiles
dualex <- list.files(
  path = "~/chapter-05_strawberry_foliar_sprays/data/primary/dualex_readings/",
  pattern = "*.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_csv(
      ., skip = 4, col_select = -c(3:6,10,15), col_types = c("c","t","n","n","n","n","n","n","n")
    )
  ) 

# combine and process all dualex data
dualex_process <- dualex %>% 
  map_df(
    ~ select(., -group)
  ) %>% 
  janitor::clean_names() %>% 
  mutate(
    date = 
      lubridate::parse_date_time(
        number_yyyy_mm_dd, orders = c("dmy","ymd")
        ),
    bag = rep(
      1:30,
      each = 4,
      times = 5
      ),
    sample = str_glue(
      "A{bag}"
    ),
    experiment = rep(
      "FS23", times = n()
    )
  ) %>% 
  select(
    date,
    experiment,
    sample,
    #block,
    chl,
    flav,
    anth,
    nbi,
    temp
  ) %>% 
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  )



###############################################################
##################                           ##################
##################       FRUIT MINERALS      ##################
##################                           ##################
###############################################################

# read in mineral analysis data generated by yara lancrop laboratories
minerals <- read_csv(
    "~/chapter-05_strawberry_foliar_sprays/data/primary/minerals/20240607_minerals_lancrop.csv",
  col_select = c(
    FIELDREF, 14:29
  )
)

# process minerals data
minerals_process <- minerals %>% 
  clean_names() %>% 
  separate(
    fieldref,
    into = c("experiment","week","sample")
  ) %>% 
  mutate(
    date = rep(
      "06/07/2023",
      times = n()
    ),
    across(
      4:19,
      ~ if_else(
        str_detect(., "<"), "0", as.character(.)
      )
    ),
    across(
      4:19,
      ~ as.numeric(.)
    )
  ) %>% 
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  mutate(
    value = if_else(
      str_detect(measure_var, "percent"), value*10^4, value
    ),
    measure_var = str_replace(
      measure_var, "percent", "ppm"
    )
  ) %>% 
  select(
    date,
    experiment,
    sample,
    measure_var,
    value
  )


###############################################################
##################                           ##################
##################       WEATHER DATA        ##################
##################                           ##################
###############################################################

# weather data were gathered from a local weather station at Niab, East Malling
weather_data <- list.files(
  path = "~/chapter-05_strawberry_foliar_sprays/data/primary/weather_data/",
  pattern = "*.csv",
  full.names = TRUE
) %>%
  map(
    ~ read_delim(., delim = ";")
  ) %>% 
  map(
    ~ pivot_longer(
      ., 3:ncol(.),
      names_to = "weather_var",
      values_to = "value"
    )
  ) %>% 
  bind_rows()

# process weather data
weather_data_wrangle <- weather_data %>% 
  mutate(
    date = lubridate::as_date(date, format = "%m/%d/%y"),
    week = lubridate::floor_date(date, unit = "week")
  ) %>% 
  summarise(
    value = mean(value, na.rm = TRUE),
    .by = c(week, weather_var)
  ) %>% 
  arrange(
    week, weather_var
  ) %>% 
  mutate(
    value = round(
      value,
      digits = 2
    ),
    month = lubridate::month(week),
    year = lubridate::year(week)
  ) %>% 
  filter(
    between(week, as.Date("2023-05-28"), as.Date("2023-07-09"))
  )


###############################################################
##################                           ##################
##################        COMPILATION        ##################
##################                           ##################
###############################################################

# read in experimental layout
# these information relate to the variety and treatment combination that each sample came from
FS23_genotype_positions <- read_csv(
  "~/chapter-05_strawberry_foliar_sprays/data/primary/experimental_design/20230829_P8_nanoparticles_layout.csv",
  trim_ws = TRUE
) %>%
  mutate(
    sample = str_glue(
      "A{bag}"
      ),
    experiment = rep(
      "FS23", times = n()
    )
  ) %>% 
  select(
    experiment,
    sample,
    genotype,
    treatment
  )

# combine the different analytes
FS23_compiled_data <- bind_rows(
  list(
    yield_23_process,
    acids_process,
    brix,
    minerals_process
  )
) %>%
  mutate(
    date = as.Date(
      str_replace_all(
        date, "\\/", "-"
      ),
      "%d-%m-%Y"
    )) %>% 
  bind_rows(
    dualex_process
  ) %>% 
  left_join(
    FS23_genotype_positions,
    by = c("experiment","sample")
  ) %>% 
  mutate(
    bag = as.numeric(
      str_remove_all(
        sample, "[A-Z]"
      )
    ),
    block = as.factor(
      case_when(
          bag <= 6 ~ 1,
          between(bag, 6, 12) ~ 2,
          between(bag, 12, 18) ~ 3,
          between(bag, 18, 24) ~ 4,
          between(bag, 24, 30) ~ 5
          )
    )
  )  %>% 
  arrange(
    date
  ) %>%
  select(
    date,
    experiment,
    sample,
    genotype,
    treatment,
    block,
    measure_var,
    value
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value",
    values_fn = mean
  ) %>% 
  group_by(
    experiment, sample, genotype, treatment, block
  ) %>% 
  mutate(
    ascorbic_acid_mg_100g_fw = (ascorbic_acid * perc_dw) * 100,
    malic_acid_mg_per_g_fw = (malic_acid * perc_dw),
    citric_acid_mg_per_g_fw = (citric_acid * perc_dw),
    theoretical_titratable_acidity = ((citric_acid_mg_per_g_fw / 10) + (malic_acid_mg_per_g_fw / 10)) / (6.4/6.71),
    tss_tta_ratio = (sum(brix, na.rm = TRUE) / theoretical_titratable_acidity),
    avg_berry_weight_grams = yield_weight_grams / yield_number,
    block = as.factor(block),
    experiment = as.factor(experiment),
    genotype = as.factor(genotype)
  ) %>% 
  ungroup() %>% 
  select(
    -c(
      ends_with(
        "acid"
      ),
      final_subsample_freshweight_grams,
      subsample_dryweight_grams,
      subsample_number_of_berries
    )
  ) %>% 
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  drop_na()

# remove redundant variables from environment
rm(
  list = c(
    "acids_process", "brix", "dualex", "dualex_process",
    "minerals", "minerals_process", "yield_23_process", "FS23_genotype_positions",
    "organic_acids", "weather_data"
  )
)

# capture image of relevant variable that can be loaded into an R environment by the user
save.image(
  file = paste0(
    "~/chapter-05_strawberry_foliar_sprays/data/processed/", format(Sys.Date(), "%Y%m%d"), "_FS23_compiled_dataset.Rdata"
  )
)

# save relevant variables as separate csv files to facilitate future analysis in other data analysis software/tools
write_csv(
  FS23_compiled_data,
  paste0(
    "~/chapter-05_strawberry_foliar_sprays/data/processed/", format(Sys.Date(), "%Y%m%d"), "_FS23_compiled_dataset.csv"
  )
)

write_csv(
  prelim_summary,
  paste0(
    "~/chapter-05_strawberry_foliar_sprays/data/processed/", format(Sys.Date(), "%Y%m%d"), "_rotation_prelim_nutrients.csv"
  )
)

write_csv(
  weather_data_wrangle,
  paste0(
    "~/chapter-05_strawberry_foliar_sprays/data/processed/", format(Sys.Date(), "%Y%m%d"), "_FS23_weather_data.csv"
  )
)

