# notes
# tidied code originally written in `chap-06_analysis.Rmd` - now chap-05 after chapter order was changed.

# changes
# preparation of adjusted/additional figures meet examiner's suggested amendments

# load required packages
pacman::p_load(
  tidyverse,
  data.table,
  broom,
  janitor,
  plotrix,
  ggpmisc,
  ggstatsplot,
  corrr,
  ggcorrplot,
  ggsci,
  scales,
  correlation,
  schemr,
  gt,
  gtsummary,
  ggpubr,
  ggtext,
  multcompView,
  ggrepel,
  flextable,
  ftExtra,
  car
)


# loading required datasets
# datasets derived from 
  # '~/chapter-06_strawberry_genotypes/code/datacompilation_VT_v0.1.3.R'
  # '~/chapter-06_strawberry_genotypes/code/datacompilation_STS_v0.0.4.R'

load("../data_submission/chapter-06_strawberry_genotypes/data/processed/20241009_VT_compiled_dataset.Rdata")

load("../data_submission/chapter-06_strawberry_genotypes/data/processed/20241008_STS_compiled_dataset.Rdata")


# function to specify theme arguments that will be applied to all figures to maintain consistency
theme_thesis <- function() {
  theme_ggstatsplot() +
    theme(
      axis.ticks = element_blank(),
      axis.line = element_line(colour = "grey50"),
      panel.grid = element_line(color = "#b4aea9"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.major.y = element_blank(),
      plot.tag.position = c(0.12,1),
      strip.text = element_text(
        face = "bold"
      ),
      strip.background = element_rect(
        fill = NA,
        color = NA
      ),
      strip.placement = "inside",
      panel.border = element_rect(
        color = "grey50",
        fill = NA,
        linewidth = 0.5
      ),
      axis.title.x = element_text(
        margin = margin(
          0.5,0,0,0,
          unit = "cm"
        )
      ),
      axis.title.y = element_text(
        margin = margin(
          0,0.5,0,0,
          unit = "cm"
        )
      )
    )
}


##### Tidying complied data #####

# code relevant variables as factors 
VT23_VT24 <- vt23_24_compiled_data %>% 
  mutate(
    block = as.factor(block),
    experiment = as.factor(experiment),
    genotype = factor(
      genotype,
      levels = c(
        "Sonata",
        "Malling Vitality",
        "Red Gauntlet",
        "Malling Centenary",
        "Malwina",
        "Vibrant",
        "Vibrant+",
        "Elsanta"
      ),
      ordered = TRUE
    )
    
  ) 

##### Table 6.1 - Experimental design modifications #####

tab1 <- dplyr::tibble(
  "Design Element" = c(
    "Number of Biological Replicates",
    "Number of Genotypes",
    "Stock Plant Quality",
    "Planting Date",
    "Fertigation Control",
    "Yeild Assessments",
    "Sensory Quality Assessments",
    "Composite Sampling",
    "Post-harvest Handling Scenarios"
  ),
  "2023" = c(
    "Six",
    "Five",
    "A+ Grade",
    "14 June",
    "Maunally adjusted timer",
    "Total weight and berry number. Berry number for Class I/II and waste berries.",
    "None",
    "Yes",
    "No"
  ),
  "2024" = c(
    "Eight",
    "Seven",
    "A Grade",
    "09 April",
    "Automated by environmental data loggers.",
    "Total weight and berry number. Weight and berry number for Class I/II and waste.",
    "Berry colour, firmness, and sweetness.",
    "No",
    "Yes"
  ),
  "Reason for Mofification" = c(
    "Increased statistical power.",
    "Increased genetic diversity.",
    "Shortage of A+ plant material in 2024.",
    "To better align the expermental period with commercial planting times for June-bearing genotypes.",
    "To make it easier to ensure that all plants were recieving sufficient fertigation.",
    "To align the 2024 yeild assesments with commonly reported measures, namely Class I grams/plant and percentage Class I.",
    "To quantify subjective assessments of sensory qualities made in 2023.",
    "Sufficient sample material to conduct all chemical analysis was collected from a single pick in 2024.",
    "To investigate a hypothesis that was developed based on the findings of the 2023 experiment."
  )
) %>%
  flextable() %>%
  colformat_md() %>% 
  set_table_properties(ft, width = 1, layout = "autofit")

# save table to .docx
# can be saved to other formats depending on the users needs, see flextable:: package for more save options
tab1 %>% 
  save_as_docx(
    path = paste0("~/chapter-06_strawberry_genotypes/figures/", format(Sys.Date(), "%Y%m%d"), "_VT_experimental_desigin_mods.docx")
  )

##### Table 6.2 - Weather data summaries #####

tab2 <- weather_data_wrangle %>% 
  pivot_wider(
    names_from = "weather_var",
    values_from = "value"
  ) %>% 
  select(
    -contains(
      "rainfall"
    )
  ) %>% 
  filter(
    between(month, 6,8) & year == 2023 | between(month, 4,7) & year == 2024
  ) %>% 
  mutate(
    month = case_when(
      month == 1 ~ "January",
      month == 2 ~ "February",
      month == 3 ~ "March",
      month == 4 ~ "April",
      month == 5 ~ "May",
      month == 6 ~ "June",
      month == 7 ~ "July",
      month == 8 ~ "August",
      month == 9 ~ "September",
      month == 10 ~ "October",
      month == 11 ~ "November",
      month == 12 ~ "December",
    ),
    year = as.factor(year)
  ) %>% 
  group_by(
    year,month
  ) %>% 
  as_grouped_data(groups = c("year","month")) %>% 
  flextable() %>% 
  set_table_properties(width = 1, layout = "autofit")

tab2 %>% 
  save_as_docx(
    path = paste0("~/chapter-06_strawberry_genotypes/figures/", format(Sys.Date(), "%Y%m%d"), "_VT_weather_summary.docx")
  )

##### ANOVA summary for ascorbic acid  #####

# filter the ascorbic acid data
vt_ascorbic_acid <- VT23_VT24 %>% 
  distinct() %>% 
  filter(
    measure_var == "ascorbic_acid_mg_100g_fw", 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() %>% 
  mutate(
    year = if_else(
      experiment == "VT23", 2023, 2024
    )
  )

# stats #

# test and visually assess procedure assumptions
# visual plots indicate an acceptable distribution
# corrections should be implemented to account for different sample sizes between genotypes and years
# vt_ascorbic_model <- aov(value ~ genotype + experiment, data = vt_ascorbic_acid)
# shapiro.test(vt_ascorbic_acid$value) # normality
# leveneTest(value ~ genotype, data = vt_ascorbic_acid) # homegeneity of variances
# plot(vt_ascorbic_model) # visual assessment

# run ANOVA with Huber-White adjustment for heteroscedasticity
# see thesis section 6.2.4 for more details
vt_ascorbic_model <- car::Anova(
  lm(
    value ~ genotype + experiment,
    data = ascorbic_acid
  ),
  white.adjust = TRUE)

# post-hoc analysis
# using Games-Howell to account for unequal sample sizes
vt_posthoc_ascorbic <- vt_ascorbic_acid %>% 
  rstatix::games_howell_test(value ~ genotype) %>% 
  unite(
    "comp",
    starts_with("group"),
    sep = "-"
  ) %>% 
  column_to_rownames(
    var = "comp"
  )

# generating letters to signify a statistically significant difference between genotypes
vt_pvals_ascorbic <- vt_posthoc_ascorbic$p.adj < 0.05

names(vt_pvals_ascorbic) <- row.names(vt_posthoc_ascorbic)

# summary table of means and between genotype differences
vt_ascorbic_letters <- as.data.frame.list(
  multcompLetters(
    vt_pvals_ascorbic
  )
) %>% 
  rownames_to_column(
    var = "genotype"
  ) %>% 
  right_join(
    ascorbic_acid,
    by = "genotype"
  ) %>% 
  summarise(
    mean = mean(value),
    se = std.error(value),
    max = max(value),
    min = min(value),
    .by = c(genotype, Letters)
  )

rm(
  list = c(
    "vt_pvals_ascorbic"
  )
)


##### Figure 6.4 - Box-violin plots, ascorbic acid in different genotypes faceted by harvest year #####

fig4 <- vt_ascorbic_acid %>% 
  ggplot(
    aes(
      x = genotype,
      y = value,
      group = genotype
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  # can add letters of significance here
  # not included in final thesis as plots became cluttered
  # geom_text(
  #   data = vt_ascorbic_letters,
  #   aes(
  #     x = genotype,
  #     y = max,
  #     label = Letters
  #   ),
  #   vjust = -1.3,
  #   fontface = "bold"
  # ) +
  scale_y_continuous(
    "Total Ascorbic Acid\n (mg/100g Fresh Weight)",
    limits = c(45,125),
    breaks = seq(from = 45, to = 125, by = 10)
  ) +
  scale_x_discrete(
    "Genotype"
  ) +
  facet_grid(
    . ~ year,
    scales = "free",
    space = 'free'
  ) +
  scale_colour_frontiers() +
  labs(
    colour = "Genotype"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1,
      face = "italic"
    )
  )


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT_ascorbicacid.png"),
  plot = fig4,
  width = 6.5,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### ANOVA summary for TROLOX Equivalent Antioxidant Capacity (TEAC) #####

# filter the TEAC data
vt_teac <- VT23_VT24 %>% 
  distinct() %>% 
  filter(
    measure_var == "teac_uM_per_g_fw", 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() %>% 
  mutate(
    year = if_else(
      experiment == "VT23", 2023, 2024
    )
  )

# stats #

# test and visually assess procedure assumptions
# visual plots indicate an acceptable distribution
# corrections should be implemented to account for different sample sizes between genotypes and years
# vt_teac_model <- aov(value ~ genotype + experiment, data = vt_teac)
# shapiro.test(vt_teac$value) # normality
# leveneTest(value ~ genotype, data = vt_teac) # homegeneity of variances
# plot(vt_teac_model) # visual assessment

# run ANOVA with Huber-White adjustment for heteroscedasticity
# see thesis section 6.2.4 for more details
vt_teac_model <- car::Anova(
  lm(
    value ~ genotype + experiment,
    data = vt_teac
  ),
  white.adjust = TRUE)

# post-hoc analysis
# using Games-Howell to conservatively account for unequal sample sizes
vt_posthoc_teac <- vt_teac %>% 
  rstatix::games_howell_test(value ~ genotype) %>% 
  unite(
    "comp",
    starts_with("group"),
    sep = "-"
  ) %>% 
  column_to_rownames(
    var = "comp"
  )

# generating letters to signify a statistically significant difference between genotypes
vt_pvals_teac <- vt_posthoc_teac$p.adj < 0.05

names(vt_pvals_teac) <- row.names(vt_posthoc_teac)

# summary table of means and between genotype differences
vt_teac_letters <- as.data.frame.list(
  multcompLetters(
    vt_pvals_teac
  )
) %>% 
  rownames_to_column(
    var = "genotype"
  ) %>% 
  right_join(
    vt_teac,
    by = "genotype"
  ) %>% 
  summarise(
    mean = mean(value),
    se = std.error(value),
    max = max(value),
    min = min(value),
    .by = c(genotype, Letters)
  )

##### Figure 6.5 - Box-plots, TEAC in different genotypes faceted by harvest year #####

fig5 <- teac %>% 
  ggplot(
    aes(
      x = genotype,
      y = value,
      group = genotype
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  # geom_text(
  #   data = vt_teac_letters,
  #   aes(
  #     x = genotype,
  #     y = max,
  #     label = Letters
  #   ),
  #   vjust = -1.3,
  #   fontface = "bold"
  # ) +
  scale_y_continuous(
    "TROLOX Equivalent\n Antioxidant Capacity (uM/g Fresh-Weight)",
    limits = c(10.5,21.5),
    breaks = seq(from = 10.5, to = 21.5, by = 1.5)
  ) +
  scale_x_discrete(
    "Genotype"
  ) +
  facet_grid(
    . ~ year,
    scales = "free",
    space = 'free'
  ) +
  scale_colour_frontiers(
    
  ) +
  labs(
    colour = "Genotype"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1,
      face = "italic"
    )
  )


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT_teac.png"),
  plot = fig5,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summary for Gallic Acid Equivalent (GAE) Total Phenolics Content (TPC) #####

# filter the gae data
vt_gae <- VT23_VT24 %>% 
  distinct() %>% 
  filter(
    measure_var == "gae_mg_per_g_fw",
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() %>% 
  mutate(
    year = if_else(
      experiment == "VT23", 2023, 2024
    )
  )

# stats #

# test and visually assess procedure assumptions
# visual plots indicate an acceptable distribution, but there is definitely some deviation from normal
# corrections should be implemented to account for different sample sizes between genotypes and years
# vt_gae_model <- aov(value ~ genotype + experiment, data = vt_gae)
# shapiro.test(vt_gae$value) # normality
# leveneTest(value ~ genotype, data = vt_gae) # homogeneity of variances
# plot(vt_gae_model) # visual assessment

# run ANOVA with Huber-White adjustment for heteroscedasticity
# see thesis section 6.2.4 for more details
vt_gae_model <- car::Anova(
  lm(
    value ~ genotype + experiment,
    data = vt_gae
  ),
  white.adjust = TRUE)

# post-hoc analysis
# using Games-Howell to conservatively account for unequal sample sizes
vt_posthoc_gae <- vt_gae %>% 
  rstatix::games_howell_test(value ~ genotype) %>% 
  unite(
    "comp",
    starts_with("group"),
    sep = "-"
  ) %>% 
  column_to_rownames(
    var = "comp"
  )

# generating letters to signify a statistically significant difference between genotypes
vt_pvals_gae <- vt_posthoc_gae$p.adj < 0.05

names(vt_pvals_gae) <- row.names(vt_posthoc_gae)

# summary table of means and between genotype differences
vt_gae_letters <- as.data.frame.list(
  multcompLetters(
    vt_pvals_gae
  )
) %>% 
  rownames_to_column(
    var = "genotype"
  ) %>% 
  right_join(
    vt_gae,
    by = "genotype"
  ) %>% 
  summarise(
    mean = mean(value),
    se = std.error(value),
    max = max(value),
    min = min(value),
    .by = c(genotype, Letters)
  )

##### Figure 6.6 - Box-plots, GAE in different genotypes faceted by harvest year #####

fig6 <- gae %>% 
  ggplot(
    aes(
      x = genotype,
      y = value,
      group = genotype
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  # geom_text(
  #   data = ascorbic_acid_letters,
  #   aes(
  #     x = genotype,
  #     y = max,
  #     label = Letters
  #   ),
  #   vjust = -1.3,
  #   fontface = "bold"
  # ) +
  scale_y_continuous(
    "Gallic Acid Equivalent TPC\n (mg/g Fresh-Weight)",
    limits = c(1.2,2.45),
    breaks = seq(from = 1.2, to = 2.45, by = 0.2)
  ) +
  scale_x_discrete(
    "Genotype"
  ) +
  facet_grid(
    . ~ year,
    scales = "free",
    space = 'free'
  ) +
  scale_colour_frontiers(
    
  ) +
  labs(
    colour = "Genotype"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1,
      face = "italic"
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT_gae_phenolics.png"),
  plot = fig6,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 6.7 - Correlation analysis of ascorbate, TEAC, and GAE #####

# prepare ascorbic acid, TEAC and GAE data for correlation analysis

vt_correlate_nutrivars <- VT23_VT24 %>% 
  filter(
    str_detect(
      measure_var, "teac|gae|ascorbic"
    )
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  drop_na() 

# run the individual correlations to display model parameters and statistics

# vt_correlate_nutrivars %>% 
#   correlation(
#     select = "gae_mg_per_g_fw",
#     select2 = "ascorbic_acid_mg_100g_fw"
#   )
# 
# vt_correlate_nutrivars %>% 
#   correlation(
#     select = "gae_mg_per_g_fw",
#     select2 = "teac_uM_per_g_fw"
#   )
# 
# vt_correlate_nutrivars %>% 
#   correlation(
#     select = "ascorbic_acid_mg_100g_fw",
#     select2 = "teac_uM_per_g_fw"
#   )

# plot correlation matirx
# ggcorrmat also uses correlation::correlation() to compute Pearson's R and Holm-adjusted p-values
# 

fig7 <- ggcorrmat(
  data = vt_correlate_nutrivars,
  cor.vars = c(
    "ascorbic_acid_mg_100g_fw",
    "teac_uM_per_g_fw",
    "gae_mg_per_g_fw"
  ),
  cor.vars.names = c(
    "Total Ascorbic Acid\n (mg/100g)",
    "Antioxidant Capcity\n  (\u03BCM TROLOX/g)",
    "Total Phenolic Content\n (mg Gallic Acid/g)"
  ),
  type = "parametric",
  colors = c(
    "#164194FF",
    "white",
    "#F39200FF"
  ),
  matrix.type = "lower"
) +
  theme(
    axis.ticks = element_blank(),
    panel.grid = element_line(color = "#b4aea9"),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_blank()
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
# note: asterisks on significant correlations were manually added to the matrix in Powerpoint having found no simple solution to integrate asterisks with ggcorrmat
ggsave(
  filename = paste0("../../figures/chap-05/", format(Sys.Date(), "%Y%m%d"), "_VT_nutri_correlation_matirx.png"),
  plot = fig7,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summaries for colour, firmness, and sweetness #####

# filter colour, firmness, tss and tss/tta ratios
# clean variable names
vt_sensory_traits <- VT23_VT24 %>% 
  filter(
    str_detect(
      measure_var, "max_load|\\*|chroma|hue|tss"
    ),
    experiment == "VT24"
  ) %>% 
  select(
    -date
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value",
    values_fn = mean
  ) %>% 
  rename(
    l = `L*`,
    a = `a*`,
    b = `b*`
  ) %>% 
  drop_na(l)

# generate hex codes from Lab colour space - potential utility for some users
hex_convert <- vt_sensory_traits  %>%
  select(
    l,a,b
  ) %>% 
  drop_na() 

vt_sensory_traits_hex <- as_tibble(lab_to_hex(hex_convert)) %>% 
  rename(hex_col_space = value) %>% 
  bind_cols(vt_sensory_traits) %>% 
  select(
    genotype,
    block,
    chroma,
    hue,
    l,
    a,
    b,
    hex_col_space,
    max_load_N,
    tss_tta_ratio
  ) 

# remove redundant variables from environment
rm(
  list = c(
    "hex_convert", "vt_sensory_traits"
  )
)

# stats for colour

# test and visually assess procedure assumptions
# visual plots indicate an acceptable distribution, but there is definitely some deviation from normal
# some inequality in variances too, likely due to smaller than ideal sample size
# vt_colour_model <- aov(chroma ~ genotype + block, data = vt_sensory_traits_hex)
# shapiro.test(vt_sensory_traits_hex$chroma) # normality
# leveneTest(chroma ~ genotype, data = vt_sensory_traits_hex) # homogeneity of variances
# plot(vt_colour_model) # visual assessment

# run ANOVA with Huber-White adjustment for heteroscedasticity
# see thesis section 6.2.4 for more details
vt_colour_model <- car::Anova(
  lm(
    chroma ~ genotype + block,
    data = vt_sensory_traits_hex
    ),
  white.adjust = TRUE
  )

# post-hoc analysis
# using Games-Howell to conservatively account for unequal sample sizes
vt_colour_posthoc <- vt_sensory_traits_hex %>%
  rstatix::games_howell_test(
    chroma ~ genotype
  ) %>%
  unite(
    "comp",
    starts_with("group"),
    sep = "-"
  ) %>% 
  column_to_rownames(
    var = "comp"
  )

# generating letters to signify a statistically significant difference between genotypes
vt_colour_pvals <- vt_colour_posthoc$p.adj < 0.05

names(vt_colour_pvals) <- row.names(vt_colour_posthoc)

# summary table of means and between genotype differences
vt_colour_letters <- as.data.frame.list(
  multcompLetters(
    vt_colour_pvals
  )
) %>% 
  rownames_to_column(
    var = "genotype"
  ) %>% 
  right_join(
    vt_sensory_traits_hex,
    by = "genotype"
  ) %>% 
  summarise(
    mean = mean(chroma),
    se = std.error(chroma),
    max = max(chroma),
    min = min(chroma),
    .by = c(genotype, Letters)
  )

# stats for firmness

# test and visually assess procedure assumptions
# visual plots indicate a non-normal distribution and unequal variances
# ANOVA is relatively robust to non-normality, not too concerned about that here
# use adjustment methods to account for unequal variance
# vt_firm_model <- aov(max_load_N ~ genotype + block, data = vt_sensory_traits_hex)
# shapiro.test(vt_sensory_traits_hex$max_load_N) # normality
# leveneTest(max_load_N ~ genotype, data = vt_sensory_traits_hex) # homogeneity of variances
# plot(vt_firm_model) # visual assessment

# run ANOVA with Huber-White adjustment for heteroscedasticity
# see thesis section 6.2.4 for more details
vt_firm_model <- car::Anova(
  lm(
    max_load_N ~ genotype + block,
    data = vt_sensory_traits_hex
  ),
  white.adjust = TRUE
)

# post-hoc analysis
# using Games-Howell to conservatively account for unequal sample sizes
vt_firm_posthoc <- vt_sensory_traits_hex %>%
  rstatix::games_howell_test(
    max_load_N ~ genotype
  ) %>%
  unite(
    "comp",
    starts_with("group"),
    sep = "-"
  ) %>% 
  column_to_rownames(
    var = "comp"
  )

# generating letters to signify a statistically significant difference between genotypes
vt_firm_pvals <- vt_firm_posthoc$p.adj < 0.05

names(vt_firm_pvals) <- row.names(vt_firm_posthoc)

# summary table of means and between genotype differences
vt_firm_letters <- as.data.frame.list(
  multcompLetters(
    vt_firm_pvals
  )
) %>% 
  rownames_to_column(
    var = "genotype"
  ) %>% 
  right_join(
    vt_sensory_traits_hex,
    by = "genotype"
  ) %>% 
  summarise(
    mean = mean(max_load_N),
    se = std.error(max_load_N),
    max = max(max_load_N),
    min = min(max_load_N),
    .by = c(genotype, Letters)
  )

# stats for sweetness (TSS/TTA)

# drop a singe NA value in tss_tta_ratio before analysis
vt_sensory_traits_hex_tss <- vt_sensory_traits_hex %>% 
  drop_na(
    tss_tta_ratio
  )

# test and visually assess procedure assumptions
# visual plots indicate a non-normal distribution and unequal variances
# ANOVA is relatively robust to non-normality, not too concerned about that here, especially with the observed effect sizes
# use adjustment methods to account for unequal variance
vt_sweet_model <- aov(tss_tta_ratio ~ genotype + block, data = vt_sensory_traits_hex_tss)
shapiro.test(vt_sensory_traits_hex_tss$tss_tta_ratio) # normality
leveneTest(tss_tta_ratio ~ genotype, data = vt_sensory_traits_hex_tss) # homogeneity of variances
plot(vt_sweet_model) # visual assessment

# run ANOVA with Huber-White adjustment for heteroscedasticity
# see thesis section 6.2.4 for more details
vt_sweet_model <- car::Anova(
  lm(
    tss_tta_ratio ~ genotype + block,
    data = vt_sensory_traits_hex
  ),
  white.adjust = TRUE
)

# post-hoc analysis
# using Games-Howell to conservatively account for unequal sample sizes
vt_sweet_posthoc <- vt_sensory_traits_hex_tss %>%
  rstatix::games_howell_test(
    tss_tta_ratio ~ genotype
  ) %>%
  unite(
    "comp",
    starts_with("group"),
    sep = "-"
  ) %>% 
  column_to_rownames(
    var = "comp"
  )

# generating letters to signify a statistically significant difference between genotypes
vt_sweet_pvals <- vt_sweet_posthoc$p.adj < 0.05

names(vt_sweet_pvals) <- row.names(vt_sweet_posthoc)

# summary table of means and between genotype differences
vt_sweet_letters <- as.data.frame.list(
  multcompLetters(
    vt_sweet_pvals
  )
) %>% 
  rownames_to_column(
    var = "genotype"
  ) %>% 
  right_join(
    vt_sensory_traits_hex_tss,
    by = "genotype"
  ) %>% 
  summarise(
    mean = mean(tss_tta_ratio),
    se = std.error(tss_tta_ratio),
    max = max(tss_tta_ratio),
    min = min(tss_tta_ratio),
    .by = c(genotype, Letters)
  )

##### Figure 6.8 - Box-violin plots, epidermal chroma in different genotypes #####

fig8 <- vt_sensory_traits_hex %>% 
  ggplot(
    aes(
      x = genotype,
      y = chroma,
      group = genotype
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  # geom_text(
  #   data = ascorbic_acid_letters,
  #   aes(
  #     x = genotype,
  #     y = max,
  #     label = Letters
  #   ),
  #   vjust = -1.3,
  #   fontface = "bold"
  # ) +
  scale_x_discrete(
    "Genotype"
  ) +
  scale_y_continuous(
    "Chroma",
    breaks = seq(
      30,60, by = 2.5
    )
  ) +
  scale_colour_frontiers(
    
  ) +
  labs(
    colour = "Genotype"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.title.y = element_text(
      margin = margin(
        0,0.5,0,0,
        unit = "cm"
      ),
      face = "italic"
    ),
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1,
      face = "italic"
    )
  )


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT_colour.png"),
  plot = fig8,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 6.10 - Box-violin plots, berry firmness in different genotypes #####

fig10 <- vt_sensory_traits_hex %>% 
  ggplot(
    aes(
      x = genotype,
      y = max_load_N,
      group = genotype
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  # geom_text(
  #   data = ascorbic_acid_letters,
  #   aes(
  #     x = genotype,
  #     y = max,
  #     label = Letters
  #   ),
  #   vjust = -1.3,
  #   fontface = "bold"
  # ) +
  scale_x_discrete(
    "Genotype"
  ) +
  scale_y_continuous(
    "Maximum Load (N)",
    breaks = seq(2,8.5, by = 0.5)
  ) +
  scale_colour_frontiers() +
  labs(
    colour = "Genotype"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1,
      face = "italic"
    )
  )


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT_firmness.png"),
  plot = fig10,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### Figure 6.11 - Box-violin plots, sweetness (tss/tta) in different genotypes #####

fig11 <- vt_sensory_traits_hex_tss %>% 
  ggplot(
    aes(
      x = genotype,
      y = tss_tta_ratio,
      group = genotype
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  # geom_text(
  #   data = ascorbic_acid_letters,
  #   aes(
  #     x = genotype,
  #     y = max,
  #     label = Letters
  #   ),
  #   vjust = -1.3,
  #   fontface = "bold"
  # ) +
  scale_x_discrete(
    "Genotype"
  ) +
  scale_y_continuous(
    "TSS / TTA",
    breaks = seq(6,11, by = 0.5)
  ) +
  scale_colour_frontiers() +
  labs(
    colour = "Genotype"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1,
      face = "italic"
    )
  )


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT_sweetness.png"),
  plot = fig11,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### Yield Analysis  #####

vt_yield <- VT23_VT24 %>% 
  filter(
    str_detect(
      measure_var, "class|waste|yield|berry"
    ),
    !str_detect(
      measure_var, "perc"
    )
  ) %>% 
  summarise(
    across(
      value,
      ~ sum(., na.rm = TRUE)
    ),
    .by = c(experiment, sample, genotype, block, measure_var)
  ) %>%     
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  # calculate common yield parameters from primary yield measurements
  mutate(
    yield_per_plant = yield_weight_grams / 6,
    class1_per_plant = class1_weight_grams / 6,
    class1_perc_weight = (class1_weight_grams / yield_weight_grams) * 100,
    class1_perc_number = (class1_number / (class1_number + class2_number + waste_number)) * 100,
    class1_avg_berry_size = class1_weight_grams / class1_number,
    avg_berry_size = yield_weight_grams / (class1_number + class2_number + waste_number),
    .by = sample
  ) %>% 
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  filter(
    str_detect(
      measure_var, "class1_per_plant|class1_perc_weight|class1_avg|yield_per_plant"
    )
  )



##### Figure 6.12 - Dot plots showing 2023 yield profiles, faceted by genotype #####

fig12 <- vt_yield %>% 
  filter(
    experiment == "VT23" &
      measure_var == "yield_weight_grams"
  ) %>% 
  arrange(
    measure_var
  ) %>% 
  ggplot(
    aes(
      x = date, y = value
    )
  ) +
  geom_point(
    fill = "#164194FF",
    colour = "#164194FF",
    size = 1.5,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 2,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_date(
    "Date",
    date_breaks = "3 days",
    date_labels = "%d %b"
    
  ) +
  scale_y_continuous(
    "Yield (grams)"
  ) +
  facet_wrap(
    genotype ~ .,
    ncol = 1,
    scales = "free_y"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    strip.text = element_text(
      face = "italic"
    ),
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT23_yield_profiles.png"),
  plot = fig12,
  width = 6,
  height = 7,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 6.13 - Dot plots showing 2024 yield profiles, faceted by genotype #####

fig13 <- vt_yield %>% 
  filter(
    experiment == "VT24" &
      measure_var == "yield_weight_grams"
  ) %>% 
  arrange(
    measure_var
  ) %>% 
  ggplot(
    aes(
      x = date, y = value
    )
  ) +
  geom_point(
    fill = "#164194FF",
    colour = "#164194FF",
    size = 1.5,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 2,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_date(
    "Date",
    date_breaks = "3 days",
    date_labels = "%d %b"
    
  ) +
  scale_y_continuous(
    "Yield (grams)"
  ) +
  facet_wrap(
    genotype ~ .,
    ncol = 1,
    scales = "free_y"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    strip.text = element_text(
      face = "italic"
    ),
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_VT24_yield_profiles.png"),
  plot = fig13,
  width = 6,
  height = 7,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)
##### Table 6.3 - Statistical summary of 2023 total yields #####

vt_yield_23 <- vt_yield %>%  
  filter(
    experiment == "VT23"
  ) %>% 
  drop_na() %>%
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  rename(
    "Total Yield (g/plant)" = yield_per_plant
  ) 

tab3 <- vt_yield_23 %>%  
  tbl_summary(
    by = genotype,
    include = c(
      "Total Yield (g/plant)"
    ),
    type = all_continuous() ~ "continuous2",
    statistic =
      all_continuous() ~ c("{mean} ({std.error})"
      ),
    missing = "no"
  ) %>%  
  italicize_levels() %>% 
  bold_labels() %>% 
  add_p(
    test = everything() ~ "oneway.test" # use stats::oneway.test to run ANOVA on yield data
  ) %>% 
  modify_fmt_fun(statistic ~ style_sigfig) %>% 
  modify_header(statistic ~ "**F-value**") %>% 
  modify_header(all_stat_cols() ~ "_{level}_\n _n_ = {n}") %>% 
  modify_spanning_header(all_stat_cols() ~ "**Genotype**") %>% 
  modify_column_hide(columns = c("stat_2","stat_3","stat_7")) %>% 
  as_flex_table() %>% 
  set_table_properties(width = 1, layout = "autofit")

tab3 %>% 
  save_as_docx(
    path = paste0("~/chapter-06_strawberry_genotypes/figures/", format(Sys.Date(), "%Y%m%d"), "_VT23_yield_summary.docx")
  )

  

##### Table 6.4 - Statistical summary of 2024 yield parameters #####

tab4 <- yield %>%  
  filter(
    experiment == "VT24"
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  rename(
    "Total Yield (g/plant)" = yield_per_plant,
    "Class 1 Yield (g/plant)" = class1_per_plant,
    "Percentage Class 1 (%)" = class1_perc_weight,
    "Mean Class 1 Berry Size (g)" = class1_avg_berry_size
  ) %>% 
  tbl_summary(
    by = genotype,
    include = c(
      "Total Yield (g/plant)",
      "Class 1 Yield (g/plant)",
      "Percentage Class 1 (%)",
      "Mean Class 1 Berry Size (g)"
    ),
    type = all_continuous() ~ "continuous2",
    statistic =
      all_continuous() ~ c("{mean} ({std.error})"
      ),
    missing = "no"
  ) %>%  
  italicize_levels() %>% 
  bold_labels() %>% 
  add_p(
    test = everything() ~ "oneway.test"
  ) %>% 
  bold_p() %>% 
  modify_fmt_fun(statistic ~ style_sigfig) %>% 
  modify_header(statistic ~ "**F-value**") %>% 
  modify_header(all_stat_cols() ~ "_{level}_\n _n_ = {n}") %>% 
  modify_spanning_header(all_stat_cols() ~ "**Genotype**") %>% 
  as_flex_table() %>%
  colformat_md() %>%
  set_table_properties(width = 0.9, layout = "autofit")

tab4 %>% 
  save_as_docx(
    path = paste0("~/chapter-06_strawberry_genotypes/figures/", format(Sys.Date(), "%Y%m%d"), "_VT24_yield_summary.docx")
  )

##### Two-way ANOVA summary for storage experiment ascorbic acid #####

# filter ascorbic acid and dry weight data from primary sts measurements
# correct variable names to display the correct number of hours post harvest
sts_prep <- sts24_acids %>% 
  filter(
    str_detect(
      measure_var, "ascorbic|perc"
    )
  ) %>% 
  mutate(
    plot_id = if_else(
      str_detect(condition, "52"), "Retail Stage - 54 hours", "Retail Stage - 78 hours"
    ),
    time_point = as.numeric(
      case_when(
        time_point == "0" ~ "0",
        time_point == "28" ~ "30",
        time_point == "53" ~ "55",
        time_point == "101" ~ "103",
        time_point == "125" ~ "127"
      )
    ),
    condition = str_replace_all(condition, "52", "54"),
    condition = str_replace_all(condition, "76", "78")
  )

# run two-way ANOVA
sts24_models <- sts24_prep %>% 
  ungroup() %>%
  nest_by(
    measure_var
  ) %>%
  mutate(
    model = list(aov(value ~ time_point * condition, data = data))
  ) %>%
  reframe(
    tidy(
      model
    )
  )


##### Figure 6.14 - Line-graphs showing changes in ascorbic acid during 8 post-harvest storage scenarios #####

fig14 <- sts_prep %>%
  summarise(
    mean = mean(value),
    sd = sd(value),
    se = std.error(value),
    .by = c(time_point, condition, measure_var, plot_id)
  ) %>%
  filter(
    measure_var == "ascorbic_acid_mg_100g_fw"
  ) %>%
  mutate(
    condition = factor(condition,
      levels = c(
        "RA54-HA", "RA54-HRF", "RRF54-HA", "RRF54-HRF", "RA78-HA", "RA78-HRF", "RRF78-HA", "RRF78-HRF"
      )
    )
  ) %>%
  ggplot(
    aes(
      x = as.factor(time_point),
      y = mean,
      group = condition,
      colour = condition
    )
  ) +
  geom_errorbar(
    aes(
      ymin = mean - se,
      ymax = mean + se
    ),
    width = 0.2,
    linewidth = 0.5,
    alpha = 0.8
  ) +
  geom_point() +
  geom_line(
    linewidth = 0.8,
    alpha = 0.8
  ) +
  scale_x_discrete(
    "Time Point\n(Hours Since Harvest)"
  ) +
  scale_y_continuous(
    "Total Ascorbic Acid\n(mg/100g Fresh Weight)",
    limits = c(70, 115),
    breaks = seq(
      70, 110,
      by = 10
    )
  ) +
  scale_colour_manual(
    values = c("#D51317FF", "#F39200FF", "#007B3DFF", "#0094CDFF", "#EFD500FF", "#95C11FFF", "#164194FF", "#6F286AFF")
  ) +
  labs(
    colour = "Scenario"
  ) +
  facet_wrap(~plot_id,
    ncol = 2
  ) +
  theme_thesis()


# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_STS_ascorbic_acid.png"),
  plot = fig14,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)








