# notes
# tidied code originally written in `chap-06_analysis.Rmd` - now chap-05 after chapter order was changed.

# changes
# preparation of adjusted/additional figures meet examiner's suggested amendments

# load required packages
pacman::p_load(
  tidyverse,
  data.table,
  broom,
  janitor,
  plotrix,
  ggpmisc,
  ggstatsplot,
  viridis,
  ggsci,
  scales,
  car,
  ggpubr,
  ggrepel,
  gt,
  gtsummary,
  cardx,
  multcompView,
  multcomp
)


# loading required datasets
# datasets derived from '~/chapter-05_strawberry_foliar_sprays/code/datacompilation_FS_v0.0.10.R'
# loads three dataframes 
  # FS23_complied_data
  # prelim_summary
  # weather_data_wrangle

load("../data_submission/chapter-05_strawberry_foliar_sprays/data/processed/20250625_FS23_compiled_dataset.Rdata")


# function to specify theme arguments that will be applied to all figures to maintain consistency
theme_thesis <- function() {
  theme_ggstatsplot() +
    theme(
      axis.ticks = element_blank(),
      axis.line = element_line(colour = "grey50"),
      panel.grid = element_line(color = "#b4aea9"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.major.y = element_blank(),
      plot.tag.position = c(0.12,1),
      strip.text = element_text(
        face = "bold"
      ),
      strip.background = element_rect(
        fill = NA,
        color = NA
      ),
      strip.placement = "inside",
      panel.border = element_rect(
        color = "grey50",
        fill = NA,
        linewidth = 0.5
      ),
      axis.title.x = element_text(
        margin = margin(
          0.5,0,0,0,
          unit = "cm"
        )
      ),
      axis.title.y = element_text(
        margin = margin(
          0,0.5,0,0,
          unit = "cm"
        )
      )
    )
}


##### Tidying complied data #####

# code relevant variables as factors and format treatment names so they are consistent with references in the thesis
fs_compiled_data <- FS23_compiled_data %>% 
  mutate(
    across(
      experiment:block,
      ~ as.factor(.)
    ),
    treatment = case_when(
      treatment == "iron" ~ "Fe-NP",
      treatment == "selenium" ~ "SEL",
      treatment == "control" ~ "Control",
      TRUE  ~ treatment
    ),
    date = str_replace_all(date, "-", "/")
    
  )

##### Figure 5.2 - Nutritional components of four strawberry varieties assessed during PhD rotation project #####

# convert data to long format and summarise the mean and standard error by genotype and nutritional variable
fig2_prep <- prelim_summary %>% 
  pivot_longer(
    cols = where(is.numeric),
    names_to = "var",
    values_to = "value"
  ) %>% 
  mutate(
    value = value * 0.1 # no dry matter data for these samples, assuming dry-matter content of 10% based on data in FS23_compiled_data
  ) %>%  
  add_count(
    genotype
  ) %>% 
  filter(
    n > 36 # keeping only genotypes with at least 3 observations 
  ) %>% 
  summarise(
    mean = mean(value, na.rm = TRUE),
    se = std.error(value, na.rm = TRUE),
    .by = c(genotype, var)
  ) 

# bar plots for each nutriional variable
# I never got around to streamlining the code for these plots, pardon the redundancy!
fig2_asa <- fig2_prep %>% 
  filter(
    var == "asa"
  ) %>% 
  ggplot(
    aes(
      x = genotype,
      y = mean,
      fill = genotype
    )
  ) +
  geom_col(
    colour = "black",
    width = 0.5
  ) +
  geom_errorbar(
    aes(
      ymin = mean - se,
      ymax = mean + se
    ),
    width = 0.3
  ) +
  scale_fill_manual(
    values = c("#F39200FF","#164194FF","#EFD500FF","#6F286AFF","#636363FF")
  ) + 
  scale_x_discrete(
    expand = c(0.2, 0)
    ) +
  scale_y_continuous(
    limits = c(0,90),
    breaks = seq(
      0,90, by = 15
    ),
    expand = c(0, 0)
    ) +
  labs(
    x = "Genotype",
    y = "Total Ascorbic Acid\n (mg/100g Fresh Weight)"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_blank(),
    axis.title.x = element_blank()
  )

fig2_teac <- fig2_prep %>% 
  filter(
    var == "teac"
  ) %>% 
  ggplot(
    aes(
      x = genotype,
      y = mean,
      fill = genotype
    )
  ) +
  geom_col(
    colour = "black",
    width = 0.5
  ) +
  geom_errorbar(
    aes(
      ymin = mean - se,
      ymax = mean + se
    ),
    width = 0.3
  ) +
  scale_fill_manual(
    values = c("#F39200FF","#164194FF","#EFD500FF","#6F286AFF","#636363FF")
  ) + 
  scale_x_discrete(
    expand = c(0.2, 0)
  ) +
  scale_y_continuous(
    limits = c(0,20),
    breaks = seq(
      0,20, by = 4
    ),
    expand = c(0, 0)
  ) +
  labs(
    x = "Genotype",
    y = "Total Antioxidant Capacity\n (\u03BCM TE/g Fresh Weight)"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_blank(),
    axis.title.x = element_blank()
  )

fig2_phen <- fig2_prep %>% 
  filter(
    var == "phen"
  ) %>% 
  ggplot(
    aes(
      x = genotype,
      y = mean,
      fill = genotype
    )
  ) +
  geom_col(
    colour = "black",
    width = 0.5
  ) +
  geom_errorbar(
    aes(
      ymin = mean - se,
      ymax = mean + se
    ),
    width = 0.3
  ) +
  scale_fill_manual(
    values = c("#F39200FF","#164194FF","#EFD500FF","#6F286AFF","#636363FF")
  ) + 
  scale_x_discrete(
    expand = c(0.2, 0)
  ) +
  scale_y_continuous(
    limits = c(0,2.5),
    breaks = seq(
      0,2.5, by = 0.5
    ),
    expand = c(0, 0)
  ) +
  labs(
    x = "Genotype",
    y = "Total Phenolic Content\n (mg GAE/g Fresh Weight)"
  ) +
  theme_thesis() +
  theme(
    legend.position = "none"
  )

# combine the three plots into an integrated figure
fig2 <- ggarrange(
  fig2_asa, fig2_teac, fig2_phen,
  nrow = 3,
  heights = c(0.75,0.75,1)
)

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_prelim_genotype_screen_asa_teac_phen.png"),
  plot = fig2,
  width = 5,
  height = 6.2,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### Table 5.1 - Fertigation recipes #####

tab1 <- dplyr::tibble(
  "Tank" = c("Tank A","Tank A","Tank A","Tank B","Tank B","Tank B","Tank B","Tank B","Tank B","Tank B","Tank B","Tank B","Tank C"),
  "Chemical" = c(
    "Potassium Nitrate",
    "Calcium Nitrate",
    "Ammonium Nitrate", 
    "Monopatassium Phosphate",
    "Magnesium Nitrate",
    "Magnesium Sulphate",
    "Manganese Sulphate",
    "Solubor",
    "Copper Sulphate",
    "Zinc Sulphate",
    "Sodium Molybdate",
    "Iron EDTA",
    "Nitric Acid"
  ),
  "Vegetative Feed\n (g/25 L)" = c(
    "662.75",
    "1716.00",
    "100.00",
    "423.50",
    "728.25",
    "292.75",
    "10.75",
    "1.50",
    "0.25",
    "5.00",
    "0.25",
    "48.75",
    "0.300"
  ),
  "Fruiting Feed\n (g/25 L)" = c(
    "916.50",
    "1450.00",
    "0.00",
    "423.50",
    "541.75",
    "216.75",
    "8.00",
    "1.50",
    "0.25",
    "3.00",
    "0.25",
    "35.75",
    "0.300"
  )
) %>% 
  as_grouped_data(groups = "Tank") %>% 
  flextable() %>% 
  set_table_properties(width = 1, layout = "autofit") 

# save table to .docx
# can be saved to other formats depending on the users needs, see flextable:: package for more save options
tab1 %>% 
  save_as_docx(
    path = paste0("~/chapter-05_strawberry_foliar_sprays/figures/", format(Sys.Date(), "%Y%m%d"), "_FS_feed_recipe.docx")
  )

##### Figure 5.4 - Weather data summaries #####

weather_summary <- weather_data_wrangle %>% 
  filter(
    !str_detect(weather_var, "rainfall|avg_watts")
  ) %>% 
  mutate(
    grp = case_when(
      str_detect(weather_var, "min") ~ "min",
      str_detect(weather_var, "max") ~ "max",
      TRUE ~ "na"
    ),
    weather_var = str_remove(
      weather_var, "_min|_max"
    )
  )

fig4_labeller = as_labeller(
  c(air_temp="Air temperature\n (°C)", 
    perc_relative_humidity="Relative Humidity\n (%)", 
    #pyranometer_avg_watts_per_m2="Daily\n Solar Irradiance\n (W/m²)",
    pyranometer_sum_watts_per_m2="Cumulative\n Solar Irradiance\n (W/m²)"
  )
)

# plot the weather data as line graphs

fig4 <- ggplot(
  weather_summary,
  aes(
    x = week,
    y = value,
    colour = grp
  )
) + 
  geom_point(
    size = 0.5
  ) +
  geom_line(
    linewidth = 0.8
  ) +
  scale_colour_manual(
    "Air Temperature",
    labels = c("Minimum", "Maximum"),
    values = c("min" = "#164194FF", "max" = "#F39200FF", "na" = "#6F286AFF"),
    breaks = c("min","max")
  ) +
  scale_x_date(
    "Date",
    breaks = seq(
      as.Date("2023-05-28"), as.Date("2023-07-09"), by = "1 week"
    )
  ) +
  facet_wrap(
    .~weather_var, # faceting by measure variable
    nrow=4,
    strip.position = 'left', # move facet strip to the left in order to replace the normal y axis labels
    scales = "free_y", # allow y axes to be free to accommodate different numerical ranges
    labeller = fig4_labeller # use the labeller object to add correct labels to strip titles 
  ) +
  ylab(NULL) +
  theme_thesis() +
  theme(
    axis.text.x = element_text(
      angle = 45,
      hjust = 1,
      vjust = 1
    ),
    strip.background = element_blank(),
    strip.placement='outside',
    panel.grid.major.x = element_line(colour = "grey95"),
    legend.position = "bottom"
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/chap-06/", format(Sys.Date(), "%Y%m%d"), "_FS_weather_summary.png"),
  plot = fig4,
  width = 5,
  height = 5,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summary for ascorbic acid  #####

# filter the ascorbic acid data
fs_ascorbic_acid <- fs_compiled_data %>% 
  distinct() %>% 
  filter(
    measure_var == "ascorbic_acid_mg_100g_fw", 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() 

# # stats 
# fs_ascorbic_model <- aov(value ~ genotype*treatment*as.factor(date) + block, data = fs_ascorbic_acid)
# summary(fs_ascorbic_model)

# #test and visually assess procedure assumptions
# shapiro.test(fs_ascorbic_acid$value) # normality
# leveneTest(value ~ genotype*treatment*date, data = fs_ascorbic_acid) # homegeneity of variances
# plot(fs_model) # visual assessment

# # post-hoc analysis 
# fs_posthoc_ascorbic<- multcompLetters4(
#   fs_ascorbic_model, TukeyHSD(fs_ascorbic_model), 
#   reversed = TRUE
#   )
# 
# fs_ascorbic_summary <- fs_ascorbic_acid %>%
#   summarise(
#     mean = mean(value),
#     stderr = std.error(value),
#     .by = c(date)
#   )
# 
# # generate letters to signify statistically significant differences that can be added to the plot
# fs_ascorbic_letters <- as.data.frame.list(
#   fs_posthoc_ascorbic$`genotype:treatment:as.factor(date)`
#   ) %>% 
#   rownames_to_column(
#     var = "levels"
#   ) %>% 
#   separate(
#     levels,
#     c("genotype","treatment","date"),
#     sep = ":"
#   ) %>% 
#   dplyr::dplyr::select(
#     genotype,
#     treatment,
#     date,
#     Letters
#   ) %>% 
#   right_join(
#     fs_ascorbic_acid,
#     by = c("genotype","treatment","date")
#   ) %>% 
#   summarise(
#      mean = mean(value),
#      se = std.error(value),
#      max = max(value),
#      min = min(value),
#      .by = c(genotype, treatment, date, Letters)
#    )

##### Figure 5.5 - Box-plots, ascorbic acid in Vibrant and Centenary under each spray treatment #####

fig5 <- fs_ascorbic_acid %>% 
  group_by(
    genotype
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  # can add letters of significance here
  # not included in final thesis as plots became cluttered
  # geom_text(
  #   data = fs_ascorbic_letters,
  #   aes(
  #     x = treatment,
  #     y = max,
  #     label = Letters,
  #     fill = genotype
  #   ),
  #   vjust = -1.3,
  #   fontface = "bold",
  #   position=position_dodge(-0.8),
  #   inherit.aes = FALSE
  # ) +
  scale_y_continuous(
    name = "Total Ascorbic Acid\n (mg/100g Fresh Weight)",
    limits = c(40, 115),
    breaks = seq(40, 115, by = 15)
  ) +
  scale_color_manual(
    "Genotype",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  facet_wrap(
    format(
      lubridate::floor_date(as.Date(date), unit = "week", week_start = 1), "%b %d"
      ) ~ .,
    ncol = 2
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_FS_ascorbicacid.png"),
  plot = fig5,
  width = 6.5,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### ANOVA summary for malic acid #####
fs_malic_acid <- fs_compiled_data %>% 
  distinct() %>% 
  filter(
    measure_var == "malic_acid_mg_per_g_fw", 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() 

# # stats #
# fs_malic_model <- aov(value ~ genotype*treatment*as.factor(date) + block, data = fs_malic_acid)
# summary(fs_malic_model)
# 
# # test and visually assess procedure assumptions #
# shapiro.test(fs_malic_model$value) # normality
# leveneTest(value ~ genotype*treatment*date, data = fs_malic_acid) # homogeneity of variances
# plot(fs_malic_model) # visual assessment
# 
# # post-hoc analysis #
# TukeyHSD(fs_malic_model)
# 
# fs_malic_summary <- fs_malic_acid %>%
#   summarise(
#     mean = mean(value),
#     stderr = std.error(value),
#     .by = c(date)
#   )

##### Figure 5.6 - Box-plots, malic acid in Vibrant and Centenary under each spray treatment #####

fig6 <- fs_malic_acid %>% 
  group_by(
    genotype
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Malic Acid\n (mg/g Fresh Weight)",
    limits = c(1.25, 3.5),
    breaks = seq(1.25, 3.5, by = 0.25)
  ) +
  scale_color_manual(
    "Genotype",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  facet_wrap(
    format(lubridate::floor_date(as.Date(date), unit = "week", week_start = 1), "%b %d") ~ .,
    ncol = 2
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_FS_malicacid.png"),
  plot = fig6,
  width = 6.5,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summary for citric acid #####
fs_citric_acid <- fs_compiled_data %>% 
  distinct() %>% 
  filter(
    measure_var == "citric_acid_mg_per_g_fw", 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() 

# # stats #
# fs_citric_model <- aov(value ~ genotype*treatment*as.factor(date) + block, data = fs_citric_acid)
# summary(fs_citric_model)
# 
# # test and visually assess procedure assumptions #
# shapiro.test(fs_citric_model$value) # normality
# leveneTest(value ~ genotype*treatment*date, data = fs_citric_acid) # homogeneity of variances
# plot(fs_citric_model) # visual assessment
# 
# # post-hoc analysis #
# TukeyHSD(fs_citric_model)
# 
# fs_citric_summary <- fs_citric_acid %>%
#   summarise(
#     mean = mean(value),
#     stderr = std.error(value),
#     .by = c(date)
#   )

##### Figure 5.7 - Box-plots, citric acid in Vibrant and Centenary under each spray treatment #####

fig7 <- fs_citric_acid %>% 
  group_by(
    genotype
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Citric Acid\n (mg/g Fresh Weight)",
    limits = c(6, 14),
    breaks = seq(6, 14, by = 1)
  ) +
  scale_color_manual(
    "Genotype",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  facet_wrap(
    format(lubridate::floor_date(as.Date(date), unit = "week", week_start = 1), "%b %d") ~ .,
    ncol = 2
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/chap-06/", format(Sys.Date(), "%Y%m%d"), "_FS_citricacid.png"),
  plot = fig7,
  width = 6.5,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summary for iron #####
fs_iron <- fs_compiled_data %>% 
  distinct() %>% 
  filter(
    measure_var == "iron_ppm", 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() 

# # stats #
# fs_iron_model <- aov(value ~ genotype*treatment + block, data = fs_iron)
# summary(fs_iron_model)
# 
# # test and visually assess procedure assumptions #
# shapiro.test(fs_iron_model$value) # normality
# leveneTest(value ~ genotype*treatment*date, data = fs_iron) # homogeneity of variances
# plot(fs_iron_model) # visual assessment
# 
# # post-hoc analysis #
# TukeyHSD(fs_iron_model)
# 
# fs_iron %>%
#   summarise(
#     mean = mean(value),
#     stderr = std.error(value),
#     .by = c(treatment)
#   )

##### Figure 5.8 - Box-plots, iron content in Vibrant and Centenary under each spray treatment #####

fig8 <- fs_iron %>% 
  filter(
    str_detect(
      measure_var, "iron"
    )
  ) %>% 
  group_by(
    genotype, measure_var
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
    
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Berry Iron Content\n (ppm)",
    limits = c(16, 42),
    breaks = seq(
      16,40, by = 4
    )
  ) +
  scale_color_manual(
    "Genotype",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/chap-06/", format(Sys.Date(), "%Y%m%d"), "_FS_iron.png"),
  plot = fig8,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### ANOVA summary for selenium #####

fs_SEL <- fs_compiled_data %>% 
  distinct() %>% 
  filter(
    measure_var == "selenium_ppm", 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() 

# # stats #
# fs_SEL_model <- aov(value ~ genotype*treatment + block, data = fs_SEL)
# summary(fs_SEL_model)
# 
# # test and visually assess procedure assumptions #
# # data fail to meet procedure assumptions #
# shapiro.test(fs_SEL_model$value) # normality
# leveneTest(value ~ genotype*treatment*date, data = fs_SEL) # homogeneity of variances
# plot(fs_SEL_model) # visual assessment
 
# fs_SEL %>%
#   summarise(
#     mean = mean(value),
#     stderr = std.error(value),
#     .by = c(treatment)
#   )

##### Figure 5.9 - Box-plots, selenium content in Vibrant and Centenary under each spray treatment #####

fig9 <- fs_SEL %>% 
  filter(
    str_detect(
      measure_var, "selenium"
    )
  ) %>% 
  group_by(
    genotype, measure_var
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
    
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Berry Selenium Content\n (ppm)",
    limits = c(0.00, 0.13),
    breaks = seq(
      0.00, 0.12, by = 0.03
    )
  ) +
  scale_color_manual(
    "Genotype",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_FS_SEL.png"),
  plot = fig9,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summary for TSS and TSS/TTA #####

# calculate TTA and subsequently yhe TSS/TTA ratios for each sample
fs_tss_tta <- fs_compiled_data %>% 
  distinct() %>% 
  filter(
    str_detect(measure_var, "brix|citric|malic"), 
    value > 0
  ) %>% 
  ungroup() %>% 
  drop_na() %>% 
  mutate(
    week = case_when(
      as.character(date) == "2023/07/03" | as.character(date) == "2023/07/06" ~ "Jul 03",
      as.character(date) == "2023/07/10" | as.character(date) == "2023/07/13" ~ "Jul 10",
    )
  ) %>% 
  drop_na(
    week
  ) %>% 
  summarise(
    value = mean(value),
    .by = c(experiment, week, sample, genotype, treatment, block, measure_var)
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  mutate(
    theoretical_titratable_acidity = ((citric_acid_mg_per_g_fw / 10) + (malic_acid_mg_per_g_fw / 10)) / (6.4/6.71),
    tss_tta_ratio = (brix / theoretical_titratable_acidity),
    .by = c(sample, genotype, treatment, block)
  ) %>% 
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  filter(
    str_detect(measure_var,"brix|theoretical_titratable_acidity|tss_tta_ratio")
  ) %>% 
  drop_na(
    value
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  )

# # stats #
# fs_sweet_model <- aov(tss_tta_ratio ~ genotype*treatment*week + block, data = fs_tss_tta)
# fs_brix_model <- aov(brix ~ genotype*treatment*week + block, data = fs_tss_tta)
# fs_acid_model <- aov(theoretical_titratable_acidity ~ genotype*treatment*week + block, data = fs_tss_tta)
# summary(fs_acid_model)
# summary(fs_sweet_model)
# summary(fs_brix_model)
# 
# # test and visually assess procedure assumptions #
# # swap in each model to compute statistics # 
# shapiro.test(theoretical_titratable_acidity$value) # normality
# leveneTest(value ~ genotype*treatment*date, data = fs_tss_tta) # homogeneity of variances
# plot(theoretical_titratable_acidity) # visual assessment

# # post-hoc analysis #
# # swap in each model to compute statistics # 
# TukeyHSD(fs_sweet_model)


##### Figure 5.10 - Box-plots, TSS in Vibrant and Centenary under each spray treatment #####

fs_brix_main_vis <- fs_tss_tta%>%
  group_by(
    genotype
  ) %>%
  ggplot(
    aes(
      x = treatment,
      y = brix,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "TSS<br>(\u00B0*Brix*)",
    limits = c(5, 10),
    breaks = seq(5, 10, by = 0.5),
    labels = label_number(accuracy = 0.1)
  ) +
  scale_color_manual(
    "Genotype",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  facet_wrap(
    week ~ .,
    ncol = 2
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    ),
    axis.title.y = ggtext::element_markdown(
      margin = margin(
        0,0.5,0,0,
        unit = "cm"
      )
      )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_FS_tss.png"),
  plot = fig10,
  width = 6.5,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### Figure 5.11 - Box-plots, TSS/TTA ratios in Vibrant and Centenary under each spray treatment #####

fs_sweet_main_vis <- fs_tss_tta %>% 
  group_by(
    genotype
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = tss_tta_ratio,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Sweetness Index\n(TSS / TTA)",
    limits = c(3.8, 9.5),
    breaks = seq(4, 10, by = 1.0),
    labels = label_number(accuracy = 0.1)
  ) +
  scale_color_manual(
    "Genotype",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  facet_wrap(
    week ~ .,
    ncol = 2
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_FS_sweetness.png"),
  plot = fig11,
  width = 6.5,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### ANOVA summaries for dualex variables  #####

fs_dualex <- fs_compiled_data %>% 
  filter(
    str_detect(
      measure_var, "chl|nbi|flav"
    )
  ) %>% 
  filter(
    !date == "2023/07/27"
  ) %>% 
  mutate(
    date = as.factor(date)
  )

# # stats #
# fs_dualex_models <- fs_dualex %>% 
#   ungroup() %>%
#   nest_by(
#     measure_var
#   ) %>%
#   mutate(
#     model = list(aov(value ~ genotype * treatment + block + Error(date), data = data))
#   ) %>%
#   reframe(
#     tidy(
#       model
#     )
#   )

# fs_dualex %>%
#   summarise(
#     mean = mean(value, na.rm = TRUE),
#     se = std.error(value, na.rm = TRUE),
#     .by = c(genotype, measure_var)
#   )

##### Figure 5.12 - Bar graphs of CHL and NBI at different dates during the experiment #####

fig12_prep <- fs_dualex %>% 
  mutate(
    measure_var = factor(
      measure_var,
      levels = c("chl","flav","nbi")
    )
  ) %>% 
  summarise(
    mean = mean(value, na.rm = TRUE),
    stderr = std.error(value, na.rm = TRUE),
    .by = c(date,genotype,treatment,measure_var)
  )

# plot for CHL

fig12a <- fig12_prep %>% 
  filter(
    measure_var == "chl"
  ) %>% 
  ggplot(
    aes(
      date,
      mean,
      fill = treatment
    ) 
  ) +
  geom_bar(
    position=position_dodge(0.75),
    stat="identity",
    width = 0.7,
    alpha = 0.8,
    colour = "black",
    linewidth = 0.2,
    aes(
      colour = treatment,
      fill = treatment
    )
  ) +
  geom_errorbar(
    aes(
      ymin = mean - stderr, ymax = mean + stderr
    ),
    linewidth = 0.3,
    width = 0.4,
    position = position_dodge(0.75)
  ) +
  facet_wrap(
    genotype ~ .,
    ncol = 2
  ) +
  scale_x_discrete(
    name = "Date (Day-Month, 2023)",
    labels = c(
      "28 Jun",
      "03 Jul",
      "06 Jul",
      "13 Jul"
    )
  ) +
  scale_y_continuous(
    name = "CHL\n (Dx Units)",
    limits = c(0,40),
    breaks = seq(0,40, by = 8),
    expand = c(0.01, 0.01)
  ) +
  scale_fill_manual(
    "Treatment",
    labels = c(
      "Iron" = "FE-NP",
      "Selenium" = "SEL",
      "Control" = "Control"
    ),
    values = pal_frontiers("default")(10)[c(8,2,3)]
  ) +
  scale_colour_manual(
    "Treatment",
    labels = c(
      "Iron" = "FE-NP",
      "Selenium" = "SEL",
      "Control" = "Control"
    ),
    values = pal_frontiers("default")(10)[c(8,2,3)]
  ) +
  labs(
    tag = "A"
  ) +
  theme_thesis() +
  theme(
    axis.title.x = element_blank(),
    axis.text.x = element_blank(),
    strip.text = element_text(
      face = "italic"
    )
  )

# plot for NBI

fig12b <- fig12_prep %>% 
  filter(
    measure_var == "nbi"
  ) %>% 
  ggplot(
    aes(
      date,
      mean,
      group = treatment
    ) 
  ) +
  geom_bar(
    position=position_dodge(0.75),
    stat="identity",
    width = 0.7,
    alpha = 0.8,
    colour = "black",
    linewidth = 0.2,
    aes(
      colour = treatment,
      fill = treatment
    )
  ) +
  geom_errorbar(
    aes(
      ymin = mean - stderr, ymax = mean + stderr
    ),
    linewidth = 0.3,
    width = 0.4,
    position = position_dodge(0.75)
  ) +
  facet_wrap(
    genotype ~ .,
    ncol = 2
  ) +
  scale_x_discrete(
    name = "Date (Day-Month, 2023)",
    labels = c(
      "28 Jun",
      "03 Jul",
      "06 Jul",
      "13 Jul"
    )
  ) +
  scale_y_continuous(
    name = "\n NBI",
    limits = c(0,22),
    breaks = seq(0,22, by = 4),
    expand = c(0.01, 0.01)
  ) +
  scale_fill_manual(
    "Treatment",
    values = pal_frontiers("default")(10)[c(8,2,3)]
  ) +
  scale_colour_manual(
    "Treatment",
    labels = c(
      "Iron" = "FE-NP",
      "Selenium" = "SEL",
      "Control" = "Control"
    ),
    values = pal_frontiers("default")(10)[c(8,2,3)]
  ) +
  labs(
    tag = "B"
  ) +
  theme_thesis() +
  theme(
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    ),
    strip.text = element_blank(),
    strip.background = element_blank()
  )

# combine the plots into an integrated figure
fig12 <- ggarrange(
  fig12a, fig12b,
  heights = c(1,1.25),
  ncol = 1,
  nrow = 2,
  common.legend = TRUE,
  legend = "right"
)

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_FS_dualex.png"),
  plot = fig12,
  width = 6.5,
  height = 5,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 5.13 - Dot plots of strawberry yield profiles #####

yield_profiles <- fs_compiled_data %>% 
  filter(
    str_detect(
      measure_var, "number|weight|waste|size|class1"
    )
  ) %>% 
  mutate(
    #value = value / 4, # calculating yield/plant - not needed for this plot
    date = as.Date(date)
  )

fig13 <- yield_profiles %>% 
  filter(
    measure_var == "yield_weight_grams"
  ) %>% 
  arrange(
    measure_var
  ) %>% 
  ggplot(
    aes(
      x = date,
      y = value,
    )
  ) +
  geom_point(
    aes(
      fill = treatment,
      colour = treatment,
      group = treatment
    ),
    size = 1.5,
    alpha = 0.4,
    position = position_dodge(1.9)
  ) +
  geom_point(
    aes(
      fill = treatment,
      colour = treatment,
      group = treatment
    ),
    stat = "summary",
    size = 2,
    shape = 23,
    color = "black",
    position = position_dodge(1.9),
    fun = mean
  ) +
  stat_summary(
    fun=mean,
    geom="crossbar",
    colour = "#8a0f00",
    linewidth = 0.2
  ) +
  scale_x_date(
    "Date",
    date_breaks = "3 days",
    date_labels = "%d %b"
    
  ) +
  scale_y_continuous(
    "Yield (grams)",
    limits = c(0,400),
    breaks = seq(
      0,400, by = 100
    )
  ) +
  scale_color_manual(
    name = "Treatment",
    values = pal_frontiers("default")(10)[c(2,8,10)]
  ) +
  scale_fill_manual(
    name = "Treatment",
    values = pal_frontiers("default")(10)[c(2,8,10)]
  ) +
  facet_wrap(
    genotype ~ .,
    ncol = 1,
    scales = "free_y"
  ) +
  theme_thesis() +
  theme(
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )


ggsave(
  filename = paste0("./../../figures/chap-06/", format(Sys.Date(), "%Y%m%d"), "_FS_yield_profiles.png"),
  plot = fig13,
  width = 6.5,
  height = 5,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summaries for selected yield parameters #####

yield <- fs_compiled_data %>% 
  filter(
    str_detect(
      measure_var, "class|waste|yield|berry"
    )
  ) %>% 
  summarise(
    across(
      value,
      ~ sum(., na.rm = TRUE)
    ),
    .by = c(experiment, sample, genotype, treatment, block, measure_var)
  ) %>%     
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  mutate(
    yield_per_plant = yield_weight_grams / 4,
    class1_perc_number = (class1_number / (class1_number + class2_number + waste_number)) * 100,
    class1_number_per_plant = class1_number / 4,
    class2_waste_number_per_plant = (class2_number + waste_number) / 4,
    avg_berry_size = yield_weight_grams / (class1_number + class2_number),
    .by = sample
  )  %>% 
  dplyr::select(
    genotype,
    treatment,
    block,
    yield_per_plant,
    class1_number_per_plant,
    class2_waste_number_per_plant,
    avg_berry_size
  ) %>%
  pivot_longer(
    where(
      is.numeric
    ),
    names_to = "measure_var",
    values_to = "value"
  ) %>% 
  arrange(
    measure_var
  ) 

# yield_models <- yield %>% 
#   pivot_longer(
#     where(is.numeric),
#     names_to = "measure_var",
#     values_to = "value"
#   ) %>% 
#   ungroup() %>%
#   nest_by(
#     measure_var
#   ) %>%
#   mutate(
#     model = list(aov(value ~ genotype * treatment + block, data = data))
#   ) %>%
#   reframe(
#     tidy(
#       model
#     )
#   )
# 
# yeild %>%
#   summarise(
#     mean = mean(value, na.rm = TRUE),
#     stderr = std.error(value, na.rm = TRUE),
#     .by = c(genotype, measure_var)
#   ) %>%
#   arrange(
#     measure_var
#   )


##### Figure 5.14 - Box-plots for selected yield parameters #####

# yield per plant
fig14_ypp <- yield %>%
  filter(
    measure_var == "yield_per_plant"
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Yield per Plant\n (grams)",
    limits = c(100, 350),
    breaks = seq(100, 350, by = 50)
  ) +
  scale_color_manual(
    name = "",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    ),
    axis.text.x = element_blank(),
    axis.title.x = element_blank()
  )

# mean berry size
fig14_bs <- yield %>%
  filter(
    measure_var == "avg_berry_size"
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Berry Size\n (grams)",
    limits = c(11, 16),
    breaks = seq(11, 16, by = 1)
  ) +
  scale_color_manual(
    name = "",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    ),
    axis.text.x = element_blank(),
    axis.title.x = element_blank()
  )

# number of class I berries per plant
fig14_c1b <- yield %>%
  filter(
    measure_var == "class1_number_per_plant"
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Class I\n (berries/plant)",
    limits = c(5, 20),
    breaks = seq(5, 20, by = 4)
  ) +
  scale_color_manual(
    name = "",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    ),
    axis.text.x = element_blank(),
    axis.title.x = element_blank()
  )

# number of class II and waste berries per plant
fig14_c2b <- yield %>%
  filter(
    measure_var == "class2_waste_number_per_plant"
  ) %>% 
  ggplot(
    aes(
      x = treatment,
      y = value,
      group = interaction(treatment, genotype)
    )
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    colour = "grey40"
  ) +
  geom_point(
    aes(
      colour = genotype
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_x_discrete(
    name = "Treatment"
  ) +
  scale_y_continuous(
    name = "Class II & Waste\n (berries/plant)",
    limits = c(2, 8),
    breaks = seq(2, 8, by = 1)
  ) +
  scale_color_manual(
    name = "",
    values = pal_frontiers("default")(10)[c(2,8)]
  ) +
  theme_thesis() +
  theme(
    legend.text = element_text(
      face = "italic"
    )
  )


# combine the four plots into an integrated figure
fig14 <- ggarrange(
  fig14_ypp, fig14_bs, fig14_c1b, fig14_c2b,
  nrow = 4,
  heights = c(0.75,0.75,0.75,1),
  common.legend = TRUE,
  legend = "bottom"
)

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_fs_yield.png"),
  plot = fig14,
  width = 5.5,
  height = 7,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)








