# notes
# tidied code originally written in `chap-04_analysis.Rmd`

# changes
# preparation of adjusted figure 4.4 to meet examiner's suggested amendments - include everbearer/junebearer info
# moved reatiler anonymisation code to StrawbRetailers_v1.1.0.R
# adjusted fig1 plotting code to accomodate the move of pseudonymisation code

# load required packages
pacman::p_load(
  tidyverse,
  data.table,
  broom,
  janitor,
  plotrix,
  ggpmisc,
  ggstatsplot,
  viridis,
  corrr,
  ggcorrplot,
  ggsci,
  scales,
  car,
  ggpubr,
  rcompanion,
  ggridges,
  gt,
  gtsummary,
  hrbrthemes,
  treemapify,
  GGally,
  ggrepel,
  rnaturalearth,
  rnaturalearthdata,
  sf,
  countrycode,
  flextable,
  ftExtra
)


# loading required datasets
# datasets derived from 
#   '~/chapter-04_strawberry_analytical_survey/code/datacompilation_SS_v0.0.4.R' and 
#   '~/chapter-04_strawberry_analytical_survey/code/StrawbRetailers_v1.0.2.R'

ss_compiled_data <- read_csv("~/chapter-04_strawberry_analytical_survey/data/processed/20241002_SS_compiled_dataset.csv") 

kantar_valume <- read_csv("~/chapter-04_strawberry_analytical_survey/data/processed/20250625_SS_kantar_retailers_strawberries.csv")


# function to specify theme arguments that will be applied to all figures to maintain consistency
theme_thesis <- function() {
  theme_ggstatsplot() +
    theme(
      axis.ticks = element_blank(),
      axis.line = element_line(colour = "grey50"),
      panel.grid = element_line(color = "#b4aea9"),
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank(),
      panel.grid.major.y = element_blank(),
      plot.tag.position = c(0.12,1),
      strip.text = element_text(
        face = "bold"
      ),
      strip.background = element_rect(
        fill = NA,
        color = NA
      ),
      strip.placement = "inside",
      panel.border = element_rect(
        color = "grey50",
        fill = NA,
        linewidth = 0.5
      ),
      axis.title.x = element_text(
        margin = margin(
          0.5,0,0,0,
          unit = "cm"
        )
      ),
      axis.title.y = element_text(
        margin = margin(
          0,0.5,0,0,
          unit = "cm"
        )
      )
    )
}


##### Figure 4.1 - Retailers Treemap #####

kantar_retailer_lvl <- valume_strawb_retailer %>%
  summarise(
    across(
      volume_kg,
      ~ sum(volume_kg)
    ),
    .by = retailer_id
  ) %>%
  arrange(
    desc(
      volume_kg
    )
  ) %>% 
  mutate(
    retailer_id = as_factor(LETTERS[1:n()])) # Anonymise retailer names

# see https://r-graph-gallery.com/treemap.html for excellent examples of treemaps

fig1 <- kantar_retailer_lvl %>% 
  mutate(
    volume_tonnes = volume_kg / 1000,
    perc_market = round((volume_tonnes / sum(volume_tonnes)) * 100, digits = 2) # calculate relative market share for ease of comparison
  ) %>% 
  ggplot(
    aes(
      area = volume_tonnes,
      fill = retailer_id,
      label = paste0(
        "Retailer ", retailer_id,"\n\n", perc_market, " %"
      )
    )) +
  geom_treemap(
    colour = "black"
  ) +
  geom_treemap_text(
    colour = "black",
    place = "centre",
    size = 12
  ) +
  scale_fill_brewer(
    palette = "Spectral"
  ) +
  theme(
    legend.position = "none"
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_StrawberryRetailers.png"),
  plot = fig1,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### Figure 4.3 - Seasonal availability of UK-marketed fresh strawberries #####

ss_counts <- ss_compiled_data %>%
  select(
    sample_name, product_origin_country, month, purchase_date, variety # select relevant variables
  ) %>%
  distinct() # remove any duplicated observations having removed unique identifying variables

# create time-series line graph
fig3 <- ss_counts %>% 
  mutate(
    month = lubridate::as_date(month, format = "%m%y")
  ) %>% 
  count(
    month
  ) %>% 
  ggplot(
    aes(
      x = month,
      y = n
    )
  ) +
  geom_line(
    colour = "#164194FF",
    linewidth = 1.2
  ) +
  scale_x_date(
    "Date",
    date_breaks = "1 months",
    date_labels = "%b-%Y"
  ) +
  scale_y_continuous(
    "Number of Samples",
    limits = c(0,50),
    breaks = seq(
      0,50,
      by = 5
    )
  ) +
  theme_thesis()

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("../../figures/chap-04/", format(Sys.Date(), "%Y%m%d"), "_SS_sampling_months.png"),
  plot = fig3,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 4.4 - Relative and actual sampling frequencies of unique strawberry varieties #####

# generate lollipop plot of actual variety frequencies

fig4a_prep <- ss_counts %>%
  count(
    variety
  ) %>%
  mutate(
    variety = fct_reorder(
      variety, n
    )
  )

fig4a <- fig4a_prep %>%
  ggplot() +
  geom_segment(
    aes(
      x = variety,
      xend = variety,
      y = 0,
      yend = n
    ),
    colour = "lightgray",
    linewidth = 1
  ) +
  geom_point(
    aes(
      x = variety,
      y = n
    ),
    colour = "#164194FF",
    size = 1.8
  ) + 
  # could add text labels, but it makes the plot quite cluttered
  # geom_text(
  #   aes(
  #     x = variety,
  #     y = n,
  #     label = n
  #   ),
  #   size = 3,
  #   hjust = -1.5
  # ) +
  scale_y_continuous(
    "Number of Samples",
    limits = c(0, 35),
    breaks = seq(0, 35, by = 5),
    expand = c(0, 0)
  ) +
  scale_x_discrete(
    "Strawberry Variety"
  ) +
  coord_flip() +
  theme_thesis() +
  theme(
    legend.position = "none",
    panel.grid.major.x = element_line(),
    axis.title.x = element_text(
      margin = margin(
        1.4,0,0,0,
        unit = "cm"
      )
    ),
    axis.text.y = element_blank(),
    axis.title.y = element_blank()
  )

# generate heatmap of relative monthly frequencies of strawberry varieites

fig4b_prep <- ss_compiled_data %>%
  select(
    sample_name,
    month,
    season,
    product_quality,
    product_origin_country,
    variety,
  ) %>%
  mutate(
    month = lubridate::as_date(month, format = "%m%y")
  ) %>% 
  distinct() %>%
  count(
    month,
    variety
  ) %>%
  arrange(
    month, variety
  ) 


fig4b <- fig4b_prep %>%
  distinct(
    variety 
  ) %>%
  slice(
    rep(
      1:n(),
      times = 12
    )
  ) %>%
  # ensure months are factored in the correct order
  dplyr::mutate(
    month = as.factor(
      rep(
        c("2023-03-01",
          "2023-04-01",
          "2023-05-01",
          "2023-06-01",
          "2023-07-01",
          "2023-08-01",
          "2023-09-01",
          "2023-10-01",
          "2023-11-01",
          "2023-12-01",
          "2024-01-01",
          "2024-02-01"),
        each = n() / 12
      )
    )
  ) %>%
  mutate(
    month = lubridate::as_date(month)
  ) %>% 
  left_join(
    ss_genotype_xplr,
    by = c("variety", "month")
  ) %>%
  replace_na(
    list(n = 0)
  ) %>%
  summarise(
    sum = sum(n),
    .by = c(season, variety)
  ) %>% 
  mutate(
    perc_month = sum / sum(sum),
    .by = season
  ) 

levels <- ss_counts$variety # maintain order of varieties such that they are consistent between the lollipop and heatmap

ss_genotype_heatmap <- ss_genotype_hm_prep %>%
  left_join(
    ss_variety_dots_prep,
    by = "variety"
  ) %>%
  mutate(
    variety = fct_reorder(
      variety,
      `n.y`
    )
  ) %>%
  ggplot(
    aes(
      x = month,
      y = variety,
      fill = perc_month
    )
  ) +
  geom_tile() +
  scale_x_date(
    "Date",
    date_breaks = "1 month",
    date_labels = "%b-%Y",
    expand = c(0,0)
  ) +
  scale_y_discrete(
    "Strawberry Variety"
  ) +
  scale_fill_distiller(
    name = "Number of Samples (%)",
    palette = "PuBu",
    direction = 1
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    panel.grid.major.x = element_line(),
    panel.border = element_blank(),
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )


# combine the heatmap and lollipop into an integrated figure
fig4 <- ggarrange(
  fig4b, fig4a,
  nrow = 1,
  ncol = 2,
  widths = c(2,1),
  heights = c(2,1)
) 

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_genotype_heatmap_dots_timeseries_combi.png"),
  plot = ss_variety_combine,
  width = 6,
  height = 6.5,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 4.5 - Geographic origins of UK-marketed fresh strawberries  #####

fig5_prep <- ss_compiled_data %>% 
  select(
    sample_name, product_origin_country, season, month, purchase_date, variety
  ) %>% 
  distinct() %>%  
  mutate(
    product_origin_country = str_replace(product_origin_country, "Scotland", "United Kingdom"),
    iso_a3 = countrycode(product_origin_country, "country.name", "iso3c")
  ) %>% 
  count(
    iso_a3,
    .by = season
  ) %>% 
  rename(
    season = `.by`
  )

# prepare world map plotting coordinates 

target_crs <- "+proj=moll"

world_map <- ne_countries(
  scale = "medium",
  returnclass = "sf"
) %>% 
  filter(
    !admin == "Antarctica"
  )

ss_euro_africa_coord <- st_sfc(
  st_point(
    c(-18.81,20.96)
  ),
  st_point(
    c(40.52,63.07)
  ),
  crs = 4326
) %>% 
  st_coordinates()


fig5a_prep <- world_map %>% 
  slice(
    rep(
      1:n(),
      times = 4
    )
  ) %>% 
  dplyr::mutate(
    season = factor(
      rep(
        c("winter","spring","summer","autumn"), # add a 'season' variable with which to facet the plot
        times = n()/4
      )
    )
  ) %>% 
  left_join(
    fig5_prep,
    by = c("iso_a3","season")
  ) %>% 
  filter(
    continent == "Europe" | continent == "Africa" | continent == "Asia"
  ) %>% 
  dplyr::mutate(
    perc_season = (n / sum(n, na.rm = TRUE))*100,
    .by = season
  )


season_names <- c(
  "spring" = "Spring",
  "summer" = "Summer",
  "autumn" = "Autumn",
  "winter" = "Winter"
)


# plot the choropleth map faceted by season
fig5a <- fig5a_prep  %>% 
  dplyr::mutate(
    season = factor(
      season,
      levels = c("spring","summer","autumn","winter")
    )
  ) %>%  
  ggplot() +
  geom_sf(
    aes(
      fill = perc_season
    )
  ) +
  # could add labels, but plot is too small to not get cluttered
  # ggrepel::geom_label_repel(
  #   aes(
  #     label = round((perc_season * 100), digits = 2),
  #     geometry = geometry
  #   ),
  #   stat = "sf_coordinates",
  #   min.segment.length = 0.1,
  #   segment.size = 0.3,
  #   size = 2.5,
  #   label.padding = 0.2,
  #   box.padding = 0.5
  # ) +
  facet_wrap(
    vars(season),
    labeller = as_labeller(season_names)
  ) +
  scale_fill_bs5(
    name = "(%)",
    limits = c(0,100),
    breaks = seq(
      0, 100,
      by = 25
    ),
    palette = "blue",
    na.value = "white"
  ) +
  labs(
    tag = "A"
  ) +
  coord_sf(
    xlim = ss_euro_africa_coord[, "X"],
    ylim = ss_euro_africa_coord[, "Y"],
    expand = FALSE
  ) +
  theme_thesis() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    legend.position = "right",
    plot.tag = element_text(
      face = "bold"
    ),
    strip.background = element_rect(
      fill = "grey75",
      color = NA
    )
  )

# generate stacked bar graph of sampling frequency from each recorded country of origin in each season
fig5_b <- fig5_prep %>%
  mutate(
    season = factor(
      str_to_title(season), levels = c("Spring","Summer","Autumn","Winter")
      )
  ) %>% 
  ggplot(
    aes(
      x = season,
      y = n,
      fill = iso_a3,
      group = -n
    )
  ) +
  geom_bar(
    position = "stack",
    stat = "identity",
    colour = "black",
    alpha = 0.8
  ) +
  geom_text(
    aes(
      label = paste0(if_else(n > 5, paste0(n), ""))
    ),
    colour = "black",
    size = 2.5,
    position = position_stack(
      vjust = 0.5
    )
  ) +
  scale_y_continuous(
    "Number of Samples",
    limits = c(0,110),
    breaks = seq(
      0,110, by = 15
    )
  ) +
  scale_x_discrete(
    "Season"
  ) +

  scale_fill_brewer(
    palette = "RdYlBu",
    direction = -1,
    labels = c("Belgium", "Egypt", "Spain", "United Kingdom", "Jordan", "Morocco", "Netherlands", "Portugal")
  ) +
  labs(
    fill = "Country of Origin",
    tag = "B"
  ) +
  theme_thesis() +
  theme(
    legend.position = "right",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )


# couldnt get the aspect ratios correct in R, resorted to combining the two plots manually in Microsoft Powerpoint...
# ss_map_combine <- ggarrange(
#   ss_origin_map, map_stackbar,
#   ncol = 1,
#   nrow = 2,
#   widths = c(4,1)
# )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_seasonal_origin_stack.png"),
  plot = fig5b,
  width = 5,
  height = 5,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_seasonal_origin_map.png"),
  plot = fig5a,
  width = 7,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 4.6 - HMRC strawberry imports #####

# see ~/chapter-04_strawberry_analytical_survey/README.txt for guidance on accessing the relevant HMRC data
hmrc_trade_data <- read_csv(
  "./data/20240815_HMRC_strawberrytradedata_2023_2024.csv"
) %>%
  clean_names() %>%
  select(
    month,
    country_code,
    weight_kg
  )

fig6_prep <- hmrc_trade_data %>% 
  separate(
    month,
    into = c("year","month"),
    sep = "-"
  ) %>% 
  filter(
    !country_code == "XS"
  ) %>% 
  dplyr::mutate(
    month = as.numeric(month),
    season = case_when(
      month == 12 | between(month, 1, 2) ~ "winter",
      between(month, 3, 5) ~ "spring",
      between(month, 6, 8) ~ "summer",
      between(month, 9, 11) ~ "autumn"
    ),
    iso_a3 = countrycode(country_code, "iso2c", "iso3c")
  ) %>% 
  mutate(
    iso_a3_group_region = if_else( # grouping all countries that aren't in the SS sample set
      str_detect(iso_a3, "BEL|EGY|ESP|JOR|MAR|NLD|PRT"), iso_a3, "Rest of World"
    )
  ) %>% 
  summarise(
    weight_kg = sum(weight_kg, na.rm = TRUE),
    .by = c(iso_a3, iso_a3_group_region)
  ) %>% 
  select(
    iso_a3,
    iso_a3_group_region,
    weight_kg
  ) %>% 
  drop_na() %>% 
  mutate(
    perc = round((weight_kg / sum(weight_kg)) * 100, digits = 2),
    weight_thousandton = weight_kg/1000000
  )


# prepare world map plotting coordinates
hmrc_map_coord <- st_sfc(
  st_point(
    c(-172.6,-57.9)
  ),
  st_point(
    c(59.1,71.4)
  ),
  crs = 4326
) %>% 
  st_coordinates()

fig6a_prep <- world_map %>% 
  slice(
    rep(
      1:n(),
      times = 4
    )
  ) %>% 
  dplyr::mutate(
    season = factor(
      rep(
        c("winter","spring","summer","autumn"),
        times = n()/4
      )
    )
  ) %>% 
  left_join(
    hmrc_map_data,
    by = c("iso_a3","season")
  ) %>% 
  dplyr::mutate(
    perc_season = weight_kg / sum(weight_kg, na.rm = TRUE),
    .by = season
  )

fig6a <- fig6a_prep  %>% 
  dplyr::mutate(
    season = factor(
      season,
      levels = c("spring","summer","autumn","winter")
    )
  ) %>% 
  ggplot() +
  geom_sf(
    aes(
      fill = perc
    ),
    colour = "#b4aea9"
  ) +
  facet_wrap(
    vars(season),
    labeller = as_labeller(season_names)
  ) +
  scale_fill_bs5(
    name = "(%)",
    limits = c(0,100),
    breaks = seq(
      0, 100,
      by = 25
    ),
    palette = "blue",
    na.value = "white"
  ) +
  coord_sf(
    xlim = hmrc_map_coord[, "X"],
    ylim = hmrc_map_coord[, "Y"],
    expand = FALSE
  ) +
  theme_thesis() +
  theme(
    axis.title = element_blank(),
    axis.text = element_blank(),
    legend.position = "right",
    plot.tag = element_text(
      face = "bold"
    ),
    strip.background = element_rect(
      fill = "grey75",
      color = NA
    )
  )

# create a stacked bar chart for strawberry import volumes in each season
fig6b <- hmrc_map_data %>%
  mutate(
    season = factor(str_to_title(season), levels = c("Spring","Summer","Autumn","Winter"))
  ) %>% 
  summarise(
    weight_thousandton = sum(weight_thousandton),
    .by = c(iso_a3_group_region, season)
  ) %>% 
  arrange(
    iso_a3_group_region
  ) %>% 
  ggplot(
    aes(
      x = season,
      y = weight_thousandton,
      fill = iso_a3_group_region,
      group = -weight_thousandton
    )
  ) +
  geom_bar(
    position = "stack",
    stat = "identity",
    colour = "black",
    alpha = 0.8
  ) +
  geom_text(
    aes(
      label = paste0(if_else(weight_thousandton > 1, paste0(round(weight_thousandton, digits = 2)), ""))
    ),
    colour = "black",
    size = 2.5,
    position = position_stack(
      vjust = 0.5
    )
  ) +
  scale_y_continuous(
    "Strawberry Imports\n(Thousand Tons)",
    limits = c(0,32),
    breaks = seq(
      0,32, by = 4
    )
  ) +
  scale_x_discrete(
    "Season"
  ) +
  scale_fill_brewer(
    palette = "RdYlBu",
    direction = -1,
    labels = c("Belgium", "Egypt", "Spain", "Jordan", "Morocco", "Netherlands", "Portugal", "Rest of World")
  ) +
  labs(
    fill = "Country of Origin"
  ) +
  theme_thesis() +
  theme(
    legend.position = "right",
    legend.title = element_text(hjust = 0.5),
    axis.ticks = element_blank(),
    plot.tag = element_text(face = "bold"),
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )

# similarly to fig5, couldnt get the aspect ratios correct in R, resorted to combining the two plots manually in Microsoft Powerpoint...

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_hmrc_mapstackbar.png"),
  plot = fig6b,
  width = 5,
  height = 5,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_hmrc_seasonal_trade_map.png"),
  plot = fig6a,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### Figure 4.7 - Distribution of measured vitamin C values  #####

ss_distributions <- ss_compiled_data %>% 
  filter(
    measure_var == "ascorbic_acid_mg_100g_fw"
  ) %>% 
  mutate(
    month = as.factor(
      lubridate::month(
        purchase_date
      )
    )
  ) 

# Descriptive statistics for the sample
# mean(ss_distributions$value, na.rm = T)
# sd(ss_distributions$value, na.rm = T)
# median(ss_distributions$value, na.rm = T)
# min(ss_distributions$value, na.rm = T)
# max(ss_distributions$value, na.rm = T)

fig7 <- ss_distributions %>% 
  ggplot(
    aes(value)
  ) +
  geom_histogram(
    aes(
      y = after_stat(density)),
    binwidth = 4,
    colour = "grey",
    fill = "white",
    linewidth = 1
  ) +
  geom_density(
    alpha = 0.5,
    fill = "steelblue",
    colour = "steelblue"
  ) +
  geom_vline(
    aes(
      xintercept = 60.44
    ),
    colour = "black",
    linetype = "11",
    linewidth = 0.8
  ) +
  scale_x_continuous(
    name = "Total Ascorbic acid\n (mg/100g FW)",
    expand = c(0,0.8),
    limits = c(0,130),
    breaks = seq(
      0,130, 
      by = 15
    )
  ) +
  scale_y_continuous(
    name = "Density",
    expand = c(0.01, 0)
  ) +
  # add vertical line to annotate the sample mean
  annotate(
    "text",
    x= 58,
    y= 0.020,
    label="Mean",
    angle = 90
  ) +
  theme_theis() +
  theme(
    legend.position = "none",
    panel.grid.major.x = element_line(),
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("../../figures/chap-04/", format(Sys.Date(), "%Y%m%d"), "_SS_ascorbicacid_distribution.png"),
  plot = fig7,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)
##### ANOVA Summaries for ascorbate, TSS/TTA, and cost/Kg #####

ss_acids <- ss_compiled_data %>% 
  filter(
    str_detect(
      measure_var, "acid|tss|brix|cost"
    )
  ) %>% 
  mutate(
    month = format(lubridate::as_date(month, format = "%m%y"), "%b-%Y"),
    month = factor(month, 
                   levels = c(
                     "Mar-2023",
                     "Apr-2023",
                     "May-2023",
                     "Jun-2023",
                     "Jul-2023",
                     "Aug-2023",
                     "Sep-2023",
                     "Oct-2023",
                     "Nov-2023",
                     "Dec-2023",
                     "Jan-2024",
                     "Feb-2024"
                   )
    )
  )  

# conduct one way ANOVA on organic acids, tss, and brix
# refere to this summary table for statistics reported in the thesis
ss_acids_models <- ss_acids %>% 
  ungroup() %>%
  nest_by(
    measure_var
  ) %>%
  mutate(
    model = list(oneway.test(value ~ month, data = data, var.equal = FALSE))
  ) %>%
  reframe(
    tidy(
      model
    )
  )

##### Figure 4.8 - Box- and Violin-plots showing the mean total ascorbic acid in each month #####

# preparation for adding a seasonal trend line through each months mean 
fig8_trend_prep <- ss_acids %>% 
  summarise(
    value = mean(value, na.rm = TRUE),
    .by = month
  ) 

# Generate box-violin plot
fig8 <- ss_acids %>% 
  filter(
    measure_var == "ascorbic_acid_mg_100g_fw"
  ) %>% 
  ggplot(
    aes(
      x = month,
      y = value,
      group = month
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = month
    ),
    position = position_jitterdodge(
      dodge.width = 0.9, jitter.width = 0.1
      ),
    size = 1.8,
    alpha = 0.4
  ) +
  scale_colour_igv() +
  # make mean values very clear
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_y_continuous(
    "Total Ascorbic acid\n (mg/100g Fresh Weight)",
    limits = c(20,125),
    breaks = seq(
      20,120,
      by = 10
    )
  ) +
  scale_x_discrete(
    "Date",
  ) +
  # add seasonal trend line
  geom_line(
    data = ss_acids_trend_prep,
    aes(
      x = month,
      y = value,
      group = 1
    ),
    colour = "#6F286AFF",
    linewidth = 0.7
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_ascorbicacid_month.png"),
  plot = fig8,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)


##### Figure 4.9 - Box- and Violin-plots showing the mean TSS/TTA ratios in each month #####

# preparation for adding a seasonal trend line through each months mean
fig9_trend_prep <- ss_acids %>% 
  filter(
    measure_var == "tss_tta_ratio"
  ) %>%
  summarise(
    value = mean(value, na.rm = TRUE),
    .by = month
  )

# Generate the box-violin plot
fig9 <- ss_acids %>%
  filter(
    measure_var == "tss_tta_ratio"
  ) %>%
  ggplot(
    aes(
      x = month,
      y = value,
      group = month
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = month
    ),
    position = position_jitterdodge(
      dodge.width = 0.9,jitter.width = 0.1
      ),
    size = 1.8,
    alpha = 0.4
  ) +
  scale_colour_igv() +
  # make means very clear
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_y_continuous(
    "Sweetness\n(TSS/TTA)",
    limits = c(3.5,14),
    breaks = seq(
      3.5,14,
      by = 3.5
    )
  ) +
  scale_x_discrete(
    "Date"
  ) +
  # add trend line
  geom_line(
    data = fig9_trend_prep,
    aes(
      x = month,
      y = value,
      group = 1
    ),
    colour = "#6F286AFF",
    linewidth = 0.7
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_tsstta_month.png"),
  plot = fig9,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 4.10 - Intakes and PAA by income tertile #####

# preparation for adding a seasonal trend line through each months mean
fig10_trend_prep <- ss_acids %>% 
  filter(
    str_detect(measure_var, "cost")
  ) %>%
  summarise(
    value = mean(value, na.rm = TRUE),
    .by = month
  )

# Generate the box-violin plot
fig10 <- ss_acids %>%
  filter(
    str_detect(
      measure_var, "cost"
    )
  ) %>%
  ggplot(
    aes(
      x = month,
      y = value,
      group = month
    )
  ) +
  geom_violin(
    fill = NA,
    color = "grey40",
    width = 0.8,
    linewidth = 0.4,
    position = position_dodge(0.9)
  ) +
  geom_boxplot(
    outlier.alpha = 0,
    fill = NA,
    width = 0.7,
    position = position_dodge(0.9),
    color = "grey40"
  ) +
  geom_point(
    aes(
      colour = month
    ),
    position = position_jitterdodge(dodge.width = 0.9,jitter.width = 0.1),
    size = 1.8,
    alpha = 0.4
  ) +
  scale_colour_igv() +
  geom_point(
    stat = "summary",
    size = 3,
    color = "#8a0f00",
    position = position_dodge(0.9),
    fun = mean
  ) +
  scale_y_continuous(
    "Retail Price\n(£/Kg)",
    limits = c(0,15),
    breaks = seq(
      0,15,
      by = 3
    )
  ) +
  scale_x_discrete(
    "Date"
  ) +
  geom_line(
    data = ss_cost_trend_prep,
    aes(
      x = month,
      y = value,
      group = 1
    ),
    colour = "#6F286AFF",
    linewidth = 0.7
  ) +
  theme_thesis() +
  theme(
    legend.position = "none",
    axis.text.x = element_text(
      angle = 45,
      vjust = 1,
      hjust = 1
    )
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_cost_month.png"),
  plot = ss_cost_vis,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### Figure 4.11 - Correlation matrix of cost, ascorbate, and TSS/TTA #####

# wrangle shape of data to be compatible with a correlation matrix
fig11_prep <- ss_compiled_data %>% 
  select(
    sample_name,
    purchase_date,
    product_origin_country,
    product_quality,
    measure_var,
    value
  ) %>% 
  filter(
    str_detect(
      measure_var, "cost|ascorbic|tss"
    )
  ) %>% 
  pivot_wider(
    names_from = "measure_var",
    values_from = "value"
  ) %>% 
  drop_na() %>% 
  dplyr::mutate(
    ascorbic_acid_mg_kg = ascorbic_acid_mg_100g_fw * 10 # convert ascorbate values from /100g to /Kg to be consistent with cost/Kg
  ) 

# summary table of correlation coefficients, as reported in  thesis section 4.3.3
ss_corr_model <- fig11_prep %>% 
  select(
    ascorbic_acid_mg_kg,
    cost_per_kg,
    tss_tta_ratio
  ) %>% 
  corr.test()


# generate the correlation matirx
fig11 <- ggcorrmat(
  data = ss_corrmat,
  cor.vars.names = c(
    "Total Ascorbic Acid\n(mg/kg)",
    "Retail Price\n(£/Kg)",
    "Sweetness\n(TSS/TTA)"
  ),
  colors = c(
    "#164194FF",
    "white",
    "#F39200FF"
  ),
  matrix.type = "lower",
  pch = 1,
  ggcorrplot.args = list(
    pch.col = "transparent" # remove the crosses for non-significant correlations, as requested by thesis examiner
  )
) +
  theme(
    axis.ticks = element_blank(),
    panel.grid = element_line(color = "#b4aea9"),
    panel.grid.minor = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.major.x = element_blank()
  )

# save the combined plot to ".png" file. 
# The specified dimensions have been found to maintain good image quality when inserting figures in .docx documents
# note: asterisks on significant correlations were manually added to the matrix in Powerpoint having found no simple solution to integrate asterisks with ggcorrmat
ggsave(
  filename = paste0("./figures/", format(Sys.Date(), "%Y%m%d"), "_SS_correlation.png"),
  plot = fig11,
  width = 6,
  height = 4,
  units = "in",
  dpi = 900,
  limitsize = FALSE
)

##### ANOVA summary for product quality #####

# Statistical summaries for measured variables given product quality
ss_pq_models <- ss_acids %>% 
  ungroup() %>%
  nest_by(
    measure_var
  ) %>%
  mutate(
    model = list(oneway.test(value ~ product_quality, data = data, var.equal = FALSE))
  ) %>%
  reframe(
    tidy(
      model
    )
  )


##### Table 4.1 - ascorbate, TSS/TAA, and cost/Kg in different product categories #####

tab1 <- ss_acids %>% 
  rename(
    `Vitamin C` = ascorbic_acid_mg_100g_fw,
    `Berry Sweetness` = tss_tta_ratio,
    `Price` = cost_per_kg
  ) %>% 
  mutate(
    product_quality = str_to_title(product_quality)
  ) %>% 
  tbl_summary(
    by = product_quality,
    include = c(
      `Vitamin C`,
      `Berry Sweetness`,
      `Price`
    ),
    statistic =
      all_continuous() ~ "{mean} ({sd})",
    missing = "no",
    digits = list(all_continuous() ~ c(2,2))
  ) %>%  
  modify_header(all_stat_cols() ~ "**{level}**\n _n_ = {n}") %>% 
  modify_spanning_header(all_stat_cols() ~ "**Product Category**") %>% 
  modify_table_styling(
    columns = label,
    rows = label == "Vitamin C",
    footnote = "Measured as Total Ascorbic Acid (mg/100g Fresh Weight)"
  ) %>% 
  modify_table_styling(
    columns = label,
    rows = label == "Berry Sweetness",
    footnote = "Measured as the ratio of Total Soluble Solids to Theoretical Titratable Acidity (TSS:TTA)"
  ) %>% 
  modify_table_styling(
    columns = label,
    rows = label == "Price",
    footnote = "Expressed as Great British Pounds per Kilogram (£/Kg)"
  ) %>% 
  as_flex_table() %>%
  colformat_md() %>%
  set_table_properties(width = 1, layout = "autofit")

tab1 %>% 
  save_as_docx(
    path = paste0("~/chapter-04_strawberry_analytical_survey/figures/", format(Sys.Date(), "%Y%m%d"), "_SS_product_quality.docx")
  )

