## Post-SDM analysis script for study titled:
## Projecting the Mesoamerican montane-specialist tree community under 
## climate change: Implications for conservation

## POST-SDM SCRIPT CONTAINING:
## - Richness maps current and future
##   + Generate clear layers (no background zeroes) for ArcMap
## - Richness differences map
## - Richness losses and gains
## - Beta turnover matrix
## - Jaccard index calculation
## - Create georeferenced turnover map
## - Area estimation in km2
## - Correlations by taxonomic group, size and latitudinal range
## - Species boxplots by elevation (current, future and combined)
## - Generating plots for atitudinal and altitudinal classification
## - GLMs of range losses by latitudinal and altitudinal classes
##   + Conifers vs Angiosperms range comparison
## - Linear regression of range losses (%) by current extent

# Set working directory to folder storing projections from 'biomod2'
# Load necessary libraries
library(raster)
library(dplyr)
library(tidyr)
library(ggplot2)

######## CURRENT RICHNESS MAP - WITH ALL RUNS LAYER ####################

# Create an empty raster stack to hold the binary maps
empty_stack <- stack()

#EXTRACT 6TH MAP FOR SPECIES WITH FEW OCCURRENCE RECORDS
SpFew <- read.csv("AllSppFewMany.csv") # csv file containing list of spp. with few or many records
SpFew <- levels(as.factor(SpFew$SpeciesFew))
i<-1
allRunsFew <- 6 # 6th layer (combined 5 runs for spp. with few records)

# Loop through the binary maps and stack them
for (i in 1:length(SpFew)) {
  # Read the binary map and choose 6th layer
  binary_map <- stack(paste0("projections/proj_current_", SpFew[i], "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsFew]]
  
  # Add the binary map to the raster stack
  empty_stack <- addLayer(empty_stack, allRuns_p)
}

#EXTRACT 11TH MAP FOR SPECIES WITH MANY OCCURRENCE RECORDS
SpMany <- read.csv("AllSppFewMany.csv") # csv file containing list of spp. with few or many records
SpMany <- levels(as.factor(SpMany$SpeciesMany))
SpMany <- SpMany[2:107] # delete first empty row
i<-1
allRunsMany <- 11 # 11th layer (combined 10 runs for spp. with many records)

# Loop through the binary maps and stack them
for (i in 1:length(SpMany)) {
  # Read the binary map and choose 11th layer
  binary_map <- stack(paste0("projections/proj_current_", SpMany[i], "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsMany]]
  
  # Add the binary map to the raster stack
  empty_stack <- addLayer(empty_stack, allRuns_p)
}

# Combine the binary maps to get species richness
current_richness_map <- sum(empty_stack)
plot(current_richness_map, main = "Current TMF tree species richness in Mesoamerica")

# Save the combined richness map to a file
writeRaster(current_richness_map, "current_richness_map.tif", format = "GTiff", overwrite = TRUE)

# Clear layer for ArcMap
current_richness_map[current_richness_map == 0] <- NA
writeRaster(current_richness_map, "current_richness_clear.tif", format = "GTiff", overwrite = TRUE)

######## FUTURE RICHNESS MAP - WITH ALL RUNS LAYER ###################

#EXTRACT 6TH MAP FOR SPECIES WITH FEW OCCURRENCE RECORDS
SpFew <- read.csv("AllSppFewMany.csv")
SpFew <- levels(as.factor(SpFew$SpeciesFew))
i<-1
allRunsFew <- 6 

#COMBINE PROJECTIOS FROM 3 GCMs
for(i in 1:length(SpFew)) {  
  
  EFpreds_futG_bin <- stack(paste0(getwd(),"/projections/proj_futureG_", SpFew[i], "_ensemble_TSSbin.tif"))
  EFpreds_futM_bin <- stack(paste0(getwd(),"/projections/proj_futureM_", SpFew[i], "_ensemble_TSSbin.tif"))
  EFpreds_futU_bin <- stack(paste0(getwd(),"/projections/proj_futureU_", SpFew[i], "_ensemble_TSSbin.tif"))
  
  # Select the 6th layers from each raster variable
  allRuns_G <- EFpreds_futG_bin[[allRunsFew]]
  allRuns_M <- EFpreds_futM_bin[[allRunsFew]]
  allRuns_U <- EFpreds_futU_bin[[allRunsFew]]
  
  # Stack the layers
  stacked_layers <- stack(allRuns_G, allRuns_M, allRuns_U)
  
  # Calculate the sum of the stacked layers without changing the background value
  combined_layer <- (allRuns_G > 0) + (allRuns_M > 0) + (allRuns_U > 0)
  
  # Reclassify values 0 and 1 as 0, and values 2 and 3 as 1
  combined_future <- combined_layer
  combined_future[combined_layer %in% c(0, 1)] <- 0
  combined_future[combined_layer %in% c(2, 3)] <- 1
  
  # Save the 'combined_future' object as a .tif file
  filename <- paste("projFutComb/", SpFew[i], "_futureCombined.tif", sep = "")
  writeRaster(combined_future, filename, format = "GTiff", overwrite = TRUE)
}

#EXTRACT 11TH MAP FOR SPECIES WITH MANY OCCURRENCE RECORDS
SpMany <- read.csv("AllSppFewMany.csv")
SpMany <- levels(as.factor(SpMany$SpeciesMany))
SpMany <- SpMany[2:107]
i<-1
allRunsMany <- 11 

#COMBINE PROJECTIOS FROM 3 GCMs
for(i in 1:length(SpMany)) {  
  
  EFpreds_futG_bin <- stack(paste0(getwd(),"/projections/proj_futureG_", SpMany[i], "_ensemble_TSSbin.tif"))
  EFpreds_futM_bin <- stack(paste0(getwd(),"/projections/proj_futureM_", SpMany[i], "_ensemble_TSSbin.tif"))
  EFpreds_futU_bin <- stack(paste0(getwd(),"/projections/proj_futureU_", SpMany[i], "_ensemble_TSSbin.tif"))
  
  # Select the 6th layers from each raster variable
  allRuns_G <- EFpreds_futG_bin[[allRunsMany]]
  allRuns_M <- EFpreds_futM_bin[[allRunsMany]]
  allRuns_U <- EFpreds_futU_bin[[allRunsMany]]
  
  # Stack the layers
  stacked_layers <- stack(allRuns_G, allRuns_M, allRuns_U)
  
  # Calculate the sum of the stacked layers without changing the background value
  combined_layer <- (allRuns_G > 0) + (allRuns_M > 0) + (allRuns_U > 0)
  
  # Reclassify values 0 and 1 as 0, and values 2 and 3 as 1
  combined_future <- combined_layer
  combined_future[combined_layer %in% c(0, 1)] <- 0
  combined_future[combined_layer %in% c(2, 3)] <- 1
  
  # Save the 'combined_future' object as a .tif file
  filename <- paste("projFutComb/", SpMany[i], "_futureCombined.tif", sep = "")
  writeRaster(combined_future, filename, format = "GTiff", overwrite = TRUE)
}

species_data <- read.csv("AllSppLocs_thin.csv")
SpListSpp <- levels(as.factor(species_data$Species))
SpListSpp <- SpListSpp[2:273]
i<-1

#ADD COMBINED PROJECTIONS LAYERS TO CREATE A SINGLE FUTURE RICHNESS MAP
# Create an empty raster stack to hold the binary maps
empty_stack <- stack()

# Loop through the binary maps and stack them
for (i in 1:length(SpListSpp)) {
  file_name <- paste0("projFutComb/", SpListSpp[i], "_futureCombined.tif")
  
  # Read the binary map
  binary_map <- raster(file_name)
  
  # Add the binary map to the raster stack
  empty_stack <- addLayer(empty_stack, binary_map)
}

# Combine the binary maps to get species richness
future_richness_map <- sum(empty_stack)
plot(future_richness_map, main = "Future TMF tree species richness in Mesoamerica")

# Save the combined richness map to a file
writeRaster(future_richness_map, "future_richness_map.tif", format = "GTiff", overwrite = TRUE)

# Clear layer for ArcMap
future_richness_map[future_richness_map == 0] <- NA
writeRaster(future_richness_map, "future_richness_clear.tif", format = "GTiff", overwrite = TRUE)

######## SUBTRACTING FUTURE RICHNESS FROM CURRENT RICHNESS ################

# Import current and future richness maps if needed
#current_richness_map <- raster("current_richness_map.tif")
#future_richness_map <- raster("future_richness_map.tif")

richness_difference <- future_richness_map - current_richness_map
plot(richness_difference)

writeRaster(richness_difference, "richness_difference.tif", format = "GTiff", overwrite = TRUE)

######## SEPARATING GAINS AND LOSSES #######################

# Create a raster for 'richness_loss' containing only negative values
richness_loss <- raster::calc(richness_difference, fun = function(x) ifelse(x < 0, x, NA))
writeRaster(richness_loss, "richness_loss.tif", format = "GTiff", overwrite = TRUE)

# Create a raster for 'richness_gain' containing only positive values
richness_gain <- raster::calc(richness_difference, fun = function(x) ifelse(x > 0, x, NA))
writeRaster(richness_gain, "richness_gain.tif", format = "GTiff", overwrite = TRUE)

######## BETA DIVERSITY MATRICES #########################

## CURRENT DIVERSITY MATRIX

#EXTRACT 6TH MAP FOR SPECIES WITH FEW OCCURRENCE RECORDS
SpFew <- read.csv("AllSppFewMany.csv")
SpFew <- levels(as.factor(SpFew$SpeciesFew))
i<-1
allRunsFew <- 6 

# Define a region mask from one of the binary maps (assuming they all have the same extent)
region_mask <- raster("projections/proj_current_Abies.guatemalensis_ensemble_TSSbin.tif")  # Any Species.name

# Get the dimensions of the mask (assuming all binary maps have the same dimensions)
mask_dim <- dim(region_mask)

# Create an empty matrix to hold the presence/absence data
presence_matrix_few <- matrix(NA, nrow = prod(mask_dim), ncol = length(SpFew),
                              dimnames = list(NULL, SpFew))

# Loop through the binary maps and populate the presence_matrix
for (i in 1:length(SpFew)) {
  # Read the binary map and select 6th later
  binary_map <- stack(paste0("projections/proj_current_", SpFew[i], "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsFew]]
  
  # Mask the binary map to the region of interest
  masked_map <- mask(allRuns_p, region_mask)
  
  # Convert masked binary map to a matrix and update the presence_matrix
  binary_matrix <- as.matrix(masked_map)
  presence_matrix_few[, i] <- binary_matrix
}

# Add a 'pixel' column containing row numbers
presence_matrix_few <- cbind(pixel = 1:nrow(presence_matrix_few), presence_matrix_few)

#EXTRACT 11TH MAP FOR SPECIES WITH MANY OCCURRENCE RECORDS
SpMany <- read.csv("AllSppFewMany.csv")
SpMany <- levels(as.factor(SpMany$SpeciesMany))
SpMany <- SpMany[2:107]
i<-1
allRunsMany <- 11 

# Create an empty matrix to hold the presence/absence data
presence_matrix_many <- matrix(NA, nrow = prod(mask_dim), ncol = length(SpMany),
                               dimnames = list(NULL, SpMany))

# Loop through the binary maps and populate the presence_matrix
for (i in 1:length(SpMany)) {
  # Read the binary map and select 6th later
  binary_map <- stack(paste0("projections/proj_current_", SpMany[i], "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsMany]]
  
  # Mask the binary map to the region of interest
  masked_map <- mask(allRuns_p, region_mask)
  
  # Convert masked binary map to a matrix and update the presence_matrix
  binary_matrix <- as.matrix(masked_map)
  presence_matrix_many[, i] <- binary_matrix
}

# Merge the matrices by combining columns
presence_matrix <- cbind(presence_matrix_few, presence_matrix_many)

# Save the presence matrix in RDS format
saveRDS(presence_matrix, "presence_matrix_current.rds")


## FUTURE DIVERSITY MATRIX

#EXTRACT 6TH MAP FOR SPECIES WITH FEW OCCURRENCE RECORDS
SpFew <- read.csv("AllSppFewMany.csv")
SpFew <- levels(as.factor(SpFew$SpeciesFew))
i<-1
allRunsFew <- 6 

# Define a region mask from one of the binary maps (assuming they all have the same extent)
region_mask <- raster("projections/proj_current_Abies.guatemalensis_ensemble_TSSbin.tif")  # Any Species.name

# Get the dimensions of the mask (assuming all binary maps have the same dimensions)
mask_dim <- dim(region_mask)

# Create an empty matrix to hold the presence/absence data
presence_matrix_few <- matrix(NA, nrow = prod(mask_dim), ncol = length(SpFew),
                              dimnames = list(NULL, SpFew))

# Loop through the binary maps and populate the presence_matrix
for (i in 1:length(SpFew)) {
  # Read the binary map and select 6th later
  binary_mapG <- stack(paste0("projections/proj_futureG_", SpFew[i], "_ensemble_TSSbin.tif"))
  binary_mapM <- stack(paste0("projections/proj_futureM_", SpFew[i], "_ensemble_TSSbin.tif"))
  binary_mapU <- stack(paste0("projections/proj_futureU_", SpFew[i], "_ensemble_TSSbin.tif"))
  
  allRuns_G <- binary_mapG[[allRunsFew]]
  allRuns_M <- binary_mapM[[allRunsFew]]
  allRuns_U <- binary_mapU[[allRunsFew]]
  
  # Stack the layers
  stacked_layers <- stack(allRuns_G, allRuns_M, allRuns_U)
  
  # Calculate the sum of the stacked layers without changing the background value
  combined_layer <- (allRuns_G > 0) + (allRuns_M > 0) + (allRuns_U > 0)
  
  # Reclassify values 0 and 1 as 0, and values 2 and 3 as 1
  combined_future <- combined_layer
  combined_future[combined_layer %in% c(0, 1)] <- 0
  combined_future[combined_layer %in% c(2, 3)] <- 1
  
  # Mask the binary map to the region of interest
  masked_map <- mask(combined_future, region_mask)
  
  # Convert masked binary map to a matrix and update the presence_matrix
  binary_matrix <- as.matrix(masked_map)
  presence_matrix_few[, i] <- binary_matrix
}

# Add a 'pixel' column containing row numbers
presence_matrix_few <- cbind(pixel = 1:nrow(presence_matrix_few), presence_matrix_few)

#EXTRACT 11TH MAP FOR SPECIES WITH MANY OCCURRENCE RECORDS
SpMany <- read.csv("AllSppFewMany.csv")
SpMany <- levels(as.factor(SpMany$SpeciesMany))
SpMany <- SpMany[2:107]
i<-1
allRunsMany <- 11 

# Create an empty matrix to hold the presence/absence data
presence_matrix_many <- matrix(NA, nrow = prod(mask_dim), ncol = length(SpMany),
                               dimnames = list(NULL, SpMany))

# Loop through the binary maps and populate the presence_matrix
for (i in 1:length(SpMany)) {
  # Read the binary map and select 6th later
  binary_mapG <- stack(paste0("projections/proj_futureG_", SpMany[i], "_ensemble_TSSbin.tif"))
  binary_mapM <- stack(paste0("projections/proj_futureM_", SpMany[i], "_ensemble_TSSbin.tif"))
  binary_mapU <- stack(paste0("projections/proj_futureU_", SpMany[i], "_ensemble_TSSbin.tif"))
  
  allRuns_G <- binary_mapG[[allRunsMany]]
  allRuns_M <- binary_mapM[[allRunsMany]]
  allRuns_U <- binary_mapU[[allRunsMany]]
  
  # Stack the layers
  stacked_layers <- stack(allRuns_G, allRuns_M, allRuns_U)
  
  # Calculate the sum of the stacked layers without changing the background value
  combined_layer <- (allRuns_G > 0) + (allRuns_M > 0) + (allRuns_U > 0)
  
  # Reclassify values 0 and 1 as 0, and values 2 and 3 as 1
  combined_future <- combined_layer
  combined_future[combined_layer %in% c(0, 1)] <- 0
  combined_future[combined_layer %in% c(2, 3)] <- 1
  
  # Mask the binary map to the region of interest
  masked_map <- mask(combined_future, region_mask)
  
  # Convert masked binary map to a matrix and update the presence_matrix
  binary_matrix <- as.matrix(masked_map)
  presence_matrix_many[, i] <- binary_matrix
}

# Merge the matrices by combining columns
presence_matrix <- cbind(presence_matrix_few, presence_matrix_many)

# Save the presence matrix in RDS format
saveRDS(presence_matrix, "presence_matrix_future.rds")

######## ESTIMATE JACCARD INDEX FOR ALL SITES WITH A LEAST 1 SPECIES ###########

# LOAD MATRICES AND REDUCE THEM BY ELIMINATING N/A AND ABSENCE-ONLY PIXELS

library(betapart)

# Load data if needed
presence_matrix_current <- readRDS("presence_matrix_current.rds")
presence_matrix_future <- readRDS("presence_matrix_future.rds")

# Convert matrices to data frames if needed
presence_matrix_current <- as.data.frame(presence_matrix_current)
presence_matrix_future <- as.data.frame(presence_matrix_future)

# Identify rows with at least 1 species present
filtered_current <- presence_matrix_current %>%
  filter(rowSums(.[, -1], na.rm = TRUE) > 1)

filtered_future <- presence_matrix_future %>%
  filter(rowSums(.[, -1], na.rm = TRUE) > 1)

# Find pixel numbers with at least 1 record
shared_pixels <- union(filtered_current$pixel, filtered_future$pixel)

# Filter rows based on shared pixel numbers
shared_rows_current <- presence_matrix_current %>%
  filter(pixel %in% shared_pixels)

shared_rows_future <- presence_matrix_future %>%
  filter(pixel %in% shared_pixels)

# CALCULATE JACCARD INDEX FOR EACH SITE (PIXEL)

# Create a function to calculate turnover for a single site
calculate_turnover <- function(site) {
  common_species <- sum(shared_rows_current[site, ] & shared_rows_future[site, ])
  total_species <- sum(shared_rows_current[site, ] | shared_rows_future[site, ])
  
  if (is.na(total_species) || total_species == 0) {
    return(0)  # No species, no turnover
  } else {
    return(1 - common_species / total_species)
  }
}

num_sites <- nrow(shared_rows_current)

# Calculate turnover for each site
turnover_values <- numeric(num_sites)
for (i in 1:num_sites) {
  turnover_values[i] <- calculate_turnover(i)
}

# Create a data frame with 'pixel' and 'turnover' columns
pixel_column <- shared_rows_current$pixel
turnover_df <- data.frame(pixel = pixel_column, turnover = turnover_values)

# Print or save the turnover data frame
print(turnover_df)
saveRDS(turnover_df, "beta_turnover.rds")

######## TURNOVER GEOREFERENCED MAP ######################## 

basemap <- raster('current_richness_map.tif')
turnover_df <- readRDS('beta_turnover.rds')

# Assuming 'turnover_df' has columns 'pixel' and 'turnover'
grid_size_x <- 5327
grid_size_y <- 3413
empty_grid <- matrix(NA, nrow = grid_size_y, ncol = grid_size_x)  # Create an empty matrix

# Extract pixel and turnover values from the data frame
pixels <- turnover_df$pixel
turnover_values <- turnover_df$turnover

# Create a custom color palette going from blue to red
color_palette <- colorRampPalette(c("blue", "red"))  # Blue to red gradient

# Map the turnover values to numeric values between 1 and 6
numeric_pixels <- cut(turnover_values, breaks = 6)

# Replace the specified pixels with the numeric values
empty_grid[pixels] <- numeric_pixels

# Create a raster object from the numeric matrix
turnover_raster <- raster(empty_grid)

# Define the extent of the raster object (adjust as needed)
extent(turnover_raster) <- extent(1, grid_size_x, 1, grid_size_y)

# Set the extent of the turnover_raster to match that of the basemap
turnover_raster <- setExtent(turnover_raster, extent(basemap))
writeRaster(turnover_raster, filename = "turnover_raster.tif", format = "GTiff")

######## RICHNESS MAPS AREA ESTIAMTION IN KM2 #############

current_rich <- raster("current_richness_map.tif")
future_rich <- raster("future_richness_map.tif")

# Define the UTM zone appropriate for study area (zone 15 for Mexico)
utm_zone <- 15

# Create a CRS object for the selected UTM zone
utm_crs <- CRS(paste0("+proj=utm +zone=", utm_zone, " +datum=WGS84 +units=m +no_defs"))

# Reproject the raster to the UTM projection with a 1 km? cell size
current_reproj <- projectRaster(current_rich, crs = utm_crs, res = 1000, method = "bilinear")
future_reproj <- projectRaster(future_rich, crs = utm_crs, res = 1000, method = "bilinear")

# Print information about the reprojected raster
print(current_reproj) ; print(future_reproj)

## FREQUENCY TABLES FOR UNIQUE VALUES IN EACH RASTER

# Round 'current_reproj' and 'future_reproj' rasters
current_reproj <- round(current_reproj)
future_reproj <- round(future_reproj)

# Create a function to generate frequency tables for a raster
generate_frequency_table <- function(raster) {
  values <- as.vector(raster)
  freq_table <- table(values)
  freq_df <- data.frame('Value' = as.numeric(names(freq_table)), 'Frequency' = as.numeric(freq_table))
  return(freq_df)
}

# Generate frequency tables for each raster
current_rich_freq_table <- generate_frequency_table(current_rich)
future_rich_freq_table <- generate_frequency_table(future_rich)
current_reproj_freq_table <- generate_frequency_table(current_reproj)
future_reproj_freq_table <- generate_frequency_table(future_reproj)

# Print the frequency tables
print("Current Rich Frequency Table:")
print(current_rich_freq_table)

print("Future Rich Frequency Table:")
print(future_rich_freq_table)

print("Current Reproj Frequency Table:")
print(current_reproj_freq_table)

print("Future Reproj Frequency Table:")
print(future_reproj_freq_table)

# Merge the frequency tables with a left join
combined_freq_table <- merge(current_rich_freq_table, future_rich_freq_table, by = "Value", all = TRUE) %>%
  merge(., current_reproj_freq_table, by = "Value", all = TRUE) %>%
  merge(., future_reproj_freq_table, by = "Value", all = TRUE)

# Rename the columns
colnames(combined_freq_table) <- c("Value", "Current_Rich_Freq", "Future_Rich_Freq", "Current_Reproj_Freq", "Future_Reproj_Freq")

# Fill missing values with 0
combined_freq_table[is.na(combined_freq_table)] <- 0

# Print the combined frequency table
print(combined_freq_table)
write.csv(combined_freq_table, 'TMF_area_projections.csv')

######## CORRELATIONS FOR TAXON, SIZE & RANGE ###############

# Import dataset 
postSDM <- read.csv("SI_SDM_projections.csv", header = T)
postSDM <- as.data.frame(postSDM)

# Convert categorical variables to factors
postSDM$Family <- as.factor(postSDM$Family)
postSDM$Genus <- as.factor(postSDM$Genus)
postSDM$Species <- as.factor(postSDM$Species)
postSDM$Distribution <- as.factor(postSDM$Distribution)

# Select only the numeric (continuous) variables for correlation
numeric_vars <- postSDM[, c('Loss', 'Stable1', 'Gain', 'PercLoss', 'PercGain', 'RangeChange', 'CurrentSize', 'FutNoDisp', 'FutFullDisp')]

# Calculate the correlation matrix
cor_matrix <- cor(numeric_vars, use = "pairwise.complete.obs", method = "pearson")

# Create an empty matrix to store p-values
p_values <- matrix(NA, nrow = ncol(numeric_vars), ncol = ncol(numeric_vars))

# Calculate p-values for each pair of variables
for (i in 1:(ncol(numeric_vars) - 1)) {
  for (j in (i + 1):ncol(numeric_vars)) {
    cor_test_result <- cor.test(numeric_vars[, i], numeric_vars[, j], method = "pearson")
    p_values[i, j] <- cor_test_result$p.value
    p_values[j, i] <- cor_test_result$p.value
  }
}

# Convert the p-values matrix to a data frame with variable names
colnames(p_values) <- colnames(cor_matrix)
rownames(p_values) <- colnames(cor_matrix)

# Print the correlation matrix
print("Correlation Matrix:")
print(cor_matrix)
write.csv(cor_matrix, "correlations_postSDM.csv")

# Print the p-values table
print("P-Values Table:")
print(p_values)
write.csv(p_values, "correlations_postSDM_pvals.csv")

######## GENERATING ELEVATION BOXPLOTS FOR EVERY SPECIES #############

elev <- raster('elev_clip.tif') #elevation layer clipped to study area from DEM

## CURRENT PROJECTIONS BOXPLOTS

presence_matrix_current <- readRDS("presence_matrix_current.rds")
presence_matrix_current <- as.data.frame(presence_matrix_current)

# Sort alphabetically
column_names <- names(presence_matrix_current)[-1]
sorted_column_names <- sort(column_names)
presence_matrix_current <- presence_matrix_current[c(names(presence_matrix_current)[1], sorted_column_names)]

presence_matrix <- presence_matrix_current[c(1, 2:6)] # subset as needed, 5 spp. here as example

# Define a region mask from one of the binary maps (assuming they all have the same extent)
region_mask <- raster("projections/proj_current_Abies.guatemalensis_ensemble_TSSbin.tif")  # Any Species.name

# Get the dimensions of the mask (assuming all binary maps have the same dimensions)
mask_dim <- dim(region_mask)

# Mask the elevation map to the region of interest
masked_elev <- mask(elev, region_mask)

# Convert masked binary map to a matrix and update the presence_matrix_current
elev_matrix <- as.matrix(masked_elev)

pixel_count <- nrow(elev) * ncol(elev)
elev_matrix <- data.frame(
  pixel = 1:pixel_count,
  elev = as.vector(elev_matrix)
)

presence_matrix$elev <- elev_matrix$elev

## COMBINED BOXPLOTS

# Step 1: Remove rows with NA values (excluding 'pixel' column)
presence_matrix_filtered <- presence_matrix[complete.cases(presence_matrix[, -1]), ]

# Step 2: Remove rows where middle columns are all zeroes
presence_matrix_filtered <- presence_matrix_filtered[rowSums(presence_matrix_filtered[, -c(1, ncol(presence_matrix_filtered))]) > 0, ]

# Open a new graphics device
dev.new()

# Combine the data into a long format suitable for ggplot2
presence_matrix_long <- gather(presence_matrix_filtered, key = "Species", value = "Presence", -c(pixel, elev))
presence_matrix_long <- presence_matrix_long[presence_matrix_long$Presence == 1, ]

# Calculate the median for each species
species_median <- presence_matrix_long %>%
  group_by(Species) %>%
  summarize(median_elev = median(elev))

# Reorder the species based on median elevation
presence_matrix_long$Species <- factor(presence_matrix_long$Species, levels = species_median$Species[order(species_median$median_elev)])

# Create a horizontal boxplot using ggplot2
ggplot(presence_matrix_long, aes(x = elev, y = Species)) +
  geom_boxplot(fill = 'lightblue', color = 'black') +
  labs(x = "Elevation (m asl)", y = "Species") +
  theme_minimal() + # Optional: customize the plot theme if desired
  scale_x_continuous(breaks = seq(0, 5000, 1000))

## FUTURE PROJECTIONS BOXPLOTS

presence_matrix_future <- readRDS("presence_matrix_future.rds")
presence_matrix_future <- as.data.frame(presence_matrix_future)

# Sort alphabetically
column_names <- names(presence_matrix_future)[-1]
sorted_column_names <- sort(column_names)
presence_matrix_future <- presence_matrix_future[c(names(presence_matrix_future)[1], sorted_column_names)]

presence_matrix <- presence_matrix_future#[c(1, 227:273)] # subset as needed

# Define a region mask from one of the binary maps (assuming they all have the same extent)
region_mask <- raster("projections/proj_current_Abies.guatemalensis_ensemble_TSSbin.tif")  # Any Species.name

# Get the dimensions of the mask (assuming all binary maps have the same dimensions)
mask_dim <- dim(region_mask)

# Mask the elevation map to the region of interest
masked_elev <- mask(elev, region_mask)

# Convert masked binary map to a matrix and update the presence_matrix_current
elev_matrix <- as.matrix(masked_elev)

pixel_count <- nrow(elev) * ncol(elev)
elev_matrix <- data.frame(
  pixel = 1:pixel_count,
  elev = as.vector(elev_matrix)
)

presence_matrix$elev <- elev_matrix$elev

# COMBINED BOXPLOTS

# Step 1: Remove rows with NA values (excluding 'pixel' column)
presence_matrix_filtered <- presence_matrix[complete.cases(presence_matrix[, -1]), ]

# Step 2: Remove rows where middle columns are all zeroes
presence_matrix_filtered <- presence_matrix_filtered[rowSums(presence_matrix_filtered[, -c(1, ncol(presence_matrix_filtered))]) > 0, ]

# Open a new graphics device
dev.new()

# Combine the data into a long format suitable for ggplot2
presence_matrix_long <- gather(presence_matrix_filtered, key = "Species", value = "Presence", -c(pixel, elev))
presence_matrix_long <- presence_matrix_long[presence_matrix_long$Presence == 1, ]

# Calculate the median for each species
species_median <- presence_matrix_long %>%
  group_by(Species) %>%
  summarize(median_elev = median(elev))

# Reorder the species based on median elevation
presence_matrix_long$Species <- factor(presence_matrix_long$Species, levels = species_median$Species[order(species_median$median_elev)])

# Create a horizontal boxplot using ggplot2
ggplot(presence_matrix_long, aes(x = elev, y = Species)) +
  geom_boxplot(fill = 'lightblue', color = 'black') +
  labs(x = "Elevation (m asl)", y = "Species") +
  theme_minimal() + # Optional: customize the plot theme if desired
  scale_x_continuous(breaks = seq(0, 5000, 1000))

######## COMBINED CHANGE-IN-ELEVATION BOXPLOTS ##############

## Run the code from previous section up until COMBINED BOXPLOTS, STEP 2.

## CURRENT ELEVATION STATS TABLE

# Initialize an empty data frame to store the results
elev_table_current <- data.frame()

# Iterate through the species columns and calculate statistics for each species
for (col in names(presence_matrix_filtered)[-1]) {
  species <- col
  elev_values <- presence_matrix_filtered$elev[presence_matrix_filtered[[col]] == 1]
  
  if (length(elev_values) > 0) {
    c_mean <- mean(elev_values, na.rm = TRUE)
    c_median <- median(elev_values, na.rm = TRUE)
    c_q1 <- quantile(elev_values, 0.25, na.rm = TRUE)
    c_q3 <- quantile(elev_values, 0.75, na.rm = TRUE)
    c_max <- max(elev_values, na.rm = TRUE)
    c_min <- min(elev_values, na.rm = TRUE)
    c_sd <- sd(elev_values, na.rm = TRUE)
    c_se <- c_sd / sqrt(length(elev_values))
    
    # Create a data frame for the current species
    species_df <- data.frame(species, c_mean, c_median, c_q1, c_q3, c_max, c_min, c_sd, c_se)
    
    # Append the data frame to the result data frame
    elev_table_current <- bind_rows(elev_table_current, species_df)
  }
}

# Rename the columns
colnames(elev_table_current) <- c('species', 'c_mean', 'c_median', 'c_q1', 'c_q3', 'c_max', 'c_min', 'c_sd', 'c_se')

# Print or save the result data frame
print(elev_table_current)

## FUTURE ELEVATION STATS TABLE

# Initialize an empty data frame to store the results
elev_table_future <- data.frame()

# Iterate through the species columns and calculate statistics for each species
for (col in names(presence_matrix_filtered)[-1]) {
  species <- col
  elev_values <- presence_matrix_filtered$elev[presence_matrix_filtered[[col]] == 1]
  
  if (length(elev_values) > 0) {
    f_mean <- mean(elev_values, na.rm = TRUE)
    f_median <- median(elev_values, na.rm = TRUE)
    f_q1 <- quantile(elev_values, 0.25, na.rm = TRUE)
    f_q3 <- quantile(elev_values, 0.75, na.rm = TRUE)
    f_max <- max(elev_values, na.rm = TRUE)
    f_min <- min(elev_values, na.rm = TRUE)
    f_sd <- sd(elev_values, na.rm = TRUE)
    f_se <- f_sd / sqrt(length(elev_values))
    
    # Create a data frame for the current species
    species_df <- data.frame(species, f_mean, f_median, f_q1, f_q3, f_max, f_min, f_sd, f_se)
    
    # Append the data frame to the result data frame
    elev_table_future <- bind_rows(elev_table_future, species_df)
  }
}

# Rename the columns
colnames(elev_table_future) <- c('species', 'f_mean', 'f_median', 'f_q1', 'f_q3', 'f_max', 'f_min', 'f_sd', 'f_se')

# Print or save the result data frame
print(elev_table_future)

## COMBINE BOTH TABLES & DELETE LAST ROW

elev_table <- cbind(elev_table_current, elev_table_future[-1]) 
elev_table <- head(elev_table, n = nrow(elev_table) - 1)

write.csv(elev_table, 'elev_table_stats.csv')

######## CLASSIFY SPECIES BY LATITUDINAL AND ALTITUDINL RANGE (FOR GLM) ###############

## BY LATITUDINAL RANGE

#EXTRACT 6TH MAP FOR SPECIES WITH FEW OCCURRENCE RECORDS
SpFew <- read.csv("AllSppFewMany.csv")
SpFew <- levels(as.factor(SpFew$SpeciesFew))
allRunsFew <- 6 

# Use lapply to create and store histograms in a list
histogram_listF <- lapply(SpFew, function(species) {
  # Read the binary map and choose the 6th layer
  binary_map <- stack(paste0("projections/proj_current_", species, "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsFew]]  # Extract the 6th layer
  
  # Get the presence pixels (where value is 1)
  presence_pixels <- which(allRuns_p[] == 1, arr.ind = TRUE)
  
  # Get latitude information for each presence pixel
  latitudes <- raster::xyFromCell(allRuns_p, presence_pixels, spatial = TRUE)$y
  
  # Create a histogram of latitudes
  histogram <- ggplot() +
    geom_histogram(aes(x = latitudes), bins = 100, fill = "blue", color = "black", alpha = 0.7) +
    labs(title = paste("Latitudinal Histogram for Species:", species),
         x = "Latitude", y = "Frequency") +
    scale_x_continuous(breaks = seq(5, 30, 2.5), limits = c(5, 30)) + # Set x-axis limits
    geom_vline(xintercept = c(12.5, 17.5), linetype = "dashed", color = "red")  # Add vertical lines

  return(histogram)
})

# Convert the list to a named list using the species names
names(histogram_listF) <- SpFew

#EXTRACT 11TH MAP FOR SPECIES WITH MANY OCCURRENCE RECORDS
SpMany <- read.csv("AllSppFewMany.csv")
SpMany <- levels(as.factor(SpMany$SpeciesMany))
SpMany <- SpMany[2:107]
i<-1
allRunsMany <- 11

# Use lapply to create and store histograms in a list
histogram_listM <- lapply(SpMany, function(species) {
  # Read the binary map and choose the 6th layer
  binary_map <- stack(paste0("projections/proj_current_", species, "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsMany]]  # Extract the 11th layer
  
  # Get the presence pixels (where value is 1)
  presence_pixels <- which(allRuns_p[] == 1, arr.ind = TRUE)
  
  # Get latitude information for each presence pixel
  latitudes <- raster::xyFromCell(allRuns_p, presence_pixels, spatial = TRUE)$y
  
  # Create a histogram of latitudes
  histogram <- ggplot() +
    geom_histogram(aes(x = latitudes), bins = 100, fill = "blue", color = "black", alpha = 0.7) +
    labs(title = paste("Latitudinal Histogram for Species:", species),
         x = "Latitude", y = "Frequency") +
    scale_x_continuous(breaks = seq(5, 30, 2.5), limits = c(5, 30)) + # Set x-axis limits
    geom_vline(xintercept = c(12.5, 17.5), linetype = "dashed", color = "red")  # Add vertical lines
  
  return(histogram)
})

# Convert the list to a named list using the species names
names(histogram_listM) <- SpMany

# Merge the two lists into a single list
histogram_list <- c(histogram_listF, histogram_listM)

# Order the elements alphabetically by names
histogram_list <- histogram_list[order(names(histogram_list))]

# Plot each histogram
plot(histogram_list[1]) # Change number to select histogram

## BY ALTITUDINAL RANGE

# Plot horizontal boxplots with cuts at 1000, 1500 and 2000 m asl to classify spp. by elevation.

elev_table <- read.csv('elev_table_stats.csv')

subset_data <- elev_table[1:50, ] # Change numbers to select species to plot

ggplot(subset_data, aes(x = species, ymin = c_min, lower = c_q1, middle = c_median, upper = c_q3, ymax = c_max)) +
  geom_boxplot(stat = "identity", fill = "lightblue", color = "black") +
  labs(x = "Species", y = "Elevation") +
  geom_hline(yintercept = c(1000, 1500, 2000), linetype = "dashed", color = "red") +
  coord_flip() +
  theme_minimal()

######## CALCULATE AREA RANGE LOSSES AND  CHANGES PER SPECIES IN KM2 ###########

## CURRENT MAPS

#EXTRACT 6TH MAP FOR SPECIES WITH FEW OCCURRENCE RECORDS
SpFew <- read.csv("AllSppFewMany.csv")
SpFew <- levels(as.factor(SpFew$SpeciesFew))
i<-1
allRunsFew <- 6 

# Define the UTM zone appropriate for study area (UTM zone 15 for Mexico)
utm_zone <- 15

# Create a CRS object for the selected UTM zone
utm_crs <- CRS(paste0("+proj=utm +zone=", utm_zone, " +datum=WGS84 +units=m +no_defs"))

# Create a function to generate frequency tables for a raster
generate_frequency_table <- function(raster) {
  values <- as.vector(raster)
  freq_table <- table(values)
  freq_df <- data.frame('Value' = as.numeric(names(freq_table)), 'Frequency' = as.numeric(freq_table))
  return(freq_df)
}

# Create an empty data frame to store the results
reproj_few <- data.frame('Species' = character(0), 'Current_0' = numeric(0), 'Current_1' = numeric(0), 'Reproj_0' = numeric(0), 'Reproj_1' = numeric(0))

# Apply reprojection function to each species
for (i in 1:length(SpFew)) {
  # Read the binary map and choose 6th layer
  binary_map <- stack(paste0("projections/proj_current_", SpFew[i], "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsFew]]
  
  # Reproject the raster to the UTM projection with a 1 km? cell size
  current_reproj <- projectRaster(allRuns_p, crs = utm_crs, res = 1000, method = "bilinear")
  
  # Round 'current_reproj' raster
  current_reproj <- round(current_reproj)
  
  # Generate frequency tables for each raster
  current_sp_freq_table <- generate_frequency_table(allRuns_p)
  current_reproj_freq_table <- generate_frequency_table(current_reproj)
  
  # Store the results in the data frame
  reproj_few <- rbind(reproj_few, c(SpFew[i], current_sp_freq_table$Frequency[1], current_sp_freq_table$Frequency[2], 
                                    current_reproj_freq_table$Frequency[1], current_reproj_freq_table$Frequency[2]))
}  

# Rename the columns for clarity
colnames(reproj_few) <- c('Species', 'Current_0', 'Current_1', 'Reproj_C_0', 'Reproj_C_1')

# Print the final result
print(reproj_few)

#write.csv(reproj_few, 'reproj_few_current.csv')

#EXTRACT 11TH MAP FOR SPECIES WITH MANY OCCURRENCE RECORDS
SpMany <- read.csv("AllSppFewMany.csv")
SpMany <- levels(as.factor(SpMany$SpeciesMany))
SpMany <- SpMany[2:107]
i<-1
allRunsMany <- 11 

# Create an empty data frame to store the results
reproj_many <- data.frame('Species' = character(0), 'Current_0' = numeric(0), 'Current_1' = numeric(0), 'Reproj_0' = numeric(0), 'Reproj_1' = numeric(0))

# Apply reprojection funtion to each species
for (i in 1:length(SpMany)) {
  # Read the binary map and choose 6th layer
  binary_map <- stack(paste0("projections/proj_current_", SpMany[i], "_ensemble_TSSbin.tif"))
  allRuns_p <- binary_map[[allRunsMany]]
  
  # Reproject the raster to the UTM projection with a 1 km? cell size
  current_reproj <- projectRaster(allRuns_p, crs = utm_crs, res = 1000, method = "bilinear")
  
  # Round 'current_reproj' raster
  current_reproj <- round(current_reproj)
  
  # Generate frequency tables for each raster
  current_sp_freq_table <- generate_frequency_table(allRuns_p)
  current_reproj_freq_table <- generate_frequency_table(current_reproj)
  
  # Store the results in the data frame
  reproj_many <- rbind(reproj_many, c(SpMany[i], current_sp_freq_table$Frequency[1], current_sp_freq_table$Frequency[2], 
                                      current_reproj_freq_table$Frequency[1], current_reproj_freq_table$Frequency[2]))
}  

# Rename the columns for clarity
colnames(reproj_many) <- c('Species', 'Current_0', 'Current_1', 'Reproj_C_0', 'Reproj_C_1')

# Print the final result
print(reproj_many)

# Stack the data frames vertically
reproj_current <- rbind(reproj_few, reproj_many)

# Order the rows alphabetically by 'Species'
reproj_current <- reproj_current[order(reproj_current$Species), ]

# Print the final result
print(reproj_current)

write.csv(reproj_current, 'reproj_spp_curr.csv')

## FUTURE MAPS

species_data <- read.csv("AllSppLocs_thin.csv")
SpListSpp <- levels(as.factor(species_data$Species))
SpListSpp <- SpListSpp[2:273]
i<-1

# Create an empty data frame to store the results
reproj_future <- data.frame('Species' = character(0), 'Future_0' = numeric(0), 'Future_1' = numeric(0), 'Reproj_0' = numeric(0), 'Reproj_1' = numeric(0))

# Use already saved combined future projections
for (i in 1:length(SpListSpp)) {
  file_name <- paste0("projFutComb/", SpListSpp[i], "_futureCombined.tif")
  binary_map <- raster(file_name)
  
  # Reproject the raster to the UTM projection with a 1 km? cell size
  future_reproj <- projectRaster(binary_map, crs = utm_crs, res = 1000, method = "bilinear")
  
  # Round 'future_reproj' raster
  future_reproj <- round(future_reproj)
  
  # Generate frequency tables for each raster
  future_sp_freq_table <- generate_frequency_table(binary_map)
  future_reproj_freq_table <- generate_frequency_table(future_reproj)
  
  # Store the results in the data frame
  reproj_future <- rbind(reproj_future, c(SpListSpp[i], future_sp_freq_table$Frequency[1], future_sp_freq_table$Frequency[2], 
                                          future_reproj_freq_table$Frequency[1], future_reproj_freq_table$Frequency[2]))
}  

# Rename the columns for clarity
colnames(reproj_future) <- c('Species', 'Future_0', 'Future_1', 'Reproj_F_0', 'Reproj_F_1')

# Print the final result
print(reproj_future)

write.csv(reproj_future, 'reproj_spp_fut.csv')

######## GLMs (+ Conifers vs Angiosperm range comparison) #################

postSDM <- read.csv("SI_SDM_projections.csv", header = T)
postSDM <- as.data.frame(postSDM)

# Define the order for 'Distribution'
dist_order <- c('N', 'N - C', 'C', 'C - S', 'S', 'W')
# Define the order for 'ElevClass'
elev_order <- c('VL', 'L', 'ML', 'M', 'MH', 'H', 'VH')

# Convert categorical variables to factors
postSDM$Species <- as.factor(postSDM$Species)
postSDM$Distribution <- factor(postSDM$Distribution, levels = dist_order, ordered = TRUE)
postSDM$ElevClass <- factor(postSDM$ElevClass, levels = elev_order, ordered = TRUE)

elev_stats <- read.csv('elev_table_stats.csv', header = T) # generated above (771)

postSDM$mean_elev <- elev_stats$c_mean

# Generate second-stage latitudinal and altitudinal classes
# Reclassify 'Distribution' into a new column 'Dist2'
postSDM <- postSDM %>%
  mutate(Dist2 = case_when(
    Distribution %in% c('N', 'N - C') ~ 'northern',
    Distribution %in% c('C - S', 'S') ~ 'southern',
    Distribution %in% c('C', 'W') ~ 'centred',
    TRUE ~ as.character(Distribution)  # If none of the above conditions match, keep the original value
  ))

# Reclassify 'ElevClass' into a new column 'Elev2'
postSDM <- postSDM %>%
  mutate(Elev2 = case_when(
    ElevClass %in% c('VL', 'L') ~ 'low',
    ElevClass %in% c('ML', 'M', 'MH') ~ 'mid',
    ElevClass %in% c('H', 'VH') ~ 'high',
    TRUE ~ as.character(ElevClass)  # If none of the above conditions match, keep the original value
  ))

Dist2_order <- c('northern', 'centred', 'southern')
Elev2_order <- c('low', 'mid', 'high')
postSDM$Dist2 <- factor(postSDM$Dist2, levels = Dist2_order, ordered = TRUE)
postSDM$Elev2 <- factor(postSDM$Elev2, levels = Elev2_order, ordered = TRUE)

# Conifers vs Angiosperms
# Non-parametric tests (Kruskal-Wallis)
kruskal.test(log(CurrentSize) ~ Clade, data = postSDM)
kruskal.test(log(FutFullDisp) ~ Clade, data = postSDM)
kruskal.test(PercLoss ~ Clade, data = postSDM)

# GLM1 - PercLoss as response variable with Gamma distribution
model.gm1 <- glm(PercLoss ~ Dist2 + Elev2 + Dist2:Elev2, data = postSDM[-c(77,105,108,115,146,148,227,256),], family = Gamma(link = "log"))
summary(model.gm1)
par(mfrow = c(2,2))
plot(model.gm1, 1:4)

# Bartlett test for heteroscedasticity
print(bartlett.test(residuals(model.gm1) ~ fitted(model.gm1)))

# GLM2 - log(Loss) as response variable with Gamma distribution
model.gm2 <- glm(log(Loss) ~ Dist2 + Elev2 + Dist2 + Dist2:Elev2, data = postSDM[-c(25,57,108,115,146,207,271),], family = Gamma(link = "log"))
summary(model.gm2)
par(mfrow = c(2,2))
plot(model.gm2, 1:4)

# Bartlett test for heteroscedasticity
print(bartlett.test(residuals(model.gm2) ~ fitted(model.gm2)))

# Interaction boxplot of log(Loss) ~ Dist2 + Elev2
ggplot(postSDM[-c(25,57,108,115,146,207,271),], aes(x = Dist2, y = log(Loss), fill = Elev2)) +
  geom_boxplot(position = 'dodge', color = 'black') +
  labs(x = 'Latitudinal class', y = 'log(Loss)') +
  scale_fill_manual(values = c('aliceblue', 'antiquewhite4', 'darkslategrey')) +
  theme_minimal() +
  theme(legend.title = element_blank(), panel.border = element_rect(colour = 'black', fill = NA))


######## LINEAR REGRESSION OF RANGE LOSSES BY CURRENT EXTENT ###########

lm1 <- lm(PercLoss ~ log(CurrentSize), data=postSDM[-c(22,105,108,115,146),])
summary(lm1)
par(mfrow = c(2,2))
plot(lm1, 1:4)

ggplot(postSDM[-c(22,105,108,115,146),], aes(x=log(CurrentSize), y=PercLoss)) +
  geom_point(shape=16, color='grey40')+
  geom_smooth(method = "lm", se = T, color='seagreen4', fill='paleturquoise4') +
  labs(x='log(Current extent in pixels)', y ='Loss (%)') +
  theme_minimal() +
  theme(panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())