## SDM script for study titled:
## Projecting the Mesoamerican montane-specialist tree community under 
## climate change: Implications for conservation

#LOAD PACKAGES "AND ADDITIONAL FUNCTIONS
library("foreach")
library("doParallel")

# Set the number of cores to use
num_cores <- 32 # Number of species per batch +1

# Register the parallel backend using 'doParallel'
cl <- makeCluster(num_cores)
registerDoParallel(cl)

#SET THE DIRECTORY WHERE THE ENVIONMENTAL DATA FOR THE PRESENT ARE STORED
directoryEnv=paste0(getwd(),'/Ascii_present/')
#SET THE DIRECTORY WHERE THE ENVIONMENTAL DATA FOR THE FUTURE ARE STORED
directoryEnv1=paste0(getwd(),'/G85/')
directoryEnv2=paste0(getwd(),'/M85/')
directoryEnv3=paste0(getwd(),'/U85/')
#SET THE DIRECTORY WHERE THE SPECIES DATASET IS STORED
directorySp=paste0(getwd(),'')

#ADD LIST OF SPECIES AND CREATE ITERATION VARIABLE
SpList <- read.csv("locs_batchXX.csv")  # Replace XX with batch number
SpListSpp <- levels(as.factor(SpList$Species))

# Export required objects to parallel workers
clusterExport(cl, c("SpListSpp", "directoryEnv", "directoryEnv1", "directoryEnv2","directoryEnv3", "directorySp"))

#LOOPS SETUP STARTS HERE
foreach(i = 1:length(SpListSpp)) %dopar% {
library("raster")
library("PresenceAbsence")
library("dismo")
library("sf")
library("biomod2")
library("tidyterra")
library("rasterVis")
library("dplyr")

#LOAD ENVIRONMENTAL PREDICTORS FOR FITTING
path <- list.files(directoryEnv,
                   pattern = ".asc$", 
                   full.names = TRUE) 
s_pres <- raster::stack(path)
crs(s_pres) <- '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'

#LOAD ENVIRONMENTAL PREDICTORS FOR PROJECTIONS
path <- list.files(directoryEnv1,
                   pattern = ".asc$", 
                   full.names = TRUE) 
s_futG <- raster::stack(path)
crs(s_futG) <- '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'

path <- list.files(directoryEnv2,
                   pattern = ".asc$", 
                   full.names = TRUE) 
s_futM <- raster::stack(path)
crs(s_futM) <- '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'

path <- list.files(directoryEnv3,
                   pattern = ".asc$", 
                   full.names = TRUE) 
s_futU <- raster::stack(path)
crs(s_futU) <- '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'


   result_dir <- paste0(getwd(), "/", SpListSpp[i])
  if (!dir.exists(result_dir)) {
    dir.create(result_dir)
  }
  preds_pres_file <- paste0(result_dir, "/proj_current/proj_current_", SpListSpp[i], "_ensemble_TSSbin.tif")
  preds_futG_file <- paste0(result_dir, "/proj_futureG/proj_futureG_", SpListSpp[i], "_ensemble_TSSbin.tif")
  preds_futM_file <- paste0(result_dir, "/proj_futureM/proj_futureM_", SpListSpp[i], "_ensemble_TSSbin.tif")
  preds_futU_file <- paste0(result_dir, "/proj_futureU/proj_futureU_", SpListSpp[i], "_ensemble_TSSbin.tif")
  
  PresData <- SpList[SpList$Species==SpListSpp[i],]
  
  #LOAD THE DATASET WITH SPECIES DATA POINTS
  PresData$PA<-1 #add a column that specifies these are presence points
  sp<-PresData$Species[1]
  
  #Sample background data points (note this can be done using biomod 
  #function automatically, but then we want to do it now in order to check 
  #variables before using biomod)
  set.seed(10)
  xy<-sampleRandom(s_pres, size=1000, xy=TRUE)[,1:2] #sample random point using one of the environmental layers
  xy<-as.data.frame(xy)
  xy<-data.frame(Species=sp, x=xy[,1], y=xy[,2], PA=0)
  SpData<-rbind(PresData, xy) #join presence and background data
  
  #extract environmental values
  Vars<-extract(s_pres, SpData[,c('x','y')])
  #bind variables with species data
  SpData<-cbind(SpData, Vars)
  #only retain point data with complete environmental information (remove coordinates with no environmental values)
  SpData<-SpData[complete.cases(SpData),] 

  #PREPARE THE BIOMOD DATASET
  ss_pres<-stack(s_pres[[c('bio04', 'bio08', 'bio09', 'bio14', 'bio16', 'bio19',  'hwsd','ruggedness', 'slope')]])
  
  ss_futG<-stack(s_futG[[c('bio04', 'bio08', 'bio09', 'bio14', 'bio16', 'bio19')]], s_pres[[c('hwsd','ruggedness', 'slope')]])
  ss_futM<-stack(s_futM[[c('bio04', 'bio08', 'bio09', 'bio14', 'bio16', 'bio19')]], s_pres[[c('hwsd','ruggedness', 'slope')]])
  ss_futU<-stack(s_futU[[c('bio04', 'bio08', 'bio09', 'bio14', 'bio16', 'bio19')]], s_pres[[c('hwsd','ruggedness', 'slope')]])
  
  dataset <- BIOMOD_FormatingData(
    resp.var = SpData$PA, #response variable (0, 1)
    expl.var = ss_pres, #explanatory variables provided as a raster stack
    resp.name = SpData$Species[1], #name of the species
    resp.xy = SpData[,c('x','y')], #coordinates of the response variable
    eval.resp.var = SpData$PA,
    eval.expl.var = s_pres,
    eval.resp.xy = SpData[,c('x', 'y')]
  )
  
  #FIT THE MODEL(S)
  N_Runs=5 # 5 for spp. with <50 records, 10 for spp. with >50 records
  
  #ADAPTED CODE FOR 'biomod2' v.4.2 FROM A PREVIOUS VERSION:
  models <- BIOMOD_Modeling(
    dataset,
    modeling.id = paste(sp,"FirstModeling",sep=""),
    models = c('GAM','GBM','ANN', 'MAXNET'),
    bm.options = BIOMOD_ModelingOptions(),
    nb.rep = N_Runs,
    data.split.perc = 80,
    data.split.table = NULL,
    do.full.models = TRUE,
    weights = NULL,
    prevalence = 0.5,
    metric.eval = "TSS",
    var.import = N_Runs,
    scale.models = FALSE,
    nb.cpu = 1,
    seed.val = NULL,
    do.progress = TRUE
  )
  
  #EVALUATE MODELS
  eval <- get_evaluations(models)
  eval
  write.csv(eval, paste(getwd(),"/evals/",SpListSpp[i],"pilot_evals.csv",sep=""))
  
  #ESTIMATE VARIABLE IMPORTANCE
  imp<-get_variables_importance(models)
  write.csv(imp, paste(getwd(),"/evals/",SpListSpp[i],"pilot_imp.csv",sep=""))

  #PRESENT PROJECTIONS - ADAPTED FOR NEW 'biomod2'
  preds_pres <- BIOMOD_Projection(
    bm.mod = models,
    proj.name = 'current',
    new.env = ss_pres,
    models.chosen = 'all',
    metric.binary = 'TSS',
    compress = F, #I changed this argument
    output.format = '.grd'
  )

  #FUTURE PREDICTIONS - ADAPTED FOR NEW 'biomod2'
  preds_futG <- BIOMOD_Projection(
    bm.mod = models,
    proj.name = 'futureG',
    new.env = ss_futG,
    models.chosen = 'all',
    metric.binary = 'TSS',
    compress = F, #I added this argument here
    output.format = '.grd'
  )

  preds_futM <- BIOMOD_Projection(
    bm.mod = models,
    proj.name = 'futureM',
    new.env = ss_futM,
    models.chosen = 'all',
    metric.binary = 'TSS',
    compress = F, #I added this argument here
    output.format = '.grd'
  )
  
  preds_futU <- BIOMOD_Projection(
    bm.mod = models,
    proj.name = 'futureU',
    new.env = ss_futU,
    models.chosen = 'all',
    metric.binary = 'TSS',
    compress = F, #I added this argument here
    output.format = '.grd'
  )
  
  #GENERATE ENSEMBLE MODEL - ADAPTED FOR NEW 'biomod2'
  EM <- BIOMOD_EnsembleModeling(
    bm.mod = models,
    models.chosen = 'all',
    em.by = 'PA+run',
    em.algo = 'EMmean', #not sure about this line
    metric.select = c('TSS'),
    metric.select.thresh = c(0.5),
    EMci.alpha = 0.05,
    EMwmean.decay = 'proportional'
  )
  
  #Project ensemble to the present (current) - ADAPTED FOR NEW 'biomod2'
  EFpres <- BIOMOD_EnsembleForecasting(
    bm.em = EM,
    bm.proj = NULL,
    models.chosen = 'all',
    metric.binary = 'all',
    metric.filter = 'all',
    new.env = s_pres,
    proj.name = "current")
  
  EFpres_preds <- get_predictions(EFpres)
  
  EFpres_preds_bin <- stack(preds_pres_file)
  
  #project ensemble to the future - ADAPTED FOR NEW 'biomod2'
  EFfutG <- BIOMOD_EnsembleForecasting(
    bm.em = EM,
    bm.proj = NULL,
    models.chosen = 'all',
    metric.binary = 'all',
    metric.filter = 'all',
    new.env = s_futG,
    proj.name = "futureG")
  
  EFfutG_preds <- get_predictions(EFfutG)
  EFpreds_futG_bin <- stack(preds_futG_file)
  
  EFfutM <- BIOMOD_EnsembleForecasting(
    bm.em = EM,
    bm.proj = NULL,
    models.chosen = 'all',
    metric.binary = 'all',
    metric.filter = 'all',
    new.env = s_futM,
    proj.name = "futureM")
  
  EFfutM_preds <- get_predictions(EFfutM)
  EFpreds_futM_bin <- stack(preds_futM_file)
  
  EFfutU <- BIOMOD_EnsembleForecasting(
    bm.em = EM,
    bm.proj = NULL,
    models.chosen = 'all',
    metric.binary = 'all',
    metric.filter = 'all',
    new.env = s_futU,
    proj.name = "futureU")
  
  EFfutU_preds <- get_predictions(EFfutU)  
  EFpreds_futU_bin <- stack(preds_futU_file)
  
  rangeShift_EF_G<-BIOMOD_RangeSize(EFpres_preds_bin, EFpreds_futG_bin)
  save(list = "rangeShift_EF_G", file = paste(getwd(),"/rangeShifts/",SpListSpp[i],"_RS_G.RData",sep=""))
  
  rangeShift_EF_M<-BIOMOD_RangeSize(EFpres_preds_bin, EFpreds_futM_bin)
  save(list = "rangeShift_EF_M", file = paste(getwd(),"/rangeShifts/",SpListSpp[i],"_RS_M.RData",sep=""))
  
  rangeShift_EF_U<-BIOMOD_RangeSize(EFpres_preds_bin, EFpreds_futU_bin)
  save(list = "rangeShift_EF_U", file = paste(getwd(),"/rangeShifts/",SpListSpp[i],"_RS_U.RData",sep=""))
  
}

# Stop the parallel backend
stopCluster(cl)