################################################################################################################
#                                             LMM ANALYSES                                                     #
################################################################################################################

rm(list = ls()); dev.off()

setwd("/Volumes/MattSSD Files/Users/matt_a/PHDTHESIS/MA_PHDTHESIS_FILES/RapidSceneCat/Data_Preprocessed/")
#setwd("/Users/matt_a/OneDrive - University of Southampton/PostYork_Docs/RapidSceneCat/Data_Preprocessed/")
data = read.csv('STR_categorization.txt',header = T, sep = ";")
numtrials = nrow(data)

head(data); sapply(data, class)
var_factors = colnames(data)
numericvars = c(4,8,9,10,11,12,13,17,20,21)
var_factors = var_factors[-numericvars]
data[var_factors] = lapply(data[var_factors], factor)
sapply(data,class)

# check presentation time precision
tapply(data$Elapsed, list(data$Pres_Time, data$Colour_GrayScale), mean)

# Remove responses that were too long or too short.
data = data[!data$ResponsePeriod > 20,]

# remove any outlier subjects
logodds = function(x){
  x = as.numeric(x)-1
  return(log(mean(x)/(1-mean(x))))
}

submeans = tapply(as.numeric(data$Cat_correct), data$Ppt_No, logodds)
plot(submeans)
outlier = which((mean(submeans) - 3*sd(submeans)) > submeans)

if (length(outlier) != 0){
  print('Outlier/s Detected')
  data = data[data$Ppt_No != outlier,]} else {
    print('No Outlier/s Detected')}


# let's test whether log odds varies linearly with presentation time
logodds_prestime = tapply(data$Cat_correct, data$Pres_Time, logodds)
plot(c(1,2,4,8),logodds_prestime, type="l")
plot(log(c(1,2,4,8)), logodds_prestime, type="l")

# log odds varies linearly with the log presentation time. 
data$Pres_Time = log(data$Pres_Time)

# mean-center our covariate
data$Pres_Time = data$Pres_Time - mean(data$Pres_Time)

# check whether variance is homogenous across groups. No obvious outliers can be detected
bvariance = function(x){
  n = length(x)
  p = mean(as.numeric(x)-1)
  return(n*p*(1-p))
}
plot(tapply(data$Cat_correct, data$Ppt_No, bvariance))
plot(tapply(data$Cat_correct, data$imageID, bvariance))
plot(tapply(data$Cat_correct, data$GT_Category, bvariance))

# For post-hocs & main effects, Set up the appropriate contrasts. There will be 3*2 = 6 unique conditions
unique_conds = unique(cbind(data$Colour_GrayScale, data$Stereo_Cond))
unique_conds = unique_conds[order(unique_conds[,1],unique_conds[,2]),]

# ORDER: grayscale/colour - 2D/3D/3DR 
data$AllConditions = rep(0,nrow(data))
for (k in 1:nrow(unique_conds)){
  data$AllConditions[(which(apply(cbind(data$Colour_GrayScale, data$Stereo_Cond)
                                  , 1, function(x) all(x == unique_conds[k,]))))] = k
}
data$AllConditions = as.factor(data$AllConditions)
data$AllConditions = factor(data$AllConditions, labels = c("mono_gs", "stereo_gs", "stereoreversed_gs",
                                                           "mono_col", "stereo_col", "stereoreversed_col"))
data$Stereo_Cond = factor(data$Stereo_Cond, labels = c("mono", "stereo", "stereoreversed"))
data$Colour_GrayScale = factor(data$Colour_GrayScale, labels = c("gs", "col"))

# overall loss?
loss = (numtrials - nrow(data))/nrow(data) # data loss = 2.22%
print(c("loss = ",loss))

#########################################################################################
#                                   Semantic LMMs                                       #
#########################################################################################

# optimization methods
library(lme4); library(nloptr); library(optimx); library(dfoptim)
optimizers = c("bobyqa", "Nelder_Mead", "nlminbwrap", "nmkbw", "optimx", "nloptwrap" )

# generalized inverse function:
library(MASS)
ginv2 = function(x) fractions(provideDimnames(ginv(x), base = dimnames(x)[2:1]))

# set up contrasts and two two-way interactions
h_matrix = rbind(StereoH1 = c(mono_gs = 0,stereo_gs = .5,stereoreversed_gs = -0.5,
                              mono_col = 0,stereo_col = .5,stereoreversed_col = -0.5),
                 
                 StereoH2 = c(mono_gs = -0.5,stereo_gs = .25,stereoreversed_gs = .25,
                              mono_col = -0.5,stereo_col = .25,stereoreversed_col = .25),

                 ColourH1 = c(mono_gs = -1/3,stereo_gs = -1/3,stereoreversed_gs = -1/3,
                              mono_col = 1/3,stereo_col = 1/3,stereoreversed_col = 1/3),
                 
                 Stereo_Colour_H1 = c(mono_gs = 0,stereo_gs = .5,stereoreversed_gs = -.5,
                                      mono_col = 0,stereo_col = -.5,stereoreversed_col = .5),
                 
                 Stereo_Colour_H2 = c(mono_gs = -.5,stereo_gs = .25,stereoreversed_gs = .25,
                                      mono_col = .5,stereo_col = -.25,stereoreversed_col = -.25))

# Save output to a txt file:
# sink("GLMM OUTPUT/Structure_Categorization_LMM_SemanticFinal_rsquared_logtransformed_3.txt")
print("Hypothesis Matrix:")
print(t(h_matrix))

# It is not wholly necessary to compute the generalized inverse if our factors have a correlation of 0
cor(h_matrix[,1],h_matrix[,2])

# The intercept needs to be explicitly modelled when our contrasts are non-centered
rowSums(h_matrix)
print("Contrast Matrix:")
c_matrix = ginv2(h_matrix)
print(c_matrix)

print("LMM OUTPUT:")
print("---------------------------------------------------------")
logitlmertest = glmer(Cat_correct ~ AllConditions * Pres_Time + (1 + Pres_Time|Ppt_No) + (1 + Pres_Time|imageID),
                      data = data, family = binomial, contrasts = list(AllConditions = c_matrix), 
                      control=glmerControl(optimizer = "optimx", optCtrl=list(method = 'bobyqa', maxfun=1000000)))
summary(logitlmertest)
confint.merMod(object = logitlmertest, level = .95, method = "Wald")

# library(partR2)
# (R2 <- partR2(logitlmertest,  partvars = c("Pres_Time"), data=data,
#              R2_type = "marginal", nboot = 10, CI = 0.95))

# preds = predict(logitlmertest, newdata = data)
# data2 = data[,c("Ppt_No","Stereo_Cond","Colour_GrayScale","Pres_Time")]
# x = aggregate(preds, list(Ppt_No = data$Ppt_No, Stereo_Cond = data$Stereo_Cond, Colour_GrayScale = data$Colour_GrayScale, Pres_Time = data$Pres_Time),mean)
# write.csv(x,'GLMMPreds_STR.txt')
# tapply(data$preds,data$Ppt_No,mean)

# For each fixed effect, we also want an r^2 effect size measure. First we compute the overall effect
library(arm)
designmatrix = as.data.frame(model.matrix(logitlmertest))
fixedestimates = fixef(logitlmertest)
fixedpreds = var(as.vector(fixedestimates %*% t(designmatrix)))

# Overall model fit:
randomeffect_variance = as.data.frame(VarCorr(logitlmertest))
randomeffect_variance = sum(randomeffect_variance$vcov)
residual = pi^2/3

rsquared = fixedpreds / (fixedpreds + randomeffect_variance + residual)
print(rsquared)

# effect sizes for all our tested effects
sumrsquared = 0
for (f in 1:length(fixedestimates)){
  single_pred = var(as.vector(fixedestimates[f] %*% t(designmatrix[,f])))
  rsquared = single_pred / (fixedpreds + randomeffect_variance + residual)
  sumrsquared = sumrsquared + rsquared
  print(fixedestimates[f])
  print(rsquared)
}

sink()


# effect of inter-observer agreement? 
logitlmertest = glmer(Cat_correct ~ cat_agreement + (1 + Pres_Time|Ppt_No) + (1 + Pres_Time|imageID),
                      data = data, family = binomial, contrasts = list(AllConditions = c_matrix), 
                      control=glmerControl(optimizer = "optimx", optCtrl=list(method = 'bobyqa', maxfun=1000000)))
summary(logitlmertest)


# Lets investigate the time at which discrimination exceeds chance. We have to factorize prestime 
data$Pres_Time = as.factor(data$Pres_Time)

data$Pres_Time = factor(data$Pres_Time, labels = c("P1", "P2", "P3", "P4"))

# let's try a different coding
h_matrix = rbind(Time1 = c(P1 = 1, P2 = 0, P3 = 0, P4 = 0),
                 
                 Time2 = c(P1 = 0, P2 = 1, P3 = 0, P4 = 0),
                 
                 Time3 = c(P1 = 0, P2 = 0, P3 = 1, P4 = 0),
                 
                 Time4 = c(P1 = 0, P2 = 0, P3 = 0, P4 = 1))
library(MASS)
ginv2 = function(x) fractions(provideDimnames(ginv(x), base = dimnames(x)[2:1]))
c_matrix = ginv2(h_matrix)
print(c_matrix)

logitlmertest_prestime = glmer(Cat_correct ~ 0 + Pres_Time + (1|Ppt_No) + (1|imageID), data = data, family = binomial, 
                               contrasts = list(Pres_Time = c_matrix))
summ1 = summary(logitlmertest_prestime)
print(summ1)

# logitlmer automatically uses a baseline of 0.5. We want chance to be 1/C
chance = -log(-1 + (1/(1/4)))
oddsratios = summ1$coefficients[,1]-chance
zscores = (summ1$coefficients[,1]-chance)/(summ1$coefficients[,2])
pvalues = (1 - pnorm(abs(zscores))) * 2
print(oddsratios)
print(zscores)
print(pvalues)

# let's get the effect sizes as well now
designmatrix = as.data.frame(model.matrix(logitlmertest_prestime))
fixedestimates = fixef(logitlmertest_prestime)-chance
fixedpreds = var(as.vector(fixedestimates %*% t(designmatrix)))

# Overall model fit:
randomeffect_variance = as.data.frame(VarCorr(logitlmertest_prestime))
randomeffect_variance = sum(randomeffect_variance$vcov)
residual = pi^2/3

rsquared = fixedpreds / (fixedpreds + randomeffect_variance + residual)
print(rsquared)

# effect sizes for all our tested effects
sumrsquared = 0
for (f in 1:length(fixedestimates)){
  single_pred = var(as.vector(fixedestimates[f] %*% t(designmatrix[,f])))
  rsquared = single_pred / (fixedpreds + randomeffect_variance + residual)
  sumrsquared = sumrsquared + rsquared
  print(fixedestimates[f])
  print(rsquared)
  print("------")
}
