source('R/load_exp2.R')
source('R/function_exp2.R')

#data from 2021

all_file_names <- list.files('data_2021/')

data_2021 <- data.frame()

for(i in 1:length(all_file_names)){
  
  #create file path for the participant's data
  path <- paste('data_2021/',all_file_names[i], sep = '')
  
  #read in the file
  x <- fromJSON(path)
  
  #append the participant's data to the end of the dataframe
  data_2021 <- rbind(data_2021, x)
  
}


data_2021$subject[data_2021$subject == "1qbojuge7ensl1t"] <- 14
data_2021$subject[data_2021$subject == "k0qrwgera2hukyb"] <- 15
data_2021$subject[data_2021$subject == "gq3eo2a6m2oxjz0"] <- 16
data_2021$subject[data_2021$subject == "puk5g1yv4rpr6fy"] <- 17
data_2021$subject[data_2021$subject == "ch94ue27dkheb77"] <- 18

write.csv(data_2021, 'data_2021.csv')


#data from 2020
asi_2020 <- read.csv("data_2020/anxiety_sensitivity_index.csv")
sas_2020 <- read.csv("data_2020/somatosensory_amplification_scale.csv")
fop_2020 <- read.csv("data_2020/fear_of_pain.csv")
pcs_2020 <- read.csv("data_2020/pain_catastrophising.csv")
bdi_2020 <- read.csv("data_2020/beck_depression.csv")
stai_2020 <- read.csv("data_2020/state_trait_anxiety_inventory.csv")
demo_2020 <- read.csv("data_2020/Demographics.csv")

#asi
asi_2021 <- jatos_tidy(data_2021, 'asi_1')

colnames(asi_2020) <- c('subject', 'asi_1', 'asi_2', 'asi_3', 'asi_4', 'asi_5', 'asi_6', 'asi_7', 
                        'asi_8', 'asi_9', 'asi_10', 'asi_11', 'asi_12', 'asi_13', 'asi_14',
                        'asi_15', 'asi_16','asi_17', 'asi_18')

asi_all <- rbind(asi_2020, asi_2021)
asi_all[,2:19] <- lapply(asi_all[,2:19], as.numeric)

asi_scored <- asi_all %>%
  mutate(asi_social = asi_1 + asi_6 + asi_9 + asi_11 + asi_13 + asi_17,
         asi_physical = asi_3 + asi_4 + asi_7 + asi_8 + asi_12 + asi_15,
         asi_cognitive = asi_2 + asi_5 + asi_10 + asi_14 + asi_16 + asi_18,
         asi_total = asi_social + asi_physical + asi_cognitive)

#social
cronbach.alpha(asi_scored[,c("asi_1", "asi_6", "asi_9", "asi_11", "asi_13", "asi_17")])

#physical
cronbach.alpha(asi_scored[,c("asi_3", "asi_4", "asi_7", "asi_8", "asi_12", "asi_15")])

#cognitive
cronbach.alpha(asi_scored[,c("asi_2", "asi_5", "asi_10", "asi_14", "asi_16", "asi_18")])

#sas
sas_2021 <- jatos_tidy(data_2021, 'sas_1')
sas_2021[,2:11] <- lapply(sas_2021[,2:11], as.numeric)
sas_2021[,2:11] <- lapply(sas_2021[,2:11], add_one)

colnames(sas_2020) <- c('subject', 'sas_1', 'sas_2', 'sas_3', 'sas_4', 'sas_5', 'sas_6', 'sas_7', 
                        'sas_8', 'sas_9', 'sas_10')

sas_all <- rbind(sas_2020, sas_2021)
sas_all[,2:11] <- lapply(sas_all[,2:11], as.numeric)

sas_scored <- sas_all %>%
  mutate(sas_total = rowSums(sas_all[,2:11]))

cronbach.alpha(sas_scored[,2:11])

#fop
fop_2021 <- jatos_tidy(data_2021, 'fop')
fop_2021[,2:31] <- lapply(fop_2021[,2:31], as.numeric)
fop_2021[,2:31] <- lapply(fop_2021[,2:31], add_one)

colnames(fop_2020) <- c('subject', 'fop_1', 'fop_2', 'fop_3', 'fop_4', 'fop_5', 'fop_6', 'fop_7', 'fop_8', 'fop_9',
                        'fop_10', 'fop_11', 'fop_12', 'fop_13', 'fop_14', 'fop_15', 'fop_16', 'fop_17', 'fop_18', 'fop_19',
                        'fop_20', 'fop_21', 'fop_22', 'fop_23', 'fop_24', 'fop_25', 'fop_26', 'fop_27', 'fop_28', 'fop_29', 'fop_30')
fop_all <- rbind(fop_2020, fop_2021)

fop_scored <- fop_all %>%
  mutate(fop_minor = rowSums(fop_all[,c(3,5,8,13,20,23,24,25,29,31)]),
         fop_medical = rowSums(fop_all[,c(9,12,15,16,17,18,21,22,27,30)]),
         fop_severe = rowSums(fop_all[,c(2,4,6,7,10,11,14,19,26,28)]),
         fop_total = fop_minor + fop_medical + fop_severe)

#minor
cronbach.alpha(fop_all[,c(3,5,8,13,20,23,24,25,29,31)])

#medical
cronbach.alpha(fop_all[,c(9,12,15,16,17,18,21,22,27,30)])

#severe
cronbach.alpha(fop_all[,c(2,4,6,7,10,11,14,19,26,28)])

#total
cronbach.alpha(fop_all[,2:31])

#pcs
pcs_2021 <- jatos_tidy(data_2021, 'pcs')
pcs_2021[,2:14] <- lapply(pcs_2021[,2:14], as.numeric)

colnames(pcs_2020) <- c('subject', 'pcs_1', 'pcs_2', 'pcs_3', 'pcs_4', 'pcs_5', 'pcs_6', 'pcs_7', 'pcs_8', 'pcs_9',
                        'pcs_10', 'pcs_11', 'pcs_12', 'pcs_13')

pcs_all <- rbind(pcs_2020, pcs_2021)

pcs_scored <- pcs_all %>%
  mutate(pcs_rumination = rowSums(pcs_all[,9:12]),
        pcs_magnification = rowSums(pcs_all[,c(7,8,14)]),
        pcs_helplessness = rowSums(pcs_all[,c(2,3,4,5,6,12)]),
        pcs_total = pcs_rumination + pcs_magnification + pcs_helplessness)

#rumination
cronbach.alpha(pcs_all[,9:12])

#magnification
cronbach.alpha(pcs_all[,c(7,8,14)])

#helplessness
cronbach.alpha(pcs_all[,c(2,3,4,5,6,12)])

#total
cronbach.alpha(pcs_all[,2:14])

#bdi
bdi_2021 <- jatos_tidy(data_2021, 'bdi')%>%
  gather(question, answer, -subject)%>%
  mutate(answer = as.numeric(str_remove_all(answer, "[[:alpha:]]|[[:space:]]")))%>%
  spread(question, answer)


colnames(bdi_2020) <- c('subject', 'bdi_1', 'bdi_2', 'bdi_3', 'bdi_4', 'bdi_5', 'bdi_6', 'bdi_7', 'bdi_8', 'bdi_9',
                        'bdi_10', 'bdi_11', 'bdi_12', 'bdi_13', 'bdi_14', 'bdi_15', 'bdi_16', 'bdi_17', 'bdi_18',
                        'bdi_19', 'bdi_20', 'bdi_21')

bdi_all <- rbind(bdi_2020, bdi_2021)

bdi_scored <- bdi_all %>%
  mutate(bdi_total = rowSums(bdi_all[,2:22]))

#total
cronbach.alpha(bdi_all[,2:22])

#stai
stai_2021 <- jatos_tidy(data_2021, 'stai')
stai_2021[,2:41] <- lapply(stai_2021[,2:41], as.numeric)
stai_2021[,2:41] <- lapply(stai_2021[,2:41], add_one)

colnames(stai_2020) <- c('subject', 'stai_1', 'stai_2', 'stai_3', 'stai_4', 'stai_5', 'stai_6', 'stai_7', 'stai_8', 'stai_9',
                        'stai_10', 'stai_11', 'stai_12', 'stai_13', 'stai_14', 'stai_15', 'stai_16', 'stai_17', 'stai_18', 'stai_19',
                        'stai_20', 'stai_21', 'stai_22', 'stai_23', 'stai_24', 'stai_25', 'stai_26', 'stai_27', 'stai_28', 'stai_29', 
                        'stai_30', 'stai_31', 'stai_32', 'stai_33', 'stai_34', 'stai_35', 'stai_36', 'stai_37', 'stai_38', 'stai_39', 'stai_40')

stai_all <- rbind(stai_2020, stai_2021)

#reverse score anxiety absent items
stai_all[,c(2,3,6,9,11,12,16,17,20,21,22,24,27,28,31,34,35,37,40)] = 
  5 - stai_all[,c(2,3,6,9,11,12,16,17,20,21,22,24,27,28,31,34,35,37,40)]

stai_scored <- stai_all %>%
  mutate(stai_state = rowSums(stai_all[,2:21]),
         stai_trait = rowSums(stai_all[,22:41]))

#state
cronbach.alpha(stai_all[,2:21])

#trait
cronbach.alpha(stai_all[,22:41])

#handedness
handedness_2020 <- demo_2020[,c(1, 5:8)]
handedness_2021 <- jatos_tidy(data_2021, 'writing_hand')

cronbach.alpha(handedness_2020[,2:5])

str(handedness_2021)

colnames(handedness_2020) <- c('subject', 'writing_hand', 'throwing_hand', 'toothbrush_hand', 'spoon_hand')


handedness_2020[handedness_2020 == "Always right"] <- 50
handedness_2020[handedness_2020 == "Usually right"] <- 25
handedness_2020[handedness_2020 == "Both equally"] <- 0
handedness_2020[handedness_2020 == "Usually left"] <- -25
handedness_2020[handedness_2020 == "Always left"] <- -50

handedness_2021[handedness_2021 == "4"] <- "50"
handedness_2021[handedness_2021 == "3"] <- "25"
handedness_2021[handedness_2021 == "2"] <- "0"
handedness_2021[handedness_2021 == "1"] <- "-25"
handedness_2021[handedness_2021 == "0"] <- "-50"

handedness_all <- rbind(handedness_2020, handedness_2021)
handedness_all[,2:5] <- lapply(handedness_all[,2:5], as.numeric)

handedness_scored <- handedness_all %>%
  mutate(handedness_total = rowSums(handedness_all[,2:5]))

cronbach.alpha(handedness_all[,2:5])


#demographics
demo1_2021 <- jatos_tidy(data_2021, 'age_years')
demo2_2021 <- jatos_tidy(data_2021, 'occupation')
demo3_2021 <- jatos_tidy(data_2021, 'gender')

demo_2021 <- demo1_2021 %>%
  left_join(demo2_2021, by = 'subject')%>%
  left_join(demo3_2021, by = 'subject')

demo_2020 <- demo_2020 %>%
 dplyr::select(-Writing, -Throwing, - Toothbrush, -Spoon, -Family.chronic.pain)

colnames(demo_2020) <- c('subject', 'age_years', 'age_months', 'gender', 'occupation',
                         'education', 'family_pain')

demo_all <- rbind(demo_2020, demo_2021)
demo_all[,2:3] <- lapply(demo_all[,2:3], as.numeric)

demo_scored <- demo_all %>%
  mutate(age_exact = ((age_years*12) + age_months)/12) %>%
  dplyr::select(-age_years, -age_months)%>%
  mutate(family_pain = str_remove_all(family_pain,'na|no')) %>%
  mutate(occupation = str_replace(occupation, 'Trainee TeacherPhD phenomenon', 'PhD Student'))

#ambiguous scenarios task 
ast_neutral_2020 <- read.csv('data_2020/Ambiguous scenarios neutral cond 1.csv')
ast_pain_2020 <- read.csv('data_2020/Ambiguous scenarios pain cond 2.csv')

ast_neutral_2020$condition <- 1
ast_pain_2020$condition <- 2

benign_ibm_correct_answers_2020 <- c('No', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes',
                                'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No',
                                'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No',
                                
                                'No', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'No', 'Yes', 'Yes',
                                'No', 'No', 'Yes', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No',
                                'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No', 'No')
 
colnames(ast_neutral_2020) <- colnames(ast_pain_2020)

ast_all_2020 <- rbind(ast_neutral_2020, ast_pain_2020)

ast_condition_2020 <- ast_all_2020 %>%
  dplyr::select(ID, condition)%>%
  rename(subject = ID)

ast_condition <- jatos_tidy(data_2021, '\"condition\"')

ast_condition_all <- rbind(ast_condition_2020, ast_condition)
  

ast_organise_2020 <- ast_all_2020 %>%
  gather(question, answer, -ID, -condition)%>%
  filter(answer == 'Yes'| answer == 'No')

ast_comprehension_2020 <- data.frame()

participant_ids_2020 <- c(1,2,3,4,5,6,7,8,9,10,11,12,13)

for (id in participant_ids_2020) {

  filtered_data_2020 <- ast_organise_2020 %>%
  filter(ID == id) %>%
  mutate(correct_answer = benign_ibm_correct_answers_2020) 

if(filtered_data_2020$condition[1] == "1"){
    ast_correct_answers_2020 <- filtered_data_2020 %>%
      mutate(comprehension_correct = ifelse(correct_answer == answer, 1, 0))
  }else{
    ast_correct_answers_2020 <- filtered_data_2020 %>%
      mutate(comprehension_correct = ifelse(correct_answer == answer, 0, 1))
  }
  
  ast_training_prop_correct_2020 <- ast_correct_answers_2020 %>%
        transmute(ID, training_proportion_correct = mean(comprehension_correct))%>%
        distinct()

      ast_comprehension_2020 <- rbind(ast_comprehension_2020, ast_training_prop_correct_2020)

}

#2021 ast training comprehension
participant_ids <- data_2021 %>%
  dplyr::select(subject)%>%
  distinct()
participant_ids_list <- as.list(participant_ids$subject)


benign_ibm_correct_answers <- c('No', 'No','No','No', 'Yes', 'Yes', 'No','No', 'No', 'No', 'No', 'No', 'Yes', 'Yes', 'No', 'No', 'Yes', 'Yes', 'Yes','Yes',
                                'No', 'No','No','No', 'Yes','Yes', 'No','No', 'Yes', 'Yes','No','No', 'No','No', 'No','No', 'No','No', 'No','No',
                                'No','No', 'No','No', 'No','No', 'No','No', 'No','No', 'No','No', 'No','No', 'No','No', 'No','No', 'No','No')

ast_comprehension <- data.frame()

for (id in participant_ids_list) {
  
  filtered_data <- data_2021 %>%
    dplyr::filter(subject == id)%>%
    dplyr::filter(str_detect(responses, 'ast_q'))%>%
    dplyr::filter(str_detect(responses, 'comp'))%>%
    dplyr::select(subject, responses)%>%
    separate(col = responses, into = c('question','answer'), sep = ':')
  filtered_data$question <- str_replace_all(filtered_data$question, "[[:punct:]]|[[:alpha:]]", "")
  filtered_data$answer<- str_replace_all(filtered_data$answer, "[[:punct:]]", "")
  
  ast_training_sorted <- filtered_data %>%
    slice_head(n = 60) %>%
    arrange(as.numeric(question)) %>%
    inner_join(ast_condition, by = 'subject')%>%
    mutate(correct_answer = tolower(benign_ibm_correct_answers))
  
  if(ast_training_sorted$condition[1] == "1"){
    ast_correct_answers <- ast_training_sorted %>%
      mutate(comprehension_correct = ifelse(correct_answer == answer, 1, 0))
  }else{
    ast_correct_answers <- ast_training_sorted %>%
      mutate(comprehension_correct = ifelse(correct_answer == answer, 0, 1))
  }
  
  ast_training_prop_correct <- ast_correct_answers %>%
    transmute(subject, training_proportion_correct = mean(comprehension_correct))%>%
    distinct()
  
  ast_comprehension <- rbind(ast_comprehension,ast_training_prop_correct)
  
}

colnames(ast_comprehension_2020) <- c('subject', 'training_proportion_correct')

ast_training_comprehension_all <- rbind(ast_comprehension_2020, ast_comprehension)


#ast test comprehension 2020

ast_test_data_2020 <- read.csv('data_2020/Ambiguous scenarios test.csv')

ast_test_comprehension_2020 <- data.frame()

ast_test_2020_organise <- ast_test_data_2020 %>%
  gather(question, answer, -ID)%>%
  filter(answer == 'Yes'| answer == 'No')

for (id in participant_ids_2020 ) {
  ast_test_comp_2020 <- ast_test_2020_organise %>%
    filter(ID == id)%>%
    mutate(correct_answer = ifelse(answer == 'Yes', 1, 0))%>%
    transmute(ID, test_proportion_correct = mean(correct_answer))%>%
    distinct()
 
   ast_test_comprehension_2020 <- rbind(ast_test_comprehension_2020, ast_test_comp_2020)
  
  }

#ast test comprehension 2021
ast_test_comprehension <- data.frame()

for (id in participant_ids_list){
  ast_test_comp <- data_2021 %>%
    dplyr::filter(subject == id)%>%
    dplyr::filter(str_detect(responses, 'ast_test'))%>%
    dplyr::filter(str_detect(responses, 'comp'))%>%
    dplyr::select(subject, responses)%>%
    separate(col = responses, into = c('question','answer'), sep = ':')
  ast_test_comp$question <- str_replace_all(ast_test_comp$question, "[[:punct:]]|[[:alpha:]]", "")
  ast_test_comp$answer<- str_replace_all(ast_test_comp$answer, "[[:punct:]]", "")
  
  ast_test_comp_sorted <- ast_test_comp %>%
    arrange(as.numeric(question))
  
  ast_test_proportion_correct <- ast_test_comp_sorted %>%
    mutate(correct_answer = ifelse(answer == 'yes', 1, 0))%>%
    transmute(subject, test_proportion_correct = mean(correct_answer))%>%
    distinct()
  
  ast_test_comprehension <- rbind(ast_test_comprehension, ast_test_proportion_correct)
}

colnames(ast_test_comprehension_2020) <- colnames(ast_test_comprehension)

ast_test_comprehension_all <- rbind(ast_test_comprehension_2020, ast_test_comprehension)

##ast ending ratings
#2020
#include only ending ratings
ending_ratings_2020 <- ast_test_data_2020[,c(1, 22:61)] %>%
  mutate(benign_target_score = benign.target.1 + benign.target.2 + benign.target.3 + benign.target.4  + benign.target.5 + 
           benign.target.6 + benign.target.7 + benign.target.8 + benign.target.9 + benign.target.10,
         negative_target_score = negative.target.1 + negative.target.2 + negative.target.3 + negative.target.4  + negative.target.5 + 
           negative.target.6 + negative.target.7 + negative.target.8 + negative.target.9 + negative.target.10,
         benign_foil_score = benign.foil.1 + benign.foil.2 + benign.foil.3 + benign.foil.4 + benign.foil.5 +
           benign.foil.6 + benign.foil.7 + benign.foil.8 + benign.foil.9 + benign.foil.10,
         negative_foil_score = negative.foil.1 + negative.foil.2 + negative.foil.3 + negative.foil.4 + negative.foil.5 +
           negative.foil.6 + negative.foil.7 + negative.foil.8 + negative.foil.9 + negative.foil.10) %>%
  mutate(ast_ib_score = negative_target_score - benign_target_score)%>%
  rename(subject = ID)

ending_ratings_2021 <- data.frame()

participant_ids_2021 <- c(14, 15, 16, 17, 18)

for (id in participant_ids_2021){
  
  ast_test_endings <- data_2021 %>%
    dplyr::filter(subject == id)%>%
    dplyr::filter(str_detect(responses, 'benign_target'))%>%
    dplyr::select(subject, responses, test_title)%>%
    separate(col = responses, into = c('benign_target', 'negative_target', 'benign_foil', 'negative_foil'), sep = ',')
  ast_test_endings$benign_target <- str_replace_all(ast_test_endings$benign_target, "[[:punct:]]|[[:alpha:]]", "")
  ast_test_endings$negative_target <- str_replace_all(ast_test_endings$negative_target, "[[:punct:]]|[[:alpha:]]", "")
  ast_test_endings$benign_foil <- str_replace_all(ast_test_endings$benign_foil , "[[:punct:]]|[[:alpha:]]", "")
  ast_test_endings$negative_foil <- str_replace_all(ast_test_endings$negative_foil, "[[:punct:]]|[[:alpha:]]", "")
  ast_test_endings <- ast_test_endings %>%
    mutate(benign_target = as.numeric(benign_target) + 1, negative_target = as.numeric(negative_target)+1,
           benign_foil = as.numeric(benign_foil)+1, negative_foil = as.numeric(negative_foil)+1)
  
  ast_test_ending_scores <- ast_test_endings %>%
    transmute(subject, benign_target_score = sum(as.numeric(benign_target)), negative_target_score = sum(as.numeric(negative_target)),
              benign_foil_score = sum(as.numeric(benign_foil)), negative_foil_score = sum(as.numeric(negative_foil)))%>%
    distinct()
  
  ending_ratings_2021 <- rbind(ending_ratings_2021, ast_test_ending_scores)
}

ending_ratings_2021_scored <- ending_ratings_2021 %>%
  mutate(ast_ib_score = negative_target_score - benign_target_score)

ending_ratings_all <- rbind(ending_ratings_2020 [,c(1, 42:46)], ending_ratings_2021_scored)

#visual imagery
#2020
visual_imagery_2020 <- ast_all_2020[,c(1,122)]
colnames(visual_imagery_2020) <- c('subject', 'visual_imagery')

#2021
visual_imagery_2021 <- jatos_tidy(data_2021, 'visual_imagery')
visual_imagery_2021$visual_imagery <- lapply(as.numeric(visual_imagery_2021$visual_imagery), add_one)

visual_imagery_all <- rbind(visual_imagery_2020, visual_imagery_2021)

#incidental learning task
#2020
##incidental learning task

#import learning phase data
ilt_l <- read.table("../Data/Incidental learning task_deploy/results/e2_p1_l/RESULTS_FILE.txt", sep = "\t", header=TRUE)

#combine into one dataframe
for (pp_number in 2:13){
  results_file_learning <- paste("../Data/Incidental learning task_deploy/results/e2_p", pp_number, "_l/RESULTS_FILE.txt",sep="")
  ilt_l_new_data <- read.table(results_file_learning, sep = "\t", header=TRUE)
  ilt_l<-rbind(ilt_l,ilt_l_new_data)
}

#import testing phase data
ilt_t <- read.table("../Data/Incidental learning task_deploy/results/e2_p1_t/RESULTS_FILE.txt", sep = "\t", header=TRUE)

#combine into one dataframe
for (pp_number in 2:13){
  results_file_testing <- paste("../Data/Incidental learning task_deploy/results/e2_p", pp_number, "_t/RESULTS_FILE.txt",sep="")
  ilt_t_new_data <- read.table(results_file_testing, sep = "\t", header=TRUE)
  ilt_t<-rbind(ilt_t,ilt_t_new_data)
}

#make clear participant numbers
ilt_l$Session_Name_ <- as.character(ilt_l$Session_Name_)
ilt_t$Session_Name_ <- as.character(ilt_t$Session_Name_)

ilt_l$Session_Name_ <- gsub(pattern = "[a-z]|_|e2", replacement = "", ilt_l$Session_Name_)
ilt_t$Session_Name_ <- gsub(pattern = "[a-z]|_|e2", replacement = "", ilt_t$Session_Name_)

#remove incorrect responses
ilt_l2 <- ilt_l %>%
  filter((KEYPRESS == "Left" & expected == "left")| (KEYPRESS == "Right" & expected == "right"))
nrow(ilt_l)
nrow(ilt_l2)
  
((nrow(ilt_l) - nrow(ilt_l2))/nrow(ilt_l))*100
#0.53% removed

ilt_t2 <- ilt_t %>%
  filter((KEYPRESS == "Left" & expected == "left")| (KEYPRESS == "Right" & expected == "right"))
nrow(ilt_t)
nrow(ilt_t2)
((nrow(ilt_t) - nrow(ilt_t2))/nrow(ilt_t))*100
#0.64% removed

#select only relevant variables, remove very fast and very slow responses
ilt_l3 <- ilt_l2 %>%
  transmute(ID = Session_Name_, probe_location = expected, word, RT, congruent)%>%
  filter(RT >= 200 & RT <= 800)
nrow(ilt_l2)
nrow(ilt_l3)
(nrow(ilt_l2)-nrow(ilt_l3))/nrow(ilt_l)*100
#1.07%

ilt_t3 <- ilt_t2 %>%
  transmute(ID = Session_Name_, probe_location = expected, word, RT, congruent)%>%
  filter(RT >= 200 & RT <= 800)
nrow(ilt_t2)
nrow(ilt_t3)
(nrow(ilt_t2)-nrow(ilt_t3))/nrow(ilt_t)*100
#1.28%

#remove values 3sd from each participant's mean
ilt_l4 <- ilt_l3 %>%
  group_by(ID) %>%
  transmute(congruent, RT, mean_RT = mean(RT), 
            three_sd_RT = sd(RT)*3) %>%
  filter(RT > (mean_RT - three_sd_RT) & 
           RT < (mean_RT + three_sd_RT) )
nrow(ilt_l3)
nrow(ilt_l4)
(nrow(ilt_l3) - nrow(ilt_l4))/nrow(ilt_l)*100
#1.92%

ilt_t4 <- ilt_t3 %>%
  group_by(ID) %>%
  transmute(congruent, RT, mean_RT = mean(RT), 
            three_sd_RT = sd(RT)*3) %>%
  filter(RT > (mean_RT - three_sd_RT) & 
           RT < (mean_RT + three_sd_RT))
nrow(ilt_t3)
nrow(ilt_t4)

((nrow(ilt_t3) - nrow(ilt_t4))/nrow(ilt_t))*100
#1.71%

#find mean for each participant's congruent and incongruent variables 
#then make a column of the difference between them
ilt_learning_congruent <- ilt_l4 %>%
  filter(congruent == "Y")%>%
  group_by(ID)%>%
  summarize(mean(RT))

ilt_learning_incongruent <- ilt_l4 %>%
  filter(congruent == "N")%>%
  group_by(ID)%>%
  summarize(mean(RT))

ilt_learning_summary <- full_join(ilt_learning_congruent, ilt_learning_incongruent, by = "ID", 
                                  suffix = c("_congruent", "_incongruent"))%>%
  mutate(learning_congruency_effect = `mean(RT)_incongruent` - `mean(RT)_congruent`)

ilt_testing_congruent <- ilt_t4 %>%
  filter(congruent == "Y")%>%
  group_by(ID)%>%
  summarize(mean(RT))

ilt_testing_incongruent <- ilt_t4 %>%
  filter(congruent == "N")%>%
  group_by(ID)%>%
  summarize(mean(RT))

ilt_testing_summary <- full_join(ilt_testing_congruent, ilt_testing_incongruent, by = "ID", 
                                 suffix = c("_congruent", "_incongruent"))%>%
  mutate(interpretation_bias_index = `mean(RT)_incongruent` - `mean(RT)_congruent`)


#2021
pain_words <- c('aches', 'harmful', 'painful', 'hurting', 'pinching',
                'stinging', 'headache', 'soreness', 'agonising')

neutral_words <- c('grasp', 'banking', 'trailer', 'setting', 'animated',
                   'boarding', 'compound', 'inclined', 'promotion')

all_words <- c(pain_words, neutral_words)

ilt_learning_2021_all <- data.frame()

for (id in participant_ids_2021) {
subject_data_2021 <- data_2021 %>%
  filter(subject == id)

subject_condition <- jatos_tidy(subject_data_2021, 'ilt_condition')

ilt_data_2021_learning <- subject_data_2021[254:470,] %>%
  dplyr::select(rt, subject, stimulus, key_press, target_type, test_part) %>%
  #remove fixation rows
  filter(is.na(test_part))%>%
  dplyr::select(-test_part) %>%
  #remove stimulus info for dots
  mutate(stimulus = ifelse(is.na(rt),stimulus, NA))

#strip additional text from stimulus to leave just the word
ilt_data_2021_learning$stimulus <- str_remove_all(ilt_data_2021_learning$stimulus, '<div style=\"font-size:60px;\">')
ilt_data_2021_learning$stimulus <- str_remove_all(ilt_data_2021_learning$stimulus, '</div>')

#make stimuli into a list
stimuli <- ilt_data_2021_learning$stimulus
stimuli_only <- stimuli[!is.na(stimuli)]   

#remove rows containing the stimuli from the dataframe
ilt_data_2021_learning2 <- ilt_data_2021_learning %>%
  filter(!is.na(target_type))%>%
  #add in vector of pain words in the original order
  mutate(stimulus = stimuli_only) %>%
  #create new column which = pain if stimulus is pain word and =neutral if stimulus is a neutral word
  mutate(stimulus_type = ifelse(stimulus %in% pain_words, 'pain', 'neutral'))
 
#create a new column for congruency of dot location 
if(subject_condition$ilt_condition == 'A') {
  
  ilt_data_2021_learning3 <- ilt_data_2021_learning2 %>%
    mutate(congruency = ifelse((stimulus_type == 'pain' & target_type == 'left')|
                                 (stimulus_type == 'neutral' & target_type == 'right'), 'congruent', 'incongruent'))
  
} else {

  ilt_data_2021_learning3 <- ilt_data_2021_learning2 %>%
    mutate(congruency = ifelse((stimulus_type == 'pain' & target_type == 'right')|
                               (stimulus_type == 'neutral' & target_type == 'left'), 'congruent', 'incongruent'))
}

ilt_learning_2021_all <- rbind(ilt_learning_2021_all, ilt_data_2021_learning3)

}

#rbind all participants together

#remove incorrect responses
ilt_data_2021_learning4 <- ilt_learning_2021_all %>%
  filter((target_type == 'right' & key_press == 39)|(target_type == 'left' & key_press == 37))
nrow(ilt_learning_2021_all) 
nrow(ilt_data_2021_learning4)

(nrow(ilt_learning_2021_all) - nrow(ilt_data_2021_learning4))/nrow(ilt_learning_2021_all)*100

##select only relevant variables, remove very fast (<200ms) and very slow (>800ms) responses
ilt_data_2021_learning5 <- ilt_data_2021_learning4 %>%
  filter(rt >= 200 & rt <= 800)
nrow(ilt_data_2021_learning5)
nrow(ilt_data_2021_learning4)
(nrow(ilt_data_2021_learning4) - nrow(ilt_data_2021_learning5))/nrow(ilt_data_2021_learning4)*100


##remove values 3sd from each participant's mean
ilt_data_2021_learning6 <- ilt_data_2021_learning5 %>%
  group_by(subject)%>%
  mutate(mean_rt = mean(rt),three_sd_rt = sd(rt)*3) %>%
  filter(rt > (mean_rt - three_sd_rt) & 
           rt < (mean_rt + three_sd_rt))
nrow(ilt_data_2021_learning5)
nrow(ilt_data_2021_learning6)

(nrow(ilt_data_2021_learning5) - nrow(ilt_data_2021_learning6))/nrow(ilt_data_2021_learning5)*100

#find mean for each participant's congruent and incongruent variables 
ilt_summary_2021 <- ilt_data_2021_learning6 %>%
  group_by(subject, congruency)%>%
  summarise(grouped_mean_rt = mean(rt))%>%
  spread(congruency, grouped_mean_rt)%>%
#then make a column of the difference between them
  mutate(learning_congruency_effect = incongruent - congruent)

#incidental learning task testing
#ambiguous_words <- c('tight', 'squeeze', 'tension', 'pulsing', 'piercing', 
 #                    'pounding','drilling', 'pressing', 'splitting')

ilt_testing_2021_all <- data.frame()

for (id in participant_ids_2021) {
  subject_data_2021 <- data_2021 %>%
    filter(subject == id)
  
  subject_condition <- jatos_tidy(subject_data_2021, 'ilt_condition')
  
  ilt_data_2021_testing <- subject_data_2021[471:578,] %>%
    dplyr::select(rt, subject, stimulus, key_press, target_type, test_part) %>%
    #remove fixation rows
    filter(is.na(test_part))%>%
    dplyr::select(-test_part) %>%
    #remove stimulus info for dots
    mutate(stimulus = ifelse(is.na(rt),stimulus, NA))
  
  #strip additional text from stimulus to leave just the word
  ilt_data_2021_testing$stimulus <- str_remove_all(ilt_data_2021_testing$stimulus, '<div style=\"font-size:60px;\">')
  ilt_data_2021_testing$stimulus <- str_remove_all(ilt_data_2021_testing$stimulus, '</div>')
  
  #make stimuli into a list
  stimuli <- ilt_data_2021_testing$stimulus
  stimuli_only <- stimuli[!is.na(stimuli)]   
  
  #remove rows containing the stimuli from the dataframe
  ilt_data_2021_testing2 <- ilt_data_2021_testing %>%
    filter(!is.na(target_type))%>%
    #add in vector of pain words in the original order
    mutate(stimulus = stimuli_only) 
  
  #create a new column for congruency of dot location to location associated with pain
  if(subject_condition$ilt_condition == 'A') {
    
    ilt_data_2021_testing3 <- ilt_data_2021_testing2 %>%
      mutate(congruency = ifelse(target_type == 'left', 'congruent', 'incongruent'))
    
  } else {
    
    ilt_data_2021_testing3 <- ilt_data_2021_testing2 %>%
      mutate(congruency = ifelse(target_type == 'right', 'congruent', 'incongruent'))
  }
  
  ilt_testing_2021_all <- rbind(ilt_testing_2021_all, ilt_data_2021_testing3)
  
}

#remove incorrect responses
ilt_data_2021_testing4 <- ilt_testing_2021_all %>%
  filter((target_type == 'right' & key_press == 39)|(target_type == 'left' & key_press == 37))
nrow(ilt_data_2021_testing4)
nrow(ilt_testing_2021_all)


(nrow(ilt_testing_2021_all) - nrow(ilt_data_2021_testing4))/nrow(ilt_testing_2021_all)*100

##select only relevant variables, remove very fast (<200ms) and very slow (>800ms) responses
ilt_data_2021_testing5 <- ilt_data_2021_testing4 %>%
  filter(rt >= 200 & rt <= 800)
nrow(ilt_data_2021_testing4)
nrow(ilt_data_2021_testing5)
(nrow(ilt_data_2021_testing4) - nrow(ilt_data_2021_testing5))/nrow(ilt_data_2021_testing4)*100


##remove values 3sd from each participant's mean
ilt_data_2021_testing6 <- ilt_data_2021_testing5 %>%
  group_by(subject)%>%
  mutate(mean_rt = mean(rt),three_sd_rt = sd(rt)*3) %>%
  filter(rt > (mean_rt - three_sd_rt) & 
           rt < (mean_rt + three_sd_rt))
nrow(ilt_data_2021_testing6)
nrow(ilt_data_2021_testing5)
(nrow(ilt_data_2021_testing5) - nrow(ilt_data_2021_testing6))/nrow(ilt_data_2021_testing5)*100

#find mean for each participant's congruent and incongruent variables 
ilt_summary_testing_2021 <- ilt_data_2021_testing6 %>%
  group_by(subject, congruency)%>%
  summarise(grouped_mean_rt = mean(rt))%>%
  spread(congruency, grouped_mean_rt)%>%
  #then make a column of the difference between them
  mutate(ilt_ib_index = incongruent - congruent)

#join together 2020 and 2021 ilt scores

colnames(ilt_learning_summary) <- colnames(ilt_summary_2021)

ilt_learning_summary_all <- rbind(ilt_learning_summary, ilt_summary_2021)



colnames(ilt_testing_summary) <- colnames(ilt_summary_testing_2021)

ilt_testing_summary_all <- rbind(ilt_testing_summary, ilt_summary_testing_2021)

#sentence ratings
sentence_ratings <- read.csv('data_2020/sentence_ratings.csv')
sentence_generation_scored <- sentence_ratings %>%
  filter(word == "Splitting"|word == "Tension"| word == "Pressing"| word == "Pounding"|
           word == "Tight"| word == "Drilling"| word == "Pulsing"| word == "Piercing"|
           word =="Pulsing")%>%
  group_by(ID)%>%
  mutate(pain = sum(pain.related == "p"),
         neutral = sum(pain.related == "n"),
         total = sum(pain.related =="p"|pain.related == "n"),
         sgt_proportion_pain = pain/total)%>%
  dplyr::select(ID, sgt_proportion_pain)%>%
  unique()%>%
  rename(subject = ID)
sentence_generation_scored$subject <- as.character(sentence_generation_scored$subject)

#pain thresholds
pain_thresholds <- read.csv('data_2020/pain_thresholds_2020_and_2021.csv') 

pain_thresholds_scored <- pain_thresholds %>%
  mutate(mean_threshold = rowMeans(pain_thresholds[,2:4]))%>%
  rename(subject = ID)
pain_thresholds_scored$subject <- as.character(pain_thresholds_scored$subject)

#interpretation of ambiguous sensations
sensations_ratings <- read.csv('data_2020/somatosensory_interpretation_2020_and_2021.csv')

#find mean of each percentage
sensations_ratings_scored <- sensations_ratings %>%
  mutate(hot60_intensity = (hot60_1_intensity + hot60_2_intensity + hot60_3_intensity)/3,
    hot80_intensity = (hot80_1_intensity + hot80_2_intensity + hot80_3_intensity)/3,
    hot100_intensity = (hot100_1_intensity + hot100_2_intensity + hot100_3_intensity)/3,
    hot120_intensity = (hot120_1_intensity + hot120_2_intensity + hot120_3_intensity)/3,
    hot140_intensity = (hot140_1_intensity + hot140_2_intensity + hot140_3_intensity)/3,
    
    hot60_unpleasantness  = (hot60_1_unpleasantness  + hot60_2_unpleasantness  + hot60_3_unpleasantness )/3,
    hot80_unpleasantness  = (hot80_1_unpleasantness  + hot80_2_unpleasantness  + hot80_3_unpleasantness )/3,
    hot100_unpleasantness  = (hot100_1_unpleasantness  + hot100_2_unpleasantness  + hot100_3_unpleasantness )/3,
    hot120_unpleasantness  = (hot120_1_unpleasantness  + hot120_2_unpleasantness  + hot120_3_unpleasantness )/3,
    hot140_unpleasantness  = (hot140_1_unpleasantness  + hot140_2_unpleasantness  + hot140_3_unpleasantness )/3)%>%
  rename(subject = ID)
sensations_ratings_scored$subject <- as.character(sensations_ratings_scored$subject)
#exclude means where full temp could not be reached in analysis

ast_condition_all <- rbind(ast_condition_2020, ast_condition)

#join together all variables into one dataframe
organised_data <- asi_scored %>%
  left_join(sas_scored, by = 'subject')%>%
  left_join(fop_scored,by = 'subject')%>%
  left_join(pcs_scored,by = 'subject')%>%
  left_join(bdi_scored,by = 'subject')%>%
  left_join(stai_scored,by = 'subject')%>%
  left_join(handedness_scored,by = 'subject')%>%
  left_join(demo_scored,by = 'subject')%>%
  left_join(ast_training_comprehension_all, by = 'subject')%>%
  left_join(ast_test_comprehension_all,by = 'subject')%>%
  left_join(ending_ratings_all,by = 'subject')%>%
  left_join(visual_imagery_all,by = 'subject')%>%
  left_join(ilt_learning_summary_all, by = 'subject')%>%
  left_join(ilt_testing_summary_all, by = 'subject')%>%
  left_join(sentence_generation_scored, by = 'subject')%>%
  left_join(pain_thresholds_scored, by = 'subject')%>%
  left_join(sensations_ratings_scored, by = 'subject')%>%
  left_join(ast_condition_all, by = 'subject')%>%
  dplyr::select(subject,
                asi_social, asi_physical, asi_cognitive, asi_total,
                sas_total,
                fop_minor, fop_medical, fop_severe, fop_total,
                pcs_rumination, pcs_magnification, pcs_helplessness, pcs_total,
                bdi_total,
                stai_state, stai_trait,
                handedness_total,
                gender, education, occupation, family_pain, age_exact,
                training_proportion_correct,
                test_proportion_correct,
                ast_ib_score,
                visual_imagery,
                learning_congruency_effect,
                ilt_ib_index,
                sgt_proportion_pain,
                mean_threshold,
                hot60_intensity, hot80_intensity, hot100_intensity, hot120_intensity, hot140_intensity,
                hot60_unpleasantness, hot80_unpleasantness, hot100_unpleasantness, hot120_unpleasantness, hot140_unpleasantness,
                condition)
