##Multiple imputation results - parity 0
library(ggplot2)
library(dplyr)
library(boot)
library(rstan)
windowsFonts(Calibri=windowsFont("Calibri"))
load("chap4/data/ONS2018_allc.RData")
load("chap4/results/QMI.RData")
source("chap4/scripts/important_functions.r")

#Indicate integrated model type fitted to mean Q imputation and 10 imputations from Step 3
mod <- c("50/50","33/67","25/75","20/80","10/90")[2]

#ONS data set-up
data4 <- list()
for (i in 1:4) {
  data4[[i]] <- data.frame(age=ONS_births_dat[[i]]$Group.1,coh=ONS_births_dat[[i]]$Group.2,N=ONS_expos_dat[[i]]$x,n=ONS_births_dat[[i]]$x,ONS=ONS_rates_dat[[i]]$x)
  data4[[i]] <- data4[[i]][which(data4[[i]]$N>0 & data4[[i]]$n>=0 & data4[[i]]$n <= data4[[i]]$N & data4[[i]]$age %in% c(15:44)),]
}
data4l <- rbind(data.frame(parity=0,data4[[1]]),
                data.frame(parity=1,data4[[2]]),
                data.frame(parity=2,data4[[3]]),
                data.frame(parity=3,data4[[4]]))

#Set-up (mean imputation)
files <- paste0("chap4/results/p0_",gsub("/","",mod),".RData")
results <- list()
results$Yhatm <- results$Adat <- results$AQAQdat <- results$Cdat <- results$Tdat <- results$ACACdat <- results$ACdat <- results$fdat <- results$probdat <- list()
results$psurvf <- results$psurvfm <- results$psurvfl <- results$psurvfu <- results$psumfm <- results$psumfl <- results$psumfu <- results$wsumfm <- results$wsumfl <- results$wsumfu <- list()
results$probfitlow <- results$probfitupp <- results$psurvflow <- results$psurvfupp <- list()

#Extract results from fitted model
load("chap3/results/Qmeanimp.RData")
source("chap4/scripts/p0_setup.r") 
ind <- -c(1:64)
load(files)
source("chap4/scripts/p0clean.r")
results0 <- results

#Set-up (10 imputations)
files <- paste0("chap4/results/p0_",gsub("/","",mod),"_imp",1:10,".RData")
results <- list()
results$Yhatm <- results$Adat <- results$AQAQdat <- results$Cdat <- results$Tdat <- results$ACACdat <- results$ACdat <- results$fdat <- results$probdat <- list()
results$psurvf <- results$psurvfm <- results$psurvfl <- results$psurvfu <- results$psumfm <- results$psumfl <- results$psumfu <- results$wsumfm <- results$wsumfl <- results$wsumfu <- list()
results$probfitlow <- results$probfitupp <- results$psurvflow <- results$psurvfupp <- list()

#Extract results from fitted models
for (n in 1:10) {
  print(n)
  qualf4imp_0 <- qualf4MI_0[[n]]
  source("chap4/scripts/p0_setup.r")
  ind <- -c(1:64)
  load(files[n])
  source("chap4/scripts/p0cleanMI.r")
}

#Labels
supp.labsp <- c(paste0("Imputation ",1:10),"ONS data")
names(supp.labsp) <- 1:11
supp.labsq4 <- c("< GCSE","GCSE","A Level","Degree")
names(supp.labsq4) <- c(1,2,3,4)

#Processing
agerange <- 15:44
cohrange <- 1945:2003
parity <- rep(0,10)
Adat <- data.frame(type=rep(1:10,each=length(agerange)),Reduce(rbind,results$Adat))
Cdat <- data.frame(type=rep(1:10,each=length(cohrange)),Reduce(rbind,results$Cdat))
ACACdat <- data.frame(type=rep(1:10,each=length(agerange)*length(cohrange)),
                      Reduce(rbind,results$ACACdat))
ACACdatp <- ACACdatf <- ACACdat
ACACdatp[which(ACACdatp$age+ACACdatp$coh>2018),] <- NA
ACACdatf[which(ACACdatf$age+ACACdatf$coh<=2018),] <- NA
ACdat <- data.frame(type=rep(1:10,each=length(agerange)*length(cohrange)),
                    Reduce(rbind,results$ACdat))
ACdatp <- ACdatf <- ACdat
ACdatp[which(ACdatp$age+ACdatp$coh>2018),] <- NA
ACdatf[which(ACdatf$age+ACdatf$coh<=2018),] <- NA
fdatACall <- data.frame(type=rep(1:10,each=length(agerange)*length(cohrange)),
                        parity=rep(parity,each=length(agerange)*length(cohrange)),
                        age=rep(rep(agerange,length(cohrange)),length(files)),
                        coh=rep(rep(cohrange,each=length(agerange)),length(files)),
                        mean=unlist(results$psurvfm),
                        lower=unlist(results$psurvfl),
                        upper=unlist(results$psurvfu))
fdatACall0 <- data.frame(type=1,age=rep(agerange,length(cohrange)),
                         coh=rep(cohrange,each=length(agerange)),
                         mean=unlist(results0$psurvfm)[1:1770],
                         lower=unlist(results0$psurvfl)[1:1770],
                         upper=unlist(results0$psurvfu)[1:1770])
ACoverlay <- data.frame(type=1,age=rep(agerange,length(cohrange)),
                        coh=rep(cohrange,each=length(agerange)),
                        MIlower=0.025*apply(Reduce(rbind,results$psurvflow),2,sort)[250,]+
                          0.975*apply(Reduce(rbind,results$psurvflow),2,sort)[251,],
                        MIupper=0.025*apply(Reduce(rbind,results$psurvfupp),2,sort,decreasing=T)[250,]+
                          0.975*apply(Reduce(rbind,results$psurvfupp),2,sort,decreasing=T)[251,],
                        MImean=apply(1000*Reduce(rbind,results$psurvfm),2,function(x) sum(x)/10000))
fdatACall <- left_join(fdatACall,data4l[,c("parity","age","coh","ONS")],by=c("parity","age","coh"))
fdatACall <- rbind(fdatACall,data.frame(data4l[,c("parity","age","coh")],mean=data4l$ONS,lower=NA,upper=NA,ONS=NA,type=11)[data4l$parity==0,])
fdatACall$type <- factor(fdatACall$type,levels=1:11)
fdatACallp <- fdatACallf <- fdatACall
fdatACallp[which(fdatACallp$age+fdatACallp$coh>2018),] <- NA
fdatACallf[which(fdatACallf$age+fdatACallf$coh<=2018),] <- NA
fdatACallred <- fdatACall[which(!is.na(fdatACall$ONS) | fdatACall$type==11),]
psumONS <- aggregate(data4l$ONS,by=list(data4l$coh,data4l$parity),sum)
psumONS[which(psumONS$Group.1>(2018-44)),"x"] <- NA
colnames(psumONS) <- c("cov","parity","ONS")
psumdat <- data.frame(type=rep(1:10,each=length(cohrange)),
                      parity=rep(parity,each=length(cohrange)),
                      cov=rep(cohrange,length(files)),
                      mean=unlist(results$psumfm),
                      lower=unlist(results$psumfl),
                      upper=unlist(results$psumfu))
psumdat <- left_join(psumdat,psumONS)
psumdatp <- psumdatf <- psumdat
psumdatp[which(psumdat$cov+44>2018),] <- NA
psumdatf[which(psumdat$cov+44<=2018),] <- NA

probdatall <- data.frame(type=rep(1:10,each=4*length(agerange)*length(cohrange)),
                         Reduce(rbind,results$probdat))
probdatall$qualf <- factor(supp.labsq4[probdatall$qualf],levels=supp.labsq4)
probdatallp <- probdatallf <- probdatall
probdatallp[which(probdatallp$age+probdatallp$coh>2018),] <- NA
probdatallf[which(probdatallf$age+probdatallf$coh<=2018),] <- NA

probdatall0 <- data.frame(type=1,results0$probdat[[1]])
probdatall0$qualf <- factor(supp.labsq4[probdatall0$qualf],levels=supp.labsq4)

q1l <- q2l <- q3l <- q4l <- q1u <- q2u <- q3u <- q4u <- list()
for (i in 1:10) {
  q1l[[i]] <- matrix(unlist(results$probfitlow[[i]][[1]]),nrow=251)
  q2l[[i]] <- matrix(unlist(results$probfitlow[[i]][[2]]),nrow=251)
  q3l[[i]] <- matrix(unlist(results$probfitlow[[i]][[3]]),nrow=251)
  q4l[[i]] <- matrix(unlist(results$probfitlow[[i]][[4]]),nrow=251)
  q1u[[i]] <- matrix(unlist(results$probfitupp[[i]][[1]]),nrow=251)
  q2u[[i]] <- matrix(unlist(results$probfitupp[[i]][[2]]),nrow=251)
  q3u[[i]] <- matrix(unlist(results$probfitupp[[i]][[3]]),nrow=251)
  q4u[[i]] <- matrix(unlist(results$probfitupp[[i]][[4]]),nrow=251)
}
probdatfit <- Reduce(rbind,lapply(results$probdat,function(x) x$fit))

ACQoverlay <- data.frame(type=1,age=rep(rep(agerange,length(cohrange)),4),
                         coh=rep(rep(cohrange,each=length(agerange)),4),
                         qualf=factor(rep(supp.labsq4,each=length(agerange)*length(cohrange)),levels=supp.labsq4),
                         MIlower=c(0.025*apply(Reduce(rbind,q1l),2,sort)[250,]+
                                     0.975*apply(Reduce(rbind,q1l),2,sort)[251,],
                                   0.025*apply(Reduce(rbind,q2l),2,sort)[250,]+
                                     0.975*apply(Reduce(rbind,q2l),2,sort)[251,],
                                   0.025*apply(Reduce(rbind,q3l),2,sort)[250,]+
                                     0.975*apply(Reduce(rbind,q3l),2,sort)[251,],
                                   0.025*apply(Reduce(rbind,q4l),2,sort)[250,]+
                                     0.975*apply(Reduce(rbind,q4l),2,sort)[251,]),
                         MIupper=c(0.025*apply(Reduce(rbind,q1u),2,sort,decreasing=T)[250,]+
                                     0.975*apply(Reduce(rbind,q1u),2,sort,decreasing=T)[251,],
                                   0.025*apply(Reduce(rbind,q2u),2,sort,decreasing=T)[250,]+
                                     0.975*apply(Reduce(rbind,q2u),2,sort,decreasing=T)[251,],
                                   0.025*apply(Reduce(rbind,q3u),2,sort,decreasing=T)[250,]+
                                     0.975*apply(Reduce(rbind,q3u),2,sort,decreasing=T)[251,],
                                   0.025*apply(Reduce(rbind,q4u),2,sort,decreasing=T)[250,]+
                                     0.975*apply(Reduce(rbind,q4u),2,sort,decreasing=T)[251,]),
                         MImean=apply(1000*probdatfit,2,function(x) sum(x)/10000))

#Plots
#Posterior mean marginalised probabilities (Figure 4.33) 
seqc <- seq(1981,2003,2)
intdat <- data.frame(coh=seqc,xintercept=2018-seqc)

png(file="chap4/plots/fig33.png",width=15,height=12,units="cm",res=400)
ggplot(fdatACall[fdatACall$type %in% c(1:10) & fdatACall$parity==0 & fdatACall$coh %in% seqc,], aes(x=age, y=mean, group=type)) +
  geom_line(aes(color="Multiple imputation (separate)"),size=0.7) +
  geom_line(aes(y=lower,color="Multiple imputation (separate)"),size=0.3) +
  geom_line(aes(y=upper,color="Multiple imputation (separate)"),size=0.3) +
  geom_line(data=ACoverlay[ACoverlay$coh %in% seqc,],aes(x=age,y=MImean,linetype="Multiple imputation (pooled)"), size=0.7) +
  geom_line(data=ACoverlay[ACoverlay$coh %in% seqc,],aes(x=age,y=MIlower),size=0.3) +
  geom_line(data=ACoverlay[ACoverlay$coh %in% seqc,],aes(x=age,y=MIupper),size=0.3) +
  geom_line(data=fdatACall0[fdatACall0$coh %in% seqc,],aes(x=age,y=mean,linetype="Mean imputation"), size=0.7)+
  geom_line(data=fdatACall0[fdatACall0$coh %in% seqc,],aes(x=age,y=lower),linetype=2,size=0.3)+
  geom_line(data=fdatACall0[fdatACall0$coh %in% seqc,],aes(x=age,y=upper),linetype=2,size=0.3)+
  scale_color_manual(values=c("Multiple imputation (separate)" = "#F8766D"), guide_legend(title="")) +
  scale_linetype_manual(values=c("Multiple imputation (pooled)" = 1,"Mean imputation" = 2), guide=guide_legend(title="",nrow=2)) +
  geom_vline(aes(xintercept=xintercept),data=intdat) +
  labs(x = "Age", y = "Probability", color="Imputation")+
  scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
  coord_cartesian(ylim=c(0,0.3)) +
  facet_wrap(~paste0(coh," cohort"),nrow=3) + 
  theme_bw() + theme(text = element_text("Calibri"), legend.position="bottom", legend.spacing.x = unit(0.1, "cm"))
dev.off()

#ACQ posterior mean probabilities (Figure 4.34)
seqc <- c(1985,1990,1995,2000)
intdat <- data.frame(coh=seqc,xintercept=2018-seqc)

png(file="chap4/plots/fig34.png",width=15,height=15,units="cm",res=400)
ggplot(probdatall[probdatall$coh %in% seqc,], aes(x=age, y=fit, group=type)) +
  geom_line(aes(color="Multiple imputation (separate)"),size=0.7) +
  geom_line(aes(y=lower,color="Multiple imputation (separate)"),size=0.3) +
  geom_line(aes(y=upper,color="Multiple imputation (separate)"),size=0.3) +
  geom_line(data=ACQoverlay[ACQoverlay$coh %in% seqc,],aes(x=age,y=MImean,linetype="Multiple imputation (pooled)"),size=0.7) +
  geom_line(data=ACQoverlay[ACQoverlay$coh %in% seqc,],aes(x=age,y=MIlower),size=0.3) +
  geom_line(data=ACQoverlay[ACQoverlay$coh %in% seqc,],aes(x=age,y=MIupper),size=0.3) +
  geom_line(data=probdatall0[probdatall0$coh %in% seqc,],aes(x=age,y=fit,linetype="Mean imputation"),size=0.7)+
  geom_line(data=probdatall0[probdatall0$coh %in% seqc,],aes(x=age,y=lower),linetype=2,size=0.3)+
  geom_line(data=probdatall0[probdatall0$coh %in% seqc,],aes(x=age,y=upper),linetype=2,size=0.3)+
  scale_color_manual(values=c("Multiple imputation (separate)" = "#F8766D"), guide_legend(title="")) +
  scale_linetype_manual(values=c("Multiple imputation (pooled)" = 1,"Mean imputation" = 2), guide=guide_legend(title="",nrow=2)) +
  geom_vline(aes(xintercept=xintercept),data=intdat) +
  labs(x = "Age", y = "Probability", color="Imputation")+
  scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
  coord_cartesian(ylim=c(0,0.3)) +
  facet_grid(qualf~paste0(coh," cohort")) + 
  theme_bw() + theme(text = element_text("Calibri"), legend.position="bottom", legend.spacing.x = unit(0.1, "cm"))
dev.off()
