##Plotting results for mean Q imputation
library(ggplot2)
library(dplyr)
library(boot)
library(rstan)
windowsFonts(Calibri=windowsFont("Calibri"))
load("chap4/data/ONS2018_allc.RData")
load("chap3/results/Qmeanimp.RData")
source("chap4/scripts/important_functions.r")

#Indicate models fitted
indmod <- expand.grid(mod=c("50/50","33/67","25/75","20/80","10/90","100/0"),parity=0:3)
indTF <- c(T, T, T, T, T, T)  # change T to F depending on models fitted (assumes same models fitted for each parity)
indmod$fit <- rep(indTF,4)
if (all(!indTF)) stop("Analysis requires at least one model type to have been fitted to each parity")
if (indTF[6] & sum(indTF[1:5])==0) stop("Analysis of UKHLS-only model requires at least one integrated model to have been fitted")
indmod$type <- rep(c(3:7,2),4)
indfit <- which(indmod$fit)
parity <- indmod$parity[indfit]
mod <- indmod$mod[indfit]
mod2 <- indmod$mod[which(indTF)]
type <- indmod$type[which(indTF)]

#ONS data set-up
data4 <- list()
for (i in 1:4) {
  data4[[i]] <- data.frame(age=ONS_births_dat[[i]]$Group.1,coh=ONS_births_dat[[i]]$Group.2,N=ONS_expos_dat[[i]]$x,n=ONS_births_dat[[i]]$x,ONS=ONS_rates_dat[[i]]$x)
  data4[[i]] <- data4[[i]][which(data4[[i]]$N>0 & data4[[i]]$n>=0 & data4[[i]]$n <= data4[[i]]$N & data4[[i]]$age %in% c(15:44)),]
}
data4l <- rbind(data.frame(parity=0,data4[[1]]),
                data.frame(parity=1,data4[[2]]),
                data.frame(parity=2,data4[[3]]),
                data.frame(parity=3,data4[[4]]))

#UKHLS data set-up
ACtab0 <- aggregate(birth.bin_0, by=list(age_0,coh_0), mean)
ACwt0 <- aggregate(weights_0st, by=list(age_0,coh_0), sum)
ACtab0 <- ACtab0[which(ACwt0$x>0),]
names(ACtab0) <- c("age","coh","UKHLS")

ACtab1 <- aggregate(birth.bin_1, by=list(age_1,coh_1), mean)
ACwt1 <- aggregate(weights_1st, by=list(age_1,coh_1), sum)
ACtab1 <- ACtab1[which(ACwt1$x>0),]
names(ACtab1) <- c("age","coh","UKHLS")

ACtab2 <- aggregate(birth.bin_2, by=list(age_2,coh_2), mean)
ACwt2 <- aggregate(weights_2st, by=list(age_2,coh_2), sum)
ACtab2 <- ACtab2[which(ACwt2$x>0),]
names(ACtab2) <- c("age","coh","UKHLS")

ACtab3 <- aggregate(birth.bin_3, by=list(age_3,coh_3), mean)
ACwt3 <- aggregate(weights_3st, by=list(age_3,coh_3), sum)
ACtab3 <- ACtab3[which(ACwt3$x>0),]
names(ACtab3) <- c("age","coh","UKHLS")

data4u <- list()
data4u[[1]] <- ACtab0
data4u[[2]] <- ACtab1
data4u[[3]] <- ACtab2
data4u[[4]] <- ACtab3

data4ul <- rbind(data.frame(parity=0,data4u[[1]]),
                 data.frame(parity=1,data4u[[2]]),
                 data.frame(parity=2,data4u[[3]]),
                 data.frame(parity=3,data4u[[4]]))

ACQtab0 <- aggregate(birth.bin_0, by=list(age_0,coh_0,qualf4imp_0), mean)
ACQwt0 <- aggregate(weights_0st,by=list(age_0,coh_0,qualf4imp_0),sum)
ACQtab0 <- ACQtab0[which(ACQwt0$x>0),]
names(ACQtab0) <- c("age","coh","qualf","UKHLS")

ACTQtab1 <- aggregate(birth.bin_1, by=list(age_1,coh_1,gapc_1,qualf2bimp_1), mean)
ACTQwt1 <- aggregate(weights_1st,by=list(age_1,coh_1,gapc_1,qualf2bimp_1),sum)
ACTQtab1 <- ACTQtab1[which(ACTQwt1$x>0),]
names(ACTQtab1) <- c("age","coh","gapc","qualf","UKHLS")

ACTQtab2 <- aggregate(birth.bin_2, by=list(age_2,coh_2,gapc_2,qualf3bimp_2), mean)
ACTQwt2 <- aggregate(weights_2st,by=list(age_2,coh_2,gapc_2,qualf3bimp_2),sum)
ACTQtab2 <- ACTQtab2[which(ACTQwt2$x>0),]
names(ACTQtab2) <- c("age","coh","gapc","qualf","UKHLS")

ACTtab3 <- aggregate(birth.bin_3, by=list(age_3,coh_3,gapc_3), mean)
ACTwt3 <- aggregate(weights_3st,by=list(age_3,coh_3,gapc_3),sum)
ACTtab3 <- ACTtab3[which(ACTwt3$x>0),]
names(ACTtab3) <- c("age","coh","gapc","UKHLS")

#Set-up
files <- c(paste0("chap4/results/p0_",c("5050","3367","2575","2080","1090","UKHLS_only"),".RData"),
           paste0("chap4/results/p1_",c("5050","3367","2575","2080","1090","UKHLS_only"),".RData"),
           paste0("chap4/results/p2_",c("5050","3367","2575","2080","1090","UKHLS_only"),".RData"),
           paste0("chap4/results/p3_",c("5050","3367","2575","2080","1090","UKHLS_only"),".RData"))[indfit]
results <- list()
results$Yhatm <- results$Adat <- results$AQAQdat <- results$Cdat <- results$Tdat <- results$ACACdat <- results$ACdat <- results$fdat <- results$probdat <- list()
results$psurvf <- results$psurvfm <- results$psurvfl <- results$psurvfu <- results$psumfm <- results$psumfl <- results$psumfu <- results$wsumfm <- results$wsumfl <- results$wsumfu <- list()

#Extract results from fitted models
for (n in 1:length(files)) {
  print(n)
  p <- parity[n]
  m <- mod[n]
  if (p == 0) source("chap4/scripts/p0_setup.r")
  if (p == 1) source("chap4/scripts/p1_setup.r")
  if (p == 2) source("chap4/scripts/p2_setup.r")
  if (p == 3) source("chap4/scripts/p3_setup.r")
  ind <- -c(1:64)
  load(files[n])
  if (mod[n] != "100/0") source(paste0("chap4/scripts/p",p,"clean.r"))
  if (mod[n] == "100/0") source(paste0("chap4/scripts/p",p,"cleanUKHLS.r"))
}

#Processing
agerange <- 15:44
cohrange <- 1945:2003
Adat <- data.frame(type=rep(rep(type,4),each=length(agerange)),Reduce(rbind,results$Adat))
Cdat <- data.frame(type=rep(rep(type,4),each=length(cohrange)),Reduce(rbind,results$Cdat))
Tdat <- data.frame(type=rep(rep(type,3),each=11),Reduce(rbind,results$Tdat))
AQAQdat <- data.frame(type=c(rep(type,each=4*length(agerange)),
                             rep(type,each=2*length(agerange))),
                      Reduce(rbind,results$AQAQdat))
ACACdat <- data.frame(type=rep(rep(type,4),each=length(agerange)*length(cohrange)),
                      Reduce(rbind,results$ACACdat))
ACACdatp <- ACACdatf <- ACACdat
ACACdatp[which(ACACdatp$age+ACACdatp$coh>2018&ACACdatp$type!=2),] <- NA
ACACdatf[which(ACACdatf$age+ACACdatf$coh<=2018&ACACdatf$type!=2),] <- NA
ACACdatp[which(ACACdatp$age+ACACdatp$coh>2007&ACACdatp$type==2),] <- NA
ACACdatf[which(ACACdatf$age+ACACdatf$coh<=2007&ACACdatf$type==2),] <- NA
ACdat <- data.frame(type=rep(rep(type,4),each=length(agerange)*length(cohrange)),
                    Reduce(rbind,results$ACdat))
ACdatp <- ACdatf <- ACdat
ACdatp[which(ACdatp$age+ACdatp$coh>2018&ACdatp$type!=2),] <- NA
ACdatf[which(ACdatf$age+ACdatf$coh<=2018&ACdatf$type!=2),] <- NA
ACdatp[which(ACdatp$age+ACdatp$coh>2007&ACdatp$type==2),] <- NA
ACdatf[which(ACdatf$age+ACdatf$coh<=2007&ACdatf$type==2),] <- NA
fdatACall <- data.frame(type=rep(rep(type,4),each=length(agerange)*length(cohrange)),
                        parity=rep(parity,each=length(agerange)*length(cohrange)),
                        age=rep(rep(agerange,length(cohrange)),length(files)),
                        coh=rep(rep(cohrange,each=length(agerange)),length(files)),
                        mean=unlist(results$psurvfm),
                        lower=unlist(results$psurvfl),
                        upper=unlist(results$psurvfu))
fdatACall <- left_join(fdatACall,data4l[,c("parity","age","coh","ONS")],by=c("parity","age","coh"))
fdatACall <- left_join(fdatACall,data4ul[,c("parity","age","coh","UKHLS")],by=c("parity","age","coh"))
fdatACall <- rbind(fdatACall,data.frame(data4l[,c("parity","age","coh")],mean=data4l$ONS,lower=NA,upper=NA,ONS=NA,UKHLS=NA,type=8))
fdatACall <- rbind(fdatACall,data.frame(data4ul[,c("parity","age","coh")],mean=data4ul$UKHLS,lower=NA,upper=NA,ONS=NA,UKHLS=NA,type=1))

ACQdat0 <- data.frame(type=rep(type,each=4*length(agerange)*length(cohrange)),
                      Reduce(rbind,results$probdat[1:6]))
ACTQdat1 <- data.frame(type=rep(type,each=22*length(agerange)*length(cohrange)),
                       Reduce(rbind,results$probdat[7:12]))
ACTQdat2 <- data.frame(type=rep(type,each=33*length(agerange)*length(cohrange)),
                       Reduce(rbind,results$probdat[13:18]))
ACTdat3 <- data.frame(type=rep(type,each=11*length(agerange)*length(cohrange)),
                      Reduce(rbind,results$probdat[19:24]))
ACQdat0 <- left_join(ACQdat0,ACQtab0,by=c("age","coh","qualf"))
ACTQdat1 <- left_join(ACTQdat1,ACTQtab1,by=c("age","coh","gapc","qualf"))
ACTQdat2 <- left_join(ACTQdat2,ACTQtab2,by=c("age","coh","gapc","qualf"))
ACTdat3 <- left_join(ACTdat3,ACTtab3,by=c("age","coh","gapc"))
ACQdat0p <- ACQdat0f <- ACQdat0
ACQdat0p[which(ACQdat0p$age+ACQdat0p$coh>2018&ACQdat0p$type!=2),] <- NA
ACQdat0f[which(ACQdat0f$age+ACQdat0f$coh<=2018&ACQdat0f$type!=2),] <- NA
ACQdat0p[which(ACQdat0p$age+ACQdat0p$coh>2007&ACQdat0p$type==2),] <- NA
ACQdat0f[which(ACQdat0f$age+ACQdat0f$coh<=2007&ACQdat0f$type==2),] <- NA
ACTQdat1p <- ACTQdat1f <- ACTQdat1
ACTQdat1p[which(ACTQdat1p$age+ACTQdat1p$coh>2018&ACTQdat1p$type!=2),] <- NA
ACTQdat1f[which(ACTQdat1f$age+ACTQdat1f$coh<=2018&ACTQdat1f$type!=2),] <- NA
ACTQdat1p[which(ACTQdat1p$age+ACTQdat1p$coh>2007&ACTQdat1p$type==2),] <- NA
ACTQdat1f[which(ACTQdat1f$age+ACTQdat1f$coh<=2007&ACTQdat1f$type==2),] <- NA
ACTQdat2p <- ACTQdat2f <- ACTQdat2
ACTQdat2p[which(ACTQdat2p$age+ACTQdat2p$coh>2018&ACTQdat2p$type!=2),] <- NA
ACTQdat2f[which(ACTQdat2f$age+ACTQdat2f$coh<=2018&ACTQdat2f$type!=2),] <- NA
ACTQdat2p[which(ACTQdat2p$age+ACTQdat2p$coh>2007&ACTQdat2p$type==2),] <- NA
ACTQdat2f[which(ACTQdat2f$age+ACTQdat2f$coh<=2007&ACTQdat2f$type==2),] <- NA
ACTdat3p <- ACTdat3f <- ACTdat3
ACTdat3p[which(ACTdat3p$age+ACTdat3p$coh>2018&ACTdat3p$type!=2),] <- NA
ACTdat3f[which(ACTdat3f$age+ACTdat3f$coh<=2018&ACTdat3f$type!=2),] <- NA
ACTdat3p[which(ACTdat3p$age+ACTdat3p$coh>2007&ACTdat3p$type==2),] <- NA
ACTdat3f[which(ACTdat3f$age+ACTdat3f$coh<=2007&ACTdat3f$type==2),] <- NA
ACTdat3$gapc <- factor(ACTdat3$gapc,levels=1:11)

#Forecasted exposures
expf0 <- expf1 <- expf2 <- expf3 <- data.frame(expand.grid(age=agerange,coh=cohrange))
UKHLSexp0 <- aggregate(birth.bin_0, by=list(age_0,coh_0), length)
UKHLSexp1 <- aggregate(birth.bin_1, by=list(age_1,coh_1), length)
UKHLSexp2 <- aggregate(birth.bin_2, by=list(age_2,coh_2), length)
UKHLSexp3 <- aggregate(birth.bin_3, by=list(age_3,coh_3), length)
names(UKHLSexp0) <- names(UKHLSexp1) <- names(UKHLSexp2) <- names(UKHLSexp3) <- c("age","coh","UN")
ONSexp0 <- data4[[1]][,c(1,2,3)]
ONSexp1 <- data4[[2]][,c(1,2,3)]
ONSexp2 <- data4[[3]][,c(1,2,3)]
ONSexp3 <- data4[[4]][,c(1,2,3)]
names(ONSexp0)[3] <- names(ONSexp1)[3] <- names(ONSexp2)[3] <- names(ONSexp3)[3] <- "ON"
expf0 <- left_join(expf0,UKHLSexp0)
expf0 <- left_join(expf0,ONSexp0)
expf1 <- left_join(expf1,UKHLSexp1)
expf1 <- left_join(expf1,ONSexp1)
expf2 <- left_join(expf2,UKHLSexp2)
expf2 <- left_join(expf2,ONSexp2)
expf3 <- left_join(expf3,UKHLSexp3)
expf3 <- left_join(expf3,ONSexp3)
expf0$UN[which(is.na(expf0$UN) & expf0$age+expf0$coh<=2007)] <- 0
expf1$UN[which(is.na(expf1$UN) & expf0$age+expf0$coh<=2007)] <- 0
expf2$UN[which(is.na(expf2$UN) & expf0$age+expf0$coh<=2007)] <- 0
expf3$UN[which(is.na(expf3$UN) & expf0$age+expf0$coh<=2007)] <- 0
expf0$ON[which(is.na(expf0$ON) & expf0$age+expf0$coh<=2018)] <- 0
expf1$ON[which(is.na(expf1$ON) & expf0$age+expf0$coh<=2018)] <- 0
expf2$ON[which(is.na(expf2$ON) & expf0$age+expf0$coh<=2018)] <- 0
expf3$ON[which(is.na(expf3$ON) & expf0$age+expf0$coh<=2018)] <- 0

for (i in agerange) {
  expf0$UN[which(is.na(expf0$UN) & expf0$age==i)] <- tail(expf0$UN[which(!is.na(expf0$UN) & expf0$age==i)],1)
  expf0$ON[which(is.na(expf0$ON) & expf0$age==i)] <- tail(expf0$ON[which(!is.na(expf0$ON) & expf0$age==i)],1)
  expf1$UN[which(is.na(expf1$UN) & expf1$age==i)] <- tail(expf1$UN[which(!is.na(expf1$UN) & expf1$age==i)],1)
  expf1$ON[which(is.na(expf1$ON) & expf1$age==i)] <- tail(expf1$ON[which(!is.na(expf1$ON) & expf1$age==i)],1)
  expf2$UN[which(is.na(expf2$UN) & expf2$age==i)] <- tail(expf2$UN[which(!is.na(expf2$UN) & expf2$age==i)],1)
  expf2$ON[which(is.na(expf2$ON) & expf2$age==i)] <- tail(expf2$ON[which(!is.na(expf2$ON) & expf2$age==i)],1)
  expf3$UN[which(is.na(expf3$UN) & expf3$age==i)] <- tail(expf3$UN[which(!is.na(expf3$UN) & expf3$age==i)],1)
  expf3$ON[which(is.na(expf3$ON) & expf3$age==i)] <- tail(expf3$ON[which(!is.na(expf3$ON) & expf3$age==i)],1)
}
expf <- rbind(data.frame(parity=0,expf0),
              data.frame(parity=1,expf1),
              data.frame(parity=2,expf2),
              data.frame(parity=3,expf3))

#Incorporate additional binomial uncertainty
for (n in 1:length(files)) {
  p <- parity[n]
  t <- rep(type,4)[n]
  psurvfn <- results$psurvf[[n]]
  Upsurvfr <- Opsurvfr <- psurvfn
  for (i in 1:1000) {
    Upsurvfr[i,] <- rbinom(length(agerange)*length(cohrange),expf$UN[expf$parity==p],psurvfn[i,])/expf$UN[expf$parity==p]
    Opsurvfr[i,] <- rbinom(length(agerange)*length(cohrange),expf$ON[expf$parity==p],psurvfn[i,])/expf$ON[expf$parity==p]
  }
  resdat <- data.frame(type=t,parity=p,age=rep(agerange,length(cohrange)),
                       coh=rep(cohrange,each=length(agerange)),
                       meanU=apply(Upsurvfr,2,mean),
                       lowerU=apply(Upsurvfr,2,quantile,p=0.025,na.rm=T),
                       upperU=apply(Upsurvfr,2,quantile,p=0.975,na.rm=T),
                       meanO=apply(Opsurvfr,2,mean),
                       lowerO=apply(Opsurvfr,2,quantile,p=0.025,na.rm=T),
                       upperO=apply(Opsurvfr,2,quantile,p=0.975,na.rm=T))
  if (n == 1) fdatACall <- left_join(fdatACall,resdat)
  if (n > 1) fdatACall[fdatACall$type==t&fdatACall$parity==p,10:15] <- resdat[,5:10]
}

fdatACall$type <- factor(fdatACall$type,levels=1:8)
fdatACallp <- fdatACallf <- fdatACall
fdatACallp[which(fdatACallp$age+fdatACallp$coh>2018 & fdatACallp$type!=2),] <- NA
fdatACallf[which(fdatACallf$age+fdatACallf$coh<=2018 & fdatACallf$type!=2),] <- NA
fdatACallp[which(fdatACallp$age+fdatACallp$coh>2007 & fdatACallp$type==2),] <- NA
fdatACallf[which(fdatACallf$age+fdatACallf$coh<=2007 & fdatACallf$type==2),] <- NA
fdatACallred <- fdatACall[which((!is.na(fdatACall$ONS)&fdatACall$type!=2) | 
                                  (!is.na(fdatACall$UKHLS)&fdatACall$type==2) |
                                  fdatACall$type %in% c(1,8)),]
psumONS <- aggregate(data4l$ONS,by=list(data4l$coh,data4l$parity),sum)
psumONS[which(psumONS$Group.1>(2018-44)),"x"] <- NA
colnames(psumONS) <- c("cov","parity","ONS")
psumdat <- data.frame(type=rep(rep(type,1),each=length(cohrange)),
                      parity=rep(parity,each=length(cohrange)),
                      cov=rep(cohrange,length(files)),
                      mean=unlist(results$psumfm),
                      lower=unlist(results$psumfl),
                      upper=unlist(results$psumfu))
psumdat <- left_join(psumdat,psumONS)
psumdatp <- psumdatf <- psumdat
psumdatp[which(psumdat$cov+44>2018),] <- NA
psumdatf[which(psumdat$cov+44<=2018),] <- NA

#Save marginalised probability samples for Step 8
nmod <- length(mod2)
psurvf <- list()
for (n in 1:nmod) {
  for (i in 1:1000) {
  psurvf[[i]] <- data.frame(expand.grid(age=15:44,coh=1945:2003),
                            p1=results$psurvf[[n]][i,],
                            p2=results$psurvf[[n+nmod]][i,],
                            p3=results$psurvf[[n+2*nmod]][i,],
                            p4=results$psurvf[[n+3*nmod]][i,])
  }
  save(psurvf,file=paste0("chap4/results/ACprob_",gsub("/","",mod2[n]),".RData"))
}

#Plots
#Labels
supp.labsp <- paste0("Parity ",c(0,1,2,"3+"))
names(supp.labsp) <- c(0,1,2,3)
supp.labst <- c("UKHLS data","100% UKHLS, 0% ONS","50% UKHLS, 50% ONS",
                "33% UKHLS, 67% ONS","25% UKHLS, 75% ONS","20% UKHLS, 80% ONS",
                "10% UKHLS, 90% ONS","ONS data","100% UKHLS (WPU)",
                "33% UKHLS (WPU)")
names(supp.labst) <- 1:10
supp.labsq4 <- c("< GCSE","GCSE","A Level","Degree")
names(supp.labsq4) <- c(1,2,3,4)
supp.labsq2b <- c("< A Level","At least A Level")
names(supp.labsq2b) <- c(1,2)
supp.labsq3b <- c("< GCSE","GCSE/A Level","Degree")
names(supp.labsq3b) <- c(1,2,3)
supp.labsg <- paste0("T = ",1:11)
names(supp.labsg) <- 1:11

#Posterior mean marginalised probabilities (Figure 4.17)
png(file=paste0("chap4/plots/fig17.png"),width=30,height=15,units="cm",res=400)
pind <- fdatACall$parity>=0 & fdatACall$type %in% c(1,type,8)
ggplot(fdatACall[pind,], aes(x=age, y=mean, color=coh)) +
  geom_line(aes(x=fdatACallp$age[pind], y=fdatACallp$mean[pind], group=fdatACallp$coh[pind]), size=0.3) +
  geom_line(aes(x=fdatACallf$age[pind], y=fdatACallf$mean[pind], group=fdatACallf$coh[pind]), linetype=2, size=0.3) +
  labs(x = "Age", y = "Observed rate/Fitted probability", color="Cohort")+
  scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
  scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
  coord_cartesian(ylim=c(0,0.35)) +
  theme_bw() + theme(text = element_text("Calibri")) +
  facet_grid(parity~type,labeller=labeller(parity=supp.labsp,type=supp.labst))
dev.off()

#Age main effects (Figure 4.18)
#Assumes the UKHLS-only model (type=2) and 33/67 model (type=4) have been fitted
Adat$type <- factor(supp.labst[Adat$type],levels=supp.labst)
Adat2 <- Adat4 <- Adat
Adat2[Adat2$type!=supp.labst[2],] <- NA
Adat4[Adat4$type!=supp.labst[4],] <- NA

png(file="chap4/plots/fig18.png",width=10.57,height=9.35,units="cm",res=400)
ind <- which(Adat$type %in% supp.labst[c(2,4)] & Adat$parity>1)
ggplot(Adat[ind,], aes(x=cov, y=fit, color=type)) +
  geom_line(aes(x=Adat2$cov[ind],y=Adat2$fit[ind],color=Adat2$type[ind]),size=1) +
  geom_line(aes(x=Adat2$cov[ind],y=Adat2$lower[ind],color=Adat2$type[ind]),size=0.5) +
  geom_line(aes(x=Adat2$cov[ind],y=Adat2$upper[ind],color=Adat2$type[ind]),size=0.5) +
  geom_line(aes(x=Adat4$cov[ind],y=Adat4$fit[ind],color=Adat4$type[ind]),size=1) +
  geom_line(aes(x=Adat4$cov[ind],y=Adat4$lower[ind],color=Adat4$type[ind]),size=0.5) +
  geom_line(aes(x=Adat4$cov[ind],y=Adat4$upper[ind],color=Adat4$type[ind]),size=0.5) +
  labs(x = "Age (a)", y = expression(paste(f[A],"(a)",sep="")), color="Model")+
  scale_y_continuous(breaks=seq(-4,4,1)) +
  scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
  coord_cartesian(ylim=c(-3,4)) +
  theme_gray() + theme(text = element_text("Calibri"), legend.position = "bottom") +
  facet_wrap(~parity, nrow=1, labeller=labeller(parity=supp.labsp))
dev.off()

#Cohort main effects (Figure 4.19)
#Assumes the UKHLS-only model (type=2) and 33/67 model (type=4) have been fitted
Cdat$type <- factor(supp.labst[Cdat$type],levels=supp.labst)
Cdat2 <- Cdat4 <- Cdat
Cdat2[Cdat2$type!=supp.labst[2],] <- NA
Cdat4[Cdat4$type!=supp.labst[4],] <- NA

png(file="chap4/plots/fig19.png",width=20,height=9.35,units="cm",res=400)
ind <- which(Cdat$type %in% supp.labst[c(2,4)])
ggplot(Cdat[ind,], aes(x=cov, y=fit, color=type)) +
  geom_line(aes(x=Cdat2$cov[ind],y=Cdat2$fit[ind],color=Cdat2$type[ind]),size=1) +
  geom_line(aes(x=Cdat2$cov[ind],y=Cdat2$lower[ind],color=Cdat2$type[ind]),size=0.5) +
  geom_line(aes(x=Cdat2$cov[ind],y=Cdat2$upper[ind],color=Cdat2$type[ind]),size=0.5) +
  geom_line(aes(x=Cdat4$cov[ind],y=Cdat4$fit[ind],color=Cdat4$type[ind]),size=1) +
  geom_line(aes(x=Cdat4$cov[ind],y=Cdat4$lower[ind],color=Cdat4$type[ind]),size=0.5) +
  geom_line(aes(x=Cdat4$cov[ind],y=Cdat4$upper[ind],color=Cdat4$type[ind]),size=0.5) +
  labs(x = "Cohort (c)", y = expression(paste(f[C],"(c)",sep="")), color="Model")+
  scale_y_continuous(breaks=seq(-4,4,1)) +
  scale_x_continuous(limits=c(1945,2003), breaks=seq(1950,2000,10), minor_breaks=seq(1945,2005,10), expand=c(0.02,0.02)) +
  coord_cartesian(ylim=c(-3,4)) +
  theme_gray() + theme(text = element_text("Calibri"), legend.position = "bottom") +
  facet_wrap(~parity, nrow=1, labeller=labeller(parity=supp.labsp))
dev.off()

#AC interaction effects (Figure 4.20)
#Assumes the UKHLS-only model (type=2) and 33/67 model (type=4) have been fitted
png(file="chap4/plots/fig20.png",width=20,height=16,units="cm",res=400)
pind <- which(ACdat$type %in% c(2,4))
ggplot(ACdat[pind,], aes(x=age, y=fit, color=coh)) +
  geom_line(aes(x=ACdatp$age[pind], y=ACdatp$fit[pind], group=ACdatp$coh[pind])) +
  geom_line(aes(x=ACdatf$age[pind], y=ACdatf$fit[pind], group=ACdatf$coh[pind]), linetype=2) +
  labs(x = "Age (a)", y = expression(paste(f["AC"],"(a,c)",sep="")), color="Cohort (c)") +
  scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), limits=c(1945,2003),guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5)) +
  scale_y_continuous(breaks=seq(-1,1,0.5)) +
  scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
  coord_cartesian(ylim=c(-1,0.75)) +
  theme_bw() + theme(text = element_text("Calibri")) +
  facet_grid(type~parity,labeller=labeller(parity=supp.labsp,type=supp.labst))
dev.off()

#Plot marginalised probabilities with and without predictive uncertainty (Figures 4.21-4.24)
#Assumes the UKHLS-only model (type=2) and 33/67 model (type=4) have been fitted
fdatACall2 <- rbind(data.frame(type=as.numeric(fdatACall$type),fdatACall[,2:9]),
                    data.frame(type=9,
                               parity=fdatACall$parity[fdatACall$type==2],
                               age=fdatACall$age[fdatACall$type==2],
                               coh=fdatACall$coh[fdatACall$type==2],
                               mean=fdatACall$meanU[fdatACall$type==2],
                               lower=fdatACall$lowerU[fdatACall$type==2],
                               upper=fdatACall$upperU[fdatACall$type==2],
                               ONS=fdatACall$ONS[fdatACall$type==2],
                               UKHLS=fdatACall$UKHLS[fdatACall$type==2]),
                    data.frame(type=10,
                               parity=fdatACall$parity[fdatACall$type==4],
                               age=fdatACall$age[fdatACall$type==4],
                               coh=fdatACall$coh[fdatACall$type==4],
                               mean=fdatACall$meanO[fdatACall$type==4],
                               lower=fdatACall$lowerO[fdatACall$type==4],
                               upper=fdatACall$upperO[fdatACall$type==4],
                               ONS=fdatACall$ONS[fdatACall$type==4],
                               UKHLS=fdatACall$UKHLS[fdatACall$type==4]))
fdatACall2$type <- factor(fdatACall2$type,levels=c(1,2,9,3,4,10,5:8))
fdatACallp2 <- fdatACallf2 <- fdatACall2
fdatACallp2[which(fdatACallp2$age+fdatACallp2$coh>2018 & fdatACallp2$type %in% c(1,3:8,10)),] <- NA
fdatACallf2[which(fdatACallf2$age+fdatACallf2$coh<=2018 & fdatACallf2$type %in% c(1,3:8,10)),] <- NA
fdatACallp2[which(fdatACallp2$age+fdatACallp2$coh>2007 & fdatACallp2$type %in% c(2,9)),] <- NA
fdatACallf2[which(fdatACallf2$age+fdatACallf2$coh<=2007 & fdatACallf2$type %in% c(2,9)),] <- NA
fdatACall2$UKHLS[fdatACall2$type %in% c(2,4,10)] <- NA
fdatACall2$ONS[fdatACall2$type %in% c(2,9,4)] <- NA

plotfunc <- function(pind, filename, ylim) {
  png(file=filename,width=30,height=15,units="cm",res=400)
  print({ ggplot(fdatACall2[pind,], aes(x=age, y=mean, color=coh)) +
      geom_line(aes(x=fdatACallp2$age[pind],
                    y=fdatACallp2$mean[pind],
                    group=fdatACallp2$coh[pind]),
                size=0.8) +
      geom_line(aes(x=fdatACallf2$age[pind],
                    y=fdatACallf2$mean[pind],
                    group=fdatACallf2$coh[pind]),
                linetype=2,size=0.8) +
      geom_line(aes(x=fdatACallp2$age[pind],
                    y=fdatACallp2$lower[pind],
                    group=fdatACallp2$coh[pind]),
                size=0.3) +
      geom_line(aes(x=fdatACallf2$age[pind],
                    y=fdatACallf2$lower[pind],
                    group=fdatACallf2$coh[pind]),
                linetype=2,size=0.3) +  
      geom_line(aes(x=fdatACallp2$age[pind],
                    y=fdatACallp2$upper[pind],
                    group=fdatACallp2$coh[pind]),
                size=0.3) +
      geom_line(aes(x=fdatACallf2$age[pind],
                    y=fdatACallf2$upper[pind],
                    group=fdatACallf2$coh[pind]),
                linetype=2,size=0.3) +
      geom_ribbon(aes(ymin=lower,ymax=upper,fill=coh),color=NA,alpha=0.15,show.legend=F) + 
      geom_point(aes(y=UKHLS,shape="UKHLS")) +
      geom_point(aes(y=ONS,shape="ONS")) +
      scale_shape_manual(values=c("ONS" = 1, "UKHLS" = 4), guide_legend(title="Observed\nrates")) +
      labs(x = "Age", y = "Probability", color="Cohort (c)")+
      scale_fill_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
      scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
      scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
      coord_cartesian(ylim=c(0,ylim)) +
      theme_bw() + theme(text = element_text("Calibri")) +
      facet_grid(type~paste0("c = ",coh), labeller=labeller(type=supp.labst)) })
  dev.off()
}

seqc <- c(1945,1955,1965,1975,1980,1985,1990,1995,2000)
plotfunc(fdatACall2$p==0 & fdatACall2$type %in% c(2,9,4,10) & fdatACall2$coh %in% seqc,
         "chap4/plots/fig21.png", 0.25)
plotfunc(fdatACall2$p==1 & fdatACall2$type %in% c(2,9,4,10) & fdatACall2$coh %in% seqc,
         "chap4/plots/fig22.png", 0.5)
plotfunc(fdatACall2$p==2 & fdatACall2$type %in% c(2,9,4,10) & fdatACall2$coh %in% seqc,
         "chap4/plots/fig23.png", 0.5)
plotfunc(fdatACall2$p==3 & fdatACall2$type %in% c(2,9,4,10) & fdatACall2$coh %in% seqc,
         "chap4/plots/fig24.png", 0.5)

#Plot parity 0 ACQ posterior mean probabilities (Figure 4.25)
#Assumes the UKHLS-only model (type=2), 33/67 model (type=4) and 10/90 model (type=7) have been fitted
png(file=paste0("chap4/plots/fig25.png"),width=20,height=15,units="cm",res=400)
pind <- ACQdat0$type %in% c(2,4,7)
ggplot(ACQdat0[pind,], aes(x=age, y=fit, color=coh)) +
  geom_line(aes(x=ACQdat0p$age[pind], y=ACQdat0p$fit[pind], group=ACQdat0p$coh[pind]), size=0.3) +
  geom_line(aes(x=ACQdat0f$age[pind], y=ACQdat0f$fit[pind], group=ACQdat0f$coh[pind]), linetype=2, size=0.3) +
  labs(x = "Age", y = "Fitted probability", color="Cohort")+
  scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
  scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
  coord_cartesian(ylim=c(0,0.26)) +
  theme_bw() + theme(text = element_text("Calibri")) +
  facet_grid(type~qualf,labeller=labeller(qualf=supp.labsq4,type=supp.labst))
dev.off()

#Plot parity 0 ACQ probabilities without predictive uncertainty (Figure 4.26)
#Assumes the UKHLS-only model (type=2) and 33/67 model (type=4) have been fitted
type <- rep(c(3:7,2),4)
pind0 <-  ACQdat0$type %in% c(2,4) & ACQdat0$qualf %in% c(1,4) & ACQdat0$coh %in% c(1945,1955,1965,1975,1980,1985,1990,1995,2000)
pind <- pind0
png(file="chap4/plots/fig26.png",width=30,height=15,units="cm",res=400)
print({ ggplot(ACQdat0[pind,], aes(x=age, y=fit, color=coh)) +
   geom_line(aes(x=ACQdat0p$age[pind],
                  y=ACQdat0p$fit[pind],
                  group=ACQdat0p$coh[pind]),
              size=0.8) +
    geom_line(aes(x=ACQdat0f$age[pind],
                  y=ACQdat0f$fit[pind],
                  group=ACQdat0f$coh[pind]),
              linetype=2,size=0.8) +
    geom_line(aes(x=ACQdat0p$age[pind],
                  y=ACQdat0p$lower[pind],
                  group=ACQdat0p$coh[pind]),
              size=0.3) +
    geom_line(aes(x=ACQdat0f$age[pind],
                  y=ACQdat0f$lower[pind],
                  group=ACQdat0f$coh[pind]),
              linetype=2,size=0.3) +  
    geom_line(aes(x=ACQdat0p$age[pind],
                  y=ACQdat0p$upper[pind],
                  group=ACQdat0p$coh[pind]),
              size=0.3) +
    geom_line(aes(x=ACQdat0f$age[pind],
                  y=ACQdat0f$upper[pind],
                  group=ACQdat0f$coh[pind]),
              linetype=2,size=0.3) +
    geom_ribbon(aes(ymin=lower,ymax=upper,fill=coh),color=NA,alpha=0.15,show.legend=F) + 
    labs(x = "Age", y = "Probability", color="Cohort (c)")+
    scale_fill_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
    scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
    scale_x_continuous(limits=c(15,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
    coord_cartesian(ylim=c(0,0.25)) +
    theme_bw() + theme(text = element_text("Calibri")) +
    facet_grid(qualf+type~paste0("c = ",coh), labeller=labeller(type=supp.labst,qualf=supp.labsq4)) })
dev.off()

#Plot parity 1 ACTQ posterior mean probabilities for an age at first birth of 19 years (Figure 4.27)
#Assumes the UKHLS-only model (type=2), 33/67 model (type=4) and 10/90 model (type=7) have been fitted
ACTQdat1$aafb <- ACTQdat1$age - ACTQdat1$gapc
ACTQdat1_19 <- ACTQdat1[ACTQdat1$aafb==19|ACTQdat1$age>30&ACTQdat1$gapc==11,]
ACTQdat1p_19 <- ACTQdat1f_19 <- ACTQdat1_19
ACTQdat1p_19[which(ACTQdat1p_19$age+ACTQdat1p_19$coh>2018&ACTQdat1p_19$type!=2),] <- NA
ACTQdat1f_19[which(ACTQdat1f_19$age+ACTQdat1f_19$coh<=2018&ACTQdat1f_19$type!=2),] <- NA
ACTQdat1p_19[which(ACTQdat1p_19$age+ACTQdat1p_19$coh>2007&ACTQdat1p_19$type==2),] <- NA
ACTQdat1f_19[which(ACTQdat1f_19$age+ACTQdat1f_19$coh<=2007&ACTQdat1f_19$type==2),] <- NA
supp.labsq2b <- c("< A Level","At least A Level")
names(supp.labsq2b) <- c(1,2)

png(file="chap4/plots/fig27.png",width=12,height=15,units="cm",res=400)
pind <- ACTQdat1_19$type %in% c(2,4,7)
ggplot(ACTQdat1_19[pind,], aes(x=age, y=fit, color=coh)) +
  geom_line(aes(x=ACTQdat1p_19$age[pind], y=ACTQdat1p_19$fit[pind], group=ACTQdat1p_19$coh[pind]), size=0.3) +
  geom_line(aes(x=ACTQdat1f_19$age[pind], y=ACTQdat1f_19$fit[pind], group=ACTQdat1f_19$coh[pind]), linetype=2, size=0.3) +
  labs(x = "Age", y = "Fitted probability", color="Cohort")+
  scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 15,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
  scale_x_continuous(limits=c(20,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
  theme_bw() + theme(text = element_text("Calibri")) +
  facet_grid(type~qualf,labeller=labeller(qualf=supp.labsq2b,type=supp.labst))
dev.off()

#Plot parity 1 ACTQ probabilities without predictive uncertainty (Figure 4.28)
#Assumes the UKHLS-only model (type=2) and 33/67 model (type=4) have been fitted
pind <- ACTQdat1_19$type %in% c(2,4) & ACTQdat1_19$coh %in% c(1945,1955,1965,1975,1980,1985,1990,1995,2000)
png(file=paste0("chap4/plots/fig28.png"),width=30,height=15,units="cm",res=400)
print({ ggplot(ACTQdat1_19[pind,], aes(x=age, y=fit, color=coh)) +
    geom_line(aes(x=ACTQdat1p_19$age[pind],
                  y=ACTQdat1p_19$fit[pind],
                  group=ACTQdat1p_19$coh[pind]),
              size=0.8) +
    geom_line(aes(x=ACTQdat1f_19$age[pind],
                  y=ACTQdat1f_19$fit[pind],
                  group=ACTQdat1f_19$coh[pind]),
              linetype=2,size=0.8) +
    geom_line(aes(x=ACTQdat1p_19$age[pind],
                  y=ACTQdat1p_19$lower[pind],
                  group=ACTQdat1p_19$coh[pind]),
              size=0.3) +
    geom_line(aes(x=ACTQdat1f_19$age[pind],
                  y=ACTQdat1f_19$lower[pind],
                  group=ACTQdat1f_19$coh[pind]),
              linetype=2,size=0.3) +  
    geom_line(aes(x=ACTQdat1p_19$age[pind],
                  y=ACTQdat1p_19$upper[pind],
                  group=ACTQdat1p_19$coh[pind]),
              size=0.3) +
    geom_line(aes(x=ACTQdat1f_19$age[pind],
                  y=ACTQdat1f_19$upper[pind],
                  group=ACTQdat1f_19$coh[pind]),
              linetype=2,size=0.3) +
    geom_ribbon(aes(ymin=lower,ymax=upper,fill=coh),color=NA,alpha=0.15,show.legend=F) + 
    labs(x = "Age", y = "Probability", color="Cohort (c)")+
    scale_fill_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
    scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5), limits=c(1945,2003)) +
    scale_x_continuous(limits=c(20,44), breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5)), expand=c(0.02,0.02)) +
    coord_cartesian(ylim=c(0,0.5)) +
    theme_bw() + theme(text = element_text("Calibri")) +
    facet_grid(qualf+type~paste0("c = ",coh), labeller=labeller(qualf=supp.labsq2b,type=supp.labst,qualf=supp.labsq4)) })
dev.off()
