##Modelling T|A,(Q)
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(rstan)
windowsFonts(Calibri=windowsFont("Calibri"))

#Set up
supp.labsq2b <- c("< A Level","At least A Level")
names(supp.labsq2b) <- c(1,2)
supp.labsq3b <- c("< GCSE","GCSE/A Level","Degree")
names(supp.labsq3b) <- c(1,2,3)
supp.labsp <- c("Parity 1","Parity 2","Parity 3+")
names(supp.labsp) <- c(1,2,3)
stepfunc <- function(start) {
  dat <- data.frame(y=start:11+0.5)
  dat$x <- seq(14,by=1,length=nrow(dat))+0.5
  dat
}
agerange <- 15:44

#Compute observed proportions
#Parity 1 function
AQTtab1func <- function(FUN,cut=T) {
  AQTtab1 <- expand.grid(age_1=15:44,gapc_1=1:11,qualf2b_1=1:2)
  AQTtab1 <- left_join(AQTtab1,aggregate(weights_1st~age_1+gapc_1+qualf2b_1, FUN = FUN, subset=coh_1<=1982), by = c("age_1","gapc_1","qualf2b_1"))
  if (cut) AQTtab1 <- AQTtab1[-which(AQTtab1$age_1-AQTtab1$gapc_1<12),]
  AQTtab1 <- left_join(AQTtab1, aggregate(weights_1st~age_1+qualf2b_1, FUN = FUN, subset=coh_1<=1982), by=c("age_1","qualf2b_1"))
  AQTtab1$weights_1st.x[which(is.na(AQTtab1$weights_1st.x) & !is.na(AQTtab1$weights_1st.y))] <- 0
  AQTtab1$x <- AQTtab1$weights_1st.x/AQTtab1$weights_1st.y
  AQTtab1$p <- 1
  AQTtab1$step <- 1
  names(AQTtab1) <- c("age","gapc","qualf","x.x","x.y","x","p","step")
  AQTtab1$qualft <- supp.labsq2b[AQTtab1$qualf]
  AQTtab1$qualf <- NULL  
  AQTtab1
}

#Parity 2 function
AQTtab2func <- function(FUN,cut=T,st=15) {
  AQTtab2 <- expand.grid(age_2=st:44,gapc_2=1:11,qualf3b_2=1:3)
  AQTtab2 <- left_join(AQTtab2,aggregate(weights_2st~age_2+gapc_2+qualf3b_2, FUN = FUN, subset=coh_2<=1982), by = c("age_2","gapc_2","qualf3b_2"))
  if(cut) AQTtab2 <- AQTtab2[-which(AQTtab2$age_2-AQTtab2$gapc_2<12),]
  AQTtab2 <- left_join(AQTtab2, aggregate(weights_2st~age_2+qualf3b_2, FUN = FUN, subset=coh_2<=1982), by=c("age_2","qualf3b_2"))
  AQTtab2$weights_2st.x[which(is.na(AQTtab2$weights_2st.x) & !is.na(AQTtab2$weights_2st.y))] <- 0
  AQTtab2$x <- AQTtab2$weights_2st.x/AQTtab2$weights_2st.y
  AQTtab2$p <- 2
  AQTtab2$step <- 0
  names(AQTtab2) <- c("age","gapc","qualf","x.x","x.y","x","p","step")
  AQTtab2$qualft <- supp.labsq3b[AQTtab2$qualf]
  AQTtab2$qualf <- NULL
  AQTtab2 
}

#Parity 3+ function
ATtab3func <- function(FUN,cut=T,st=15) {
  ATtab3 <- expand.grid(age_3=st:44,gapc_3=1:11)
  ATtab3 <- left_join(ATtab3,aggregate(weights_3st~age_3+gapc_3, FUN = FUN), by = c("age_3","gapc_3"))
  if (cut) ATtab3 <- ATtab3[-which(ATtab3$age_3-ATtab3$gapc_3<12),]
  ATtab3 <- left_join(ATtab3, aggregate(weights_3st~age_3, FUN = FUN), by=c("age_3"))
  ATtab3$weights_3st.x[which(is.na(ATtab3$weights_3st.x) & !is.na(ATtab3$weights_3st.y))] <- 0
  ATtab3$x <- ATtab3$weights_3st.x/ATtab3$weights_3st.y
  ATtab3$p <- 3
  ATtab3$step <- -2
  names(ATtab3) <- c("age","gapc","x.x","x.y","x","p","step")
  ATtab3$qualft <- "All"  
  ATtab3
}

AQTtab1 <- AQTtab1func(length)
AQTtab1w <- AQTtab1func(sum)
AQTtab2 <- AQTtab2func(length)
AQTtab2w <- AQTtab2func(sum)
ATtab3 <- ATtab3func(length)
ATtab3w <- ATtab3func(sum)

AQTtaball <- rbind(AQTtab1,AQTtab2,ATtab3)
AQTtaball$qualft <- factor(AQTtaball$qualft, levels=c("All","< GCSE","< A Level","GCSE","GCSE/A Level","A Level","< Degree","At least A Level","Degree"))
AQTtaballw <- rbind(AQTtab1w,AQTtab2w,ATtab3w)
AQTtaballw$qualft <- factor(AQTtaballw$qualft, levels=c("All","< GCSE","< A Level","GCSE","GCSE/A Level","A Level","< Degree","At least A Level","Degree"))

#Plot observed proportions (Figure 4.9)
plotfunc <- function(dat,filename) {
  png(file=filename,width=17.5,height=10,units="cm",res=400)
  print({ggplot(dat, aes(x=age,y=gapc,fill=x)) +
      geom_raster(hjust=0.5,vjust=0.5) +
      labs(x = "Age", y = "Time since last birth", fill="Proportion") +
      scale_fill_gradientn(colours=c("white",colorRampPalette(brewer.pal(9,"YlOrRd"))(10000),"black"), limits=c(0,1),guide = guide_colorbar(barheight = 15,frame.colour="black",ticks.colour="black"), breaks = seq(0,1,0.1)) +
      scale_y_continuous(expand=c(0,0),breaks=seq(1,11,2), minor_breaks=seq(2,10,2)) + 
      scale_x_continuous(expand=c(0,0),breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5))) +
      coord_cartesian(xlim=c(14.5,44.5),ylim=c(0.5,11.5)) +
      geom_hline(yintercept=1.5:10.5,color="lightgray",size=0.25) +
      geom_vline(xintercept=15.5:43.5,color="lightgray",size=0.25) +
      geom_step(data=stepfunc(3), mapping=aes(x=x,y=y),direction="hv") +
      theme_bw() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),text = element_text("Calibri")) +
      facet_wrap(~p+qualft,nrow=2,labeller=labeller(p=supp.labsp))
  })
  dev.off()
}

plotfunc(AQTtaball, "chap4/plots/fig9_uw.png")  # unweighted
plotfunc(AQTtaballw, "chap4/plots/fig9_w.png")  # weighted

#Stan modelling
#Parity 1
newdata1 <- aggregate(gapc_1 ~ age_1 + qualf2b_1, FUN = function(x) c(y=length(x), t1=length(x[x==1]), t2=length(x[x==2]), t3=length(x[x==3]), t4=length(x[x==4]), t5=length(x[x==5]), t6=length(x[x==6]), t7=length(x[x==7]), t8=length(x[x==8]), t9=length(x[x==9]), t10=length(x[x==10]), t11=length(x[x==11])), subset = coh_1 <= 1982)
newdata1 <- data.frame(newdata1$age_1,newdata1$qualf2b_1,newdata1$gapc_1)
newdata1$yw <- aggregate(weights_1st ~ age_1 + qualf2b_1, FUN = sum, subset = coh_1 <= 1982)$weights_1st
newdata1$wtmult <- newdata1$yw/newdata1$y
colnames(newdata1) <- c("a","q","y",paste0(1:11),"yw","wtmult")
newdata1.1 <- newdata1[newdata1$q==1,]
newdata1.2 <- newdata1[newdata1$q==2,]
rownames(newdata1.1) <- newdata1.1$a
rownames(newdata1.2) <- newdata1.2$a

y1 <- newdata1.1[,paste0(1:11)]
y2 <- newdata1.2[,paste0(1:11)]
Na <- nrow(y1)
Nab <- nrow(y1)+1
a <- as.numeric(rownames(y1))-median(15:44)
a2 <- a^2
ab <- y1
for (i in 3:11) ab[,i] <- c(rep(0,i-3),1:(Na-(i-3)))
ab[,2] <- 2:(Na+1)
ab <- ab[,-1]
ab[ab==0] <- 1
wt1 <- newdata1.1[rownames(y1),"wtmult"]
wt2 <- newdata1.2[rownames(y2),"wtmult"]

standata <- list(Na=Na, Nab=Nab, y1=y1, y2=y2, a=a, a2=a2, ab=ab, wt1=wt1, wt2=wt2)
stanout <- stan(file="chap4/stan/p1_TAQ_6f.2.stan",data=standata,chains=1,iter=2000)
save(stanout,file="chap4/results/p1_TAQ_6f.2.RData")

#Parity 2
newdata2 <- aggregate(gapc_2 ~ age_2 + qualf3b_2, FUN = function(x) c(y=length(x), t1=length(x[x==1]), t2=length(x[x==2]), t3=length(x[x==3]), t4=length(x[x==4]), t5=length(x[x==5]), t6=length(x[x==6]), t7=length(x[x==7]), t8=length(x[x==8]), t9=length(x[x==9]), t10=length(x[x==10]), t11=length(x[x==11])), subset = coh_2 <= 1982)
newdata2 <- data.frame(newdata2$age_2,newdata2$qualf3b_2,newdata2$gapc_2)
newdata2$yw <- aggregate(weights_2st ~ age_2 + qualf3b_2, FUN = sum, subset = coh_2 <= 1982)$weights_2st
newdata2$wtmult <- newdata2$yw/newdata2$y
colnames(newdata2) <- c("a","q","y",paste0(1:11),"yw","wtmult")
newdata2.1 <- newdata2[newdata2$q==1,]
newdata2.2 <- newdata2[newdata2$q==2,]
newdata2.3 <- newdata2[newdata2$q==3,]
newdata2.1 <- rbind(0,0,newdata2.1)
newdata2.2 <- rbind(0,0,newdata2.2)
newdata2.3 <- rbind(0,0,newdata2.3)
rownames(newdata2.1) <- agerange
rownames(newdata2.2) <- agerange
rownames(newdata2.3) <- agerange

y1 <- newdata2.1[,paste0(1:11)]
y2 <- newdata2.2[,paste0(1:11)]
y3 <- newdata2.3[,paste0(1:11)]
Na <- nrow(y1)
Nab <- nrow(y1)+1
a <- as.numeric(rownames(y1))-median(17:44)
ab <- y1
for (i in 3:11) ab[,i] <- c(rep(0,i-3),1:(Na-(i-3)))
ab[,2] <- 2:(Na+1)
ab <- ab[,-1]
ab[ab==0] <- 1
wt1 <- newdata2.1[rownames(y1),"wtmult"]
wt2 <- newdata2.2[rownames(y2),"wtmult"]
wt3 <- newdata2.3[rownames(y3),"wtmult"]

standata <- list(Na=Na, Nab=Nab, y1=y1, y2=y2, y3=y3, a=a, ab=ab, wt1=wt1, wt2=wt2, wt3=wt3)
stanout <- stan(file="chap4/stan/p2_TAQ_5d.2.stan",data=standata,chains=1,iter=2000)
save(stanout,file="chap4/results/p2_TAQ_5d.2.RData")

#Parity 3+
newdata3 <- aggregate(gapc_3 ~ age_3, FUN = function(x) c(y=length(x), t1=length(x[x==1]), t2=length(x[x==2]), t3=length(x[x==3]), t4=length(x[x==4]), t5=length(x[x==5]), t6=length(x[x==6]), t7=length(x[x==7]), t8=length(x[x==8]), t9=length(x[x==9]), t10=length(x[x==10]), t11=length(x[x==11])))
newdata3 <- data.frame(newdata3$age_3,newdata3$gapc_3)
newdata3$yw <- aggregate(weights_3st ~ age_3, FUN = sum)$weights_3st
newdata3$wtmult <- newdata3$yw/newdata3$y
colnames(newdata3) <- c("a","y",paste0(1:11),"yw","wtmult")
newdata3 <- rbind(0,0,0,newdata3)
rownames(newdata3) <- agerange

y <- newdata3[,paste0(1:11)]
Na <- nrow(y)
Nab <- nrow(y)+1
a <- as.numeric(rownames(y))-median(18:44)
a2 <- a^2
ab <- y
for (i in 3:11) ab[,i] <- c(rep(0,i-3),1:(Na-(i-3)))
ab[,2] <- 2:(Na+1)
ab <- ab[,-1]
ab[ab==0] <- 1
wt <- newdata3[paste(rownames(y)),"wtmult"]

standata <- list(Na=Na, Nab=Nab, y=y, a=a, a2=a2, ab=ab, wt=wt)
stanout <- stan(file="chap4/stan/p3_TA_5a.2.stan",data=standata,chains=1,iter=2000)
save(stanout,file="chap4/results/p3_TA_5a.2.RData")

#Extract posterior mean probabilities for full AT surface
#Parity 1
load("chap4/results/p1_TAQ_6f.2.RData")
Tprop1 <- extract(stanout,pars="Tprop1",permuted=F)
Tprop2 <- extract(stanout,pars="Tprop2",permuted=F)
Tpropm1 <- matrix(apply(Tprop1[,1,],2,mean),nrow=30,byrow=F)
Tpropm2 <- matrix(apply(Tprop2[,1,],2,mean),nrow=30,byrow=F)
Tpropdat1 <- data.frame(q=rep(1:2,each=30*11),a=rep(agerange,22),
                        gapc=rep(rep(1:11,each=30),2),
                        x=c(as.vector(Tpropm1),as.vector(Tpropm2)))
Tpropdat1$qualft <- factor(supp.labsq2b[Tpropdat1$q],levels=supp.labsq2b)
Tpropdat1$par <- 1

#Parity 2
load("chap4/results/p2_TAQ_5d.2.RData")
Tprop1 <- extract(stanout,pars="Tprop1",permuted=F)
Tprop2 <- extract(stanout,pars="Tprop2",permuted=F)
Tprop3 <- extract(stanout,pars="Tprop3",permuted=F)
Tpropm1 <- matrix(apply(Tprop1[,1,],2,mean),nrow=30,byrow=F)
Tpropm2 <- matrix(apply(Tprop2[,1,],2,mean),nrow=30,byrow=F)
Tpropm3 <- matrix(apply(Tprop3[,1,],2,mean),nrow=30,byrow=F)
Tpropdat2 <- data.frame(q=rep(1:3,each=30*11),a=rep(agerange,33),gapc=rep(rep(1:11,each=30),3),
                        x=c(as.vector(Tpropm1),as.vector(Tpropm2),as.vector(Tpropm3)))
Tpropdat2$qualft <- factor(supp.labsq3b[Tpropdat2$q],levels=supp.labsq3b)
Tpropdat2$par <- 2

#Parity 3+
load("chap4/results/p3_TA_5a.2.RData")
Tprop <- extract(stanout,pars="Tprop",permuted=F)
Tpropm <- matrix(apply(Tprop[,1,],2,mean),nrow=30,byrow=F)
Tpropdat3 <- data.frame(q=1,a=rep(agerange,11),gapc=rep(1:11,each=30),
                        x=as.vector(Tpropm))
Tpropdat3$qualft <- "All"
Tpropdat3$par <- 3

Tpropdat <- rbind(Tpropdat1,Tpropdat2,Tpropdat3)
Tpropdat$qualft <- factor(Tpropdat$qualft,levels=c("< GCSE","< A Level","GCSE","GCSE/A Level","A Level","< Degree","At least A Level","Degree","All"))

#Plot posterior mean probabilities (Figure 4.12)
png(file="chap4/plots/fig12.png",width=17.5,height=10,units="cm",res=400)
ggplot(Tpropdat, aes(x=a,y=gapc,fill=x)) +
  geom_raster(hjust=0.5,vjust=0.5) +
  labs(x = "Age", y = "Time since last birth", fill="Fitted\nprobability") +
  scale_fill_gradientn(colours=c("white",colorRampPalette(brewer.pal(9,"YlOrRd"))(1000000),"black"), limits=c(0,1),guide = guide_colorbar(barheight = 15,frame.colour="black",ticks.colour="black"), breaks = seq(0,1,0.1)) +
  scale_y_continuous(expand=c(0,0),breaks=seq(1,11,2), minor_breaks=seq(2,10,2)) + 
  scale_x_continuous(expand=c(0,0),breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5))) +
  coord_cartesian(xlim=c(14.5,44.5),ylim=c(0.5,11.5)) +
  geom_hline(yintercept=1.5:10.5,color="lightgray",size=0.25) +
  geom_vline(xintercept=15.5:43.5,color="lightgray",size=0.25) +
  geom_step(data=stepfunc(3), mapping=aes(x=x,y=y),direction="hv") +
  theme_bw() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),text = element_text("Calibri")) +
  facet_wrap(~par+qualft,nrow=2,labeller=labeller(par=supp.labsp))
dev.off()

#Create data frames with weighted counts
newdata1w2 <- newdata1
newdata1w2[,paste(1:11)] <- newdata1w2[,paste(1:11)] * newdata1w2$wtmult
newdata1w2$y <- newdata1w2$yw
newdata1w2$yw <- NULL

newdata2w2 <- newdata2
newdata2w2[,paste(1:11)] <- newdata2w2[,paste(1:11)] * newdata2w2$wtmult
newdata2w2$y <- newdata2w2$yw
newdata2w2$yw <- NULL

newdata3w2 <- newdata3
newdata3w2[,paste(1:11)] <- newdata3w2[,paste(1:11)] * newdata3w2$wtmult
newdata3w2$y <- newdata3w2$yw
newdata3w2$yw <- NULL

#Convert into a long dataset and compute Pearson residuals
library(tidyr)
newdata1w2 %<>% filter(a>0) %>% gather("gapc","yt",-wtmult,-y,-a,-q,convert=T)
newdata2w2 %<>% filter(a>0) %>% gather("gapc","yt",-wtmult,-y,-a,-q,convert=T)
newdata3w2 %<>% filter(a>0) %>% gather("gapc","yt",-wtmult,-y,-a,convert=T)

newdata1w2 <- newdata1w2 %>% full_join(Tpropdat1,by=c("q","a","gapc"))
newdata2w2 <- newdata2w2 %>% full_join(Tpropdat2,by=c("q","a","gapc"))
newdata3w2 <- newdata3w2 %>% full_join(Tpropdat3,by=c("a","gapc"))

newdataw <- rbind(newdata1w2,newdata2w2,newdata3w2)
newdataw <- newdataw %>% mutate(res=(yt-x*y)/sqrt(x*y))
newdataw$res <- ifelse(abs(newdataw$res)>3,sign(newdataw$res)*3,newdataw$res)
newdataw$res[newdataw$a-newdataw$gapc<12] <- 0

#Plot Pearson residuals (Figure 4.13)
png(file="chap4/plots/fig13.png",width=17.5,height=10,units="cm",res=400)
ggplot(newdataw,aes(x=a,y=gapc,fill=res)) +
  geom_raster(hjust=0.5,vjust=0.5) +
  labs(x = "Age", y = "Time since last birth", fill="Pearson\nresidual") +
  scale_fill_gradientn(colors=rev(c("red","orange","yellow","white","green","turquoise","blue")), limits=c(-3,3),guide = guide_colorbar(barheight = 15,frame.colour="black",ticks.colour="black"), breaks = seq(-3,3,0.5), labels=c(expression(""<="-3"),"-2.5","-2.0","-1.5","-1.0","-0.5","0.0","0.5","1.0","1.5","2.0","2.5",expression("">=3.0))) +
  scale_y_continuous(expand=c(0,0),breaks=seq(1,11,2), minor_breaks=seq(2,10,2)) + 
  scale_x_continuous(expand=c(0,0),breaks=seq(15,44,5), minor_breaks=setdiff(15:44,seq(15,44,5))) +
  coord_cartesian(xlim=c(14.5,44.5),ylim=c(0.5,11.5)) +
  geom_hline(yintercept=1.5:10.5,color="lightgray",size=0.25) +
  geom_vline(xintercept=15.5:43.5,color="lightgray",size=0.25) +
  geom_step(data=stepfunc(3), mapping=aes(x=x,y=y,fill=x),direction="hv") +
  theme_bw() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),text = element_text("Calibri")) +
  facet_wrap(~par+qualft,labeller=labeller(par=supp.labsp))
dev.off()
