##Fitting integrated GAMs - parity 2
library(rstan)
load("chap4/data/ONS2018_allc.RData")
load("chap4/results/Qimp.RData")

back <- FALSE # change to TRUE if peforming backtesting
imp <-  FALSE # change to TRUE if using one of the Q imputations instead of the mean imputation

if (imp) {
  N.imp <- 1 # choose a number between 1 and 10
  qualf3bimp_2 <- qualf3bMI_2[[N.imp]]
}

if (!imp) {
  load("chap3/results/Qmeanimp.RData")
}

#GAM set-up
source("chap4/scripts/p2_setup.r")
betadim <- ncol(B_A2)+ncol(B_C2)+ncol(B_T2)+ncol(B_AC4)
S1 <- S2 <- S3 <- S4 <- S5 <- matrix(0,betadim,betadim)
S1[1:ncol(B_A2),1:ncol(B_A2)] <- S_A2
S2[(1:ncol(B_C2)+ncol(B_A2)),(1:ncol(B_C2)+ncol(B_A2))] <- S_C2
S3[(1:ncol(B_T2)+ncol(B_A2)+ncol(B_C2)),(1:ncol(B_T2)+ncol(B_A2)+ncol(B_C2))] <- S_T2
S4[(1:ncol(B_AC4)+ncol(B_A2)+ncol(B_C2)+ncol(B_T2)),(1:ncol(B_AC4)+ncol(B_A2)+ncol(B_C2)+ncol(B_T2))] <- S_AF
S5[(1:ncol(B_AC4)+ncol(B_A2)+ncol(B_C2)+ncol(B_T2)),(1:ncol(B_AC4)+ncol(B_A2)+ncol(B_C2)+ncol(B_T2))] <- S_CF
X <- cbind(B_A2,B_C2,B_T2,B_AC4)
wt <- data2w$nw/data2w$n
Xq <- matrix(0,N,3)
for (q in 1:3) Xq[,q] <- ifelse(data2$qualf3bimp_2==q,1,0)

#ONS data set-up
data <- data.frame(age=ONS_births_dat[[3]]$Group.1,coh=ONS_births_dat[[3]]$Group.2,N=ONS_expos_dat[[3]]$x,n=ONS_births_dat[[3]]$x,x=ONS_rates_dat[[3]]$x)
data <- data[which(data$N>0 & data$n>=0 & data$n <= data$N & data$age %in% c(15:44)),]
if (back) data <- data[which(data$age + data$coh <= 2013),]
ONSN <- data$N
ONSn <- data$n
Nm <- nrow(data)
data2f <- expand.grid(age=agerange,coh=cohrange,gapc=1:11,qualf=1:3)
data2f <- right_join(data,data2f,by=c("age","coh"))
ACind <- which(!is.na(data2f$N))
ACind <- ACind[ACind <= nrow(data2f)/33]
data2f <- data2f[which(!is.na(data2f$N)),]
data2f <- data2f[,-c(3,4,5)]
Xf <- cbind(B_A2[A_ind[data2f$age-14],],
            B_C2all[C_indall[data2f$coh-1944],],
            B_T2[T_ind[data2f$gapc],],
            B_ACfull3[rep(ACind,33),])
Nf <- nrow(Xf)
Afind <- c(data2f$age-14)[1:Nm]
Xqf <- matrix(0,Nf,3)
for (q in 1:3) Xqf[,q] <- ifelse(data2f$qualf==q,1,0)

#ACQ model set-up
newdata2 <- aggregate(qualf3b_2 ~ age_2 + coh_2, FUN = function(x) c(y=length(x), q1=length(x[x==1]), q2=length(x[x==2]), q3=length(x[x==3])), subset=coh_2 <= 1982)
newdata2 <- data.frame(newdata2$age_2,newdata2$coh_2,newdata2$qualf3b_2)
colnames(newdata2) <- c("a","c","y","y1","y2","y3")
newdata2$a <- newdata2$a - median(15:44)
newdata2$yw <- aggregate(weights_2st ~ age_2 + coh_2, FUN = sum, subset = coh_2 <= 1982)$weights_2st
newdata2$wtmult <- newdata2$yw/newdata2$y
colnames(newdata2) <- c("a","c","y",paste0(1:3),"yw","wtmult")
qmax1 <- 3

newdata22 <- expand.grid(a=agerange,c=cohrange)
newdata22$cc <- newdata22$c-median(1945:1982)
newdata22$cind72 <- ifelse(newdata22$c >= 1972, 1972, newdata22$c)
newdata22$ccind72 <- newdata22$cind72-1944
newdata22$cc72 <- ifelse(newdata22$c<=1971,0,newdata22$cc)
newdata22$aind <- newdata22$a-14
newdata22$a <- newdata22$a-median(15:44)

newdata22 <- left_join(newdata22,newdata2,by=c("a","c"))
Nac <- nrow(newdata22)
y <- newdata22[,paste0(1:qmax1)]
yind <- which(!is.na(y[,1]))
Nobs <- length(yind)
y[is.na(y)] <- 0
cc72 <- newdata22$cc72
ccind72 <- newdata22$ccind72
Nc72 <- max(ccind72)
ac <- newdata22$a
aind <- newdata22$aind
Na <- max(aind)
wtc <- newdata22$wtmult
wtc[is.na(wtc)] <- 0

#ATQ model set-up
newdata2 <- aggregate(gapc_2 ~ age_2 + qualf3b_2, FUN = function(x) c(y=length(x), t1=length(x[x==1]), t2=length(x[x==2]), t3=length(x[x==3]), t4=length(x[x==4]), t5=length(x[x==5]), t6=length(x[x==6]), t7=length(x[x==7]), t8=length(x[x==8]), t9=length(x[x==9]), t10=length(x[x==10]), t11=length(x[x==11])), subset = coh_2 <= 1982)
newdata2 <- data.frame(newdata2$age_2,newdata2$qualf3b_2,newdata2$gapc_2)
newdata2$yw <- aggregate(weights_2st ~ age_2 + qualf3b_2, FUN = sum, subset = coh_2 <= 1982)$weights_2st
newdata2$wtmult <- newdata2$yw/newdata2$y
colnames(newdata2) <- c("a","q","y",paste0(1:11),"yw","wtmult")
newdata2.1 <- newdata2[newdata2$q==1,]
newdata2.2 <- newdata2[newdata2$q==2,]
newdata2.3 <- newdata2[newdata2$q==3,]
newdata2.1 <- rbind(0,0,newdata2.1)
newdata2.2 <- rbind(0,0,newdata2.2)
newdata2.3 <- rbind(0,0,newdata2.3)
rownames(newdata2.1) <- agerange
rownames(newdata2.2) <- agerange
rownames(newdata2.3) <- agerange

y1 <- newdata2.1[,paste0(1:11)]
y2 <- newdata2.2[,paste0(1:11)]
y3 <- newdata2.3[,paste0(1:11)]
Nab <- nrow(y1)+1
a <- as.numeric(rownames(y1))-median(as.numeric(rownames(y1)))
ab <- y1
for (i in 3:11) ab[,i] <- c(rep(0,i-3),1:(Na-(i-3)))
ab[,2] <- 2:(Na+1)
ab <- ab[,-1]
ab[ab==0] <- 1
wt1c <- newdata2.1[rownames(y1),"wtmult"]
wt2c <- newdata2.2[rownames(y2),"wtmult"]
wt3c <- newdata2.3[rownames(y3),"wtmult"]

#Fit Stan models
standata <- list(N=N,Nm=Nm,Nf=Nf,Nac=Nac,Na=Na,Nab=Nab,Nc72=Nc72,Nobs=Nobs,
                 betadim=betadim,succ=succ,tot=tot,ONSN=ONSN,ONSn=ONSn,
                 y=y,y1=y1,y2=y2,y3=y3,ccind72=ccind72,cc72=cc72,
                 aind=aind,a=a,ac=ac,ab=ab,Afind=Afind,ACind=ACind,yind=yind,
                 wtc=wtc,wt1c=wt1c,wt2c=wt2c,wt3c=wt3c,wt=wt,X=X,Xq=Xq,Xf=Xf,Xqf=Xqf,
                 S1=S1,S2=S2,S3=S3,S4=S4,S5=S5)

#50/50
stanout <- stan(file="chap4/stan/p2_5050.stan",data=standata,chains=1,iter=2000)
if (!imp & !back) save(stanout,file="chap4/results/p2_5050.RData")
if ( imp & !back) save(stanout,file=paste0("chap4/results/p2_5050_imp",N.imp,".RData"))
if (!imp &  back) save(stanout,file="chap4/results/p2_5050_2013.RData")
if ( imp &  back) save(stanout,file=paste0("chap4/results/p2_5050_imp",N.imp,"_2013.RData"))

# #33/67
# stanout <- stan(file="chap4/stan/p2_3367.stan",data=standata,chains=1,iter=2000)
# if (!imp & !back) save(stanout,file="chap4/results/p2_3367.RData")
# if ( imp & !back) save(stanout,file=paste0("chap4/results/p2_3367_imp",N.imp,".RData"))
# if (!imp &  back) save(stanout,file="chap4/results/p2_3367_2013.RData")
# if ( imp &  back) save(stanout,file=paste0("chap4/results/p2_3367_imp",N.imp,"_2013.RData"))
# 
# #25/75
# stanout <- stan(file="chap4/stan/p2_2575.stan",data=standata,chains=1,iter=2000)
# if (!imp & !back) save(stanout,file="chap4/results/p2_2575.RData")
# if ( imp & !back) save(stanout,file=paste0("chap4/results/p2_2575_imp",N.imp,".RData"))
# if (!imp &  back) save(stanout,file="chap4/results/p2_2575_2013.RData")
# if ( imp &  back) save(stanout,file=paste0("chap4/results/p2_2575_imp",N.imp,"_2013.RData"))
# 
# #20/80
# stanout <- stan(file="chap4/stan/p2_2080.stan",data=standata,chains=1,iter=2000)
# if (!imp & !back) save(stanout,file="chap4/results/p2_2080.RData")
# if ( imp & !back) save(stanout,file=paste0("chap4/results/p2_2080_imp",N.imp,".RData"))
# if (!imp &  back) save(stanout,file="chap4/results/p2_2080_2013.RData")
# if ( imp &  back) save(stanout,file=paste0("chap4/results/p2_2080_imp",N.imp,"_2013.RData"))
# 
# #10/90
# stanout <- stan(file="chap4/stan/p2_1090.stan",data=standata,chains=1,iter=2000)
# if (!imp & !back) save(stanout,file="chap4/results/p2_1090.RData")
# if ( imp & !back) save(stanout,file=paste0("chap4/results/p2_1090_imp",N.imp,".RData"))
# if (!imp &  back) save(stanout,file="chap4/results/p2_1090_2013.RData")
# if ( imp &  back) save(stanout,file=paste0("chap4/results/p2_1090_imp",N.imp,"_2013.RData"))
