##Exploratory Data Analysis
library(ggplot2)
library(dplyr)
windowsFonts(Calibri=windowsFont("Calibri"))

#Loading function
loading <- function(dataset){
  dat <- load(dataset)
  return(mget(dat))
}

#Labels
supp.labs1 <- c("All","Parity 0","Parity 1","Parity 2","Parity 3+")
names(supp.labs1) <- c(0,1,2,3,4)
supp.labs2 <- c("Single-year cohorts", "Six-year cohorts")
names(supp.labs2) <- c(1,5)
supp.labst <- c("UKHLS","ONS")
names(supp.labst) <- c(1,2)
supp.labsa <- c("15-19","20-24","25-29","30-34","35-39","40-44")
names(supp.labsa) <- seq(17,42,by=5)
supp.labsc <- c("1945-1950","1951-1956","1957-1962","1963-1968","1969-1974","1975-1980","1981-1986","1987-1992")
names(supp.labsc) <- seq(1947.5,1989.5,by=6)

#Observed fertility rates by parity (Figure 4.1)
ONS6yfunc <- function(i,seq,dat) {
  data <- aggregate(dat$ONS_expos_dat[[i]][,"x"], by=list(dat$ONS_expos_dat[[i]]$Group.1,seq[ceiling((dat$ONS_expos_dat[[i]]$Group.2-1944)/6)]), FUN=sum)
  colnames(data) <- c("age","coh","N")
  data$n <- aggregate(dat$ONS_births_dat[[i]][,"x"], by=list(dat$ONS_births_dat[[i]]$Group.1,seq[ceiling((dat$ONS_births_dat[[i]]$Group.2-1944)/6)]), FUN=sum)$x
  data$x <- data$n/data$N
  data
}

plotfunc <- function(type,filename) {
  if (type=="allc"){
    dat <- loading("chap4/data/ONS2018_allc.RData")
    seqc <- c(seq(1947.5,1995.5,by=6),2001)
    }
  if (type=="resc"){
    dat <- loading("chap4/data/ONS2018_resc.RData")
    seqc <- seq(1947.5,1989.5,by=6)
    }
  data <- rbind(cbind(par=0,yr=1,age=dat$ONS_births_dat[[5]]$Group.1,coh=dat$ONS_births_dat[[5]]$Group.2,N=dat$ONS_expos_dat[[5]]$x,n=dat$ONS_births_dat[[5]]$x,x=dat$ONS_rates_dat[[5]]$x),
                cbind(par=1,yr=1,age=dat$ONS_births_dat[[1]]$Group.1,coh=dat$ONS_births_dat[[1]]$Group.2,N=dat$ONS_expos_dat[[1]]$x,n=dat$ONS_births_dat[[1]]$x,x=dat$ONS_rates_dat[[1]]$x),
                cbind(par=2,yr=1,age=dat$ONS_births_dat[[2]]$Group.1,coh=dat$ONS_births_dat[[2]]$Group.2,N=dat$ONS_expos_dat[[2]]$x,n=dat$ONS_births_dat[[2]]$x,x=dat$ONS_rates_dat[[2]]$x),
                cbind(par=3,yr=1,age=dat$ONS_births_dat[[3]]$Group.1,coh=dat$ONS_births_dat[[3]]$Group.2,N=dat$ONS_expos_dat[[3]]$x,n=dat$ONS_births_dat[[3]]$x,x=dat$ONS_rates_dat[[3]]$x),
                cbind(par=4,yr=1,age=dat$ONS_births_dat[[4]]$Group.1,coh=dat$ONS_births_dat[[4]]$Group.2,N=dat$ONS_expos_dat[[4]]$x,n=dat$ONS_births_dat[[4]]$x,x=dat$ONS_rates_dat[[4]]$x),
                cbind(par=0,yr=5,ONS6yfunc(5,seqc,dat)),
                cbind(par=1,yr=5,ONS6yfunc(1,seqc,dat)),
                cbind(par=2,yr=5,ONS6yfunc(2,seqc,dat)),
                cbind(par=3,yr=5,ONS6yfunc(3,seqc,dat)),
                cbind(par=4,yr=5,ONS6yfunc(4,seqc,dat))) 
  data <- data[which(data$N>0 & data$n>=0 & data$n <= data$N),]
  png(file=filename,width=30,height=15,units="cm",res=400)
  print({ggplot(data, aes(x=age, y=x, color = coh)) +
      geom_point(size=1) +
      labs(x = "Age", y = "Observed rate", color = "Cohort") +
      scale_color_gradientn(colours=rainbow(100, start=0.3, end=1), guide = guide_colorbar(barheight = 20,frame.colour="black",ticks.colour="black"), breaks = seq(1945,2000,5)) +
      scale_y_continuous(breaks=seq(0,0.5,0.1)) +
      scale_x_continuous(limits=c(15,44), breaks=seq(15,40,5), minor_breaks=setdiff(15:44,seq(15,40,5)), expand=c(0.03,0.03)) +
      coord_cartesian(ylim=c(0,0.5)) +
      theme_bw() + theme(legend.position = "right", text = element_text("Calibri")) +
      facet_grid(cols=vars(par), rows=vars(yr), labeller = labeller(par=supp.labs1, yr=supp.labs2))
  })
  dev.off()
}

plotfunc("allc", "chap4/plots/fig1_allc.png")
plotfunc("resc", "chap4/plots/fig1_resc.png")

#Histograms of ratios of exposures (Figure 4.2)
load("chap4/data/ONS2018_resc.RData")

dat0w <- aggregate(weights_0st, by=list(age_0, coh_0), FUN = sum)
dat1w <- aggregate(weights_1st, by=list(age_1, coh_1), FUN = sum)
dat2w <- aggregate(weights_2st, by=list(age_2, coh_2), FUN = sum)
dat3w <- aggregate(weights_3st, by=list(age_3, coh_3), FUN = sum)

dat0w <- left_join(dat0w,ONS_expos_dat[[1]],by=c("Group.1","Group.2"))
dat1w <- left_join(dat1w,ONS_expos_dat[[2]],by=c("Group.1","Group.2"))
dat2w <- left_join(dat2w,ONS_expos_dat[[3]],by=c("Group.1","Group.2"))
dat3w <- left_join(dat3w,ONS_expos_dat[[4]],by=c("Group.1","Group.2"))

dat0w$wt <- dat0w$x.y/dat0w$x.x
dat1w$wt <- dat1w$x.y/dat1w$x.x
dat2w$wt <- dat2w$x.y/dat2w$x.x
dat3w$wt <- dat3w$x.y/dat3w$x.x

dat0w$Parity <- 0
dat1w$Parity <- 1
dat2w$Parity <- 2
dat3w$Parity <- "3+"

datw <- rbind(dat0w,dat1w,dat2w,dat3w)
datw$Parity <- factor(datw$Parity,levels=c("0","1","2","3+"))

png(file="chap4/plots/fig2.png",width=20,height=15,units="cm",res=400)
print({ggplot(datw, aes(x=wt,fill=Parity,color=Parity)) +
    geom_histogram(aes(y=..density..), position="identity", binwidth=50, alpha=0.5) +
    geom_density(alpha=0,size=0.8,n=2^10) +
    labs(x = "Ratio", y = "Density") + 
    scale_x_continuous(breaks=seq(0,2000,500), expand=c(0.02,0.02)) +
    coord_cartesian(xlim=c(0,2000)) +
    theme_gray() + theme(legend.position = "none", text = element_text("Calibri"),
                         axis.text.y=element_blank(), axis.ticks.y=element_blank()) +
    facet_wrap(~paste0("Parity ",Parity))
})
dev.off()

#Calibration plots (Figure 4.3)
load("chap4/data/ONS2018_resc.RData")
ONS6y5afunc <- function(i,seqa,seqc) {
  data <- aggregate(ONS_expos_dat[[i]][ONS_expos_dat[[i]]$Group.1 %in% 15:44,"x"], by=list(seqa[ceiling((ONS_expos_dat[[i]]$Group.1[ONS_expos_dat[[i]]$Group.1 %in% 15:44]-14)/5)],seqc[ceiling((ONS_expos_dat[[i]]$Group.2[ONS_expos_dat[[i]]$Group.1 %in% 15:44]-1944)/6)]), FUN=sum)
  colnames(data) <- c("age","coh","N")
  data$n <- aggregate(ONS_births_dat[[i]][ONS_births_dat[[i]]$Group.1 %in% 15:44,"x"], by=list(seqa[ceiling((ONS_births_dat[[i]]$Group.1[ONS_births_dat[[i]]$Group.1 %in% 15:44]-14)/5)],seqc[ceiling((ONS_births_dat[[i]]$Group.2[ONS_births_dat[[i]]$Group.1 %in% 15:44]-1944)/6)]), FUN=sum)$x
  data$x <- data$n/data$N
  data
}
wratesfunc <- function(w,c,a,b,sixyc=F,fiveya=F,FUN) {
  if (sixyc)  c <- seq(1947.5,1989.5,by=6)[ceiling((c-1944)/6)]
  if (fiveya) a <- seq(17,42,by=5)[ceiling((a-14)/5)]
  data <- aggregate(w, by=list(a,c), FUN = FUN)
  data2 <- aggregate(w[b==1], by=list(a[b==1],c[b==1]), FUN = FUN)
  data <- left_join(data,data2,by=c("Group.1","Group.2")) %>% mutate_each(function(x) (replace(x, which(is.na(x)), 0)))
  data$x <- data$x.y/data$x.x
  data
}

ONS6y5a <- rbind(cbind(par=0,ONS6y5afunc(5,seq(17,42,by=5),seq(1947.5,1989.5,by=6))),
                 cbind(par=1,ONS6y5afunc(1,seq(17,42,by=5),seq(1947.5,1989.5,by=6))),
                 cbind(par=2,ONS6y5afunc(2,seq(17,42,by=5),seq(1947.5,1989.5,by=6))),
                 cbind(par=3,ONS6y5afunc(3,seq(17,42,by=5),seq(1947.5,1989.5,by=6))),
                 cbind(par=4,ONS6y5afunc(4,seq(17,42,by=5),seq(1947.5,1989.5,by=6))))
UKHLS6y5a <- rbind(cbind(par=0,wratesfunc(weights,coh,age,birth.bin,T,T,sum)),
                   cbind(par=1,wratesfunc(weights_0,coh_0,age_0,birth.bin_0,T,T,sum)),
                   cbind(par=2,wratesfunc(weights_1,coh_1,age_1,birth.bin_1,T,T,sum)),
                   cbind(par=3,wratesfunc(weights_2,coh_2,age_2,birth.bin_2,T,T,sum)),
                   cbind(par=4,wratesfunc(weights_3,coh_3,age_3,birth.bin_3,T,T,sum)))
UKHLS6y5a <- UKHLS6y5a[which(UKHLS6y5a$x.x>0),]
names(UKHLS6y5a)[2:3] <- c("age","coh")

all6y5a <- rbind(data.frame(type="ONS",ONS6y5a[,c(1:3,6)]),
                 data.frame(type="UKHLS",UKHLS6y5a[,c(1:3,6)]))
all6y5a$age2 <- factor(supp.labsa[ceiling((all6y5a$age-16)/5)],levels=supp.labsa)

png(file="chap4/plots/fig3.png",width=30,height=17.5,units="cm",res=400)
print({ggplot(all6y5a, aes(x=age2, y=x, group=type)) +
  geom_point(size=1,aes(shape=type)) +
  geom_line(aes(linetype=type),show.legend = T) +
  labs(x = "Age group", y = "Observed rate", color = "Cohort") +
  scale_shape_manual(values=c("ONS" = 1,"UKHLS" = 4),guide_legend(title="Dataset")) +
  scale_linetype_manual(values=c("ONS" = 1,"UKHLS" = 2),guide_legend(title="Dataset")) +
  coord_cartesian(ylim=c(0,0.3)) +
  theme_bw() + theme(legend.position = "bottom", text = element_text("Calibri"),
                     axis.text.x = element_text(angle = 90, hjust=1, vjust=0.3)) +
  facet_grid(rows=vars(par), cols=vars(coh), labeller=labeller(par=supp.labs1, coh=supp.labsc))
})
dev.off()
