# This script is used to ecode the 54 raw phenotypic variables used to develop the CAPE and CAPP models
# A summary of the encoded data can be found in IOWBC_data.xlsx, sheet: "Data dictionary"
# R version 3.6.1 is used

# Set working directory
setwd("../../..")

# Load packages
library(dplyr)

#Import raw data for variables - found in IOWBC_data.xlsx, sheet: "Raw data"
data<-read.csv(file="Original_candidate_variables.csv", header=T)
#1536 samples

########################
### Encode Variables ###
########################
###	Study_ID ###
#Study_ID <- data$StudyID

###	Maternal Age ###
Mat_age <- data$F0_Age_at_Delivery
Mat_age[Mat_age < 16] <- NA
summary(Mat_age, useNA="ifany")		

							
### Maternal Asthma ###
Mat_asthma <- ifelse(data$asthmy1.0 == "No",0,
	ifelse(data$asthmy1.0 == "Yes",1,NA))
table (Mat_asthma, useNA="ifany")


### Maternal eczema ###
Mat_eczema <- ifelse(data$eczemy2.0 == "No",0,
	ifelse(data$eczemy2.0 == "Yes",1,NA))
table (Mat_eczema, useNA="ifany")	

							
### Maternal hayfever ###
Mat_hayfever <- ifelse(data$hayy3.0 == "No",0,
	ifelse(data$hayy3.0 == "Yes",1,NA))
table (Mat_hayfever, useNA="ifany")	
						

### Paternal Asthma ###
Pat_asthma <- ifelse(data$asthmp1.0 == "No",0,
	ifelse(data$asthmp1.0 == "Yes",1,NA))
table (Pat_asthma, useNA="ifany")	
						

### Paternal eczema ###
Pat_eczema <- ifelse(data$eczemp2.0 == "No",0,
	ifelse(data$eczemp2.0 == "Yes",1,NA))
table (Pat_eczema, useNA="ifany")		
						
							
### Paternal hayfever ###
Pat_hayfever <- ifelse(data$hayp3.0 == "No",0,
	ifelse(data$hayp3.0 == "Yes",1,NA))
table (Pat_hayfever, useNA="ifany")		
							

### Sex ###
Sex<- ifelse(data$Sex == "Female",0,
	ifelse(data$Sex == "Male",1,NA))
table (Sex, useNA="ifany")
						

### Prematurity ###
Prematurity <- ifelse(data$gestage=="28"|data$gestage=="30"|data$gestage=="31"|data$gestage=="32"|data$gestage=="33"|data$gestage=="34"|data$gestage=="35"|data$gestage=="36",0,
	ifelse(data$gestage=="37"|data$gestage=="38"|data$gestage=="39"|data$gestage=="40"|data$gestage=="41"|data$gestage=="42", 1,
		ifelse(data$gestage=="43"|data$gestage=="44"|data$gestage=="45",2,NA)))
table (Prematurity, useNA="ifany") 	
							

### Delivery ###							
Delivery <- ifelse(data$Delivery=="101"| data$Delivery=="-101"|data$Delivery=="-100",NA,
	ifelse(data$Delivery=="4"| data$Delivery=="5"| data$Delivery=="9",1,0))
table (Delivery, useNA="ifany")	
							

### Birth weight ###
data$birthwt <- as.numeric(levels(data$birthwt)[data$birthwt])
data$birthwt[data$birthwt < 0.90] <- NA 
data$birthwt[data$birthwt == 32.54] <- 3.25
Birthweight <- round(data$birthwt, digits=2)
summary(Birthweight, useNA=="ifany")					
							

### Parity ###
Parity <- ifelse(data$BirthOrder.4=="1"|data$BirthOrder=="Not Applicable",0,
	ifelse(data$BirthOrder=="Drop Out"|data$BirthOrder=="Not Seen", NA, 1))
table(Parity, useNA="ifany")
									

### Season of Birth ###
# Trim DOB to get month of birth
data$DateOfBirth<- as.character(data$DateOfBirth)
data$DOB = substr(data$DateOfBirth,4, nchar(data$DateOfBirth))
data$DOB = substr(data$DOB,1,nchar(data$DOB)-3)
# Assign season of birth
Birth_season <- ifelse(data$DOB == "Sep"|data$DOB == "Oct"|data$DOB == "Nov",0,
	ifelse(data$DOB == "Dec"| data$DOB == "Jan"|data$DOB == "Feb",1,
		ifelse(data$DOB == "Mar"|data$DOB == "Apr"|data$DOB == "May",2,
			ifelse(data$DOB == "Jun"|data$DOB == "Jul"|data$DOB == "Aug",3,NA))))
table(Birth_season, useNA="ifany")			
														

### Solid food introduction ###
Solid_food <-	as.numeric(levels(data$solids.1)[data$solids.1])					
table(Solid_food, useNA="ifany")

								
### Wheeze_2YR ###
wh1 <- ifelse(data$wheast1.1=="Not applicable",0,
	ifelse(data$wheast1.1=="<3",1,
		ifelse(data$wheast1.1==">3"|data$wheast1.1=="Frequent",2,NaN)))

wh2 <- ifelse(data$episo1c.2=="Not applicable"|data$episo1c.2=="No wheezing",0,
	ifelse(data$episo1c.2=="<3",1,
		ifelse(data$episo1c.2==">3"|data$episo1c.2=="Frequent",2,NaN)))	

Wheeze_2YR <- ifelse(wh1=="0" & wh2=="0",0, 
	ifelse(wh1=="2"|wh2=="2",2,
		ifelse(wh1=="1"|wh2=="1",1,NA)))
table(Wheeze_2YR, useNA="ifany")
													

### Wheeze without cold 2YR ###
Wheeze_without_cold_2YR <- ifelse(wh1=="2"|wh2=="2",1,
	ifelse((wh1=="NaN"|wh2=="NaN"),NA,0))
table(Wheeze_without_cold_2YR, useNA="ifany")
							

### Cough 2YR ###
c1 <- ifelse(data$couast1.1=="Not applicable"|data$couast1.1=="Never",0,
	ifelse(data$couast1.1=="Frequent"|data$couast1.1=="Occasional",1,NaN))
	
c2 <- ifelse(data$cough1c.2=="Never"|data$cough1c.2=="Not applicable",0,
	ifelse(data$cough1c.2=="Frequent"|data$cough1c.2=="Occasional",1,NaN))

Cough_2YR <- ifelse(c1=="0" & c2=="0",0,
	ifelse(c1=="1"|c2=="1",1,NA))
table(Cough_2YR, useNA="ifany")
							
							
### Nasal symptoms 2YR ###
nas1<- ifelse(data$nasal3.1=="No",0,
	ifelse(data$nasal3.1=="Yes",1,NaN))

nas2<- ifelse(data$nasal3.2=="No",0,
	ifelse(data$nasal3.2=="Yes",1,NaN))

Nasal_symp_2YR <- ifelse(nas1=="0" & nas2=="0",0,
	ifelse(nas1=="1"|nas2=="1",1,NA))
table(Nasal_symp_2YR, useNA="ifany")
										

### Recurrent chest infections 2YR ###
inf1<- ifelse(data$chest1.1=="No"|data$chest1.1=="Not appliable",0,
	ifelse(data$chest1.1=="Yes",1,NaN))

inf2<- ifelse(data$cheast1.2=="No"|data$cheast1.2=="Not appliable",0,
	ifelse(data$cheast1.2=="Yes",1,NaN))
	
Chest_infection_2YR <- ifelse(inf1=="0" & inf2=="0",0,
	ifelse(inf1=="1"|inf2=="1",1,NA))
table(Chest_infection_2YR, useNA="ifany")
							

### Nocturnal symptoms 2YR ###
noct1<- ifelse(data$nocast1.1=="Never"|data$nocast1.1=="Not applicable",0,
	ifelse(data$nocast1.1=="Frequent"|data$nocast1.1=="Occasional",1,NaN))					

noct2<- ifelse(data$noct1c.2=="Never"|data$noct1c.2=="Not applicable",0,
	ifelse(data$noct1c.2=="Frequent"|data$noct1c.2=="Occasional",1,NaN))	

Noct_symp_2YR <- ifelse(noct1=="0" & noct2=="0",0,
	ifelse(noct1=="1"|noct2=="1",1,NA))
table(Noct_symp_2YR, useNA="ifany")
							
							
### Eczema 2YR ###
eczema1<- ifelse(data$eczema2.1=="No",0,
	ifelse(data$eczema2.1=="Yes",1,NaN))	

eczema2<- ifelse(data$eczema2.2=="No",0,
	ifelse(data$eczema2.2=="Yes",1,NaN))

Eczema_2YR <- ifelse(eczema1=="0" & eczema2=="0",0,
	ifelse(eczema1=="1"|eczema2=="1",1,NA))
table(Eczema_2YR, useNA="ifany")
													

### Hayfever 2YR ###
hay1<- ifelse(data$rhcon.1=="no siblings",0,
	ifelse(data$rhcon.1=="yes",1,NaN))							
		
hay2<- ifelse(data$rhcon.2=="2",0,
	ifelse(data$rhcon.2=="yes",1,NaN))	
			
Hayfever_2YR <- ifelse(hay1=="0" & hay2=="0",0,
	ifelse(hay1=="1"|hay2=="1",1,NA))
table(Hayfever_2YR, useNA="ifany")
								

### Wheeze at 4YR ###
Wheeze_4YR <- ifelse(data$WhezFreq.4=="Not Applicable",0,
	ifelse(data$WhezFreq.4=="<3",1,
		ifelse(data$WhezFreq.4==">3"|data$WhezFreq.4=="Frequent",2,NA)))
table(Wheeze_4YR, useNA="ifany")
								

### Wheeze without cold at 4YR ###
Wheeze_without_cold_4YR <- ifelse(data$WhezFreq.4=="Not Applicable"|data$WhezFreq.4=="<3",0,
	ifelse(data$WhezFreq.4==">3"|data$WhezFreq.4=="Frequent",1,NA))
table(Wheeze_without_cold_4YR, useNA="ifany")
							
							
### Cough at 4YR ###
Cough_4YR <- ifelse(data$CoughFreq.4=="Not Applicable",0,
	ifelse(data$CoughFreq.4=="Drop Out"|data$CoughFreq.4=="Not Seen",NA,1))
table(Cough_4YR, useNA="ifany")
							

### Nasal symptoms at 4YR ###
Nasal_symp_4YR <- ifelse(data$RecurNasConges.4=="No",0,
	ifelse(data$RecurNasConges.4=="Yes",1,NA))
table(Nasal_symp_4YR, useNA="ifany")
							

### Nocturnal symptoms at 4YR ###
Noct_Symp_4YR <- ifelse(data$NocSympFreq.4=="Not Applicable",0,
	ifelse(data$NocSympFreq.4=="<3"| data$NocSympFreq.4==">3"|data$NocSympFreq.4=="Frequent",1,NA))
table(Noct_Symp_4YR, useNA="ifany")
							

### Eczema at 4YR ###
Eczema_4YR <- ifelse(data$Eccon.4=="No"|data$Eccon.4=="Not Applicable",0,
	ifelse(data$Eccon.4=="Yes",1,NA))
table(Eczema_4YR, useNA="ifany")
							

### Hayfever at 4YR ###
Hayfever_4YR <- ifelse(data$Rhcon.4=="2"|data$Rhcon.4=="Not Applicable",0,
	ifelse(data$Rhcon.4=="Yes",1,NA))
table(Hayfever_4YR, useNA="ifany")
							

### Early life on farm ###
Farm_early_life <- ifelse(data$Spend1stYear.18=="Yes",1,
	ifelse(data$Spend1stYear.18=="Drop out"|data$Spend1stYear.18=="Missing"|data$Spend1stYear.18=="Not seen",NA,0))
table(Farm_early_life, useNA="ifany")
							
							
### Socioeconomic status of parents ###
SES <- data$SES_cluster_1_10_g5numerical
table(SES, useNA="ifany")						
							

### Smoking variables ###
### Maternal smoking at birth ###
Mat_smoking_birth<- ifelse(data$smokey5.0=="No",0,
	ifelse(data$smokey5.0=="Yes",1,NA))
table(Mat_smoking_birth, useNA="ifany")
							

### Paternal smoking at birth ###
Pat_smoking_birth<- ifelse(data$smokep5.0=="No"|data$smokep5.0=="Not appliable",0,
	ifelse(data$smokep5.0=="Yes",1,NA))
table(Pat_smoking_birth, useNA="ifany")	
							
			
### Any parental smoking at birth ###
Smoking_birth <- ifelse(Mat_smoking_birth=="0"& Pat_smoking_birth=="0",0,
	ifelse(Mat_smoking_birth=="1"|Pat_smoking_birth=="1",1,NA))
table(Smoking_birth, useNA="ifany")
							
							
### Parental smoking 1YR ###
#Parental smoking at 1YR
s1<- ifelse(data$smoke5.1=="No",0,
	ifelse(data$smoke5.1=="Yes",1,NA))
table(s1, useNA="ifany")	
							
							
#Parental smoking at 2YR
s2<- ifelse(data$smoke6.2=="No",0,
	ifelse(data$smoke6.2=="Yes",1,NA))
table(s2, useNA="ifany")
							
							
#Any parental smoking at either 1/2 YR
s3<- ifelse(s1=="0"& s2=="0",0,
	ifelse(s1=="1"|s2=="1",1,NA))
							

## Final Smoking 2YR variable
Smoking_2YR<- ifelse(Smoking_birth=="0"& s3=="0",0,
	ifelse(Smoking_birth=="1" & s3=="0",1,
	ifelse(s3=="1",2,NA)))
table(Smoking_2YR, useNA="ifany")


### Parental smoking 4 YR ###
#Maternal smoking at 4
m4<- ifelse(data$MothSmokInHouse.4=="Not Applicable",0,
	ifelse(data$MothSmokInHouse.4=="Drop Out"|data$MothSmokInHouse.4=="Not Seen",NA,1))

#paternal smoking at 4	
p4<- ifelse(data$FathSmokInHouse.4=="Not Applicable",0,
	ifelse(data$FathSmokInHouse.4=="Drop Out"|data$FathSmokInHouse.4=="Not Seen",NA,1))	
	
#Any parental smoking at 4YR
s4<- ifelse(m4=="0" & p4=="0",0,
	ifelse(m4=="1"|p4=="1",1,NA))
	
## Final Smoking 4YR variable
Smoking_4YR<- ifelse(Smoking_2YR=="0" & s4=="0",0,
	ifelse((Smoking_2YR=="1"|Smoking_2YR=="2") & s4=="0",1,
	ifelse(s4=="1",2,NA)))
table(Smoking_4YR, useNA="ifany")	


### Pet variables ###
#furry pet at birth
fp0<-data[,c(7:11)]
# Recode pet names to identify the furry ones
fp0<- fp0 %>%
	mutate_at(c("petsw6a.0", "petsw6b.0", "petsw6c.0", "petsw6d.0", "petsw6e.0" ), funs(recode(.,  `-100`="Not applicable", `Guinea pig`="Yes", `Horse`="Yes", `Hamster`="Yes", `Gerbil`="Yes", `Rabbit`="Yes", `Rat`="Yes", `Goat`="Yes", `Chinchilla`="Yes", `Tarantula`="Yes", `House mice`="Yes", `-101`="Missing", `-102`="Missing", `-200`="Missing")))

#do they have a cat or dog stated as other pets 	- just for a check of the dog/cat variables 
Cat_0<- ifelse(fp0$petsw6a.0=="Cat" | fp0$petsw6b.0=="Cat"| fp0$petsw6c.0=="Cat" | fp0$petsw6d.0=="Cat" | fp0$petsw6e.0=="Cat",1,
			ifelse(fp0$petsw6a.0=="Missing" & fp0$petsw6b.0=="Missing" & fp0$petsw6c.0=="Missing" & fp0$petsw6d.0=="Missing" & fp0$petsw6e.0=="Missing",NaN,0))
table(Cat_0, useNA="ifany")
																	
Dog_0<- ifelse(fp0$petsw6a.0=="Dog" | fp0$petsw6b.0=="Dog"| fp0$petsw6c.0=="Dog" | fp0$petsw6d.0=="Dog" | fp0$petsw6e.0=="Dog",1,	
			ifelse(fp0$petsw6a.0=="Missing" & fp0$petsw6b.0=="Missing" & fp0$petsw6c.0=="Missing" & fp0$petsw6d.0=="Missing" & fp0$petsw6e.0=="Missing",NaN,0))
table(Dog_0, useNA="ifany")										
										
### Dog at birth ###
Dog_birth <- ifelse(data$dog.0=="yes"|Dog_0=="1",1,
				ifelse(data$dog.0=="2",0,NA))
table(Dog_birth, useNA="ifany")
								
				
### Cat at birth ###
Cat_birth <- ifelse(data$cat.0=="yes"|Cat_0=="1",1,
				ifelse(data$cat.0=="2",0,NA))
table(Cat_birth, useNA="ifany")
								

#do they have a furry pet - yes, no, missing	
Furry_pet_birth<- ifelse(Cat_birth=="1"|Dog_birth=="1"|fp0$petsw6a.0=="Yes" | fp0$petsw6b.0=="Yes" | fp0$petsw6c.0=="Yes" | fp0$petsw6d.0=="Yes" | fp0$petsw6e.0=="Yes",1,
	ifelse(Cat_birth=="NA"& Dog_birth=="NA"& fp0$petsw6a.0=="Missing" & fp0$petsw6b.0=="Missing" & fp0$petsw6c.0=="Missing" & fp0$petsw6d.0=="Missing" & fp0$petsw6e.0=="Missing",NA,0))
table(Furry_pet_birth, useNA="ifany")
										

### furry pet at 1YR ###
fp1<-data[,c(67:72)]
# Recode pet names to identify the furry ones
fp1<- fp1 %>%
	mutate_at(c("petsp6a.1", "petsp6b.1", "petsp6c.1", "petsp6d.1", "petsp6e.1", "petsp6f.1" ), funs(recode(.,  `-100`="Not applicable", `Guinea pig`="Yes", `Horse`="Yes", `Hamster`="Yes", `Gerbil`="Yes", `Rabbit`="Yes", `Rat`="Yes", `Goat`="Yes", `Chinchilla`="Yes", `Tarantula`="Yes", `House mice`="Yes", `Chipmunk`="Yes",`-101`="Missing", `-102`="Missing", `-200`="Missing")))

#do they have a furry pet - yes, no, missing	
Furry_1<- ifelse(fp1$petsp6a.1=="Yes" | fp1$petsp6b.1=="Yes" | fp1$petsp6c.1=="Yes" | fp1$petsp6d.1=="Yes" | fp1$petsp6e.1=="Yes"| fp1$petsp6f.1=="Yes",1,
	ifelse(fp1$petsp6a.1=="Missing" & fp1$petsp6b.1=="Missing" & fp1$petsp6c.1=="Missing" & fp1$petsp6d.1=="Missing" & fp1$petsp6e.1=="Missing" & fp1$petsp6f.1=="Missing",NaN,0))

table(Furry_1, useNA="ifany")
										

#do they have a cat or dog stated as other pets 	- just for a check of the dog/cat variables 
Other_cat_1<- ifelse(fp1$petsp6a.1=="Cat" | fp1$petsp6b.1=="Cat" | fp1$petsp6c.1=="Cat" | fp1$petsp6d.1=="Cat" | fp1$petsp6e.1=="Cat" | fp1$petsp6f.1=="Cat",1,
	ifelse(fp1$petsp6a.1=="Missing" & fp1$petsp6b.1=="Missing" & fp1$petsp6c.1=="Missing" & fp1$petsp6d.1=="Missing" & fp1$petsp6e.1=="Missing" & fp1$petsp6f.1=="Missing",NaN,0))

table(Other_cat_1, useNA="ifany")
										
	
Other_dog_1 <- ifelse(fp1$petsp6a.1=="Dog" | fp1$petsp6b.1=="Dog" | fp1$petsp6c.1=="Dog" | fp1$petsp6d.1=="Dog" | fp1$petsp6e.1=="Dog"| fp1$petsp6f.1=="Dog",1,
	ifelse(fp1$petsp6a.1=="Missing" & fp1$petsp6b.1=="Missing" & fp1$petsp6c.1=="Missing" & fp1$petsp6d.1=="Missing" & fp1$petsp6e.1=="Missing" & fp1$petsp6f.1=="Missing",NaN,0))

table(Other_dog_1, useNA="ifany")									


### furry pet at 2YR ###
fp2<-data[,c(73:78)]

# Recode pet names to identify the furry ones
fp2<- fp2 %>%
	mutate_at(c("pets7a.2", "pets7b.2", "pets7c.2", "pets7d.2", "pets7e.2", "pets7f.2" ), funs(recode(.,  `-100`="Not applicable", `Guinea pig`="Yes", `Horse`="Yes", `Hamster`="Yes", `Gerbil`="Yes", `Rabbit`="Yes", `Rat`="Yes", `Goat`="Yes", `Chinchilla`="Yes", `Chinchila`="Yes",`Tarantula`="Yes", `House mice`="Yes", `Chipmunk`="Yes", `Mouse`="Yes", `Spider`="Yes", `-101`="Missing", `-102`="Missing", `-200`="Missing")))

#do they have a furry pet - yes, no, missing	
Furry_2<- ifelse(fp2$pets7a.2=="Yes" | fp2$pets7b.2=="Yes" | fp2$pets7c.2=="Yes" | fp2$pets7d.2=="Yes" | fp2$pets7e.2=="Yes"| fp2$pets7f.2=="Yes",1,
	ifelse(fp2$pets7a.2=="Missing" & fp2$pets7b.2=="Missing" & fp2$pets7c.2=="Missing" & fp2$pets7d.2=="Missing" & fp2$pets7e.2=="Missing" & fp2$pets7f.2=="Missing",NaN,0))

table(Furry_2, useNA="ifany")
										

#do they have a cat or dog stated as other pets 	- just for a check of the dog/cat variables 
Other_cat_2<- ifelse(fp2$pets7a.2=="Cat" | fp2$pets7b.2=="Cat"| fp2$pets7c.2=="Cat" | fp2$pets7d.2=="Cat" | fp2$pets7e.2=="Cat"| fp2$pets7f.2=="Cat",1,
	ifelse(fp2$pets7a.2=="Missing" & fp2$pets7b.2=="Missing" & fp2$pets7c.2=="Missing" & fp2$pets7d.2=="Missing" & fp2$pets7e.2=="Missing" & fp2$pets7f.2=="Missing",NaN,0))

table(Other_cat_2, useNA="ifany")
										
Other_dog_2 <- ifelse(fp2$pets7a.2=="Dog" | fp2$pets7b.2=="Dog"| fp2$pets7c.2=="Dog" | fp2$pets7d.2=="Dog" | fp2$pets7e.2=="Dog"| fp2$pets7f.2=="Dog",1,
		ifelse(fp2$pets7a.2=="Missing" & fp2$pets7b.2=="Missing" & fp2$pets7c.2=="Missing" & fp2$pets7d.2=="Missing" & fp2$pets7e.2=="Missing" & fp2$pets7f.2=="Missing",NaN,0))
table(Other_dog_2, useNA="ifany")
										

# Summarise furry pets at 1 or 2yrs
table(Furry_1, Furry_2, useNA="ifany")
Furry_by_2 <- ifelse(Furry_1=="0" & Furry_2=="0",0,
	ifelse(Furry_1=="1" | Furry_2=="1",1,NA))
table(Furry_by_2, useNA="ifany")

table(Other_cat_1, Other_cat_2, useNA="ifany")
Other_cat_by_2 <- ifelse(Other_cat_1=="0" & Other_cat_2=="0",0,
	ifelse(Other_cat_1=="1" | Other_cat_2=="1",1,NA))
table(Other_cat_by_2, useNA="ifany")
	
	
table(Other_dog_1, Other_dog_2, useNA="ifany")
Other_dog_by_2 <- ifelse(Other_dog_1=="0" & Other_dog_2=="0",0,
	ifelse(Other_dog_1=="1" | Other_dog_2=="1",1,NA))
table(Other_dog_by_2, useNA="ifany")
	

### Pet dog 2YR ###
d1<- ifelse(data$dog.1=="2",0,
	ifelse(data$dog.1=="yes",1,NaN))	

d2<- ifelse(data$dog.2=="2",0,
	ifelse(data$dog.2=="yes",1,NaN))							
	
Dog_2 <- ifelse(d1=="1"|d2=="1",1,
	ifelse(d1=="0" & d2=="0",0,NA))
table(Dog_2, useNA="ifany")

Dog_2YR <- ifelse(Dog_2=="1"|Other_dog_by_2=="1",1,
	ifelse(Dog_2=="0" & Other_dog_by_2=="0",0,NA))
							
						
### Pet cat 2YR ###
c1<- ifelse(data$cat.1=="2",0,
	ifelse(data$cat.1=="yes",1,NaN))							

c2<- ifelse(data$cat.2=="2",0,
	ifelse(data$cat.2=="yes",1,NaN))
	
Cat_2 <- ifelse(c1=="1" | c2=="1",1,
	ifelse(c1=="0" & c2=="0",0,NA))

Cat_2YR <- ifelse(Cat_2=="1"| Other_cat_by_2=="1",1,
	ifelse(Cat_2=="0" & Other_cat_by_2=="0",0,NA))
table(Cat_2YR, useNA="ifany")
														

### Furry Pet 2YR ###							
Furry_pet_2YR <- ifelse(Dog_2YR=="1"| Cat_2YR=="1"|Furry_by_2=="1",1,
	ifelse(Dog_2YR=="0"& Cat_2YR=="0"& Furry_by_2=="0",0,NA))
table(Furry_pet_2YR, useNA="ifany")
										

### Pet dog at 4YR ###
Dog_4YR <- ifelse(data$dog.4=="2",0,
	ifelse(data$dog.4=="yes",1,NA))
table(Dog_4YR, useNA="ifany")
														
							
### Pet cat at 4YR ###
Cat_4YR <- ifelse(data$cat.4=="no",0,
	ifelse(data$cat.4=="yes",1,NA))
table(Cat_4YR, useNA="ifany")
							
							
### Furry pet at 4YR ###
Furry_pet_4YR <- ifelse(Cat_4YR=="1"|Dog_4YR=="1"|data$OthPetExpoElseWh.4=="guinea pig"|data$OthPetExpoElseWh.4=="horse"|data$OthPetExpoElseWh.4=="rabbit"|data$OthPetExpoElseWh.4=="rat"|data$OthPetExpoElseWh.4=="hamster"|data$OthPetExpoElseWh.4=="gerbal"|data$OthPetExpoElseWh.4=="cinchilla",1,
	ifelse(Cat_4YR=="0" & Dog_4YR==20" & (data$OthPetExpoElseWh.4=="Not Applicable" |data$OthPetExpoElseWh.4=="cockteal"|data$OthPetExpoElseWh.4=="canary"|data$OthPetExpoElseWh.4=="finches"|data$OthPetExpoElseWh.4=="budgie"|data$OthPetExpoElseWh.4=="parrot",0,NA))
table(Furry_pet_4YR, useNA="ifany")
							

### Sensitisation ###
#At 1 years
sens1<-data[,c("hdm.1", "grass.1", "tree.1", "clad.1", "alt.1", "milk.1", "egg.1", "wheat.1", "peanut.1", "cod.1", "soya.1")]
sens1<- sens1%>%
	mutate_at(c("hdm.1","grass.1", "tree.1", "clad.1", "alt.1", "milk.1", "egg.1", "wheat.1", "peanut.1", "cod.1", "soya.1"), funs(recode(.,`Positive`=1, `Negative`=0, `Not appliable`=0, `Not done`=0,`-103`=-100, `Drop out`=-100, `Missing`=-100, `Not seen`=-100)))
#calculate polysens for each year 
nsens1<-rowSums(sens1)
polysens1<-cbind(sens1,nsens1)


#At 2 years
sens2<-data[,c("hdm.2","grass.2", "tree.2", "clad.2", "alterna.2", "milk.2", "egg.2", "wheat.2", "peanut.2", "cod.2", "soya.2")]
sens2<- sens2%>%
	mutate_at(c("hdm.2", "grass.2", "tree.2", "clad.2", "alterna.2", "milk.2", "egg.2", "wheat.2", "peanut.2", "cod.2", "soya.2"), funs(recode(.,`Positive`=1,`Negative`=0, `Not appliable`=0, `Not done`=0,`-103`=-100, `Drop out`=-100, `Missing`=-100, `Not seen`=-100)))
#calculate polysens for each year 
nsens2<-rowSums(sens2)
polysens2<-cbind(sens2,nsens2)


#At 4 years
sens4<-data[,c("HDMResult.4", "CatResult.4", "DogResult.4", "GrassResult.4", "CladosporiumResult.4", "AlternariaResult.4", "MilkResult.4", "EggResult.4", "WheatResult.4", "PeanutResult.4", "CodResult.4", "SoyaResult.4")]
sens4<- sens4%>%
	mutate_at(c("HDMResult.4","CatResult.4", "DogResult.4", "GrassResult.4", "CladosporiumResult.4", "AlternariaResult.4", "MilkResult.4", "EggResult.4", "WheatResult.4", "PeanutResult.4", "CodResult.4", "SoyaResult.4"), funs(recode(.,`Positive`=1,`Negative`=0, `Missing`=-100)))
#calculate polysens for each year 
nsens4<-rowSums(sens4)
polysens4<-cbind(sens4,nsens4)


### Monosensitisation (one positive SPT) 1YR ###
table(nsens1, useNA="ifany")
monosens_1YR <- ifelse(nsens1=="1",1,
	ifelse(nsens1=="-1100"|nsens1=="-1000"|nsens1=="-999",NA,0))
table(monosens_1YR, useNA="ifany")


### Atopic (at least one positive SPT) 1YR ###
table(nsens1, useNA="ifany")
atopy_1YR <- ifelse(nsens1=="-999"|nsens1=="-898"|nsens1=="-797"|nsens1=="1"|nsens1=="2"|nsens1=="3",1,
	ifelse(nsens1=="0",0,NA))
table(atopy_1YR, useNA="ifany")
								

### Polysensitisation (two or more positive SPT) 1YR ###
table(nsens1, useNA="ifany")
polysens_1YR <- ifelse(nsens1=="-898"|nsens1=="-797"|nsens1=="2"|nsens1=="3",1,
	ifelse(nsens1=="0"|nsens1=="1",0,NA))
table(polysens_1YR, useNA="ifany")


### Monosensitisation (one positive SPT) 2YR ###
table(nsens2, useNA="ifany")
monosens_2YR <- ifelse(nsens2=="1",1,
	ifelse(nsens2=="-1100"|nsens2=="-999",NA,0))
table(monosens_2YR, useNA="ifany")


### Atopic (at least one positive SPT) 2YR ###
table(nsens2, useNA="ifany")
atopy_2YR <- ifelse(nsens2=="-999"|nsens2=="-898"|nsens2=="1"|nsens2=="2"|nsens2=="3",1,
	ifelse(nsens2=="0",0,NA))
table(atopy_2YR, useNA="ifany")
									

### Polysensitisation (two or more positive SPT) 2YR ###
table(nsens2, useNA="ifany")
polysens_2YR <- ifelse(nsens2=="-898"|nsens2=="2"|nsens2=="3",1,
	ifelse(nsens2=="0"|nsens2=="1",0,NA))
table(polysens_2YR, useNA="ifany")


### Monosensitisation (one positive SPT) at 1/2YR ###
table(monosens_1YR,monosens_2YR, useNA="ifany")
Monosensitisation_2YR <- ifelse(monosens_1YR=="1" | monosens_2YR=="1",1,
	ifelse(monosens_1YR=="0" & monosens_2YR=="0",0,NA))
table(Monosensitisation_2YR, useNA="ifany")
										

### Atopic (at least one positive SPT) 1/2YR ###
table(atopy_1YR,atopy_2YR, useNA="ifany")
Atopy_2YR <- ifelse(atopy_1YR=="1" | atopy_2YR=="1",1,
	ifelse(atopy_1YR=="0" & atopy_2YR=="0",0,NA))
table(Atopy_2YR, useNA="ifany")
										

### Polysensitisation (two or more positive SPT) at 1/2YR ###
table(polysens_1YR,polysens_2YR, useNA="ifany")
Polysensitisation_2YR <- ifelse(polysens_1YR=="1" | polysens_2YR=="1",1,
	ifelse(polysens_1YR=="0" & polysens_2YR=="0",0,NA))
table(Polysensitisation_2YR, useNA="ifany")


### Monosensitisation (one positive SPT) 4YR ###
table(nsens4, useNA="ifany")
Monosensitisation_4YR <- ifelse(nsens4=="1",1,
	ifelse(nsens4=="-1200"|nsens4=="-800"|nsens4=="-200"|nsens4=="-100"|nsens4=="-99",NA,0))
table(Monosensitisation_4YR, useNA="ifany")
							
										
### Atopic (at least one positive SPT) 4YR ###
table(nsens4, useNA="ifany")
Atopy_4YR <- ifelse(nsens4=="-99"|nsens4=="-98"|nsens4=="1"|nsens4=="2"|nsens4=="3"|nsens4=="4"|nsens4=="5"|nsens4=="6"|nsens4=="7",1,
	ifelse(nsens4=="0",0,NA))
table(Atopy_4YR, useNA="ifany")


### Polysensitisation (two or more positive SPT) at 4YR ###
table(nsens4, useNA="ifany")
Polysensitisation_4YR <- ifelse(nsens4=="-98"|nsens4=="2"|nsens4=="3"|nsens4=="4"|nsens4=="5"|nsens4=="6"|nsens4=="7",1,
	ifelse(nsens2=="0",0,NA))
table(Polysensitisation_4YR, useNA="ifany")


### Asthma outcome at 10YR ###
Asthma_10YR <- ifelse(data$M_InvestigatorDiagnosedAsthma.10=="No",0,
	ifelse(data$M_InvestigatorDiagnosedAsthma.10=="Yes",1,NA))
table(Asthma_10YR, useNA="ifany")
							
							
############################
### Additional variables ###	
############################
### Total Breastfeeding ###
#Import  breastfeeding data - found in IOWBC_data.xlsx, sheet: "Raw breastfeeding data"
bf_data<- read.csv(file="Raw_breastfeeding_data.csv", header=T)

# Merge breastfeeding (BF) data with all 1536 IDs to identify missing samples
IDs<-data[1]
BF <- left_join(IDs, bf_data, by="StudyID", fill=NA)

# Recode total BF categories
total <- BF[2]
TBF <- ifelse(total=="0",0, 
	ifelse(total=="1",1,
		ifelse(total=="2",2,
			ifelse(total=="3"|total=="4",3,NA))))
table(TBF, useNA="ifany")
												

# Recode exclusive BF categories
exclusive <- BF[3]
EBF <- ifelse(exclusive=="0",0, 
	ifelse(exclusive=="1",1,
		ifelse(exclusive=="2",2,NA)))
table(EBF, useNA="ifany")


### BMI ###
#Import  BMI data - found in IOWBC_data.xlsx, sheet: "Standardised BMI data"
# To obtain this BMI data, height and weight data found in IOWBC_data.xlsx, sheet: "Raw BMI data" was standardised against the British 1990 Growth Reference.
bmi_data<- read.csv(file="Standardised_BMI_data.csv", header=T)

# Merge BMI data with all 1536 IDs to identify missing samples
IDs<-data[1]
BMI <- left_join(IDs, bmi_data, by="StudyID", fill=NA)

# Recode BMI at 1YR categories
BMI_1YR <- BMI[2]
summary(BMI_1YR, useNA="ifany")
BMI_1YR[BMI_1YR < -20] <- NA
						

# Recode BMI at 4YR categories
BMI_4YR <- BMI[3]
summary(BMI_4YR, useNA="ifany")
BMI_4YR[BMI_4YR < -20] <- NA
						

################################################
############## Construct dataset ###############
################################################
# Cleaned data for all 1536 individuals - found in IOWBC_data.xlsx, sheet: "Cleaned data (n=1536)"
dataset<-cbind(Study_ID, Mat_age,Mat_smoking_birth, Pat_smoking_birth, Dog_birth, Cat_birth, Furry_pet_birth, Mat_asthma, Mat_eczema, Mat_hayfever, Pat_asthma, Pat_eczema, Pat_hayfever, Sex, Prematurity, Delivery, Birthweight, TBF, EBF, Solid_food, Parity, Birth_season, BMI_1YR, Wheeze_2YR, Wheeze_without_cold_2YR, Cough_2YR, Nasal_symp_2YR, Chest_infection_2YR, Noct_symp_2YR, Eczema_2YR, Hayfever_2YR, Atopy_2YR, Monosensitisation_2YR, Polysensitisation_2YR, Smoking_2YR, Dog_2YR, Cat_2YR, Furry_pet_2YR, BMI_4YR, Wheeze_4YR, Wheeze_without_cold_4YR, Cough_4YR, Nasal_symp_4YR, Noct_Symp_4YR, Eczema_4YR, Hayfever_4YR, Atopy_4YR, Monosensitisation_4YR, Polysensitisation_4YR, Smoking_4YR, Dog_4YR, Cat_4YR, Furry_pet_4YR, Farm_early_life, SES, Asthma_10YR)
write.csv(dataset, file="Asthma_Prediction_Model_Dataset_1536Ids.csv", row.names=F)

# Subset dataset for only those 1368 individuals with a 10YR asthma outcome - found in IOWBC_data.xlsx, sheet: "Cleaned data (n=1368)"
data_A <- subset(dataset, Asthma_10YR=="0"|Asthma_10YR=="1")
write.csv(data_A, file="Asthma_Prediction_Model_Dataset_1368Ids.csv", row.names=F)









