Loading data

library(reshape)

#library(rjson)
setwd("/Users/lavm/luc-git/papers/prov-template/data/")

pdf("outputs/box-template.pdf")


smart_w <- read.table("smartshare/archive_28483/outputs/toscatter.csv", sep=",")
smart_w2 <- smart_w[order(smart_w$V4),]
smart_w2$V7 <- 1:nrow(smart_w2)
smart_w2$V8 <- 1
smart_w2$V9 <- "smart"



smart_ww <- read.table("smartshare/archive_28483/outputs/toscatter2.csv", sep=",")
smart_ww2 <- smart_ww[order(smart_ww$V4),]
smart_ww2$V7 <- 1:nrow(smart_ww2)
smart_ww2$V8 <- 2
smart_ww2$V9 <- "smart"


food_w <- read.table("foodprovenance/archive_20160303/outputs/toscatter.csv", sep=",")
food_w2 <- food_w[order(food_w$V4),]
food_w2$V7 <- 1:nrow(food_w2)
food_w2$V8 <- 1
food_w2$V9 <- "food"



food_ww <- read.table("foodprovenance/archive_20160303/outputs/toscatter2.csv", sep=",")
food_ww2 <- food_ww[order(food_ww$V4),]
food_ww2$V7 <- 1:nrow(food_ww2)
food_ww2$V8 <- 2
food_ww2$V9 <- "food"


ebook_w <- read.table("ebook/20160308_big/outputs/toscatter.csv", sep=",")
ebook_w2 <- ebook_w[order(ebook_w$V4),]
ebook_w2$V7 <- 1:nrow(ebook_w2)
ebook_w2$V8 <- 1
ebook_w2$V9 <- "ebook"



ebook_ww <- read.table("ebook/20160308_big/outputs/toscatter2.csv", sep=",")
ebook_ww2 <- ebook_ww[order(ebook_ww$V4),]
ebook_ww2$V7 <- 1:nrow(ebook_ww2)
ebook_ww2$V8 <- 2
ebook_ww2$V9 <- "ebook"


picaso_w <- read.table("picaso/20160211-reexpanded/outputs/toscatter.csv", sep=",")
picaso_w2 <- picaso_w[order(picaso_w$V4),]
picaso_w2$V7 <- 1:nrow(picaso_w2)
picaso_w2$V8 <- 1
picaso_w2$V9 <- "picaso"

picaso_w2$V1 <- as.character(picaso_w2$V1)
picaso_w2$V3 <- as.character(picaso_w2$V3)
picaso_w2$V5 <- as.character(picaso_w2$V5)


picaso_ww <- read.table("picaso/20160211-reexpanded/outputs/toscatter2.csv", sep=",")
picaso_ww2 <- picaso_ww[order(picaso_ww$V4),]
picaso_ww2$V7 <- 1:nrow(picaso_ww2)
picaso_ww2$V8 <- 2
picaso_ww2$V9 <- "picaso"

picaso_ww2$V1 <- as.character(picaso_ww2$V1)
picaso_ww2$V3 <- as.character(picaso_ww2$V3)
picaso_ww2$V5 <- as.character(picaso_ww2$V5)



picaso_names <- read.table("picaso/20160211-reexpanded/raw/template-names.txt",sep=",")
names(picaso_names)<-c("name","pretty")

for_picaso_name <- function(n) {
             return(picaso_names[picaso_names$name == n, "pretty"])
           }

Compaction Ratio with Repect to Bindings Size

######################################################################
###
###  Version 2 (bindings)
###

xx<-rbind(smart_ww2, 
          food_ww2,
          ebook_ww2,
          picaso_ww2)


xx$V10 <- xx$V4 / xx$V6
xx$V11 <- xx$V2 / xx$V6

yy <- xx[order(xx$V1),]
#yy$V11 <- 1:nrow(yy)


smart_mean =mean(smart_ww2$V4 / smart_ww2$V6)
food_mean  =mean(food_ww2$V4  / food_ww2$V6)
ebook_mean =mean(ebook_ww2$V4 / ebook_ww2$V6)
picaso_mean=mean(picaso_ww2$V4/ picaso_ww2$V6)
total_mean =mean(yy$V10)

smart_sd =sd(smart_ww2$V4 / smart_ww2$V6)
food_sd  =sd(food_ww2$V4  / food_ww2$V6)
ebook_sd =sd(ebook_ww2$V4 / ebook_ww2$V6)
picaso_sd=sd(picaso_ww2$V4/ picaso_ww2$V6)
total_sd =sd(yy$V10)


smart_median =median(smart_ww2$V4 / smart_ww2$V6)
food_median  =median(food_ww2$V4  / food_ww2$V6)
ebook_median =median(ebook_ww2$V4 / ebook_ww2$V6)
picaso_median=median(picaso_ww2$V4/ picaso_ww2$V6)
total_median =median(yy$V10)

Ratio Between Templates Size and Expanded Provenance

smart_mean2 =mean(smart_ww2$V2 / smart_ww2$V6)
food_mean2  =mean(food_ww2$V2  / food_ww2$V6)
ebook_mean2 =mean(ebook_ww2$V2 / ebook_ww2$V6)
picaso_mean2=mean(picaso_ww2$V2/ picaso_ww2$V6)
total_mean2 =mean(yy$V11)

smart_sd2 =sd(smart_ww2$V2 / smart_ww2$V6)
food_sd2  =sd(food_ww2$V2  / food_ww2$V6)
ebook_sd2 =sd(ebook_ww2$V2 / ebook_ww2$V6)
picaso_sd2=sd(picaso_ww2$V2/ picaso_ww2$V6)
total_sd2 =sd(yy$V11)


smart_median2 =median(smart_ww2$V2 / smart_ww2$V6)
food_median2  =median(food_ww2$V2  / food_ww2$V6)
ebook_median2 =median(ebook_ww2$V2 / ebook_ww2$V6)
picaso_median2=median(picaso_ww2$V2/ picaso_ww2$V6)
total_median2 =median(yy$V11)

######################################################################
# box plot (bindings)

Box Plot (compaction ratio bindings/expanded provenance)

mydots=c(1,2,3,4,5,6,7,8)

applications=c(1,2,3,4)
names(applications)=c("smart","food","ebook","picaso")
colors=c("red","blue","green4","brown")

tmpl <- unique(yy$V1)

print(length(tmpl))
## [1] 30
#pretty_templates <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,"tmpl_28",29,30,31)
pretty_templates <- c(1:length(tmpl))


pretty_templates <- c(
"Login",              #"template_2",
"Receive_API_Call",   #"template_25878",
"Change_Page",        #"template_3",
"Send_Request",       #"template_35836",
"Negotiation_Type_1", #"template_35838",
"Receive_Request",    #"template_35839",
"Receive_Request",    #"template_35840",
"Negotiation_Type_2", #"template_35843",
"Composition",        #"template_35844",
"Use_Response",       #"template_4",
"Init_Gen_Reputation", #"template_6",
"Gen_Reputation", #"template_9",
"analysis",
"foodspec",
"invoiceitems",
"block_run",
"Derivation_1_n",       #"tmpl_10",
"Derivation_n_1",       #"tmpl_11",
"Conference_Session",       #"tmpl_3",
"Citation",     #"tmpl_2",
"Tweet",        #"tmpl_7",
"Derived_Material",     #"tmpl_5",
"Presentation_v1",      #"tmpl_8",
"Presentation_v2",      #"tmpl_9",
"Work_Element",     #"tmpl_6",
"Dataset_Usage",        #"tmpl_4",
"Attribution_v1",       #"tmpl_1",
"Project",      #"tmpl_13",
"Attribution_v2",       #"tmpl_12",
"Attribution/Citation"      #"tmpl_1+2"
)



names(pretty_templates)=tmpl







par(mar = c(6,5,2.3,0.5)) 



boxplot(yy$V10 ~ yy$V1, data=tmpl, axes=FALSE, range=0, ylab="compaction ratio\n size of sets of bindings / size of expanded templates", cex.lab=0.7)

# Make y axis
axis(2, c(0,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2), cex.axis=0.7)

# Make x axis
axis(1, at=1:length(tmpl), labels=FALSE, cex.axis=0.7, las=3)

# Labels on x axis 
mtext(text=pretty_templates[tmpl], side=1,at=1:length(tmpl),adj=1,col=colors[c(1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4)],srt = 0, pos = 1, xpd = TRUE, cex=0.6,las=3,line=1)
## Warning in mtext(text = pretty_templates[tmpl], side = 1, at =
## 1:length(tmpl), : "pos" is not a graphical parameter
# add a legend
legend(1,0.35, names(applications) , cex=0.7, col=colors[applications],lty=c(1,1),title="Applications")  #,pch=mydots


segments(x0=0.5, x1=12.5, y0=smart_median, y1=smart_median, col=colors[1])        
segments(x0=12.5,x1=15.5, y0=food_median,  y1=food_median,  col=colors[2])        
segments(x0=15.5,x1=16.5, y0=ebook_median, y1=ebook_median, col=colors[3])        
segments(x0=16.5,x1=30.5, y0=picaso_median,y1=picaso_median,col=colors[4])

######################################################################
# box plot (templates)

Box Plot (compaction ratio templates/expanded provenance)

boxplot(yy$V11 ~ yy$V1, data=tmpl, axes=FALSE, range=0, ylab="ratio size of templates / size of expanded templates", cex.lab=0.7)

# Make y axis
axis(2, c(0,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.5,2.0,2.5), cex.axis=0.7)

# Make x axis
axis(1, at=1:length(tmpl), labels=FALSE, cex.axis=0.7, las=3)

# Labels on x axis 
mtext(text=pretty_templates[tmpl], side=1,at=1:length(tmpl),adj=1,col=colors[c(1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4)],srt = 0, pos = 1, xpd = TRUE, cex=0.6,las=3,line=1)
## Warning in mtext(text = pretty_templates[tmpl], side = 1, at =
## 1:length(tmpl), : "pos" is not a graphical parameter
# add a legend
legend(1,1.95, names(applications) , cex=0.7, col=colors[applications],lty=c(1,1),title="Applications")  #,pch=mydots


segments(x0=0.5, x1=12.5, y0=smart_median2, y1=smart_median2, col=colors[1])          
segments(x0=12.5,x1=15.5, y0=food_median2,  y1=food_median2,  col=colors[2])          
segments(x0=15.5,x1=16.5, y0=ebook_median2, y1=ebook_median2, col=colors[3])          
segments(x0=16.5,x1=30.5, y0=picaso_median2,y1=picaso_median2,col=colors[4])

######################################################################
###
###  Version 1
###
###
###xx<-rbind(smart_w2,
###   food_w2,
###   ebook_w2,
###   picaso_w2
###          )
###
###xx$V10 <- xx$V4 / xx$V6
###yy <- xx[order(xx$V1),]
####yy$V11 <- 1:nrow(yy)
###
###
#########################################################################
#### box plot
###
###
###tmpl <- unique(yy$V1)
###
###
###
###boxplot(yy$V10 ~ yy$V1, axes=FALSE, range=0)
###
#### Make y axis
###axis(2, c(0,0.2,0.4,0.5,0.6,0.7,0.8,0.9,1,1.1,1.2,1.3), cex.axis=0.7)
###
####axis(1,tmpl,cex.axis=0.7)
###
#### how to color axis labels?
####col.axis=colors[2*applications[tmpl]+1-2]
###axis(1, at=1:length(tmpl), cex.axis=0.7, las=3, labels=FALSE)
###
###mtext(text=tmpl, side=1,at=1:length(tmpl),adj=1,col=colors[c(1,1,1,1,1,1,1,1,1,1,2,2,2,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4)],srt = 0, pos = 1, xpd = TRUE, cex=0.7,las=3,line=1)
###
###
###legend(1,0.5, names(applications) , cex=0.7, col=colors[applications],lty=c(1,1),pch=mydots,title="Applications")
###
###
###
###
print(smart_median)
## [1] 0.4476684
print(food_median)
## [1] 0.5327581
print(ebook_median)
## [1] 0.6601266
print(picaso_median)
## [1] 0.342117
print(total_median)
## [1] 0.3972071
print(smart_mean)
## [1] 0.430907
print(food_mean)
## [1] 0.5260223
print(ebook_mean)
## [1] 0.6704058
print(picaso_mean)
## [1] 0.350379
print(total_mean)
## [1] 0.4063708
print(smart_sd)
## [1] 0.07320251
print(food_sd)
## [1] 0.08038991
print(ebook_sd)
## [1] 0.06227174
print(picaso_sd)
## [1] 0.0533793
print(total_sd)
## [1] 0.1024571

Summary (bindings)

summary = c(1,2,3,4)
names(summary)= names(applications)
summary["smart"]=smart_mean
summary["food"]=food_mean
summary["ebook"]=ebook_mean
summary["picaso"]=picaso_mean
summary["total"]=total_mean


summary = rbind(summary,c(smart_sd,food_sd,ebook_sd,picaso_sd,total_sd))
summary = rbind(summary,c(smart_median,food_median,ebook_median,picaso_median,total_median))

summary <- t(summary)
colnames(summary) <- c("mean","sd", "median")
print(summary)
##             mean         sd    median
## smart  0.4309070 0.07320251 0.4476684
## food   0.5260223 0.08038991 0.5327581
## ebook  0.6704058 0.06227174 0.6601266
## picaso 0.3503790 0.05337930 0.3421170
## total  0.4063708 0.10245710 0.3972071

Summary (templates)

summary2 = c(1,2,3,4)
names(summary2)= names(applications)
summary2["smart"]=smart_mean2
summary2["food"]=food_mean2
summary2["ebook"]=ebook_mean2
summary2["picaso"]=picaso_mean2
summary2["total"]=total_mean2


summary2 = rbind(summary2,c(smart_sd2,food_sd2,ebook_sd2,picaso_sd2,total_sd2))
summary2 = rbind(summary2,c(smart_median2,food_median2,ebook_median2,picaso_median2,total_median2))



summary2 <- t(summary2)
colnames(summary2) <- c("mean","sd", "median")
print(round(summary2,3))
##         mean    sd median
## smart  0.751 0.227  0.817
## food   0.797 0.419  0.857
## ebook  0.706 0.150  0.714
## picaso 0.580 0.218  0.547
## total  0.657 0.274  0.680
print(picaso_names)
##    name             pretty
## 1     1     Attribution v1
## 2     2           Citation
## 3     3 Conference-Session
## 4     4      Dataset-Usage
## 5     5   Derived-Material
## 6     6       Work-Element
## 7     7              Tweet
## 8     8    Presentation v1
## 9     9    Presentation v2
## 10   10     Derivation-1-n
## 11   11     Derivation-n-1
## 12   12     Attribution v2
## 13   13            Project
print(for_picaso_name("3"))
## [1] Conference-Session
## 13 Levels: Attribution v1 Attribution v2 Citation ... Work-Element
print(for_picaso_name("10"))
## [1] Derivation-1-n
## 13 Levels: Attribution v1 Attribution v2 Citation ... Work-Element