# This looks at the properties of the HVGs or DE genes that
# were missed between the original and one simulation iteration.

# For HVGs with the log-variance method.

pdf("comparison_log_HVG.pdf", width=4, height=6)
par(mar=c(5.1, 5.1, 4.1, 2.1))
for (x in c("Calero", "Liora")) {
    original <- sprintf("../variance/log_%s_0.tsv",x)
    modified <- sprintf("../variance/log_%s_1.tsv",x)
    odata <- read.table(original, header=TRUE)
    mdata <- read.table(modified, header=TRUE)
    
    kept <- odata$bio[rownames(odata) %in% rownames(mdata)]
    diff <- c(odata$bio[! rownames(odata) %in% rownames(mdata)], mdata$bio[! rownames(mdata) %in% rownames(odata)])
    dname <- ifelse(x=="calero", "416B", "TSC")

    out <- boxplot(list(Common=kept, "Lost/Gained"=diff), ylab="Biological component of variance",
                   col="grey80", cex.axis=1.2, cex.lab=1.4, cex.main=1.4, main=dname, range=0,
                   ylim=c(0, max(max(kept), max(diff))+1))
    text(1:2, out$stats[5,], c(length(kept), length(diff)), pos=3)
}
dev.off()   

# For HVGs with the CV2 method.

#pdf("comparison_CV2_HVG.pdf", width=4, height=6)
#par(mar=c(5.1, 5.1, 4.1, 2.1))
#for (x in c("calero", "liora")) {
#    original <- sprintf("../variance/cv2_%s_0.tsv",x)
#    modified <- sprintf("../variance/cv2_%s_1.tsv",x)
#    odata <- read.table(original, header=TRUE)
#    mdata <- read.table(modified, header=TRUE)
#    
#    dname <- ifelse(x=="calero", "416B", "TSC")
#    oratio <- odata$cv2/odata$trend
#    mratio <- mdata$cv2/mdata$trend
#    kept <- oratio[rownames(odata) %in% rownames(mdata)]
#    diff <- c(oratio[! rownames(odata) %in% rownames(mdata)], mratio[! rownames(mdata) %in% rownames(odata)])
#
#    out <- boxplot(list(Common=kept, "Lost/Gained"=diff), ylab="Variance ratio (Total:Technical)", 
#                   col="grey80", cex.axis=1.2, cex.lab=1.4, cex.main=1.4, main=dname, log='y', range=0,
#                   ylim=c(0, max(max(kept), max(diff))+1))
#    text(1:2, out$stats[5,], c(length(kept), length(diff)), pos=3)
#}
#dev.off()

# For DEGs with edgeR.

pdf("comparison_edgeR_DEG.pdf", width=4, height=6)
par(mar=c(5.1, 5.1, 4.1, 2.1))
for (x in c("Calero", "Buettner")) {
    original <- sprintf("../diffexp/edgeR_%s_0.tsv",x)
    modified <- sprintf("../diffexp/edgeR_%s_1.tsv",x)
    odata <- read.table(original, header=TRUE)
    odata <- odata[odata$FDR <= 0.05,]
    mdata <- read.table(modified, header=TRUE)
    mdata <- mdata[mdata$FDR <= 0.05,]
    
    dname <- ifelse(x=="Calero", "416B (induced/control)", "mESC (G2M/G1)")
    olfc <- abs(odata$logFC)
    mlfc <- abs(mdata$logFC)
    kept <- olfc[rownames(odata) %in% rownames(mdata)]
    diff <- c(olfc[! rownames(odata) %in% rownames(mdata)], mlfc[!rownames(mdata) %in% rownames(odata)])

    out <- boxplot(list(Common=kept, "Lost/Gained"=diff), ylab=expression("Absolute Log"[2]~"Fold Change"),
                   col="grey80", cex.axis=1.2, cex.lab=1.4, cex.main=1.4, main=dname, range=0,
                   ylim=c(0, max(max(kept), max(diff))+1))
    text(1:2, out$stats[5,], c(length(kept), length(diff)), pos=3)
}
dev.off()