########################################################################################
# Print out important bits from the metadata for Fernando's lot.

collected <- list()
fpath <- "/run/user/1753941046/gvfs/smb-share:server=jmlab-data,share=jmlab/group_folders/lun01/Internal/SpikeIns"
relink <- "make_links_Calero.sh"
write(file=relink, c("set -e", "set -u", "mkdir fastq_Calero", "cd fastq_Calero"), ncol=1)
library(edgeR)

for (sample in c("Calero/trial_20160113", "Calero/trial_20160325")) {
    cpath <- file.path("..", sample, "analysis", "genic_counts.tsv")
    all.files <- read.table(cpath, nrows=1, stringsAsFactor=FALSE)
    prefixes <- as.character(all.files[-c(1:2)])
    batch <- sub(".*_", "", basename(sample))

    # Loading in the corresponding object.
    full.obj <- readRDS(file.path("..", sample, "analysis", "full.rds"))
    m <- match(prefixes, colnames(full.obj))
    full.obj <- full.obj[,m]

    # Adding in the metadata.
    addition.mode <- character(length(prefixes))
    addition.mode[full.obj$samples$ercc.first] <- "ERCC+SIRV"
    addition.mode[full.obj$samples$sirv.first] <- "SIRV+ERCC"
    addition.mode[full.obj$samples$premixed] <- "Premixed"
    treatment <- ifelse(full.obj$samples$induced, "Induced", "Control")

    # Adding in the MD5 sums.
    fnames <- paste0(sub("SLX\\.", "SLX-", prefixes), ".fq.gz")
    md5.sums <- read.table(file.path(fpath, sample, "fastq", "md5.all"),
                           header=FALSE, stringsAsFactor=FALSE)
    m <- match(fnames, md5.sums[,2])
    md5.sums <- md5.sums[m,1]

    # Creating links to files.
    curpath <- file.path(fpath, sample, "fastq")
    chosen <- list.files(curpath, pattern="fq.gz$")
    write(file=relink, paste0("ln -s ", file.path(curpath, chosen), " ", chosen), append=TRUE, ncol=1)
    new.count.file <- paste0("counts_Calero_", batch, ".tsv")
    write(file=relink, paste0("ln -s ", normalizePath(cpath), " ", new.count.file), append=TRUE, ncol=1)

    collected[[sample]] <- data.frame(Sample=prefixes, Batch=batch, Addition=addition.mode, Treatment=treatment,
                                      File=fnames, MD5=md5.sums, Counts=new.count.file)
}

collected <- do.call(rbind, collected)


output <- list()
output[["Source Name"]] <- collected$Sample
output[["Characteristics[organism]"]] <- "Mus musculus"
output[["Characteristics[cell line]"]] <- "416B"
output[["Characteristics[single cell well quality]"]] <- "single cell"
output[["Material Type"]] <- "RNA"
output[[paste0(rep(c("Protocol REF", "Performer"), 5), collapse="\t")]] <- paste0(c("Obtaining 416B cells", "Fernando Calero",
                                                                                    "Culturing 416B cells", "Fernando Calero",
                                                                                    "Reverse transcription", "Fernando Calero",
                                                                                    "Extracting RNA", "Fernando Calero",
                                                                                    "Creating libraries","Fernando Calero"
                                                                                    ), collapse="\t")
output[["Extract Name"]] <- collected$Sample
output[["Comment[LIBRARY_LAYOUT]"]] <- "SINGLE"
output[["Comment[LIBRARY_SELECTION]"]] <- "Oligo-dT"
output[["Comment[LIBRARY_SOURCE]"]] <- "TRANSCRIPTOMIC"
output[["Comment[LIBRARY_STRAND]"]] <- "not applicable"
output[["Comment[LIBRARY_STRATEGY]"]] <- "RNA-seq"
output[["Comment[NOMINAL_LENGTH]"]] <- "not applicable"
output[["Comment[NOMINAL_SDEV]"]] <- "not applicable"
output[["Comment[ORIENTATION]"]] <- "not applicable"
output[["Protocol REF\tPerformer"]] <- "Sequencing libraries\tFernando Calero"
output[["Assay Name"]] <- collected$Sample
output[["Technology Type"]] <- "sequencing assay"
output[["Array Data File"]] <- collected$File
output[["Protocol REF\tPerformer"]] <- "Assigning reads to genes\tAaron Lun"
output[["Derived Array Data File"]] <- collected$Counts
output[["Comment[MD5]"]] <- collected$MD5
output[["Factor Value[spike-in addition]"]] <- collected$Addition
output[["Factor Value[treatment]"]] <- collected$Treatment
output[["Factor Value[block]"]] <- collected$Batch

# Constructing the sdrf.tsv file.

output$check.names <- FALSE
sdrf <- do.call(data.frame, output)
write.table(file="sdrf_Calero.tsv", sdrf, row.names=FALSE, sep="\t", quote=FALSE)