--- title: Identifying overlapping effects upon HUSH knockdown author: Aaron Lun date: 18 November 2016 output: html_document: fig_caption: false --- ```{r, echo=FALSE} knitr::opts_chunk$set(error=FALSE, warning=FALSE, fig.path="figure-olap/") ``` # Overlaps in differentially expressed genes ## Defining a p-value threshold We need to define a common p-value threshold, which we apply to all genes in all comparisons. This common threshold is chosen by controlling the FDR threshold across all comparisons. First we load in the statistics for each comparison. ```{r} res <- list() for (f in list.files(pattern="knockdown_.*_genes.tsv")) { curprot <- sub("knockdown_(.*)_genes.tsv", "\\1", f) curres <- read.table(f, header=TRUE, stringsAsFactors=FALSE) curres <- curres[order(curres$ENSEMBL),] res[[curprot]] <- curres } ``` We then extract the p-values from each comparison and apply the Benjamini-Hochberg correction to the pooled set of p-values. The p-value threshold is that corresponding to a FDR of 5%. ```{r} all.p <- lapply(res, "[[", i="P.Value") all.p <- unlist(all.p) adj.p <- p.adjust(all.p, method="BH") threshold <- max(all.p[adj.p <= 0.05]) threshold ``` ## Effect of HUSH core component knockdown We look for the number of overlapping genes in the three core components of the HUSH complex -- TASOR, MPP8 and periphilin. ```{r} combo <- cbind(TASOR=res$TASOR$P.Value <= threshold, MPP8=res$MPP8$P.Value <= threshold, PHL=res$PHL$P.Value <= threshold) storage.mode(combo) <- "integer" library(limma) vennDiagram(combo) ``` We can have a look at the genes common to all knockdowns. ```{r} is.common <- rowSums(combo)==3L data.frame(res$TASOR[is.common,c("ENSEMBL", "SYMBOL")]) ``` We can also have a look at the genes shared between any two of the knockdowns. ```{r} is.shared <- rowSums(combo)==2L data.frame(res$TASOR[is.shared,c("ENSEMBL", "SYMBOL")], combo[is.shared,]) ``` ## Relationship between HUSH and SETDB-1 We look for the number of overlapping genes in the HUSH components and SETDB-1. ```{r} combo <- cbind(TASOR=res$TASOR$P.Value <= threshold, MPP8=res$MPP8$P.Value <= threshold, PHL=res$PHL$P.Value <= threshold, SETDB=res$SETDB$P.Value <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the genes common to all knockdowns. ```{r} is.common <- rowSums(combo)==4L data.frame(res$TASOR[is.common,c("ENSEMBL", "SYMBOL")]) ``` We look at the genes shared between SETDB-1 and knockdown of one or two HUSH components. ```{r} is.shared <- combo[,"SETDB"]==1L & rowSums(combo)>=2L & !is.common data.frame(res$TASOR[is.shared,c("ENSEMBL", "SYMBOL")], combo[is.shared,]) ``` ## Relationship between HUSH and MORC2 We look for the number of overlapping genes in the HUSH components and MORC2. ```{r} combo <- cbind(TASOR=res$TASOR$P.Value <= threshold, MPP8=res$MPP8$P.Value <= threshold, PHL=res$PHL$P.Value <= threshold, MORC=res$MORC$P.Value <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the genes common to all knockdowns. ```{r} is.common <- rowSums(combo)==4L data.frame(res$TASOR[is.common,c("ENSEMBL", "SYMBOL")]) ``` We look at the genes shared between MORC2 and knockdown of one or two HUSH components. ```{r} is.shared <- combo[,"MORC"]==1L & rowSums(combo)>=2L & !is.common data.frame(res$TASOR[is.shared,c("ENSEMBL", "SYMBOL")], combo[is.shared,]) ``` ## Relationship between HUSH and KAP-1 We look for the number of overlapping genes in the HUSH components and KAP-1. ```{r} combo <- cbind(TASOR=res$TASOR$P.Value <= threshold, MPP8=res$MPP8$P.Value <= threshold, PHL=res$PHL$P.Value <= threshold, KAP=res$KAP$P.Value <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the genes common to all knockdowns. ```{r} is.common <- rowSums(combo)==4L data.frame(res$TASOR[is.common,c("ENSEMBL", "SYMBOL")]) ``` We look at the genes shared between KAP-1 and knockdown of one or two HUSH components. ```{r} is.shared <- combo[,"KAP"]==1L & rowSums(combo)>=2L & !is.common data.frame(res$TASOR[is.shared,c("ENSEMBL", "SYMBOL")], combo[is.shared,]) ``` ## Relationship between HUSH and KAP-1, MORC2 and SETDB-1 ```{r} combo <- cbind(TASOR=res$TASOR$P.Value <= threshold, MPP8=res$MPP8$P.Value <= threshold, PHL=res$PHL$P.Value <= threshold, KAP=res$KAP$P.Value <= threshold, MORC=res$MORC$P.Value <= threshold, SETDB=res$SETDB$P.Value <= threshold) storage.mode(combo) <- "integer" ``` We look at the genes common to all knockdowns. ```{r} is.common <- rowSums(combo)==6L data.frame(res$TASOR[is.common,c("ENSEMBL", "SYMBOL")]) ``` We look at the genes shared between KAP-1, SETDB-1, MORC2 and knockdown of one or two HUSH components. ```{r} is.shared <- rowSums(combo[,c("MORC", "KAP", "SETDB")])==3L & rowSums(combo)>=4L & !is.common data.frame(res$TASOR[is.shared,c("ENSEMBL", "SYMBOL")], combo[is.shared,]) ``` ## Relationship between SETDB-1 and ATF7IP We look for the number of overlapping genes between SETDB-1 and ATF7IP. ```{r} combo <- cbind(SETDB=res$SETDB$P.Value <= threshold, ATF7IP=res$ATF7IP$P.Value <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the common set of genes. ```{r} is.common <- rowSums(combo)==2L data.frame(res$TASOR[is.common,c("ENSEMBL", "SYMBOL")]) ``` ## Relationship between SETDB-1 and KAP-1 We look for the number of overlapping genes between SETDB-1 and ATF7IP. ```{r} combo <- cbind(SETDB=res$SETDB$P.Value <= threshold, KAP=res$KAP$P.Value <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the common set of genes. ```{r} is.common <- rowSums(combo)==2L data.frame(res$TASOR[is.common,c("ENSEMBL", "SYMBOL")]) ``` # Overlaps in differentially expressed repeats ## Defining a p-value threshold We need to define a common p-value threshold, which we apply to all repeats in all comparisons. This common threshold is chosen by controlling the FDR threshold across all comparisons. First we load in the statistics for each comparison. ```{r} res <- list() for (f in list.files(pattern="knockdown_.*_repeats.tsv")) { curprot <- sub("knockdown_(.*)_repeats.tsv", "\\1", f) curres <- read.table(f, header=TRUE, stringsAsFactors=FALSE) curres <- curres[order(curres$Repeat),] res[[curprot]] <- curres } ``` We then extract the p-values from each comparison and apply the Benjamini-Hochberg correction to the pooled set of p-values. The p-value threshold is that corresponding to a FDR of 5%. ```{r} all.p <- lapply(res, "[[", i="PValue") all.p <- unlist(all.p) adj.p <- p.adjust(all.p, method="BH") threshold <- max(all.p[adj.p <= 0.05]) threshold ``` ## Effect of HUSH core component knockdown We look for the number of overlapping repeats in the three core components of the HUSH complex -- TASOR, MPP8 and periphilin. ```{r} combo <- cbind(TASOR=res$TASOR$PValue <= threshold, MPP8=res$MPP8$PValue <= threshold, PHL=res$PHL$PValue <= threshold) storage.mode(combo) <- "integer" library(limma) vennDiagram(combo) ``` We can have a look at the repeats common to all knockdowns. ```{r} is.common <- rowSums(combo)==3L data.frame(Repeat=res$TASOR[is.common,c("Repeat")]) ``` We can also have a look at the repeats shared between any two of the knockdowns. ```{r} is.shared <- rowSums(combo)==2L data.frame(Repeat=res$TASOR[is.shared,c("Repeat")], combo[is.shared,]) ``` ## Relationship between HUSH and SETDB-1 We look for the number of overlapping repeats in the HUSH components and SETDB-1. ```{r} combo <- cbind(TASOR=res$TASOR$PValue <= threshold, MPP8=res$MPP8$PValue <= threshold, PHL=res$PHL$PValue <= threshold, SETDB=res$SETDB$PValue <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the repeats common to all knockdowns. ```{r} is.common <- rowSums(combo)==4L data.frame(Repeat=res$TASOR[is.common,c("Repeat")]) ``` We look at the repeats shared between SETDB-1 and knockdown of one or two HUSH components. ```{r} is.shared <- combo[,"SETDB"]==1L & rowSums(combo)>=2L & !is.common data.frame(Repeat=res$TASOR[is.shared,c("Repeat")], combo[is.shared,]) ``` ## Relationship between HUSH and MORC2 We look for the number of overlapping repeats in the HUSH components and MORC2. ```{r} combo <- cbind(TASOR=res$TASOR$PValue <= threshold, MPP8=res$MPP8$PValue <= threshold, PHL=res$PHL$PValue <= threshold, MORC=res$MORC$PValue <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the repeats common to all knockdowns. ```{r} is.common <- rowSums(combo)==4L data.frame(Repeat=res$TASOR[is.common,c("Repeat")]) ``` We look at the repeats shared between MORC2 and knockdown of one or two HUSH components. ```{r} is.shared <- combo[,"MORC"]==1L & rowSums(combo)>=2L & !is.common data.frame(Repeat=res$TASOR[is.shared,c("Repeat")], combo[is.shared,]) ``` ## Relationship between HUSH and KAP-1 We look for the number of overlapping genes in the HUSH components and KAP-1. ```{r} combo <- cbind(TASOR=res$TASOR$P.Value <= threshold, MPP8=res$MPP8$P.Value <= threshold, PHL=res$PHL$P.Value <= threshold, KAP=res$KAP$P.Value <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the genes common to all knockdowns. ```{r} is.common <- rowSums(combo)==4L data.frame(res$TASOR[is.common,c("Repeat")]) ``` We look at the genes shared between KAP-1 and knockdown of one or two HUSH components. ```{r} is.shared <- combo[,"KAP"]==1L & rowSums(combo)>=2L & !is.common data.frame(res$TASOR[is.shared,c("Repeat")], combo[is.shared,]) ``` ## Relationship between HUSH and KAP-1, MORC2 and SETDB-1 ```{r} combo <- cbind(TASOR=res$TASOR$P.Value <= threshold, MPP8=res$MPP8$P.Value <= threshold, PHL=res$PHL$P.Value <= threshold, KAP=res$KAP$P.Value <= threshold, MORC=res$MORC$P.Value <= threshold, SETDB=res$SETDB$P.Value <= threshold) storage.mode(combo) <- "integer" ``` We look at the genes common to all knockdowns. ```{r} is.common <- rowSums(combo)==6L data.frame(res$TASOR[is.common,c("Repeat")]) ``` We look at the genes shared between KAP-1, SETDB-1, MORC2 and knockdown of one or two HUSH components. ```{r} is.shared <- rowSums(combo[,c("MORC", "KAP", "SETDB")])==3L & rowSums(combo)>=4L & !is.common data.frame(res$TASOR[is.shared,c("Repeat")], combo[is.shared,]) ``` ## Relationship between SETDB-1 and ATF7IP We look for the number of overlapping repeats between SETDB-1 and ATF7IP. ```{r} combo <- cbind(SETDB=res$SETDB$PValue <= threshold, ATF7IP=res$ATF7IP$PValue <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the common set of repeats. ```{r} is.common <- rowSums(combo)==2L data.frame(Repeat=res$TASOR[is.common,c("Repeat")]) ``` ## Relationship between SETDB-1 and KAP-1 We look for the number of overlapping repeats between SETDB-1 and ATF7IP. ```{r} combo <- cbind(SETDB=res$SETDB$PValue <= threshold, KAP=res$KAP$PValue <= threshold) storage.mode(combo) <- "integer" vennDiagram(combo) ``` We look at the common set of repeats. ```{r} is.common <- rowSums(combo)==2L data.frame(Repeat=res$TASOR[is.common,c("Repeat")]) ``` # Session information ```{r} sessionInfo() ```