# This tests that swappedDrops works correctly. # library(DropletUtils); library(testthat); source("test-swapping.R") ########################################## BITMASK <- function(idx, blen) { seqs <- vector("list", blen) for (i in seq_len(blen)) { seqs[[i]] <- c("A", "C", "G", "T")[idx %% 4 + 1L] idx <- floor(idx/4) } do.call(paste0, rev(seqs)) } test_that("barcode extraction is working correctly", { library(rhdf5) for (blen in c(4, 7, 10)) { all.barcodes <- sample(4^blen, 10000, replace=TRUE) - 1L out.file <- tempfile(fileext="h5") h5 <- h5createFile(out.file) h5write(all.barcodes, out.file, "barcode") out <- .Call(DropletUtils:::cxx_get_cell_barcodes, out.file, "barcode", blen) guess <- .Call(DropletUtils:::cxx_get_cell_barcodes, out.file, "barcode", NULL) expect_identical(out, guess) # Manually doing the bit masks. progressive <- BITMASK(all.barcodes, blen) expect_identical(out, progressive) } }) ########################################## tmpdir <- tempfile() dir.create(tmpdir) ngenes <- 20L barcode <- 4L ncells <- 4L^barcode # Defining a reference function to compare the results. REFFUN <- function(original, swapped, min.frac) { combined <- rbind(original, swapped) combined <- combined[combined$gene= min.frac) { s <- combined$sample[current[chosen]] cur.gene <- combined$gene[current[chosen]] + 1L cur.cell <- combined$cell[current[chosen]] + 1L all.counts[[s]][cur.gene, cur.cell] <- all.counts[[s]][cur.gene, cur.cell] + 1 is.swapped[current[chosen]] <- FALSE } } return(all.counts) } ########################################## library(Matrix) set.seed(5717) test_that("Removal of swapped drops works correctly", { for (nmolecules in c(10, 100, 1000, 10000)) { output <- DropletUtils:::sim10xMolInfo(tmpdir, return.tab=TRUE, barcode.length=barcode, nsamples=3, ngenes=ngenes, nmolecules=nmolecules) # Figuring out the correspondence between cell ID and the sorted barcode. was.mapped <- c(output$original$gene, output$swapped$gene)!=ngenes f <- factor(c(output$original$sample, output$swapped$sample), levels=seq_along(output$files)) retainer <- split(c(output$original$cell, output$swapped$cell)[was.mapped], f[was.mapped], drop=FALSE) retainer <- lapply(retainer, FUN=function(i) { i <- unique(i) collected <- BITMASK(i, barcode) stopifnot(!anyDuplicated(collected)) i[order(collected)] + 1L # get back to 1-based indexing. }) # Constructing total matrices: combined <- rbind(output$original, output$swapped) combined <- combined[combined$gene= min.frac)) } # Checking that the HDF5 and sparse results are the same. observed4 <- swappedDrops(output$files, barcode, get.swapped=FALSE, get.diagnostics=TRUE, min.frac=min.frac, hdf5.out=TRUE) expect_s4_class(observed4$diagnostics, "HDF5Array") expect_equivalent(as.matrix(observed3$diagnostics), as.matrix(observed4$diagnostics)) } }) test_that("swappedDrops functions correctly for silly inputs", { output <- DropletUtils:::sim10xMolInfo(tmpdir, barcode.length=barcode, nsamples=3, ngenes=ngenes, nmolecules=0) deswapped <- swappedDrops(output) for (ref in deswapped$cleaned) { expect_identical(dim(ref), c(ngenes, 0L)) } output <- DropletUtils:::sim10xMolInfo(tmpdir, barcode.length=barcode, nsamples=3, ngenes=0, nmolecules=0) deswapped <- swappedDrops(output) for (ref in deswapped$cleaned) { expect_identical(dim(ref), c(0L, 0L)) } # Fails if you give it samples with different gene sets. tmpdir2 <- tempfile() dir.create(tmpdir2) o1 <- DropletUtils:::sim10xMolInfo(tmpdir, barcode.length=barcode, nsamples=3, ngenes=100, nmolecules=0) o2 <- DropletUtils:::sim10xMolInfo(tmpdir2, barcode.length=barcode, nsamples=3, ngenes=10, nmolecules=0) expect_error(swappedDrops(c(o1, o2)), "gene information differs") # Spits out a warning if you have multiple GEM groups. o1 <- DropletUtils:::sim10xMolInfo(tmpdir, barcode.length=barcode, nsamples=1, ngenes=100, nmolecules=100) h5write(sample(3L, 100, replace=TRUE), o1, "gem_group") expect_warning(swappedDrops(o1), "contains multiple GEM groups") # Responds to names in the sample paths. new.names <- LETTERS[seq_along(output)] deswapped2 <- swappedDrops(setNames(output, new.names), get.swapped=TRUE) expect_identical(names(deswapped2$cleaned), new.names) expect_identical(names(deswapped2$swapped), new.names) # removeSwappedDrops is not happy if manually specified lists don't match up. expect_error(removeSwappedDrops(cells=list(), umis=list(1L), genes=list(1L), nreads=list(1L), ref.genes=1:10), "lists are not") expect_error(removeSwappedDrops(cells=list(c("A", "C")), umis=list(1L), genes=list(1L), nreads=list(1L), ref.genes=1:10), "list vectors are not") })