#' @export
#' @importFrom BiocParallel SerialParam
#' @importFrom S4Vectors DataFrame metadata<-
#' @importFrom edgeR goodTuringProportions
testEmptyDrops <- function(m, lower=100, niters=10000, test.ambient=FALSE, ignore=NULL, alpha=Inf, BPPARAM=SerialParam()) 
# A function to compute a non-ambient p-value for each barcode.
# 
# written by Aaron Lun
# created 1 August 2017
{
    discard <- rowSums(m) == 0
    m <- m[!discard,,drop=FALSE]
    ncells <- ncol(m)

    # Computing the average profile from the ambient cells.
    umi.sum <- as.integer(round(colSums(m)))
    ambient <- umi.sum <= lower # lower => "T" in the text.
    ambient.cells <- m[,ambient]
    ambient.prof <- rowSums(ambient.cells)
    ambient.prop <- goodTuringProportions(ambient.prof)

    # Removing supposed ambient cells from the matrix.
    # Also removing additional cells that don't pass some total count threshold, if required.
    if (!test.ambient) {
        keep <- !ambient
    } else {
        keep <- umi.sum > 0L
    }
    if (!is.null(ignore)) { 
        keep <- keep & umi.sum > ignore
    }
    obs <- m[,keep,drop=FALSE]
    obs.totals <- umi.sum[keep]

    # Estimating the alpha from the discarded ambient droplets, if desired.
    if (is.null(alpha)) {
        alpha <- .estimate_alpha(m[,ambient,drop=FALSE], ambient.prop, umi.sum[ambient]) 
    }

    # Calculating the log-multinomial probability for each cell.
    obs.P <- .compute_multinom_prob_data(obs, ambient.prop, alpha=alpha)
    rest.P <- .compute_multinom_prob_rest(obs.totals, alpha=alpha)

    # Computing the p-value for each observed probability.
    n.above <- .permute_counter(totals=obs.totals, probs=obs.P, ambient=ambient.prop, iter=niters, BPPARAM=BPPARAM, alpha=alpha)
    limited <- n.above==0L
    pval <- (n.above+1)/(niters+1)

    # Collating into some sensible output.
    all.p <- all.lr <- all.exp <- rep(NA_real_, ncells)
    all.lim <- rep(NA, ncells)
    all.p[keep] <- pval
    all.lr[keep] <- obs.P + rest.P 
    all.lim[keep] <- limited

    output <- DataFrame(Total=umi.sum, LogProb=all.lr, PValue=all.p, Limited=all.lim, row.names=colnames(m))
    metadata(output) <- list(lower=lower, niters=niters, ambient=ambient.prop, alpha=alpha)
    output
}

#' @importFrom BiocParallel bpnworkers SerialParam bplapply
.permute_counter <- function(totals, probs, ambient, iter, alpha=Inf, BPPARAM=SerialParam()) 
# Calculating the p-values using a Monte Carlo approach.
{
    o <- order(totals, probs)
    re.P <- probs[o]
    re.totals <- rle(totals[o]) # Ensure identity upon comparison.

    nworkers <- bpnworkers(BPPARAM)
    per.core <- rep(ceiling(iter/nworkers), nworkers)
    per.core[1] <- iter - sum(per.core[-1]) # Making sure that we get the exact number of iterations.

    out.values <- bplapply(per.core, FUN=.monte_carlo_pval, total.val=re.totals$values,
        total.len=re.totals$lengths, P=re.P, ambient=ambient, alpha=alpha, BPPARAM=BPPARAM)
    n.above <- Reduce("+", out.values)
    n.above[o] <- n.above
    return(n.above)
}

.monte_carlo_pval <- function(total.val, total.len, P, ambient, iterations, alpha) { 
    if (is.infinite(alpha)) {
        out <- .Call(cxx_montecarlo_pval, total.val, total.len, P, ambient, iterations) 
    } else {
        out <- .Call(cxx_montecarlo_pval_alpha, total.val, total.len, P, ambient, iterations, alpha) 
    }
    return(out)
}

#' @importFrom methods is
#' @importClassesFrom Matrix dgTMatrix
#' @importFrom stats aggregate
.compute_multinom_prob_data <- function(mat, prop, alpha=Inf)
# Efficiently calculates the data-dependent component of the log-multinomial probability.
# Also does so for the Dirichlet-multinomial log-probability for a given 'alpha'.
{
    if (is(mat, "dgTMatrix")) {
        i <- mat@i + 1L
        j <- mat@j + 1L
        x <- mat@x

        if (is.infinite(alpha)) {
            p.n0 <- x * log(prop[i]) - lfactorial(x)
        } else {
            p.n0 <- lgamma(alpha * prop[i] + x) - lfactorial(x) - lgamma(alpha * prop[i])
        }

        by.col <- aggregate(p.n0, list(Col=j), sum)
        obs.P <- numeric(ncol(mat))
        obs.P[by.col$Col] <- by.col$x
    } else {
        obs.P <- .Call(cxx_compute_multinom, mat, prop, alpha)
    }
    return(obs.P)
}

.compute_multinom_prob_rest <- function(totals, alpha=Inf) 
# Efficiently calculates the total-dependent component of the multinomial log-probability.
{
    if (is.infinite(alpha)) { 
        return(lfactorial(totals))
    } else {
        return(lfactorial(totals) + lgamma(alpha) - lgamma(totals + alpha))
    }
}

#' @importFrom methods is
#' @importClassesFrom Matrix dgTMatrix
#' @importMethodsFrom Matrix which
#' @importFrom stats optimize 
.estimate_alpha <- function(mat, prop, totals, interval=c(0.01, 10000))
# Efficiently finds the MLE for the overdispersion parameter of a Dirichlet-multinomial distribution.
{
    if (is(mat, "dgTMatrix")) {
        i <- mat@i + 1L
        j <- mat@j + 1L
        x <- mat@x
    } else {
        keep <- which(mat > 0, arr.ind=TRUE)
        i <- keep[,1]
        j <- keep[,2]
        x <- mat[keep]
    }

    per.prop <- prop[i] 
    LOGLIK <- function(alpha) {
        output <- numeric(length(alpha))
        for (adx in seq_along(alpha)) {
            cur.alpha <- alpha[adx]
            output[adx] <- lgamma(cur.alpha) * length(totals) - 
                sum(lgamma(totals + cur.alpha)) + 
                sum(lgamma(x + per.prop * cur.alpha)) - 
                sum(lgamma(per.prop * cur.alpha))
        }
        return(output)
    }

    optimize(LOGLIK, interval=interval, maximum=TRUE)$maximum
}

#' @export
#' @importFrom stats p.adjust
#' @importFrom S4Vectors metadata<- metadata
emptyDrops <- function(m, lower=100, retain=NULL, barcode.args=list(), ...) 
# Combined function that puts these all together, always keeping cells above the inflection
# point (they are given p-values of 0, as they are always rejected). 
# 
# written by Aaron Lun
# created 7 August 2017
{
    stats <- testEmptyDrops(m, lower=lower, ...)
    tmp <- stats$PValue
    
    if (is.null(retain)) {
        retain <- do.call(barcodeRanks, c(list(m, lower=lower), barcode.args))$knee
    }
    always <- stats$Total >= retain
    tmp[always] <- 0

    metadata(stats)$retain <- retain
    stats$FDR <- p.adjust(tmp, method="BH")
    return(stats)
}