#' Calling of amplifications in low purity samples
#'
#' Function to extract amplification from a
#' \code{\link{runAbsoluteCN}} return object in samples of too low purity
#' for the standard \code{\link{callAlterations}}.
#'
#'
#' @param res Return object of the \code{\link{runAbsoluteCN}} function.
#' @param normalDB Normal database, created with
#' \code{\link{createNormalDatabase}}.
#' @param pvalue.cutoff Copy numbers log-ratio cutoffs to call
#' amplifications as calculating using the log-ratios observed in
#' \code{normalDB}
#' @param percentile.cutoff Only report genes with log2-ratio mean
#' exceeding this sample-wise cutoff.
#' @param min.width Minimum number of targets
#' @param all.genes If \code{FALSE}, then only return amplifications
#' passing the thresholds.
#' @param purity If not \code{NULL}, then scale log2-ratios to the
#' corresponding integer copy number. Useful when accurate ctDNA
#' fractions (between 4-10 percent) are available.
#' @param BPPARAM \code{BiocParallelParam} object. If \code{NULL}, does not
#' use parallelization for fitting local optima.
#' @return A \code{data.frame} with gene-level amplification calls.
#' @author Markus Riester
#' @seealso \code{\link{runAbsoluteCN}} \code{\link{callAlterations}}
#' @examples
#'
#' data(purecn.example.output)
#' normal.coverage.file <- system.file("extdata", "example_normal.txt.gz",
#' package = "PureCN")
#' normal2.coverage.file <- system.file("extdata", "example_normal2.txt.gz",
#' package = "PureCN")
#' normal.coverage.files <- c(normal.coverage.file, normal2.coverage.file)
#' normalDB <- createNormalDatabase(normal.coverage.files)
#' callAmplificationsInLowPurity(purecn.example.output, normalDB)["EIF2A", ]
#'
#' @importFrom stats ecdf pnorm na.omit
#' @export callAmplificationsInLowPurity
callAmplificationsInLowPurity <- function(res, normalDB,
pvalue.cutoff = 0.001, percentile.cutoff = 90,
min.width = 3, all.genes = FALSE, purity = NULL, BPPARAM = NULL) {
if (percentile.cutoff < 0 || percentile.cutoff > 100) {
.stopUserError("percentile.cutoff not in expected range (0 to 100).")
}
.checkFraction(pvalue.cutoff, "pvalue.cutoff")
if (!is.null(purity)) .checkFraction(purity, "purity")
if (!is(res$results[[1]]$gene.calls, "data.frame")) {
.stopUserError("This function requires gene-level calls.\n",
"Please add a column 'Gene' containing gene symbols to the ",
"interval.file.")
}
if (is.null(normalDB$sd)) {
.stopUserError("This function requires a normalDB object generated by PureCN 1.16 or later.")
}
pon_lrs_gr <- normalDB$sd$log.ratios
mcols(pon_lrs_gr) <- apply(mcols(pon_lrs_gr), 2, .calibrate_log_ratio, normalDB$sd$log.ratios)
pon_lrs_gr <- subsetByOverlaps(pon_lrs_gr, res$input$log.ratio)
pon_w_gr <- subsetByOverlaps(normalDB$sd$weights, res$input$log.ratio)
tumor_normal_noise_ratio <- .calculate_tumor_normal_noise_ratio(res$input$log.ratio, pon_lrs_gr)
flog.info("Tumor/normal noise ratio: %.3f", tumor_normal_noise_ratio)
if (tumor_normal_noise_ratio > 5) {
flog.warn("Extensive noise in tumor compared to normals.")
tumor_normal_noise_ratio <- 5
}
tumor_normal_noise_ratio <- max(1, tumor_normal_noise_ratio)
calls <- res$results[[1]]$gene.calls
calls$chr <- as.character(calls$chr)
calls$p.value <- NA
calls$percentile.genome <- ecdf(calls$gene.mean)(calls$gene.mean) * 100
calls$percentile.chromosome <- NA
calls_gr <- GRanges(calls)
.get_gene_pv <- function(g) {
g_gr <- calls_gr[g]
# recalculate gene mean to make sure the same intervals are
# used in tumor and pon
pon_gene_w <- subsetByOverlaps(pon_w_gr, g_gr)$weights
tumor_gene <- weighted.mean(
subsetByOverlaps(res$input$log.ratio, g_gr)$log.ratio,
pon_gene_w)
# speed-up
if (tumor_gene < 0 || length(pon_gene_w) < min.width ||
(!all.genes && g_gr$percentile.genome < percentile.cutoff)) {
return(list(gene.mean = tumor_gene, p.value = 0.5))
}
pon_lrs <- mcols(subsetByOverlaps(pon_lrs_gr, g_gr))
pon_gene_lrs <- apply(pon_lrs, 2, weighted.mean, pon_gene_w, na.rm = TRUE)
list(gene.mean = tumor_gene,
p.value = pnorm(g_gr$gene.mean, mean = mean(pon_gene_lrs),
sd = sd(pon_gene_lrs) * tumor_normal_noise_ratio, lower.tail = FALSE),
pon.sd = sd(pon_gene_lrs))
}
if (!is.null(BPPARAM) && !requireNamespace("BiocParallel", quietly = TRUE)) {
flog.warn("Install BiocParallel for parallel optimization.")
BPPARAM <- NULL
} else if (!is.null(BPPARAM)) {
flog.info("Using BiocParallel for parallel optimization.")
}
if (is.null(BPPARAM)) {
gene_level_summary <- lapply(rownames(calls), .get_gene_pv)
} else {
gene_level_summary <- BiocParallel::bplapply(rownames(calls),
.get_gene_pv, BPPARAM = BPPARAM)
}
calls$p.value <- sapply(gene_level_summary, function(x) x$p.value)
# sanity check that there is no difference in tumor and pon gene.mean
# calculation due to overlapping probes etc.
calls$gene.mean <- sapply(gene_level_summary, function(x) x$gene.mean)
calls$percentile.genome <- ecdf(calls$gene.mean)(calls$gene.mean) * 100
ecdf_chr <- sapply(seqlevelsInUse(calls_gr), function(i)
ecdf(calls$gene.mean[calls$chr == i]))
idx <- seq(nrow(calls))
idx <- idx[calls$chr %in% names(ecdf_chr)]
if (length(idx) != nrow(calls)) {
flog.warn("Unable to calculate chromosome percentile for %s.",
paste(sort(unique(calls$chr[!calls$chr %in% names(ecdf_chr)])), collapse=","))
}
calls$percentile.chromosome[idx] <- sapply(idx, function(i)
ecdf_chr[[calls$chr[i]]](calls$gene.mean[i])) * 100
calls$type <- NA
amp.ids <- calls$p.value <= pvalue.cutoff & calls$percentile.genome >= percentile.cutoff
calls$type[amp.ids] <- "AMPLIFICATION"
# delete columns not relevant for this algorithm
calls <- calls[, !grepl("^\\.",colnames(calls))]
calls$focal <- NULL
calls$seg.mean <- NULL
calls$seg.id <- NULL
# support scaling of C in the future
calls$C <- NA
if (!is.null(purity)) {
calls$C <- (2^calls$gene.mean * 2) / purity - ((2 * (1 - purity)) / purity)
}
if (!all.genes) {
return(calls[!is.na(calls$type),])
}
calls
}
.calculate_tumor_normal_noise_ratio <- function(tumor, normals) {
tumor <- tumor[tumor$on.target]
normals <- subsetByOverlaps(normals, tumor)
noise_tumor <- sd(diff(na.omit(tumor$log.ratio)))
noise_normals <- apply(mcols(normals), 2, function(x) sd(diff(na.omit(x))))
flog.debug("Noise tumor: %f; Noise normals: %s",
noise_tumor, paste(noise_normals, collapse = ","))
ratio <- noise_tumor / mean(noise_normals)
if (is.infinite(ratio)) {
.stopRuntimeError("Tumor/normal noise infinite.")
}
return(ratio)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.