#' calcInteger()
#'
#' Calculates the integer copy number profile for each single cell
#'
#' @param scCNA The CopyKit object.
#' @param assay String with the name of the assay to pull data from to calculate
#' integers.
#' @param method Method used to scale the ratio values to integer.
#' @param ploidy_value If method of choice is 'fixed' a ploidy value should be
#' provided.
#' @param name String specifying the name to be used to store the result in the
#' reducedDims of the output.
#' @param penalty An integer passed on to scquantum::ploidy.inference()
#' penalty argument
#' @param BPPARAM A \linkS4class{BiocParallelParam} specifying how the function
#' should be parallelized.
#'
#' @details
#'
#' CopyKit support the following methods for calculating integer copy number
#' matrices
#' \itemize{
#' \item{fixed:} When method argument is set to 'fixed' copykit extracts the
#' segment means from the scCNA object and multiplies those means by the value
#' provided in the argument ploidy_value.
#'
#'
#' \item{scquantum:} When the method argument is set to 'scquantum', CopyKit
#' applies \code{\link[scquantum]{ploidy.inference}} function to perform a
#' sample wise calculation returning the estimated compuational ploidy for
#' every single cell
#' }
#'
#' @return The CopyKit object with an assay slot named 'integer' that contains
#' a data frame with cells as columns and integerized segments as rows. And, in
#' case of method = 'scquantum' CopyKit adds three new elements to \code{colData}
#' named 'ploidy' and 'ploidy_score' and the 'confidence ratio' obtained from
#' scquantum for each cell.
#'
#' @export
#'
#' @importFrom S4Vectors metadata
#' @importFrom SummarizedExperiment assay assayNames colData rowRanges
#' @importFrom scquantum ploidy.inference timeseries.iod
#'
#' @examples
#' copykit_obj <- mock_bincounts(ncells_diploid = 0, ncells = 10)
#' copykit_obj <- calcInteger(copykit_obj, method = "scquantum")
calcInteger <- function(scCNA,
assay = c("bincounts",
'smoothed_bincounts',
"segment_ratios"),
method = "fixed",
ploidy_value = NULL,
name = "integer",
penalty = 25,
BPPARAM = bpparam()) {
# args
assay = match.arg(assay)
if ('smoothed_bincounts' %in% assayNames(scCNA) && assay == 'bincounts'
&& method == 'scquantum') {
warning("CopyKit detected that knnSmooth() has been performed.")
warning("If working with knnSmooth datasets we recommend using the assay 'smoothed_bincounts'")
}
# getting datasets
if (assay == 'bincounts') {
bin <- SummarizedExperiment::assay(scCNA, 'bincounts')
}
if (assay %in% c('smoothed_bincounts','segment_ratios')) {
bin <- SummarizedExperiment::assay(scCNA, 'segment_ratios')
}
seg <- SummarizedExperiment::assay(scCNA, 'segment_ratios')
if (!is.null(ploidy_value)) {
if (method == "fixed") {
if (is.null(ploidy_value) && !is.numeric(ploidy_value)) {
stop("Method fixed requires a numeric value for ploidy_value.")
}
message("Scaling ratio values by ploidy value ",
ploidy_value)
# ploidy values are added to colData information
SummarizedExperiment::colData(scCNA)$ploidy <-
ploidy_value
# saving ploidy scaling method
S4Vectors::metadata(scCNA)$ploidy_method <- "fixed"
}
}
if (method == "metadata") {
# method metadata just allows the segment ratios to be integerized based
# on the values for each cell in the colData(scCNA)$ploidy information.
if (!is.null(colData(scCNA)$ploidy)) {
message('Calculating integer values based on colData(scCNA)$ploidy info.')
} else {
stop("Method 'metadata' requires colData(scCNA)$ploidy information.")
}
}
# logic for scquantum method
if (method == "scquantum") {
rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA))
# method scquantum with input bincounts
if (assay %in% c('bincounts', 'smoothed_bincounts')) {
sc_quants <-
BiocParallel::bplapply(
assay(scCNA, assay),
scquantum::ploidy.inference,
chrom = rg$seqnames,
start = rg$start,
end = rg$end,
penalty = penalty,
BPPARAM = BPPARAM
)
}
# method scquantum with input segment ratios
if (assay == 'segment_ratios') {
sc_quants <- BiocParallel::bplapply(seq_along(seg), function(z) {
# extracting segments rle id and lengths
segnums <- cumsum(c(TRUE, abs(diff(seg[, z])) > 0.00001))
seg_length <- rle(seg[, z])$lengths
# extracting segment-wise means and index of dispersion
seg_bins_mean <- tapply(bin[, z], segnums, mean)
if (any(seg_length <= 3)) {
iod.est <- scquantum::timeseries.iod(bin[,z])
} else {
iod.est <-
tapply(bin[, z], segnums, scquantum::timeseries.iod)
}
# bincount mean estimate
mean.est <- mean(bin[, z])
estimates <- scquantum::ploidy.inference(
x = seg_bins_mean,
chrom = NULL,
start = NULL,
end = NULL,
seg_length = seg_length,
iod = iod.est,
mean_bincount = mean.est,
do_segmentation = FALSE
)
})
}
# extracting ploidies from sc_quantum object
sc_ploidies <-
vapply(sc_quants, function(x)
x$ploidy, numeric(1))
# extracting ploidies from sc_quantum object
sc_confidence <-
vapply(sc_quants, function(x)
x$confidence_ratio, numeric(1))
# calculating ploidy score from scquantum confidence ratio
ploidy_score <- abs(1-sc_confidence)
SummarizedExperiment::colData(scCNA)$ploidy <- sc_ploidies
SummarizedExperiment::colData(scCNA)$confidence_ratio <-
sc_confidence
SummarizedExperiment::colData(scCNA)$ploidy_score <-
ploidy_score
}
# check to guarantee multiplication
if (!identical(names(bin), colData(scCNA)$sample)) {
stop("Order of cells in segment_ratios and colData() is not identical.")
}
# obtain the matrix of integer values by multiplying the seg ratios
# by the diagonal of the ploidy colData vector
int_values <-
round(as.matrix(seg) %*% diag(colData(scCNA)$ploidy)) %>%
as.data.frame()
# recovering names
names(int_values) <- names(seg)
SummarizedExperiment::assay(scCNA, name) <- int_values
return(scCNA)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.