#' @title A function to deconvolute of bulk samples to their origin proportions
#' using data from reference atlas (e.g. methylation signatures)
#' Results of model are returned in dataframe "results".
#' Summary of partial R-squared values of model (min, median, mean, max) are
#' printed upon completion.
#' @importFrom foreach %dopar% foreach
#' @importFrom stats na.omit
#' @importFrom dplyr select
#' @importFrom utils getFromNamespace
#' @details deconvolute checks if deconvolution brings advantages on top of
#' the basic bimodal profiles through partial R-squares. The reference matrix
#' usually follows a bimodal distribution in the case of methylation,and taking
#' the average of the rows of methylation matrix might give a pretty similar
#' profile to the bulk methylation profile you are trying to deconvolute.If the
#' deconvolution is advantageous, partial R-squared is expect to be high.
#' @param reference A dataframe containing signatures of different cell types
#' (e.g. methylation signature) used to train the model. The first column
#' should contain a unique ID (e.g. target ID) to match rows of the reference
#' to rows of the bulk. All subsequent columns are cell types. One row per unit
#' of the signature (e.g. CpG). Each cell contains the value for the cell type
#' of this unit (e.g. methylation value of the CpG). If not given, defaults to
#' a reference atlas which is included in this package.
#' This reference atlas comes from Moss et al. (2018)
#' @param vec The user may provide a vector with which partial R-squared of the
#' results will be calculated.
#' The length must match the number of rows of the reference and bulk tables
#' merged on the ID column (with NAs removed).
#' Defaults to row means of reference.
#' @param bulk A dataframe containing signatures of bulk samples used to test
#' to model.
#' Should be dataframe with first column with unique IDs (does not need to
#' exactly match list of IDs in reference,but should have significant overlap),
#' and rest of columns = samples. Should not have duplicate IDs. May use
#' simulateCellMix function to create this dataframe.
#' @param model A string indicating which model is used to deconvolute the
#' samples. Can be either "nnls" (for non-negative least squares) or "svr"
#' (support vector regression) or "qp" (quadratic programming) or "rlm" (robust
#' linear regression). If not given, defaults to "nnls".
#' @keywords deconvolution
#' @examples
#' data("HumanCellTypeMethAtlas")
#' bulk_data <- simulateCellMix(10, reference = HumanCellTypeMethAtlas)[[1]]
#' # non-least negative square regression
#' results_nnls <- deconvolute(
#' bulk = bulk_data,
#' reference = HumanCellTypeMethAtlas
#' )
#' # Quadric programming
#' results_qp <- deconvolute(
#' reference = HumanCellTypeMethAtlas,
#' bulk = bulk_data, model = "qp"
#' )
#' @return A list, first is a dataframe called proportions which contains
#' predicted cell-type proportions of bulk sample profiles in "bulk", second is
#' called rsq,containing partial-rsq values of results, one value per sample.
#' @references Moss, J. et al. (2018). Comprehensive human cell-type
#' methylation atlas reveals origins of circulating cell-free DNA in health
#' and disease. Nature communications, 9(1), 1-12.
#' \url{https://doi.org/10.1038/s41467-018-07466-6}
#' @export
deconvolute <- function(reference,
vec = NULL, bulk, model = "nnls") {
if (!model %in% c(
"nnls", "qp", "svr", "rlm"
)) {
stop("Please type one of the following model types: nnls,qp,svr or
rlm.")
} else {
message("DECONVOLUTION WITH ", toupper(model))
comb <- function(x, ...) {
lapply(seq_along(x), function(i) {
c(x[[i]], lapply(
list(...),
function(y) y[[i]]
))
})
}
## get rid of rows in both tables with na values
clean <- lapply(list(bulk, reference), na.omit)
colnum <- ncol(clean[[2]][, -1])
# save internal functions & variables within foreach,use parallelization
h <- NULL
findPartialRsquared <- getFromNamespace("findPartialRsquared", "deconvR")
decoModel <- getFromNamespace("decoModel", "deconvR")
operation <- foreach(
h = seq(2, ncol(clean[[1]])), .inorder = TRUE,
.combine = "comb", .multicombine = TRUE,
.export = c("findPartialRsquared", "decoModel"),
.init = list(c(), list())
) %dopar% {
thedata <- na.omit(merge(dplyr::select(clean[[1]], 1, h),
clean[[2]],
by = "IDs"
))[, -1]
if (NROW(thedata) == 0) {
stop("The bulk data and the reference you have provided doesn't
have any common probe ID. Thus, deconvolution is not
possible. Please try to provide an appropirate atlas.")
} else {
# merge each sample to reference table
mix <- as.matrix(thedata[, 1])
ref <- as.matrix(thedata[, -1])
coefficients <- decoModel(model, ref, mix, colnum)
if (model == "svr") {
coefficients <- as.numeric(coefficients / sum(coefficients))
} else {
coefficients <- coefficients / sum(coefficients)
}
return(list(
findPartialRsquared(mix, (ref %*% coefficients), ref, vec),
coefficients
))
}
}
results <- data.frame(t(
vapply(seq_along(operation[[2]]), function(i) {
res <- unlist(operation[[2]][[i]])
return(res)
}, FUN.VALUE = numeric(NCOL(clean[[2]]) - 1))
), row.names = colnames(clean[[1]])[-1])
colnames(results) <- colnames(clean[[2]])[-1]
message(
"SUMMARY OF PARTIAL R-SQUARED VALUES FOR ",
toupper(model), ": "
)
print(summary(unlist(operation[[1]])))
# results table will have coefficient predictions of each sample
return(list(proportions = results, rsq = unlist(operation[[1]])))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.