#' @title Deconvolution of heterogeneous tumor samples with two or three
#' components using expression data from RNAseq or microarray platforms
#'
#'@description DeMixT is a software that performs deconvolution on transcriptome
#'data from a mixture of two or three components.
#'
#' @param data.Y A SummarizedExperiment object of expression data from mixed
#' tumor samples. It is a \eqn{G} by \eqn{My} matrix where \eqn{G} is the number
#' of genes and \eqn{My} is the number of mixed samples. Samples with the same
#' tissue type should be placed together in columns.
#' @param data.N1 A SummarizedExperiment object of expression data
#' from reference component 1 (e.g., normal). It is a \eqn{G} by \eqn{M1} matrix
#' where \eqn{G} is the number of genes and \eqn{M1} is the number of samples
#' for component 1.
#' @param data.N2 A SummarizedExperiment object of expression data from
#' additional reference samples. It is a \eqn{G} by \eqn{M2} matrix where
#' \eqn{G} is the number of genes and \eqn{M2} is the number of samples for
#' component 2. Component 2 is needed only for running a three-component model.
#' @param niter The maximum number of iterations used in the algorithm of
#' iterated conditional modes. A larger value better guarantees
#' the convergence in estimation but increases the running time. The default is
#' 10.
#' @param nbin The number of bins used in numerical integration for computing
#' complete likelihood. A larger value increases accuracy in estimation but
#' increases the running time, especially in a three-component deconvolution
#' problem. The default is 50.
#' @param if.filter The logical flag indicating whether a predetermined filter
#' rule is used to select genes for proportion estimation. The default is TRUE.
#' @param filter.sd The cut-off for the standard deviation of lognormal
#' distribution. Genes whose log transferred standard deviation smaller than
#' the cut-off will be selected into the model. The default is 0.5.
#' @param ngene.selected.for.pi The percentage or the number of genes used for
#' proportion estimation. The difference between the expression levels from
#' mixed tumor samples and the known component(s) are evaluated, and the most
#' differential expressed genes are selected, which is called DE. It is enabled
#' when if.filter = TRUE. The default is \eqn{min(1500, 0.3*G)}, where
#' \eqn{G} is the number of genes. Users can also try using more genes,
#' ranging from \eqn{0.3*G} to \eqn{0.5*G}, and evaluate the outcome.
#' @param mean.diff.in.CM Threshold of expression difference for selecting genes
#' in the component merging strategy. We merge three-component to two-component
#' by selecting genes with similar expressions for the two known components.
#' Genes with the mean differences less than the threshold will be selected for
#' component merging. It is used in the three-component setting, and is enabled
#' when if.filter = TRUE. The default is 0.25.
#' @param nspikein The number of spikes in normal reference used for proportion
#' estimation. The default value is \eqn{ min(200, 0.3*My)}, where
#' \eqn{My} the number of mixed samples. If it is set to 0, proportion
#' estimation is performed without any spike in normal reference.
#' @param gene.selection.method The method of gene selection used for proportion
#' estimation. The default method is 'GS', which applies a profile likelihood based
#' method for gene selection. If it is set to 'DE', the most differential expressed
#' genes are selected.
#' @param ngene.Profile.selected The number of genes used for proportion
#' estimation ranked by profile likelihood. The default is
#' \eqn{min(1500,0.1*G)}, where \eqn{G} is the number of genes.
#' This is enabled only when gene.selection.method is set to 'GS'.
#' @param tol The convergence criterion. The default is 10^(-5).
#' @param output.more.info The logical flag indicating whether to show the
#' estimated proportions in each iteration in the output.
#' @param pi01 Initialized proportion for first kown component. The default is
#' \eqn{Null} and pi01 will be generated randomly from uniform distribution.
#' @param pi02 Initialized proportion for second kown component. pi02 is needed
#' only for running a three-component model. The default is \eqn{Null} and pi02
#' will be generated randomly from uniform distribution.
#' @param nthread The number of threads used for deconvolution when OpenMP is
#' available in the system. The default is the number of whole threads minus one.
#' In our no-OpenMP version, it is set to 1.
#'
#' @return
#' \item{pi}{A matrix of estimated proportion. First row and second row
#' corresponds to the proportion estimate for the known components and unkown
#' component respectively for two or three component settings, and each column
#' corresponds to one sample.}
#' \item{pi.iter}{Estimated proportions in each iteration. It is a \eqn{niter*
#' My*p} array, where \eqn{p} is the number of components. This is
#' enabled only when output.more.info = TRUE.}
#' \item{ExprT}{A matrix of deconvolved expression profiles corresponding to
#' T-component in mixed samples for a given subset of genes. Each row
#' corresponds to one gene and each column corresponds to one sample.}
#' \item{ExprN1}{A matrix of deconvolved expression profiles corresponding to
#' N1-component in mixed samples for a given subset of genes. Each row
#' corresponds to one gene and each column corresponds to one sample.}
#' \item{ExprN2}{A matrix of deconvolved expression profiles corresponding to
#' N2-component in mixed samples for a given subset of genes in a
#' three-component setting. Each row corresponds to one gene and each
#' column corresponds to one sample.}
#' \item{Mu}{A matrix of estimated \eqn{Mu} of log2-normal distribution for
#' both known (\eqn{MuN1, MuN2}) and unknown component (\eqn{MuT}). Each row
#' corresponds to one gene.}
#' \item{Sigma}{Estimated \eqn{Sigma} of log2-normal distribution for both
#' known (\eqn{SigmaN1, SigmaN2}) and unknown component (\eqn{SigmaT}). Each
#' row corresponds to one gene.}
#' \item{gene.name}{The names of genes used in estimating the proportions.
#' If no gene names are provided in the original data set, the genes will be
#' automatically indexed.}
#'
#' @author Zeya Wang, Wenyi Wang
#'
#' @seealso http://bioinformatics.mdanderson.org/main/DeMixT
#'
#' @examples
#' # Example 1: simulated two-component data by using GS(gene selection method)
#' data(test.data.2comp)
#' # res <- DeMixT(data.Y = test.data.2comp$data.Y,
#' # data.N1 = test.data.2comp$data.N1,
#' # data.N2 = NULL, nspikein = 50,
#' # gene.selection.method = 'GS',
#' # niter = 10, nbin = 50, if.filter = TRUE,
#' # ngene.selected.for.pi = 150,
#' # mean.diff.in.CM = 0.25, tol = 10^(-5))
#' # res$pi
#' # head(res$ExprT, 3)
#' # head(res$ExprN1, 3)
#' # head(res$Mu, 3)
#' # head(res$Sigma, 3)
#' #
#' # Example 2: simulated two-component data by using DE(gene selection method)
#' # data(test.data.2comp)
#' # res <- DeMixT(data.Y = test.data.2comp$data.Y,
#' # data.N1 = test.data.2comp$data.N1,
#' # data.N2 = NULL, nspikein = 50, g
#' # ene.selection.method = 'DE',
#' # niter = 10, nbin = 50, if.filter = TRUE,
#' # ngene.selected.for.pi = 150,
#' # mean.diff.in.CM = 0.25, tol = 10^(-5))
#' #
#' # Example 3: three-component mixed cell line data applying
#' # component merging strategy
#' # data(test.data.3comp)
#' # res <- DeMixT(data.Y = test.data.3comp$data.Y,
#' # data.N1 = test.data.3comp$data.N1,
#' # data.N2 = test.data.3comp$data.N2,
#' # if.filter = TRUE)
#' #
#' # Example: convert a matrix into the SummarizedExperiment format
#' # library(SummarizedExperiment)
#' # example <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2, ncol = 3, byrow = TRUE)
#' # example.se <- SummarizedExperiment(assays = list(counts = example))
#'
#' @references Wang Z, Cao S, Morris J S, et al. Transcriptome Deconvolution of
#' Heterogeneous Tumor Samples with Immune Infiltration. iScience, 2018, 9: 451-460.
#'
#' @keywords DeMixT
#'
#' @export
DeMixT <- function(
data.Y, data.N1, data.N2 = NULL,
niter = 10, nbin = 50, if.filter = TRUE,
filter.sd = 0.5, ngene.selected.for.pi = NA,
mean.diff.in.CM = 0.25, nspikein = NULL,
gene.selection.method = 'GS',
ngene.Profile.selected = NA,
tol = 10^(-5), output.more.info = FALSE,
pi01 = NULL, pi02 = NULL,
nthread = parallel::detectCores() - 1) {
message("Step 1: Estimation of Proportions\n")
if (!is.null(data.N2)) nspikein = 0
if (gene.selection.method == 'DE'){
res.pi <- DeMixT_DE(data.Y = data.Y, data.N1 = data.N1,
data.N2 = data.N2,
niter = niter, nbin = nbin,
if.filter = if.filter, filter.sd= filter.sd,
nspikein = nspikein,
ngene.selected.for.pi = ngene.selected.for.pi,
mean.diff.in.CM = mean.diff.in.CM,
tol = tol, pi01 = pi01, pi02 = pi02,
nthread = nthread)
}else{
res.pi <- DeMixT_GS(data.Y = data.Y, data.N1 = data.N1,
data.N2 = data.N2,
niter = niter, nbin = nbin,
if.filter = if.filter, filter.sd = filter.sd,
ngene.Profile.selected = ngene.Profile.selected,
ngene.selected.for.pi = ngene.selected.for.pi,
mean.diff.in.CM = mean.diff.in.CM, nspikein = nspikein,
tol = tol, pi01 = pi01, pi02 = pi02,
nthread = nthread)
}
message("Step 2: Deconvolution of Expressions\n")
res.S2 <- DeMixT_S2(data.Y = data.Y, data.N1 = data.N1,
data.N2 = data.N2,
givenpi = c(t(res.pi$pi[-nrow(res.pi$pi),])), nbin = nbin,
nthread = nthread)
message("Deconvolution is finished\n")
if (is.null(data.N2)) { # two-component
if (output.more.info) {
return(list(pi = res.pi$pi, ExprT = res.S2$decovExprT,
ExprN1 = res.S2$decovExprN1, Mu = res.S2$decovMu,
Sigma = res.S2$decovSigma, pi.iter = res.pi$pi.iter,
gene.name = res.pi$gene.name))
}
return(list(pi = res.pi$pi, ExprT = res.S2$decovExprT,
ExprN1 = res.S2$decovExprN1, Mu = res.S2$decovMu,
Sigma = res.S2$decovSigma,
gene.name = res.pi$gene.name))
} else { # three-component
if (output.more.info) {
return(list(pi = res.pi$pi, ExprT = res.S2$decovExprT,
ExprN1 = res.S2$decovExprN1, ExprN2 = res.S2$decovExprN2,
Mu = res.S2$decovMu, Sigma = res.S2$decovSigma,
pi.iter = res.pi$pi.iter, gene.name = res.pi$gene.name))
}
return(list(pi = res.pi$pi, ExprT = res.S2$decovExprT,
ExprN1 = res.S2$decovExprN1, ExprN2 = res.S2$decovExprN2,
Mu = res.S2$decovMu, Sigma = res.S2$decovSigma,
gene.name = res.pi$gene.name))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.