Nothing
#' Transformation an expression matrix to binary differential expression matrix
#'
#' Transform the RNA-seq counts or normalized expression matrix into binary differential expression matrix of -1, 0 and 1, which indicates the down-regulation, no change and up-regulation.
#'
#' @name bi.deg
#' @param exp a matrix or data frame for expression data. The expression value can be counts or normalized expression data
#' @param cl a vector of 0 and 1. It has equal length with the column number of exp. 1 indicates the corresponding samples are patients and 0 is control or normal
#' @param method defines the methods applied for DE analysis. The possible value is 'edger', 'deseq2', 'normalized'. 'edger' or 'deseq2' is used for RNA-seq count data; 'normalized' is used for normalized RNA-seq or microarray data
#' @param cutoff the p-value cutoff for DEGs
#' @param cores the thread number
#'
#' @author Guofeng Meng
#'
#' @importFrom edgeR calcNormFactors estimateDisp getDispersion equalizeLibSizes DGEList
#' @importFrom utils tail
#' @importFrom methods is
#' @importFrom DESeq2 DESeqDataSetFromMatrix estimateSizeFactors estimateDispersions dispersions varianceStabilizingTransformation
#' @importFrom SummarizedExperiment assay
#' @import parallel
#'
#' @details For each sample in 'exp', 'cl' defines the patients and normal. The normal samples are used to construct the expression references with negative binomial distribution (e.g. method='edger' or method='deseq2') or a normal distribution (method='normalized').
#'
#'
#' When counts data are used, the DEG analysis is performed using the functions implemented by `DESeq2` or `edgeR`. The dispersion and mu values are estimated.
#'
#' @return A deg class object with value of 1, 0 and -1.
#'
#' @examples
#' deg <- bi.deg(exp,cl=cl, method='edger', cutoff=0.05) # exp is the RNA-seq counts matrix
#' @export
bi.deg <- function(exp, cl, method = c("edger", "deseq2", "normalized")[1], cutoff = 0.05,
cores = 1) {
if (!is(exp, "matrix") & !is(exp, "data.frame"))
stop("Error: exp: should be matrix or data.frame")
exp = as.matrix(exp)
if (method != "edger" & method != "deseq2" & method != "normalized")
stop("Error:method: must be edger, deseq2 or normalized data. method is not recognized")
if (dim(exp)[2] != length(cl))
stop("Error:cl: has the wrong numbers!")
wh.ct = which(cl == 0)
if (length(wh.ct) == 0)
stop("Error: No control sample is found or control samples are not set as 0!")
if (length(wh.ct) <= 2)
stop("Error: No enough control sample is used!")
if (length(wh.ct) < 10)
print(paste("Warning: only ", length(wh.ct), " control samples are used!",
sep = ""))
wh.pa = which(cl == 1)
if (length(wh.pa) == 0)
stop("Error: No disease/treated sample is found or samples are not set as 1!")
genes = row.names(exp)
pas = colnames(exp[, wh.pa])
n.ct = length(wh.ct)
if (method == "edger") {
y = DGEList(counts = exp[, wh.ct])
y <- calcNormFactors(y)
y <- estimateDisp(y)
disp = as.vector(getDispersion(y))
mycutoff = rep(cutoff, length(disp))
mycutoff[disp > quantile(disp, probs = 0.97)] = cutoff + 0.05
rm(y)
deg.lst = bplapply(wh.pa, function(x) {
z = DGEList(counts = exp[, c(wh.ct, x)])
z2 = equalizeLibSizes(z, dispersion = disp)$pseudo.counts
p = pnbinom(z2[, n.ct + 1], size = 1/disp, mu = rowSums(z2[, seq_len(n.ct)])/n.ct,
lower.tail = FALSE)
bi = integer(length(p))
bi[p <= mycutoff] = 1
bi[1 - p <= mycutoff] = -1
return(bi)
}, BPPARAM= MulticoreParam( workers= min(cores, length(wh.pa))))
names(deg.lst) <- pas
deg = as.matrix(as.data.frame(deg.lst))
}
if (method == "deseq2") {
y = DESeqDataSetFromMatrix(countData = exp[, wh.ct], colData = data.frame(lab = rep("control",
length(wh.ct))), design = ~1)
y <- estimateSizeFactors(y)
y <- estimateDispersions(y, quiet = TRUE)
disp = dispersions(y)
mycutoff = rep(cutoff, length(disp))
mycutoff[disp > quantile(disp, probs = 0.97)] = cutoff + 0.05
rm(y)
deg.lst = bplapply(wh.pa, function(x) {
y = DESeqDataSetFromMatrix(countData = exp[, c(wh.ct, x)], colData = data.frame(lab =
c(rep("control", length(wh.ct)), "disease")), design = ~1)
y <- estimateSizeFactors(y)
y <- estimateDispersions(y, quiet = TRUE)
z2 = 2^assay(varianceStabilizingTransformation(y))
p = pnbinom(z2[, n.ct + 1], size = 1/disp, mu = rowSums(z2[, seq_len(n.ct)])/n.ct,
lower.tail = FALSE)
bi = integer(length(p))
bi[p <= mycutoff] = 1
bi[1 - p <= mycutoff] = -1
return(bi)
}, BPPARAM= MulticoreParam( workers= min(cores, length(wh.pa))))
names(deg.lst) <- pas
deg = as.matrix(as.data.frame(deg.lst))
}
if (method == "normalized") {
y = exp[, wh.ct]
mu = apply(y, 1, mean)
sd = apply(y, 1, sd)
rm(y)
deg.lst = bplapply(wh.pa, function(x) {
w = exp[, x]
z = (w - mu)/sd
p = pnorm(z, lower.tail = FALSE)
bi = integer(length(p))
bi[p <= mycutoff] = 1
bi[1 - p <= mycutoff] = -1
return(bi)
}, BPPARAM= MulticoreParam( workers= min(cores, length(wh.pa))))
names(deg.lst) <- pas
deg = as.matrix(as.data.frame(deg.lst))
}
row.names(deg) <- genes
colnames(deg) <- pas
attr(deg, "class") <- "deg"
return(deg)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.