Nothing
#' VIPER
#'
#' This function performs Virtual Inference of Protein-activity by Enriched Regulon analysis
#'
#' @param eset ExpressionSet object or Numeric matrix containing the expression data or gene expression signatures, with samples in columns and genes in rows
#' @param regulon Object of class regulon or list of objects of class regulon for metaVIPER analysis
#' @param dnull Numeric matrix for the null model, usually generated by \code{nullTtest}
#' @param pleiotropy Logical, whether correction for pleiotropic regulation should be performed
#' @param nes Logical, whether the enrichment score reported should be normalized
#' @param method Character string indicating the method for computing the single samples signature, either scale, rank, mad, ttest or none
#' @param bootstraps Integer indicating the number of bootstraps iterations to perform. Only the scale method is implemented with bootstraps.
#' @param minsize Integer indicating the minimum number of targets allowed per regulon
#' @param adaptive.size Logical, whether the weighting scores should be taken into account for computing the regulon size
#' @param eset.filter Logical, whether the dataset should be limited only to the genes represented in the interactome
#' #' @param mvws Number or vector indicating either the exponent score for the metaViper weights, or the inflection point and trend for the sigmoid function describing the weights in metaViper
#' @param pleiotropyArgs list of 5 numbers for the pleotropy correction indicating: regulators p-value threshold, pleiotropic interaction p-value threshold, minimum number of targets in the overlap between pleiotropic regulators, penalty for the pleiotropic interactions and the method for computing the pleiotropy, either absolute or adaptive
#' @param cores Integer indicating the number of cores to use (only 1 in Windows-based systems)
#' @param verbose Logical, whether progression messages should be printed in the terminal
#' @return A matrix of inferred activity for each regulator gene in the network across all samples
#' @seealso \code{\link{msviper}}
#' @examples
#' data(bcellViper, package="bcellViper")
#' d1 <- exprs(dset)
#' res <- viper(d1, regulon)
#' dim(d1)
#' d1[1:5, 1:5]
#' regulon
#' dim(res)
#' res[1:5, 1:5]
#' @export
viper <- function(eset, regulon, dnull=NULL, pleiotropy=FALSE, nes=TRUE, method=c("none", "scale", "rank", "mad", "ttest"), bootstraps=0, minsize=25, adaptive.size=FALSE, eset.filter=TRUE, mvws=1, pleiotropyArgs=list(regulators=.05, shadow=.05, targets=10, penalty=20, method="adaptive"), cores=1, verbose=TRUE) {
method <- match.arg(method)
pdata <- NULL
if (is(eset, "viperSignature")) {
dnull <- eset$nullmodel
eset <- eset$signature
method="none"
if (bootstraps>0) {
bootstraps <- 0
warning("Using a null model, bootstraps iterations are ignored.", call.=FALSE)
}
}
if (pleiotropy & bootstraps>0) {
bootstraps <- 0
warning("Using pleiotropic correction, bootstraps iterations are ignored.", call.=FALSE)
}
if (is(eset, "ExpressionSet")) {
pdata <- phenoData(eset)
eset <- exprs(eset)
} else if (is.data.frame(eset)) {
eset <- as.matrix(eset)
}
if (is.null(nrow(eset))) eset <- matrix(eset, length(eset), 1, dimnames=list(names(eset), NULL))
if (verbose) message("\nComputing the association scores")
if (names(regulon[[1]])[1]=="tfmode") regulon <- list(regulon=regulon)
if (bootstraps>0) {
return(bootstrapViper(eset=eset, regulon=regulon, nes=nes, bootstraps=bootstraps, eset.filter=eset.filter, adaptive.size=adaptive.size, minsize=minsize, cores=cores, verbose=verbose))
}
cores1 <- 1
if (length(regulon)>1) {
cores1 <- min(cores, length(regulon))
cores <- 1
nes <- TRUE # Only NES can be returned in metaViper
}
if (cores>1 | cores1>1) verbose <- FALSE # Set verbose to FALSE if running in multiple cores
switch(method,
scale={tt <- t(scale(t(eset)))},
rank={tt <- t(apply(eset, 1, rank))*punif(length(eset), -.1, .1)},
mad={tt <- t(apply(eset, 1, function(x) (x-median(x))/mad(x)))},
ttest={
tt <- sapply(1:ncol(eset), function(i, eset) rowTtest(eset[, i]-eset[, -i])$statistic, eset=eset)
colnames(tt) <- colnames(eset)
rownames(tt) <- rownames(eset)
},
none={tt <- eset}
)
if (verbose) message("Computing regulons enrichment with aREA")
res <- mclapply(regulon, function(regulon, dnull, pleiotropy, nes, tt, eset.filter, adaptive.size, cores, pleiotropyArgs, verbose) {
if (eset.filter) {
tmp <- c(names(regulon), unlist(lapply(regulon, function(x) names(x$tfmode)), use.names=FALSE))
tt <- tt[rownames(tt) %in% unique(tmp), ]
}
regulon <- lapply(regulon, function(x, genes) {
filtro <- names(x$tfmode) %in% genes
x$tfmode <- x$tfmode[filtro]
if (length(x$likelihood)==length(filtro)) x$likelihood <- x$likelihood[filtro]
return(x)
}, genes=rownames(tt))
if (adaptive.size) regulon <- regulon[sapply(regulon, function(x) {
sum((x$likelihood/max(x$likelihood))^2)
})>=minsize]
else regulon <- regulon[sapply(regulon, function(x) length(x$tfmode))>=minsize]
es <- aREA(tt, regulon, cores=cores, minsize=0, verbose=verbose)
if (!nes) {
if (pleiotropy) warning("No pleiotropy correction implemented when raw es is returned.", call.=FALSE)
return(es$es)
}
if (is.null(dnull)) nes <- es$nes
else {
if (verbose) message("\nEstimating NES with null model")
tmp <- aREA(dnull, regulon, cores=cores, minsize=0, verbose=verbose)$es
if (ncol(tmp)>499) {
nes <- t(sapply(1:nrow(tmp), function(i, tmp, es) {
#aecdf1(tmp[i, ], symmetric=TRUE, es[i, ])$nes
aecdf(tmp[i, ], symmetric=TRUE)(es[i, ])$nes
}, tmp=tmp, es=es$es))
rownames(nes) <- rownames(es$nes)
}
else {
nes <- es$es/sqrt(frvarna(tmp)[, 1])
}
}
if (pleiotropy) {
pb <- NULL
if (verbose) {
message("\nComputing pleiotropy for ", ncol(nes), " samples.")
message("\nProcess started at ", date())
}
if (cores>1) {
nes <- mclapply(1:ncol(nes), function(i, ss, nes, regulon, args, dnull) {
nes <- nes[, i]
sreg <- shadowRegulon(ss[, i], nes, regulon, regulators=args[[1]], shadow=args[[2]], targets=args[[3]], penalty=args[[4]], method=args[[5]])
if (!is.null(sreg)) {
if (is.null(dnull)) tmp <-aREA(ss[, i], sreg, minsize=5, cores=1)$nes[, 1]
else {
tmp <- aREA(cbind(ss[, i], dnull), sreg, minsize=5, cores=1)$es
#tmp <- apply(tmp, 1, function(x) aecdf1(x[-1], symmetric=TRUE, x[1])$nes)
tmp <- apply(tmp, 1, function(x) aecdf(x[-1], symmetric=TRUE)(x[1])$nes)
}
nes[match(names(tmp), names(nes))] <- tmp
}
return(nes)
}, ss=tt, nes=nes, regulon=regulon, args=pleiotropyArgs, dnull=dnull, mc.cores=cores)
nes <- sapply(nes, function(x) x)
}
else {
if (verbose) pb <- txtProgressBar(max=ncol(nes), style=3)
nes <- sapply(1:ncol(nes), function(i, ss, nes, regulon, args, dnull, pb) {
nes <- nes[, i]
sreg <- shadowRegulon(ss[, i], nes, regulon, regulators=args[[1]], shadow=args[[2]], targets=args[[3]], penalty=args[[4]], method=args[[5]])
if (!is.null(sreg)) {
if (is.null(dnull)) tmp <-aREA(ss[, i], sreg, minsize=5)$nes[, 1]
else {
tmp <- aREA(cbind(ss[, i], dnull), sreg, minsize=5)$es
#tmp <- apply(tmp, 1, function(x) aecdf1(x[-1], symmetric=TRUE, x[1])$nes)
tmp <- apply(tmp, 1, function(x) aecdf(x[-1], symmetric=TRUE)(x[1])$nes)
}
nes[match(names(tmp), names(nes))] <- tmp
}
if (is(pb, "txtProgressBar")) setTxtProgressBar(pb, i)
return(nes)
}, ss=tt, nes=nes, regulon=regulon, args=pleiotropyArgs, dnull=dnull, pb=pb)
}
if (verbose) message("\nProcess ended at ", date(), "\n")
if (is.null(nrow(nes))) nes <- matrix(nes, length(nes), 1, dimnames=list(names(nes), NULL))
colnames(nes) <- colnames(eset)
return(nes)
}
return(nes)
}, dnull=dnull, pleiotropy=pleiotropy, nes=nes, tt=tt, eset.filter=eset.filter, adaptive.size=adaptive.size, cores=cores, pleiotropyArgs=pleiotropyArgs, verbose=verbose, mc.cores=cores1)
if (length(res)==1) nes <- res[[1]]
else {
genes <- unique(unlist(lapply(res, rownames), use.names=FALSE))
nes <- sapply(res, function(x, genes) as.vector(x[match(genes, rownames(x)), ]), genes=genes)
nes[is.na(nes)] <- 0
if (length(mvws)==1) {
ws <- abs(nes)^mvws
}
else {
ws <- sigT(abs(nes), mvws[2], mvws[1])
}
nes <- matrix(rowSums(nes*ws)/rowSums(ws), length(genes), ncol(res[[1]]), dimnames=list(genes, colnames(res[[1]])))
}
if (is.null(pdata)) return(nes)
return(ExpressionSet(assayData=nes, phenoData=pdata))
}
##########
#' Generic S4 method for signature and sample-permutation null model for VIPER
#'
#' This function generates a viperSignature object from a test dataset based on a set of samples to use as reference
#'
#' @param eset ExpressionSet object or numeric matrix containing the test dataset, with genes in rows and samples in columns
#' @param ... Additional parameters added to keep compatibility
#' @return viperSignature S3 object containing the signature and null model
#' @export
#' @docType methods
#' @rdname viperSignature-methods
setGeneric("viperSignature", function(eset, ...) standardGeneric("viperSignature"))
#' @param pheno Character string indicating the phenotype data to use
#' @param refgroup Vector of character string indicatig the category of \code{pheno} to use as reference group
#' @param method Character string indicating how to compute the signature and null model, either ttest, zscore or mean
#' @param per Integer indicating the number of sample permutations
#' @param bootstrap Logical, whether null model should be estimated with bootstrap. In this case, only reference samples are used.
#' @param seed Integer indicating the seed for the random sample generation. The system default is used when set to zero
#' @param cores Integer indicating the number of cores to use (only 1 in Windows-based systems)
#' @param verbose Logical, whether progression messages should be printed in the terminal
#' @examples
#' data(bcellViper, package="bcellViper")
#' ss <- viperSignature(dset, "description", c("N", "CB", "CC"), per=100) # Only 100 permutations to reduce computation time, but it is recommended to perform at least 1000 permutations
#' res <- viper(ss, regulon)
#' dim(exprs(dset))
#' exprs(dset)[1:5, 1:5]
#' regulon
#' dim(res)
#' exprs(res)[1:5, 1:5]
#' @rdname viperSignature-methods
#' @aliases viperSignature,ExpressionSet-method
setMethod("viperSignature", "ExpressionSet", function(eset, pheno, refgroup, method=c("zscore", "ttest", "mean"), per=100, bootstrap=TRUE, seed=1, cores=1, verbose=TRUE) {
method <- match.arg(method)
pos <- pData(eset)[[pheno]] %in% refgroup
tmp <- viperSignature(exprs(eset)[, !pos], exprs(eset)[, pos], method=method, bootstrap=bootstrap, per=per, seed=seed, cores=cores, verbose=verbose)
pdata <- phenoData(eset)
pData(pdata) <- pData(pdata)[match(colnames(tmp$signature), rownames(pData(pdata))), ]
tmp$signature <- ExpressionSet(assayData=tmp$signature, phenoData=pdata)
return(tmp)
})
#' @param ref Numeric matrix containing the reference samples (columns) and genes in rows
#' @examples
#' data(bcellViper, package="bcellViper")
#' d1 <- exprs(dset)
#' pos <- pData(dset)[["description"]] %in% c("N", "CB", "CC")
#' ss <- viperSignature(d1[, !pos], d1[, pos], per=100) # Only 100 permutations to reduce computation time, but it is recommended to perform at least 1000 permutations
#' res <- viper(ss, regulon)
#' dim(d1)
#' d1[1:5, 1:5]
#' regulon
#' dim(res)
#' res[1:5, 1:5]
#' @rdname viperSignature-methods
#' @aliases viperSignature,matrix-method
setMethod("viperSignature", "matrix", function(eset, ref, method=c("zscore", "ttest", "mean"), per=100, bootstrap=TRUE, seed=1, cores=1, verbose=TRUE) {
method <- match.arg(method)
if (seed>0) set.seed(ceiling(seed))
switch(method,
ttest={
vpsig <- apply(eset, 2, function(x, ctrl) {
tmp <- rowTtest(x-ctrl)
(qnorm(tmp$p.value/2, lower.tail=FALSE)*sign(tmp$statistic))[, 1]
}, ctrl=ref)
rownames(vpsig) <- rownames(eset)
colnames(vpsig) <- colnames(eset)
},
zscore={
vpsig <- (eset-rowMeans(ref))/sqrt(frvarna(ref)[, 1])
},
mean={
vpsig <- eset-rowMeans(ref)
})
if (bootstrap) {
if (ncol(ref)<6) {
vpnull <- NULL
warning("Not enough samples to compute null model by sample permutation, gene permutation will be used instead", call.=FALSE)
}
else {
per <- ceiling(per/ncol(ref)) # Number of bootstraps
pb <- NULL
if (verbose) pb <- txtProgressBar(max=ncol(ref), style=3)
tmp <- lapply(1:ncol(ref), function(i, d22, per, method, pb) { # Leave one out vs. bootstrap set
tmp <- sapply(1:per, function(i, x, y, method) {
pos <- sample(ncol(y), replace=TRUE)
y <- y[, pos]
switch(method,
ttest={
dnull <- rowTtest(x-y)
dnull <- qnorm(dnull$p.value[, 1]/2, lower.tail=FALSE)*sign(dnull$statistic[, 1])
},
zscore={
dnull <- (x-rowMeans(y))/sqrt(frvarna(y)[, 1])
},
mean={
dnull <- x-rowMeans(y)
})
}, x=d22[, i], y=d22[, -i], method=method)
if (!is.null(pb)) setTxtProgressBar(pb, i)
return(tmp)
}, d22=ref, per=per, method=method, pb=pb)
vpnull <- do.call(cbind, tmp)
}
}
else {
if ((ncol(eset)+ncol(ref))<12) {
vpnull <- NULL
warning("Not enough samples to compute null model by sample permutation, gene permutation will be used instead", call.=FALSE)
}
else {
if (ncol(ref)<12) {
warning("Not enough reference samples to compute null model, all samples will be used", call.=FALSE)
ref <- cbind(ref, eset)
}
nco <- choose(ncol(ref), round(ncol(ref)/2))
if (nco<(50*per)) {
per1 <- combn(sample(ncol(ref)), round(ncol(ref)/2))[, 1:min(per, nco)]
}
else {
per1 <- sapply(1:(min(per, nco)), function(i, n1, n2) sample(n1, n2), n1=ncol(ref), n2=round(ncol(ref)/2))
}
pb <- NULL
if (cores>1) {
vpnull <- mclapply(1:ncol(per1), function(i, dset, ref, size, method, per1) {
switch(method,
ttest={
tmp <- NA
while(any(is.na(tmp))) {
pos <- sample(ncol(dset), size)
tmp <- rowTtest(dset[, pos[1]]-dset[, pos[-1]])
tmp <- (qnorm(tmp$p.value/2, lower.tail=FALSE)*sign(tmp$statistic))[, 1]
}
},
zscore={
pos <- per1[, i]
tmp <- (rowMeans(ref[, pos])-rowMeans(ref[, -pos]))/(sqrt(frvarna(ref[, pos])[, 1])+sqrt(frvarna(ref[, -pos])[, 1]))
},
mean={
pos <- per1[, i]
tmp <- rowMeans(ref[, pos])-rowMeans(ref[, -pos])
})
return(tmp)
}, dset=cbind(eset, ref), ref=ref, size=ncol(ref)+1, method=method, per1=per1, mc.cores=cores)
vpnull <- sapply(vpnull, function(x) x)
}
else {
if (verbose) pb <- txtProgressBar(max=ncol(per1), style=3)
vpnull <- sapply(1:ncol(per1), function(i, dset, ref, pb, size, verbose, method, per1) {
if (verbose) setTxtProgressBar(pb, i)
switch(method,
ttest={
tmp <- NA
while(any(is.na(tmp))) {
pos <- sample(ncol(dset), size)
tmp <- rowTtest(dset[, pos[1]]-dset[, pos[-1]])
tmp <- (qnorm(tmp$p.value/2, lower.tail=FALSE)*sign(tmp$statistic))[, 1]
}
},
zscore={
pos <- per1[, i]
tmp <- (rowMeans(ref[, pos])-rowMeans(ref[, -pos]))/(sqrt(frvarna(ref[, pos])[, 1])+sqrt(frvarna(ref[, -pos])[, 1]))
},
mean={
pos <- per1[, i]
tmp <- rowMeans(ref[, pos])-rowMeans(ref[, -pos])
})
return(tmp)
}, dset=cbind(eset, ref), ref=ref, size=ncol(ref)+1, pb=pb, verbose=verbose, method=method, per1=per1)
}
rownames(vpnull) <- rownames(eset)
}
}
tmp <- list(signature=vpsig, nullmodel=vpnull)
class(tmp) <- "viperSignature"
return(tmp)
})
#' bootstrapsViper
#'
#' This function performs a viper analysis with bootstraps
#'
#' @param eset ExpressionSet object or Numeric matrix containing the expression data, with samples in columns and genes in rows
#' @param regulon Object of class regulon
#' @param nes Logical, whether the enrichment score reported should be normalized
#' @param bootstraps Integer indicating the number of bootstraps iterations to perform. Only the scale method is implemented with bootstraps.
#' @param eset.filter Logical, whether the dataset should be limited only to the genes represented in the interactome
#' @param adaptive.size Logical, whether the weighting scores should be taken into account for computing the regulon size
#' @param minsize Integer indicating the minimum number of targets allowed per regulon
#' @param mvws Number or vector indicating either the exponent score for the metaViper weights, or the inflection point and trend for the sigmoid function describing the weights in metaViper
#' @param cores Integer indicating the number of cores to use (only 1 in Windows-based systems)
#' @param verbose Logical, whether progression messages should be printed in the terminal
#' @return A list containing a matrix of inferred activity for each regulator gene in the network across all samples and the corresponding standard deviation computed from the bootstrap iterations.
#' @seealso \code{\link{viper}}
#' @examples
#' data(bcellViper, package="bcellViper")
#' d1 <- exprs(dset)
#' res <- viper(d1[, 1:50], regulon, bootstraps=10) # Run only on 50 samples to reduce computation time
#' dim(d1)
#' d1[1:5, 1:5]
#' regulon
#' dim(res$nes)
#' res$nes[1:5, 1:5]
#' res$sd[1:5, 1:5]
bootstrapViper <- function(eset, regulon, nes=TRUE, bootstraps=10, eset.filter=FALSE, adaptive.size=TRUE, minsize=20, mvws=1, cores=1, verbose=TRUE) {
cores1 <- 1
if (length(regulon)>1) {
cores1 <- min(cores, length(regulon))
cores <- 1
nes <- TRUE # Only NES can be returned in metaViper
}
if (cores>1 | cores1>1) verbose <- FALSE # Set verbose to FALSE if running in multiple cores
if (verbose) {
message("\nComputing the parameters for ", bootstraps, " bootstraps.")
message("Process started at ", date())
}
tmp <- lapply(1:bootstraps, function(i, eset) {
tmp <- eset[, sample(ncol(eset), replace=TRUE)]
return(list(mean=rowMeans(tmp), sd=sqrt(frvarna(tmp)[, 1])))
}, eset=eset)
btmean <- sapply(tmp, function(x) x$mean)
btsd <- sapply(tmp, function(x) x$sd)
res <- mclapply(regulon, function(regulon, eset, btmean, btsd, cores, verbose, eset.filter, adaptive.size, minsize) {
if (eset.filter) {
tmp <- c(names(regulon), unlist(lapply(regulon, function(x) names(x$tfmode)), use.names=FALSE))
eset <- eset[rownames(eset) %in% unique(tmp), ]
}
regulon <- lapply(regulon, function(x, genes) {
filtro <- names(x$tfmode) %in% genes
x$tfmode <- x$tfmode[filtro]
if (length(x$likelihood)==length(filtro)) x$likelihood <- x$likelihood[filtro]
return(x)
}, genes=rownames(eset))
if (adaptive.size) regulon <- regulon[sapply(regulon, function(x) {
sum((x$likelihood/max(x$likelihood))^2)
})>=minsize]
else regulon <- regulon[sapply(regulon, function(x) length(x$tfmode))>=minsize]
targets <- unique(unlist(lapply(regulon, function(x) names(x$tfmode)), use.names=FALSE))
mor <- sapply(regulon, function(x, genes) {
return(x$tfmode[match(genes, names(x$tfmode))])
}, genes=targets)
wts <- sapply(regulon, function(x, genes) {
tmp <- x$likelihood[match(genes, names(x$tfmode))]
tmp[is.na(match(genes, names(x$tfmode)))] <- NA
return(tmp/max(tmp, na.rm=T))
}, genes=targets)
mor[is.na(mor)] <- 0
wts[is.na(wts)] <- 0
rownames(wts) <- targets
nes <- sqrt(colSums(wts^2))
wts <- scale(wts, center=FALSE, scale=colSums(wts))
postemp <- match(rownames(wts), rownames(btmean))
btmean <- btmean[postemp, ]
btsd <- btsd[postemp, ]
pb <- NULL
if (cores>1) {
res <- mclapply(1:ncol(eset), function(i, eset, btmean, btsd, mor, wts) {
tt <- (eset[, i]-btmean)/btsd
tt <- apply(tt, 2, function(x) rank(x))/(nrow(tt)+1)
t1 <- abs(tt-.5)*2
t1 <- t1+(1-max(t1))/2
t1 <- qnorm(t1)
t2 <- qnorm(tt)
sum1 <- t(mor * wts) %*% t2
sum2 <- t((1-abs(mor)) * wts) %*% t1
ss <- sign(sum1)
ss[ss==0] <- 1
tmp <- (abs(sum1) + sum2*(sum2>0))*ss
return(list(mean=rowMeans(tmp), sd=sqrt(frvarna(tmp)[, 1])))
}, eset=eset, btmean=btmean, btsd=btsd, mor=mor, wts=wts, mc.cores=cores)
}
else {
if (verbose) pb <- txtProgressBar(max=ncol(eset), style=3)
res <- lapply(1:ncol(eset), function(i, eset, btmean, btsd, mor, wts, pb) {
tt <- (eset[, i]-btmean)/btsd
tt <- apply(tt, 2, function(x) rank(x))/(nrow(tt)+1)
t1 <- abs(tt-.5)*2
t1 <- t1+(1-max(t1))/2
t1 <- qnorm(t1)
t2 <- qnorm(tt)
sum1 <- t(mor * wts) %*% t2
sum2 <- t((1-abs(mor)) * wts) %*% t1
ss <- sign(sum1)
ss[ss==0] <- 1
tmp <- (abs(sum1) + sum2*(sum2>0))*ss
if (is(pb, "txtProgressBar")) setTxtProgressBar(pb, i)
return(list(mean=rowMeans(tmp), sd=sqrt(frvarna(tmp)[, 1])))
}, eset=eset, btmean=btmean, btsd=btsd, mor=mor, wts=wts, pb=pb)
}
names(res) <- colnames(eset)
if (verbose) message("\nProcess ended at ", date())
return(list(nes=sapply(res, function(x) x$mean)*nes, sd=sapply(res, function(x) x$sd)*nes))
}, btmean=btmean, eset=eset, btsd=btsd, cores=cores, verbose=verbose, eset.filter=eset.filter, adaptive.size=adaptive.size, minsize=minsize, mc.cores=cores1)
if (length(res)==1) return(res[[1]])
genes <- unique(unlist(lapply(res, function(x) rownames(x$nes)), use.names=FALSE))
nes <- sapply(res, function(x, genes) as.vector(x$nes[match(genes, rownames(x$nes)), ]), genes=genes)
nessd <- sapply(res, function(x, genes) as.vector(x$sd[match(genes, rownames(x$sd)), ]), genes=genes)
nes[is.na(nes)] <- 0
nessd[is.na(nessd)] <- 0
if (length(mvws)==1) {
ws <- abs(nes)^mvws
}
else {
ws <- sigT(abs(nes), mvws[2], mvws[1])
}
nes <- matrix(rowSums(nes*ws)/rowSums(ws), length(genes), ncol(res[[1]]$nes), dimnames=list(genes, colnames(res[[1]]$nes)))
nessd <- matrix(rowSums(nessd*ws)/rowSums(ws), length(genes), ncol(res[[1]]$sd), dimnames=list(genes, colnames(res[[1]]$sd)))
return(list(nes=nes, sd=nessd))
}
#' analytic Rank-based Enrichment Analysis
#'
#' This function performs wREA enrichment analysis on a set of signatues
#'
#' @param eset Matrix containing a set of signatures, with samples in columns and traits in rows
#' @param regulon Regulon object
#' @param method Character string indicating the implementation, either auto, matrix or loop
#' @param minsize Interger indicating the minimum allowed size for the regulons
#' @param cores Integer indicating the number of cores to use (only 1 in Windows-based systems)
#' @param wm Optional numeric matrix of weights (0; 1) with same dimension as eset
#' @param verbose Logical, whether a progress bar should be shown
#' @return List of two elements, enrichment score and normalized enrichment score
#' @export
aREA <- function(eset, regulon, method=c("auto", "matrix", "loop"), minsize=20, cores=1, wm=NULL, verbose=FALSE) {
method <- match.arg(method)
if (is.null(ncol(eset))) eset <- matrix(eset, length(eset), 1, dimnames=list(names(eset), NULL))
if (minsize>0) {
regulon <- lapply(regulon, function(x, genes) {
pos <- names(x$tfmode) %in% genes
list(tfmode=x$tfmode[pos], likelihood=x$likelihood[pos])
}, genes=rownames(eset))
regulon <- regulon[sapply(regulon, function(x) length(x$tfmode))>=minsize]
if (length(regulon)==0) stop(paste("There are no regulons with size of at least ", minsize, " targets.", sep=""), call.=FALSE)
class(regulon) <- "regulon"
}
targets <- unique(unlist(lapply(regulon, function(x) names(x$tfmode)), use.names=FALSE))
if (length(which(is.na(eset)))>0) method <- "loop"
if (method=="auto") {
method <- "matrix"
if (length(targets)>1000) method <- "loop"
}
if (length(wm)>0 | length(which(is.na(eset)))>0) method <- "loop"
switch(method,
matrix={
mor <- sapply(regulon, function(x, genes) {
return(x$tfmode[match(genes, names(x$tfmode))])
}, genes=targets)
wts <- sapply(regulon, function(x, genes) {
tmp <- x$likelihood[match(genes, names(x$tfmode))]
tmp[is.na(match(genes, names(x$tfmode)))] <- NA
return(tmp/max(tmp, na.rm=T))
}, genes=targets)
mor[is.na(mor)] <- 0
wts[is.na(wts)] <- 0
nes <- sqrt(colSums(wts^2))
wts <- scale(wts, center=FALSE, scale=colSums(wts))
pos <- match(targets, rownames(eset))
t2 <- apply(eset, 2, rank)/(nrow(eset)+1)
t1 <- abs(t2-.5)*2
t1 <- t1+(1-max(t1))/2
t1 <- qnorm(filterRowMatrix(t1, pos))
t2 <- qnorm(filterRowMatrix(t2, pos))
sum1 <- t(mor * wts) %*% t2
sum2 <- t((1-abs(mor)) * wts) %*% t1
ss <- sign(sum1)
ss[ss==0] <- 1
tmp <- (abs(sum1) + sum2*(sum2>0))*ss
tmp <- list(es=tmp, nes=tmp*nes)
},
loop={
t2 <- t(t(apply(eset, 2, rank, na.last="keep"))/(colSums(!is.na(eset))+1))
t1 <- abs(t2-.5)*2
t1 <- t(t(t1)+(1-apply(t1, 2, max, na.rm=TRUE))/2)
t1 <- qnorm(t1)
t2 <- qnorm(t2)
if (is.null(wm)) wm <- matrix(1, nrow(eset), ncol(eset), dimnames=list(rownames(eset), colnames(eset)))
else {
if (is.null(ncol(wm))) wm <- matrix(wm, length(wm), ncol(eset), dimnames=list(names(wm), colnames(eset)))
wm <- filterRowMatrix(wm, match(rownames(eset), rownames(wm)))
rownames(wm) <- rownames(eset)
wm[is.na(wm)] <- 0
}
wm[is.na(t1)] <- 0
t1[is.na(t1)] <- 0
t2[is.na(t2)] <- 0
pb <- NULL
if (cores>1) {
temp <- mclapply(1:length(regulon), function(i, regulon, t1, t2, ws) {
x <- regulon[[i]]
pos <- match(names(x$tfmode), rownames(t1))
sum1 <- matrix(x$tfmode * x$likelihood, 1, length(x$tfmode)) %*% (filterRowMatrix(t2, pos) * filterRowMatrix(ws, pos))
ss <- sign(sum1)
ss[ss==0] <- 1
sum2 <- matrix((1-abs(x$tfmode)) * x$likelihood, 1, length(x$tfmode)) %*% (filterRowMatrix(t1, pos) * filterRowMatrix(ws, pos))
return(as.vector(abs(sum1) + sum2*(sum2>0)) / colSums(x$likelihood * filterRowMatrix(ws, pos)) * ss)
}, regulon=regulon, t1=t1, t2=t2, mc.cores=cores, ws=wm)
temp <- sapply(temp, function(x) x)
}
else {
if (verbose) {
pb <- txtProgressBar(max=length(regulon), style=3)
}
temp <- sapply(1:length(regulon), function(i, regulon, t1, t2, pb, ws) {
x <- regulon[[i]]
pos <- match(names(x$tfmode), rownames(t1))
sum1 <- matrix(x$tfmode * x$likelihood, 1, length(x$tfmode)) %*% (filterRowMatrix(t2, pos) * filterRowMatrix(ws, pos))
ss <- sign(sum1)
ss[ss==0] <- 1
sum2 <- matrix((1-abs(x$tfmode)) * x$likelihood, 1, length(x$tfmode)) %*% (filterRowMatrix(t1, pos) * filterRowMatrix(ws, pos))
if (is(pb, "txtProgressBar")) setTxtProgressBar(pb, i)
return(as.vector(abs(sum1) + sum2*(sum2>0)) / colSums(x$likelihood * filterRowMatrix(ws, pos)) * ss)
}, regulon=regulon, t1=t1, t2=t2, pb=pb, ws=wm)
}
if (is.null(ncol(temp))) temp <- matrix(temp, 1, length(temp))
colnames(temp) <- names(regulon)
rownames(temp) <- colnames(eset)
if (length(which(wm<1))>0) {
w <- sapply(regulon, function(x, ws) {
tmp <- x$likelihood*filterRowMatrix(ws, match(names(x$tfmode), rownames(ws)))
sqrt(colSums(apply(tmp, 2, function(x) x/max(x))^2))
}, ws=wm)
if (is.null(ncol(w))) w <- t(w)
w <- t(w)
}
else {
w <- sapply(regulon, function(x) sqrt(sum((x$likelihood/max(x$likelihood))^2)))
}
return(list(es=t(temp), nes=t(temp)*w))
})
return(tmp)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.