##' Compute and annotate the intersection or union between contributiong genes of components originating from different IcaSet objects.
##'
##' @title Union and intersection of contributing genes
##' @param icaSets List of \code{IcaSet} objects, the \code{geneNames} of the \code{IcaSet} objects must be from the same type (e.g, gene Symbols).
##' @param keepCompByIcaSet Indices of the components to be considered in each \code{IcaSet}.
##' @param lab The names of the icaSets (e.g the names of the datasets they originate from).
##' @param cutoff The cutoff (on the absolute centered and scaled projections) above which the genes have to be considered.
##' @param type \code{"intersection"} to restrict the list of genes to the ones that are common between all datasets, or \code{"union"} to consider all the union of genes available across the datasets.
##' @param annotate If TRUE (default) the genes are annotated using function \code{writeGenes}.
##' @param file The HTML file name where the genes and their annotations are written, default is \code{type}Genes_lab1-i_lab2-j_... where i and j are the component indices contained in \code{keepCompByIcaSet}.
##' @param mart The mart object (database and dataset) used for annotation, see function \code{useMart} of package \code{biomaRt}.
##' @return A data.frame containing \describe{
##' \item{\code{typeID(icaSets[[1]])['geneID_biomart']}:}{the gene IDs,}
##' \item{median_rank}{the median of the ranks of each gene across the \code{IcaSet} objects,}
##' \item{analyses}{the labels of the \code{IcaSet} objects in which each gene is above the given \code{cutoff}}
##' \item{min_rank}{the minimum of the ranks of each gene across the \code{IcaSet} objects,}
##' \item{ranks}{the ranks of each gene in each \code{IcaSet} where it is available,}
##' \item{scaled_proj}{the centered and reduced projection of each gene in each \code{IcaSet} where it is available.}}
##' @author Anne Biton
##' @export
##' @seealso \code{\link[MineICA]{writeGenes}}
##' @examples \dontrun{
##' data(icaSetCarbayo)
##' mart <- useMart("ensembl", "hsapiens_gene_ensembl")
##'
##' ## comparison of two components
##' ## here the components come from the same IcaSet for convenience
##' ## but they must come from different IcaSet in practice.
##' compareGenes(keepCompByIcaSet = c(9,4), icaSets = list(icaSetCarbayo, icaSetCarbayo),
##' lab=c("Carbayo", "Carbayo2"), cutoff=3, type="union", mart=mart)
##'
##' }
##'
compareGenes <- function(keepCompByIcaSet, icaSets, lab, cutoff=0, type = c("union","intersection"), annotate=TRUE, file, mart=useMart("ensembl", "hsapiens_gene_ensembl")) {
type <- match.arg(type)
if (missing(file) || is.null(file))
file <- paste(type,"Genes_", paste(lab,"-",unlist(keepCompByIcaSet),collapse="_",sep=""),sep="",collapse="")
paths <- lab
icaSet <- p <- NULL
comps <-
foreach(icaSet=icaSets, comp=keepCompByIcaSet) %dopar% {
comp <- SByGene(icaSet)[,comp]
names(comp) <- geneNames(icaSet)
return(comp)
}
names(comps) <- paths
comps <- llply(comps,function(x) (x-mean(x))/sd(x))
#### intersection of the available genes
inter <-names(comps[[1]])
for (comp in comps)
inter <- intersect(inter,names(comp))
compsinter <- llply(comps,function(x,inter) x[inter], inter = inter)
#plot(as.data.frame(compsinter))
if (!missing(cutoff) && !is.null(cutoff)) {
interCut <- names(comps[[1]][abs(comps[[1]])>cutoff])
for (comp in comps) interCut <- intersect(interCut,names(comp)[abs(comp)>cutoff])
}
### use the signs of the genes in the intersection to determine the signs of the genes in the union
## I use the signs in one analysis and assume that the signs are the same in the other analysis...
if (FALSE) {
intersigns <- sign(comps[[1]][inter])
if (type == "union") {
#### union of the available genes
union <-c()
union2signs <- c()
for (comp in comps) {
if (sum(sign(comp[names(intersigns[intersigns==1])]))<0) comp <- -comp
union <- unique(c(union,names(comp)))
union2signs <- c(union2signs,sign(comp))
}
print(length(union))
}
else if (type == "intersection") {
if (is.null(cutoff))
stop("When intersection is used, cutoff should be specified")
union2signs <- c()
for (comp in comps) {
if (sum(sign(comp[names(intersigns[intersigns==1])]))<0) comp <- -comp
union2signs <- c(union2signs,sign(comp[interCut]))
}
union <- interCut
}
}
switch(type,
"intersection"={union <- interCut},
"union"={union <- c();for (comp in comps) {union <- unique(c(union,names(comp)))}}
)
compRank <- foreach(comp=comps) %dopar% {
r <- rank(-abs(comp))
}
names(compRank) <- paths #if(!is.null(path2name)) path2name[paths] else basename(paths)
## add analysis where the gene is present
union2an <-
foreach (p=paths, comp=comps, .combine = cbind) %dopar% {
union2an <- match(union,names(comp))
union2an[!is.na(union2an)] <- p
union2an[is.na(union2an)] <- ""
names(union2an) <- union
return(union2an)
}
union2nban <- apply(union2an,1,function(x) {x=as.character(x);x=x[x != ""]; length(x)})
union2an <- (apply(union2an,1,paste,collapse=",",sep=""))
## add rank details where the gene is present
union2proj <-
foreach (p=paths, comp=comps, .combine = cbind) %dopar% {
union2p <- signif(comp[match(union,names(comp))],2)
names(union2p) <- union
#union2p[!is.na(union2p)] <- comp[names(union2p[!is.na(union2p)])]
union2p[is.na(union2p)] <- ""
#names(union2p) <- union
return(union2p)
}
union2proj <- (apply(union2proj,1,paste,collapse=",",sep=""))
## add rank details where the gene is present
union2rank <- as.data.frame(llply(compRank, function(comp,union) comp[union], union = union))
union2rankdetails <-apply(union2rank,1,paste,collapse=",",sep="")
union2rankdetails <-gsub(union2rankdetails,pattern="NA",replacement="")
union2rankmin <- apply(union2rank, 1,min, na.rm = TRUE)
union2rankmedian <- apply(union2rank, 1,median, na.rm = TRUE)
names(union2rankmin) <- union
names(union2rankmedian) <- union
if (type=="intersection")
d <- data.frame(min_rank = as.character(union2rankmin), median_rank = union2rankmedian, ranks = union2rankdetails, scaled_proj = union2proj, row.names = union)
else {
d <- data.frame(analyses = union2an, min_rank = as.character(union2rankmin), median_rank = union2rankmedian, ranks = union2rankdetails, scaled_proj = union2proj, row.names = union)
d$analyses <- gsub(d$analyses,pattern="^,,",replacement="", perl = TRUE)
d$analyses <- gsub(d$analyses,pattern="^,",replacement="", perl = TRUE)
d$analyses <- gsub(d$analyses,pattern=",,,$",replacement="", perl = TRUE)
d$analyses <- gsub(d$analyses,pattern=",,$",replacement="", perl = TRUE)
d$analyses <- gsub(d$analyses,pattern=",,",replacement=",")
d$nbAn <- union2nban
}
d <- d[order(abs(d$median_rank), decreasing = FALSE),]
if (nrow(d)>1000) d <- d[1:1000,]
d$ranks <- gsub(d$ranks,pattern="^,,",replacement="", perl = TRUE)
d$ranks <- gsub(d$ranks,pattern="^,",replacement="", perl = TRUE)
d$ranks <- gsub(d$ranks,pattern=",,,$",replacement="", perl = TRUE)
d$ranks <- gsub(d$ranks,pattern=",,$",replacement="", perl = TRUE)
d$ranks <- gsub(d$ranks,pattern=",,",replacement=",")
d$scaled_proj <- gsub(d$scaled_proj,pattern="^,",replacement="", perl = TRUE)
d$scaled_proj <- gsub(d$scaled_proj,pattern=",,,$",replacement="", perl = TRUE)
d$scaled_proj <- gsub(d$scaled_proj,pattern=",,$",replacement="", perl = TRUE)
d$scaled_proj <- gsub(d$scaled_proj,pattern=",,",replacement=",")
### annotate genes and order by mean rank value
#if (FALSE) {
if (annotate) {
writeGenes(data = d,
filename = file,
mart = mart,
typeId = typeID(icaSets[[1]])["geneID_biomart"],
typeRetrieved = NULL,
sortBy = "median_rank",
colAnnot = NULL,
decreasing = FALSE
)
}
## attribute meanRank to genes in the union
return(d)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.