#' Gene Categorization
#'
#' This function categorizes differential genes of each pair-wise comparison among e.g. initiating A, derived B and primary C
#' samples during a cellular engineering, into five categories named from \emph{Reversed},\emph{Inactive},\emph{Insufficient},
#' \emph{Successful} and \emph{Over} representing the gene reprogrammed states, and calculates the ratio of expression difference (ED)
#' between B and A to the ED between C and A.
#' @usage categorizeGene(expr, diffGene, from.sample,to.sample, target.sample)
#' @param expr a data frame with expression for all genes in \code{diffGene}, alternatively output from \code{\link{diffGene}}
#' function.
#' @param diffGene a list of differential genes in three comparisons, alternatively output by \code{\link{diffGene}} function.
#' @param from.sample,to.sample,target.sample character to specify the name of initiating sample, derived sample and primary
#' sample during a cellular engineering, must be consistent with sample names in the \code{expr} data frame.
#' @details Gene (g) categorization is achieved by considering the pair-wise comparisons (Expression Difference, ED eq. 1) among
#' the three types of samples and the ratio of such differences (EDg ratio, eq. 2).
#' \eqn{EDg(B, A) = average expression of g in B minus average expression of g in A} (1)
#' \eqn{EDg ratio = EDg(B, A)/EDg(C, A)} (2)
#'
#' \emph{Reversed}, EDg(B,A) and EDg(B,C) are significantly differential, while EDg(C,A) is not limited by differential or not,
#' EDg ratio is smaller than 0;
#' \emph{Inactive}, EDg(C,A) and EDg(B,C) are significantly differential, while EDg(B,A) is not differential;
#' \emph{Insufficient}, EDg(B,A) ,EDg(C,A) and EDg(B,C) are all significantly differential, EDg ratio is between 0 and 1;
#' \emph{Successful}, EDg(B,A) and EDg(C,A) are significantly differential, while EDg(B,C) is not differential;
#' \emph{Over}, EDg(B,A) and EDg(B,C) are significantly differential, while EDg(C,A) is not limited by differential or not,
#' EDg ratio is greater than 1.
#' For the \emph{Inactive} and \emph{Successful} categories, genes with bottom and top 5 percentage ED ratios are
#' removed to avoid the ambiguous categorization with \emph{Reversed}, \emph{Insufficient} or \emph{Over} categories.
#' @return A list with components:
#' a list of the five gene categories
#' a list of the ED ratios for the five gene categories.
#' @export
#' @examples
#' data(expr.filer)
#' data(diffgene.genes)
#'
#' category = categorizeGene(expr = expr.filter,diffGene = diffgene.genes,
#' from.sample="DMEC", to.sample="rEChMPP", target.sample="CB")
#' cate.gene = category[[1]]
#' cate.ratio = category[[2]]
categorizeGene = function(expr, diffGene, from.sample, to.sample, target.sample){
samples = c(from.sample, to.sample, target.sample)
expr = expr[,gsub("[^[:alpha:]]","",colnames(expr)) %in% samples]
expr = expr[rowSums(expr) >0,]
group= c(to.sample, target.sample)
comp = c(paste(group,from.sample,sep="_"), paste(group,collapse = "_"))
from.mean = rowMeans(expr[,gsub("[^[:alpha:]]","",colnames(expr)) %in% from.sample])
for(i in 1:2){
group.mean = rowMeans(expr[,gsub("[^[:alpha:]]","",colnames(expr)) %in% group[i]])
expr[[comp[i]]] = group.mean- from.mean
}
# calculate the ratio of expression difference based two comparisons
expr$ratio = expr[,comp[1]]/expr[,comp[2]]
diff.ratio = expr$ratio
# considering the differential genes between to.sample and target.sample
if(!all(names(diffGene) %in% comp) ){
stop ("Please correct the names of 'diffGene' list.")
}
#get the five gene categories
diff.to = diffGene[[comp[1]]]
diff.target = diffGene[[comp[2]]]
diff.totarget = diffGene[[comp[3]]]
suc.group = expr[rownames(expr) %in% diff.target & rownames(expr) %in% diff.to & !rownames(expr) %in% diff.totarget,]
inact.group = expr[rownames(expr) %in% diff.target & !rownames(expr) %in% diff.to & rownames(expr) %in% diff.totarget,]
insuff.group = expr[rownames(expr) %in% diff.target & rownames(expr) %in% diff.to & rownames(expr) %in% diff.totarget, ]
insuff.group = insuff.group[insuff.group$ratio <1 & insuff.group$ratio >0,]
amb.group = expr[rownames(expr) %in% diff.totarget & rownames(expr) %in% diff.to,]
rev.group = amb.group[amb.group$ratio<0, ]
over.group = amb.group[amb.group$ratio >1,]
fivelist = list(Reverse = rev.group,Inactive = inact.group, Insufficient = insuff.group,
Successful= suc.group, Over = over.group)
vali.gene = lapply(fivelist, function(x){rownames(x)})
vali.gene = Reduce(union, vali.gene)
diff.ratio = expr[rownames(expr) %in% vali.gene, ]
ratio = diff.ratio$ratio
ratio = data.frame(ratio)
colnames(ratio) = "ED_ratio"
rownames(ratio) = rownames(diff.ratio)
fivelist = lapply(fivelist, function(x){rownames(x)})
ratio.list = list()
for(i in 1:length(fivelist)){
ratio.list[[i]] = ratio[rownames(ratio) %in% fivelist[[i]],,drop=FALSE]
}
names(ratio.list) = names(fivelist)
#remove the bottom quantile 5% and top quantile 5% genes based on the ED ratio for the Successful and Inactive genes
ratio.list.filter = lapply(ratio.list[c(2,4)], function(x){
y = x[,1]
z= x[x[,1] >= quantile(y,0.05) & x[,1] <= quantile(y,0.95),,drop=FALSE]
z
})
ratio.listold = ratio.list
ratio.list[2] = ratio.list.filter[1]
ratio.list[4] = ratio.list.filter[2]
fivelist = lapply(ratio.list, rownames)
return(list(fivelist = fivelist, ratiolist = ratio.list, ratio.listold = ratio.listold))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.