#' Normalize gene beta scores
#'
#' Two normalization methods are available. \code{cell_cycle} method normalizes gene beta scores
#' based on positive control genes in CRISPR screening. \code{loess} method normalizes gene
#' beta scores using loess.
#'
#' @docType methods
#' @name NormalizeBeta
#' @rdname NormalizeBeta
#' @aliases normalizebeta
#'
#' @param beta Data frame.
#' @param id An integer specifying the column of gene.
#' @param method Character, one of 'cell_cycle'(default) and 'loess'.
#' or character string giving the name of the table column containing the gene names.
#' @param posControl A character vector, specifying a list of positive control genes.
#' @param samples Character vector, specifying the sample names in \emph{beta} columns.
#' If NULL (default), take all \emph{beta} columns as samples.
#' @param org "hsa", "mmu", "bta", "cfa", "ptr", "rno", or "ssc" indicating the organism.
#'
#' @return A data frame with same format as input data \emph{beta}.
#'
#' @details In CRISPR screens, cells treated with different conditions (e.g., with or without drug)
#' may have different proliferation rates. So it's necessary to normalize the proliferation rate
#' based on defined positive control genes among samples. After normalization, the beta scores are
#' comparable across samples. \code{loess} is another optional normalization method, which is used
#' to normalize array data before.
#'
#' @author Wubing Zhang
#'
#' @examples
#' file3 = file.path(system.file("extdata", package = "MAGeCKFlute"),
#' "testdata/mle.gene_summary.txt")
#' dd = ReadBeta(file3)
#' \dontrun{
#' #Cell Cycle normalization
#' dd_essential = NormalizeBeta(dd, method="cell_cycle", org = "mmu")
#' head(dd_essential)
#' }
#' #Optional loess normalization (not recommended)
#' dd_loess = NormalizeBeta(dd, method="loess")
#' head(dd_loess)
#'
#' @export
#'
NormalizeBeta <- function(beta, id = 1, method="cell_cycle",
posControl=NULL, samples=NULL, org = "hsa"){
normalized = beta[, colnames(beta)[setdiff(1:ncol(beta), id)]]
if(id==0) ids = rownames(beta) else ids = as.character(beta[,id])
if(is.null(samples)) samples = colnames(normalized)
normalized = normalized[, samples]
normalized = as.matrix(normalized)
if(method=="cell_cycle"){
if(is.null(posControl)){
# depmapDat = LoadDepmap()
# Depmap = depmapDat$Depmap
# posControl = Selector(Depmap, -0.5, select = 0.9)$sig
posControl = readRDS(file.path(system.file("extdata", package = "MAGeCKFlute"),
"Zuber_Essential.rds"))$GeneSymbol
if(org!="hsa"){
posControl = TransGeneID(posControl, fromType = "Symbol", toType = "Symbol",
fromOrg = "hsa", toOrg = org)
posControl = na.omit(posControl)
}
# Zuber_Essential = readRDS(file.path(system.file("extdata", package =
# "MAGeCKFlute"), "Zuber_Essential.rds"))
# posControl=Zuber_Essential$GeneSymbol
}
idx = which(toupper(ids) %in% toupper(posControl))
if(length(idx)>0){
mid = apply(normalized[idx,], 2, median, na.rm = TRUE)
# mad = apply(normalized[idx,], 2, mad, na.rm = TRUE)
mid = abs(mid - 0.1)
normalized = t(t(normalized) / mid)
}else{
warning("No positive control genes are mapped !!!", call. = FALSE)
}
}
if(method=="loess"){
normalized = normalize.loess(normalized, log.it = FALSE, verbose=FALSE)
}
beta[, samples] = normalized
return(beta)
}
#' normalize.loess
#'
#' Loess normalization method.
#'
#' @docType methods
#' @name normalize.loess
#' @rdname normalize.loess
#' @aliases loess.normalize
#'
#' @param mat A matrix with columns containing the values of the chips to normalize.
#' @param subset A subset of the data to fit a loess to.
#' @param epsilon A tolerance value (supposed to be a small value - used as a stopping criterion).
#' @param maxit Maximum number of iterations.
#' @param log.it Logical. If \code{TRUE} it takes the log2 of \code{mat}.
#' @param verbose Logical. If \code{TRUE} displays current pair of chip being worked on.
#' @param span Parameter to be passed the function \code{\link[stats]{loess}}
#' @param family.loess Parameter to be passed the function \code{\link[stats]{loess}}.
#' \code{"gaussian"} or \code{"symmetric"} are acceptable values for this parameter.
#' @param ... Any of the options of normalize.loess you would like to modify (described above).
#'
#' @return A matrix similar as \code{mat}.
#'
#' @author Wubing Zhang
#'
#' @seealso \code{\link{loess}}
#' @seealso \code{\link{NormalizeBeta}}
#'
#' @examples
#' file3 = file.path(system.file("extdata", package = "MAGeCKFlute"),
#' "testdata/mle.gene_summary.txt")
#' dd = ReadBeta(file3)
#' beta_loess = normalize.loess(dd[,-1])
#'
#' @export
#'
normalize.loess <- function(mat, subset=sample(1:(dim(mat)[1]), min(c(5000, nrow(mat)))),
epsilon=10^-2, maxit=1, log.it=FALSE, verbose=TRUE, span=2/3,
family.loess="symmetric", ...){
J <- dim(mat)[2]
II <- dim(mat)[1]
if(log.it){
mat <- log2(mat)
}
change <- epsilon +1
iter <- 0
w <- c(0, rep(1,length(subset)), 0) ##this way we give 0 weight to the
##extremes added so that we can interpolate
while(iter < maxit){
iter <- iter + 1
means <- matrix(0,II,J) ##contains temp of what we substract
for (j in 1:(J-1)){
for (k in (j+1):J){
y <- mat[,j] - mat[,k]
x <- (mat[,j] + mat[,k]) / 2
index <- c(order(x)[1], subset, order(-x)[1])
##put endpoints in so we can interpolate
xx <- x[index]
yy <- y[index]
aux <-loess(yy~xx, span=span, degree=1, weights=w, family=family.loess)
aux <- predict(aux, data.frame(xx=x)) / J
means[, j] <- means[, j] + aux
means[, k] <- means[, k] - aux
if (verbose)
cat("Done with",j,"vs",k,"in iteration",iter,"\n")
}
}
mat <- mat - means
change <- max(colMeans((means[subset,])^2))
if(verbose)
cat(iter, change,"\n")
}
if ((change > epsilon) & (maxit > 1))
warning(paste("No convergence after", maxit, "iterations.\n"))
if(log.it) {
return(2^mat)
} else
return(mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.