#'Calculation of Effects Sizes and their variance
#'
#'This function uses the Hedges'g estimator to calulate the different Effects
#'size and their variances for each genes and for each dataset.
#'
#' @param objectMA A list of list. Each list contains two elements. The first
#' element is the expression matrix (genes in rows and sample in columns) and
#' the second element is a vector of zeros and ones that represents the state
#' of the diffenrent samples of the expression matrix. 0 represents one group
#' (controls) and 1 represents the other group (cases).
#' The result of the CreateobjectMA can be used too.
#'
#' @param missAllow a number that indicates the maximun proportion of missing
#' values allowed in a sample. If the sample has more proportion of missing
#' values the sample will be eliminated. In the other case the missing values
#' will be imputed using the K-NN algorithm.
#'
#'
#' @return A list formed by three elements:
#' \itemize{
#' \item{First element (ES) is a dataframe were columns are each of the studies
#' (datasets) and rows are the genes. Each element of the dataframe represents
#' the Effect Size.}
#' \item{Second element (Var) is a dataframe were columns are each of the studies
#' (datasets) and rows are the genes. Each element of the dataframe represents
#' the variance of the Effect size.}
#' \item{Third element (logFC) is a dataframe were columns are each of the studies
#' (datasets) and rows are the genes. Each element of the dataframe represents
#' the log Fold Changes.}
#' }
#' @author Juan Antonio Villatoro Garcia,
#' \email{juanantoniovillatorogarcia@@gmail.com}
#'
#' @seealso \code{\link{createObjectMA}}, \code{\link{metaAnalysisDE}}
#'
#' @examples
#'
#' data(DExMAExampleData)
#'
#' resultsEffects <- calculateES(objectMA = maObject, missAllow = 0.3)
#' resultsEffects
#'
#' @export
#Function that combines the others for calculating effects size
calculateES <- function(objectMA, missAllow = 0.3){
objectMA <- .metaImpute(objectMA, missAllow=missAllow)
K <- length(objectMA)
if(is.null(names(objectMA))){
names(objectMA)<- paste("Study",seq_len(K),sep="")
}
Effect <- list(0)
Variance <- list(0)
for (i in seq_len(K)) {
res <- .getES(objectMA[i])
colnames(res$ES) <- colnames(res$Var) <- names(objectMA[i])
Effect[[i]] <- res$ES
Variance[[i]] <- res$Var
}
names(Effect) <- names(objectMA)
names(Variance) <- names(objectMA)
Total.Effect <- .matrixmerge(Effect)
Total.Var <- .matrixmerge(Variance)
logFC <- pvalueIndAnalysis(objectMA, missAllow = missAllow)$logFC
Prop.dataset <- as.matrix(1-rowMeans(is.na(Total.Effect)))
result <- list(ES = Total.Effect, Var = Total.Var, logFC = logFC)
return(result)
}
#FUNCTION FOR MERGING MATRIX TAKING INTO ACCOUNT MISSING ROWS
.matrixmerge <- function(lista){
t.lista <- lapply(lista, t)
fused <- plyr::rbind.fill.matrix(t.lista)
fused <- t(fused)
colnames(fused) <- names(lista)
return(fused)
}
#Function for delete samples with missing values
.deleteNa <- function(df) {
no.miss <- colSums(is.na(df[[1]])) <= 0
df[[1]] = df[[1]][,no.miss]
df[[2]] = df[[2]][no.miss]
return(df)
}
#FUNCTION FOR FILTERING SAMPLES WITH MORE THAN % MISSING VALUES
.metaImpute <- function(objectMA,missAllow){
index.miss <- which(vapply(objectMA,
FUN = function(y)any(is.na(y[[1]])),
FUN.VALUE = TRUE))
if(length(index.miss)>0){
for(j in index.miss){
k<-nrow(objectMA[[j]][[1]])
rnum<-which(apply(objectMA[[j]][[1]],2,
function(y) sum(is.na(y))/k)<missAllow)
print(length(rnum))
if(length(rnum)>1){
objectMA[[j]][[1]][,rnum]<-impute.knn(objectMA[[j]][[1]][,rnum],
k=10)$data}
objectMA[[j]]<-.deleteNa(objectMA[[j]])
}
}
return(objectMA)
}
#Functions for calculating effects size
.indCalES<-function(y,l){
l <- unclass(factor(l))
n <- table(factor(l))
ind <- diag(rep(1,length(n)))[l,]
ym<-y%*%ind%*%diag(1/n)
ntilde <- 1/sum(1/n)
m <-sum(n)-2
cm <- cm<-1-(3/(4*sum(n)-9))
#s <- sqrt((1/(sum(n)-2)*((y^2%*%ind)%*%diag(1/(n-1))-
# ym^2%*%diag(n/(n-1)))%*%(n-1)))
numerator_s1 <- ((sum(l==2)-1) * as.matrix(apply(y[,l==2],1,var)))
numerator_s2 <- ((sum(l==1)-1) * as.matrix(apply(y[,l==1],1,var)))
numerator_s <- numerator_s1 + numerator_s2
denominator_s <- length(l) - 2
s <- sqrt(numerator_s/denominator_s)
d <- (ym[,2]-ym[,1])/s
dprime <- d*cm
terme1 <- 1/ntilde
vard <- (sum(l==1)^(-1)+sum(l==2)^(-1))+(d^2)/(2*(sum(l==1)+sum(l==2)))
#vardprime <- sum(1/n)+dprime^2/(2*sum(n))
vardprime = cm^2 * vard
result <- cbind( dprime, vardprime)
colnames(result) <- c( "dprime", "vardprime")
rownames(result) <- rownames(y)
return(result)
}
.getES <- function(x){
K <- length(x)
ES.m <- Var.m <- N <- n <- NULL
y <- x[[1]][[1]]
l <- x[[1]][[2]]
temp <- .indCalES(y,l)
ES.m <- as.matrix(temp[,"dprime"])
Var.m <- as.matrix(temp[,"vardprime"])
N <- c(N,length(l))
n <- c(n,table(l))
rownames(ES.m) <- rownames(y)
rownames(Var.m) <- rownames(y)
res <- list(ES = ES.m,Var = Var.m)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.