#' This function check xxxxx
#'
#' @title Check if xxxxxx
#'
#' @param tab A data.frame which correspond to xxxxxx
#'
#' @return A list of two items
#'
#' @author Thomas Burger, Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' test.design(Biobase::pData(Exp1_R25_pept)[,1:3])
#'
#' @export
#'
test.design <- function(tab){
valid <- TRUE
txt <- NULL
level <- NULL
level.a <- factor(tab[,1], ordered = TRUE)
level.b <- factor(tab[,2], ordered = TRUE)
name.level.a <- colnames(tab)[1]
name.level.b <- colnames(tab)[2]
level.c <- NULL
if(ncol(tab)==3) {
level.c <- factor(tab[,3], ordered = TRUE)
name.level.c <- colnames(tab)[3]
}
# verification intersection sur B
# verification de la non redondance'intersection
# vide entre les groupes
uniqueA <- unique(level.a)
ll <- lapply(uniqueA,
function(x){
as.character(level.b)[which(level.a==x)]
}
)
n <- NULL
for (i in seq_len(length(uniqueA)-1)){
for (j in (i+1):length(uniqueA)){
n <- c(n, intersect(ll[[i]], ll[[j]]))
}
}
if (length(n) > 0){
valid <- FALSE
txt <- c(txt,paste0("The value ",
n,
" in column '",
colnames(tab)[2],
"' is not correctly set.\n"))
}
#verification si niveau hierarchique inf
if (length(levels(level.a)) == length(levels(level.b))){
## c'est un design de niveau n-1 en fait
valid <- FALSE
txt <- c(txt,
paste0("The column ",
name.level.b,
" is not informative. ",
"Thus, the design is not of level (n-1).\n"))
}
else if (!is.null(level.c)){
if (length(levels(level.b)) == length(levels(level.c))){
## c'est un design de niveau n-1 en fait
valid <- FALSE
txt <- c(txt,
paste0("The column ",
name.level.c,
" is not informative. ",
"Thus, the design is of level (n-1).\n"))
}
}
#verification si niveau non informatif
return(list(valid = valid,
warn = txt))
}
#' This function check the validity of the conditions
#'
#' @title Check if the design is valid
#'
#' @param conds A vector
#'
#' @return A list
#'
#' @author Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' check.conditions(Biobase::pData(Exp1_R25_pept)$Condition)
#'
#' @export
#'
check.conditions <- function(conds){
res <- list(valid=TRUE,warn=NULL)
if (("" %in% conds) || (NA %in% conds)){
res <- list(valid=FALSE,warn="The conditions are note full filled.")
return(res)
}
# Check if there is at least two conditions
if (length(unique(conds)) < 2){
res <- list(valid=FALSE,warn="The design must contain at least two conditions.")
return(res)
}
# check if each condition has at least two values
nValPerCond <- unlist(lapply(unique(conds), function(x){length(conds[which(conds==x)])}))
if (all(nValPerCond < 2)){
res <- list(valid=FALSE,warn="The design must contain at least two values per condition.")
return(res)
}
return(res)
}
#' This function check the validity of the experimental design
#'
#' @title Check if the design is valid
#'
#' @param sTab The data.frame which correspond to the pData function of MSnbase
#'
#' @return A boolean
#'
#' @author Thomas Burger, Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' check.design(Biobase::pData(Exp1_R25_pept)[,1:3])
#'
#' @export
#'
check.design <- function(sTab){
res <- list(valid=FALSE,warn=NULL)
names <- colnames(sTab)
level.design <- ncol(sTab)-2
res <- check.conditions(sTab$Condition)
if (!res$valid){
return(res)
}
# Check if all the column are fullfilled
if (level.design == 1){
if (("" %in% sTab$Bio.Rep) || (NA %in% sTab$Bio.Rep)){
res <- list(valid=FALSE,warn="The Bio.Rep colmumn are not full filled.")
return(res)
}
}
else if (level.design == 2){
if (("" %in% sTab$Bio.Rep) || (NA %in% sTab$Bio.Rep)){
res <- list(valid=FALSE,warn="The Bio.Rep colmumn are not full filled.")
return(res)
}else if (("" %in% sTab$Tech.Rep) || (NA %in% sTab$Tech.Rep)){
res <- list(valid=FALSE,warn="The Tech.Rep colmumn are not full filled.")
return(res)
}
}
else if (level.design == 3){
if (("" %in% sTab$Bio.Rep) || (NA %in% sTab$Bio.Rep)){
res <- list(valid=FALSE,warn="The Bio.Rep colmumn are not full filled.")
return(res)
} else if (("" %in% sTab$Tech.Rep) || (NA %in% sTab$Tech.Rep)){
res <- list(valid=FALSE,warn="The Tech.Rep colmumn are not full filled.")
return(res)
} else if (("" %in% sTab$Analyt.Rep) || (NA %in% sTab$Analyt.Rep)){
res <- list(valid=FALSE,warn="The Analyt.Rep colmumn are not full filled.")
return(res)
}
}
# Check if the hierarchy of the design is correct
if (level.design == 1){
res <- test.design(sTab[,c("Condition", "Bio.Rep")])
} else if (level.design == 2){
res <- test.design(sTab[,c("Condition", "Bio.Rep","Tech.Rep")])
} else if (level.design == 3){
res <- test.design(sTab[,c("Condition", "Bio.Rep","Tech.Rep")])
if (res$valid)
res <- test.design(sTab[,c("Bio.Rep","Tech.Rep", "Analyt.Rep")])
}
return(res)
}
#' This function builds the design matrix
#'
#' @title Builds the design matrix
#'
#' @param sTab The data.frame which correspond to the pData function of MSnbase
#'
#' @return A design matrix
#'
#' @author Thomas Burger, Quentin Giai-Gianetto, Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' make.design(Biobase::pData(Exp1_R25_pept))
#'
#' @export
make.design <- function(sTab){
if (!check.design(sTab)$valid){
warning("The design matrix is not correct.")
warning(check.design(sTab)$warn)
return(NULL)
}
n <- ncol(sTab)
if (n==1 || n==2){
stop("Error in design matrix dimensions which must have at least 3 columns.")
}
res <- do.call(paste0("make.design.", (n-2)),list(sTab))
return(res)
}
#' This function builds the design matrix for design of level 1
#'
#' @title Builds the design matrix for designs of level 1
#'
#' @param sTab The data.frame which correspond to the pData function of MSnbase
#'
#' @return A design matrix
#'
#' @author Thomas Burger, Quentin Giai-Gianetto, Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' make.design.1(Biobase::pData(Exp1_R25_pept))
#'
#' @export
#'
# make.design.1 <- function(sTab){
#
# Conditions <- factor(sTab$Condition, levels=unique(sTab$Condition))
# nb_cond=length(unique(Conditions))
# nb_samples <- nrow(sTab)
#
# #CGet the number of replicates per condition
# # nb_Rep_decal=rep(0,nb_cond_decal)
# # for (i in 1:nb_cond_decal){
# # nb_Rep_decal[i]=sum((Conditions_decal==unique(Conditions_decal)[i]))
# # }
#
# design=matrix(0,nb_samples,nb_cond)
# #n0_decal=1
# coln=NULL
# for (j in 1:nb_cond){
# test <- rep
# coln=c(coln,paste("Condition",j,collapse=NULL,sep=""))
# design[,j]=as.integer(Conditions==unique(Conditions)[j])
# }
# colnames(design)=coln
#
# return(design)
# }
make.design.1 <- function(sTab){
Conditions <- factor(sTab$Condition, ordered = TRUE)
nb_cond=length(unique(Conditions))
nb_samples <- nrow(sTab)
#CGet the number of replicates per condition
nb_Rep=rep(0,nb_cond)
for (i in 1:nb_cond){
nb_Rep[i]=sum((Conditions==unique(Conditions)[i]))
}
design=matrix(0,nb_samples,nb_cond)
n0=1
coln=NULL
for (j in 1:nb_cond){
coln=c(coln,paste("Condition",j,collapse=NULL,sep=""))
design[(n0:(n0+nb_Rep[j]-1)),j]=rep(1,length((n0:(n0+nb_Rep[j]-1))))
n0=n0+nb_Rep[j]
}
colnames(design)=coln
return(design)
}
#' This function builds the design matrix for design of level 2
#'
#' @title Builds the design matrix for designs of level 2
#'
#' @param sTab The data.frame which correspond to the pData function of MSnbase
#'
#' @return A design matrix
#'
#' @author Thomas Burger, Quentin Giai-Gianetto, Samuel Wieczorek
#'
#' @examples
#' \donttest{
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' make.design.2(Biobase::pData(Exp1_R25_pept))
#' }
#'
#' @importFrom stats model.matrix rnorm
#'
#' @export
#'
make.design.2=function(sTab){
Condition <- factor(sTab$Condition, levels=unique(sTab$Condition))
RepBio <- factor(sTab$Bio.Rep, levels=unique(sTab$Bio.Rep))
#Renome the levels of factor
levels(Condition)=c(1:length(levels(Condition)))
levels(RepBio)=c(1:length(levels(RepBio)))
#Initial design matrix
df <- rep(0,nrow(sTab))
names(df) <- rownames(sTab)
design=model.matrix(df~0+Condition:RepBio)
#Remove empty columns in the design matrix
design=design[,(apply(design,2,sum)>0)]
#Remove identical columns in the design matrix
coldel=-1
for (i in 1:(length(design[1,])-1)){
d2=as.matrix(design[,(i+1):length(design[1,])]);
for (j in 1:length(d2[1,])){
d2[,j]=d2[,j]-design[,i];
}
e=as.matrix(rnorm(length(design[,1]),10,1));
sd2=t(e)%*%d2
liste=which(sd2==0)
coldel=c(coldel,liste+i)
}
design=design[,(1:length(design[1,]))!=coldel]
colnames(design)=make.names(colnames(design))
return(design)
}
#' This function builds the design matrix for design of level 3
#'
#' @title Builds the design matrix for designs of level 3
#'
#' @param sTab The data.frame which correspond to the pData function of MSnbase
#'
#' @return A design matrix
#'
#' @author Thomas Burger, Quentin Giai-Gianetto, Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' sTab <-cbind(Biobase::pData(Exp1_R25_pept), Tech.Rep=1:6)
#' make.design.3(sTab)
#'
#' @importFrom stats model.matrix rnorm
#'
#' @export
#'
make.design.3 <- function(sTab){
Condition <- factor(sTab$Condition, levels=unique(sTab$Condition))
RepBio <- factor(sTab$Bio.Rep, levels=unique(sTab$Bio.Rep))
RepTech <- factor(sTab$Tech.Rep, levels=unique(sTab$Tech.Rep))
#Rename the levels of factor
levels(Condition)=c(1:length(levels(Condition)))
levels(RepBio)=c(1:length(levels(RepBio)))
levels(RepTech)=c(1:length(levels(RepTech)))
#Initial design matrix
df <- rep(0,nrow(sTab))
names(df) <- rownames(sTab)
design=model.matrix(df~0+Condition:RepBio:RepTech)
#Remove empty columns in the design matrix
design=design[,(apply(design,2,sum)>0)]
#Remove identical columns in the design matrix
coldel=-1
for (i in 1:(length(design[1,])-1)){
d2=as.matrix(design[,(i+1):length(design[1,])]);
for (j in 1:length(d2[1,])){
d2[,j]=d2[,j]-design[,i];
}
e=as.matrix(rnorm(length(design[,1]),10,1));
sd2=t(e)%*%d2
liste=which(sd2==0)
coldel=c(coldel,liste+i)
}
design=design[,(1:length(design[1,]))!=coldel]
colnames(design)=make.names(colnames(design))
return(design)
}
#' This function builds the contrast matrix
#'
#' @title Builds the contrast matrix
#'
#' @param design The data.frame which correspond to the pData function of
#' MSnbase
#'
#' @param condition xxxxx
#'
#' @param contrast An integer that Indicates if the test consists of the
#' comparison of each biological condition versus each of the other ones
#' (Contrast=1; for example H0:"C1=C2" vs H1:"C1!=C2", etc.)
#' or each condition versus all others (Contrast=2; e.g. H0:"C1=(C2+C3)/2" vs
#' H1:"C1!=(C2+C3)/2", etc. if there are three conditions).
#'
#' @return A constrat matrix
#'
#' @author Thomas Burger, Quentin Giai-Gianetto, Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' design <- make.design(Biobase::pData(Exp1_R25_pept))
#' conds <- Biobase::pData(Exp1_R25_pept)$Condition
#' make.contrast(design, conds)
#'
#' @export
#'
make.contrast <- function(design, condition, contrast=1){
#######################################################
aggreg.column.design=function(design,Condition){
nb.cond=length(levels(Condition))
name.col=colnames(design)
name.cond=NULL
nb.col=NULL
for (i in 1:nb.cond){
col.select=NULL
col.name.begin=paste("Condition",i, sep = "")
nc=nchar(col.name.begin)
for (j in 1:length(design[1,])){
if (substr(name.col[j], 1, nc)==col.name.begin){
col.select=c(col.select,j)
}
}
name.aggreg=NULL
for (j in 1:length(col.select)){
name.aggreg=paste(name.aggreg,name.col[col.select[j]],sep="+")
}
name.aggreg=substr(name.aggreg, 2, nchar(name.aggreg))
name.cond=c(name.cond,name.aggreg)
nb.col=c(nb.col,length(col.select))
}
return(list(name.cond,nb.col))
}
nb.cond=length(levels(condition))
r=aggreg.column.design(design,condition)
label.agg=r[[1]]
nb.agg=r[[2]]
k=1
if (contrast == 1){
## Contrast for One vs One
contra=rep(0,sum(1:(nb.cond-1)))
for (i in 1:(nb.cond-1)){
for (j in (i+1):nb.cond){
contra[k]=c(paste("(",label.agg[i],")/",
nb.agg[i],"-(",label.agg[j],")/",
nb.agg[j]))
k=k+1
}
}
} else if (contrast==2){
## Contrast for One vs All
contra=rep(0,nb.cond)
for (i in 1:(nb.cond)){
contra[k]=c(paste("(",label.agg[i],")/",nb.agg[i]))
nb=sum(nb.agg[(1:nb.cond)[(1:nb.cond)!=i]])
for (j in (1:nb.cond)[(1:nb.cond)!=i]){
contra[k]=c(paste(contra[k],"-(",label.agg[j],")/",nb))
}
k=k+1
}
}
return(contra)
}
##############################################################
#Fonction realisant un test de contrastes entre conditions a l'aide du
#package LIMMA.
#
#En entree:
#qData: tableau de donnees sans valeurs manquantes avec chaque replicat en
#colonne
#Conditions: indique le numero/la lettre de la condition biologique auquel
#appartient chaque replicat
#
#Contrast: indique si l'on souhaite tester chaque condition biologique
#contre chacune (Contrast=1; par exemple H0:"C1=C2" vs H1:"C1!=C2", etc.)
#ou chaque condition contre toutes les autres (Contrast=2; par exemple
#H0:"C1=(C2+C3)/2" vs H1:"C1!=(C2+C3)/2", etc. si on a 3 conditions ).
#
#En sortie :
#Objet fit renvoye par la fonction eBayes de LIMMA.
##QGG, Aout 2015
##############################################################
#' This function is a limmaCompleteTest
#'
#' @title Computes a hierarchical differential analysis
#'
#' @param qData A matrix of quantitative data, without any missing values.
#'
#' @param sTab A dataframe of experimental design (Biobase::pData()).
#'
#' @param comp.type A string that corresponds to the type of comparison.
#' Values are: 'anova1way', 'OnevsOne' and 'OnevsAll'; default is 'OnevsOne'.
#'
#' @return A list of two dataframes : logFC and P_Value. The first one contains
#' the logFC values of all the comparisons (one column for one comparison),
#' the second one contains the pvalue of all the comparisons (one column for
#' one comparison). The names of the columns for those two dataframes
#' are identical and correspond to the description of the comparison.
#'
#' @author Hélène Borges, Thomas Burger, Quentin Giai-Gianetto, Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_pept, package='DAPARdata')
#' obj <- Exp1_R25_pept
#' qData <- Biobase::exprs(obj)
#' sTab <- Biobase::pData(obj)
#' limma <- limmaCompleteTest(qData, sTab, comp.type='anova1way')
#'
#' @export
#'
#' @import limma
#'
limmaCompleteTest <- function(qData, sTab, comp.type="OnevsOne"){
## save the current order of columns
# reoder columns to group conditions
switch(comp.type,
OnevsOne = contrast <- 1,
OnevsAll = contrast <- 2)
sTab.old <- sTab
conds <- factor(sTab$Condition, levels=unique(sTab$Condition))
sTab <- sTab[unlist(lapply(split(sTab, conds), function(x) {x['Sample.name']})),]
qData <- qData[,unlist(lapply(split(sTab.old, conds), function(x) {x['Sample.name']}))]
conds <- conds[order(conds)]
res.l <- NULL
design.matrix <- make.design(sTab)
if(!is.null(design.matrix)) {
if(comp.type == 'OnevsOne' || comp.type == "OnevsAll"){
contra <- make.contrast(design.matrix,condition=conds, contrast)
cmtx <- limma::makeContrasts(contrasts=contra,
levels=make.names(colnames(design.matrix)))
fit <- limma::eBayes(limma::contrasts.fit(limma::lmFit(qData, design.matrix), cmtx))
res.l <- formatLimmaResult(fit, conds, contrast)
}else if(comp.type == "anova1way"){
# make the orthogonal contrasts
contrasts <- tidyr::crossing(A = colnames(design.matrix), B = colnames(design.matrix), .name_repair = "minimal") %>%
dplyr::filter(A!=B)
orthogonal_contrasts <- dplyr::filter(contrasts, !duplicated(paste0(pmax(A, B), pmin(A, B)))) %>%
dplyr::mutate(contrasts = stringr::str_glue("{A}-{B}"))
# make the contrasts in a format adapted for limma functions
contrasts_limma_format <- limma::makeContrasts(contrasts = orthogonal_contrasts$contrasts,
levels = colnames(design.matrix))
ebayes_fit <- limma::eBayes(limma::contrasts.fit(limma::lmFit(qData, design.matrix), contrasts_limma_format))
fit_table <- limma::topTable(ebayes_fit, sort.by = "none", number = nrow(qData))
fit_pvalue <- dplyr::select(fit_table, "anova_1way_pval" = P.Value)
res.l <- list("logFC" = data.frame("anova_1way_logFC" = matrix(NA, nrow = nrow(fit_pvalue)),
row.names = rownames(fit_pvalue)),
"P_Value" = fit_pvalue)
}
}
return(res.l)
}
#' @title xxxx
#'
#' @param fit xxxx
#'
#' @param conds xxxx
#'
#' @param contrast xxxx
#'
#' @return A list of two dataframes : logFC and P_Value. The first one contains
#' the logFC values of all the comparisons (one column for one comparison),
#' the second one contains the pvalue of all the comparisons (one column for
#' one comparison). The names of the columns for those two dataframes are
#' identical and correspond to the description of the comparison.
#'
#' @author Samuel Wieczorek
#'
#' @examples
#' utils::data(Exp1_R25_prot, package='DAPARdata')
#' obj <- Exp1_R25_prot[1:1000]
#' level <- obj@experimentData@other$typeOfData
#' metacell.mask <- match.metacell(GetMetacell(obj), 'missing', level)
#' indices <- GetIndices_WholeMatrix(metacell.mask, op='>=', th=1)
#' obj <- MetaCellFiltering(obj, indices, cmd='delete')
#' qData <- Biobase::exprs(obj$new)
#' sTab <- Biobase::pData(obj$new)
#' limma <- limmaCompleteTest(qData, sTab)
#'
formatLimmaResult <- function(fit, conds, contrast){
#res.tmp <- topTable(fit,number=Inf, sort.by="none")
#res <- cbind(res.tmp[,1:Compa.Nb], fit$p.value)
#names(res) <- gsub(".", "_", names(res), fixed=TRUE)
res <- cbind(fit$coefficients, fit$p.value)
#how many comparisons have been done (and thus how many columns of pval)
Compa.Nb <- dim(fit$p.value)[2]
#empty colnames vector
cn<-c()
for (i in 1:Compa.Nb){
#not the same syntax to pars if Contast=1 or Contrast=2
if(contrast == 1){
compa <- stringr::str_match_all(colnames(fit$p.value)[i],"[[:space:]]Condition([[:digit:]]+)")[[1]]
cn[i] <- paste(unique(conds)[as.numeric(compa[1,2])], "_vs_",unique(conds)[as.numeric(compa[2,2])], sep="")
}
if(contrast == 2){
#hierarchic only
#compa <- stringr::str_match_all(colnames(fit$p.value)[i], "[[:space:]]Condition([[:digit:]]+)[[:space:]]")[[1]]
#cn[i]<-paste(levels(Conditions)[as.numeric(compa[1,2])], "vs(all-",levels(Conditions)[as.numeric(compa[1,2])], ")", sep="")
#hier and non hier
compa <- stringr::str_match_all(colnames(fit$p.value)[i], "[[:space:]]Condition([[:digit:]]+)")[[1]]
cn[i] <- paste(unique(conds)[as.numeric(compa[1,2])], "_vs_(all-",unique(conds)[as.numeric(compa[1,2])], ")", sep="")
}
}
res.l <- list(
logFC = as.data.frame(res[,1:Compa.Nb]),
P_Value = as.data.frame(res[,-(1:Compa.Nb)] )
)
colnames(res.l$logFC) <- gsub('[ ]', '', paste(cn, "logFC", sep="_"))
colnames(res.l$P_Value) <- gsub('[ ]', '', paste(cn, "pval", sep="_"))
## end colnames
return(res.l)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.