# Author: Simona Constantinescu; simona.constantinescu@bsse.ethz.ch
###############################################################################
#' @title Randomly generates a pathTiMEx model
#'
#' @description \code{generateRandomModel} generates randomly the structure
#' and the parameters of a pathTiMEx model, including the exponential rates
#' of the waiting times to mutations, the intensities of mutual exclusivity
#' of the pathways, the error rate, the poset and the pathway assignments.
#'
#' @param n number of genes
#' @param k number of pathways
#' @param lamobs exponential rate of the observation time (usually fixed
#' to 1)
#' @param minLam lower limit for simulating the exponential rates of the
#' waiting times to mutations of the genes
#' @param maxLam upper limit for simulating the exponential rates of the
#' waiting times to mutations of the genes
#' @param muInterval vector of values from which the intensities of mutual
#' exclusivity of the pathways are uniformly sampled
#' @param ldensesInterval vector of values from which the density of edges in
#' the poset is uniformly sampled
#' @param epsilon the error rate at the level of genes, real number between 0
#' and 1
#'
#' @details The exponential rate of the observation time is kept fixed (usually
#' to 1) when generating parameters for the model.
#'
#' @return lists of parameters and structure for a pathTiMEx model, as follows:
#' \itemize{
#' \item{\code{lams}} {vector of exponential rates (lambda) of the waiting
#' times to mutations of the genes}
#' \item{\code{lamobs}} {the exponential rate of the observation time, usually
#' fixed to 1}
#' \item{\code{mus}} {vector of intensities of mutual exclusivity (mu) for
#' each pathway}
#' \item{\code{ldense}} {the density of edges in the poset}
#' \item{\code{poset}} {the poset of the model}
#' \item{\code{assignment}} {the assignment of genes to pathways}
#' \item{\code{epsilon}} {the error rate at the level of genes, real number
#' between 0 and 1}
#' \item{\code{type}} {way in which the model was simulated, in this case
#' \code{random}}
#' }
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases generateRandomModel
#'
#' @export
generateRandomModel<-function(n,k,lamobs,minLam,maxLam,muInterval,ldensesInterval,epsilon)
{
# exponential rates for the waiting times of the genes
lams<-runif(n,min=minLam,max=maxLam)
# mutual exclusivity intensities of the pathways
mus<-sample(muInterval,k,replace=TRUE)
# density of edges
ldense<-sample(ldensesInterval,1,replace=TRUE)
poset<-genRandomPoset(k, ldense)
# assignments of the n genes to the k pathways; repeat until each pathway has at least one member
assignment<-sample(c(1:k),n,replace=TRUE)
while (length(unique(assignment))!=k)
assignment<-sample(c(1:k),n,replace=TRUE)
names(assignment)<-c(1:n)
l<-list("lams" = lams, "lamobs" = lamobs, "mus" = mus, "ldense" = ldense,
"poset" = poset, "assignment" = assignment, "epsilon"=epsilon, "type" = "random")
return(l)
}
###############################################################################
#' @title Generates a pathTiMEx model for given parameters
#'
#' @description \code{generateStructureModel} randomly generates the
#' structure for a pathTiMEx model under given parameters, i.e. the
#' poset and the pathway assignments are generated, while the
#' exponential rates of the waiting times to mutations
#' and of the observation time, the intensities of mutual exclusivity and the
#' error rate are known.
#'
#' @param lams vector of exponential rates of the waiting times to mutations,
#' equal in size to the number of genes
#' @param lamobs the exponential rate of the observation time (usually fixed
#' to 1)
#' @param mus vector of intensities of mutual exclusivity, equal in size to
#' the number of pathways
#' @param ldensesInterval vector of values from which the density of edges in
#' the poset is uniformly sampled
#' @param epsilon the error rate at the level of genes, real number
#' between 0 and 1
#'
#' @details The exponential rate of the observation time is kept fixed (usually
#' to 1) when generating parameters for the model.
#'
#' @return lists of parameters and structure for a pathTiMEx model, as follows:
#' \itemize{
#' \item{\code{lams}} {input vector of exponential rates of the waiting times
#' to mutation of each gene}
#' \item{\code{lamobs}} {input exponential rate of the observation time}
#' \item{\code{mus}} {input vector of mutual exclusivity intensities of each
#' pathway}
#' \item{\code{ldense}} {the density of edges in the poset}
#' \item{\code{poset}} {the poset of the model}
#' \item{\code{assignment}} {the assignment of genes to pathways}
#' \item{\code{epsilon}} {the error rate at the level of genes, real number
#' between 0 and 1}
#' \item{\code{type}} {way in which the model was simulated, in this case
#' \code{structure}}
#' }
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases generateStructureModel
#'
#' @export
generateStructureModel<-function(lams,lamobs,mus,epsilon)
{
# density of edges
ldense <- sample(ldensesInterval,1,replace=TRUE)
poset <- genRandomPoset(length(mus), ldense)
# assignments of the n genes to the k pathways; repeat until each pathway has at least one member
assignment <- sample(c(1:length(mus)),length(lams),replace=TRUE)
while (length(unique(assignment))!=length(mus))
assignment <- sample(c(1:length(mus)),length(lams),replace=TRUE)
names(assignment) <- c(1:length(lams))
l<-list("lams" = lams, "lamobs" = lamobs, "mus" = mus, "ldense" = ldense,
"poset" = poset, "assignment" = assignment, "epsilon"= epsilon,
"type" = "structure")
return(l)
}
###############################################################################
#' @title Generates a pathTiMEx model for given structure and parameters
#'
#' @description \code{generateFixedModel} generates a pathTiMEx model
#' for given structure (poset and pathway assignment) and parameters
#'
#' @param lams the exponential rates of the waiting times to mutations
#' @param lamobs the exponential rate of the observation time (usually fixed
#' to 1)
#' @param mus the intensities of mutual exclusivity
#' @param listEdges list of pairs [i,j] such that the element [i,j] in the
#' poset equals 1
#' @param assignment vector of assignments of genes to pathways
#' @param epsilon {the error rate at the level of genes, real number
#' between 0 and 1}
#'
#' @details The exponential rate of the observation time is kept fixed (usually
#' to 1) when generating parameters for the model
#'
#' @return lists of parameters and structure for a pathTiMEx model, as follows:
#' \itemize{
#' \item{\code{lams}} {input vector of exponential rates of the waiting times
#' to mutation of each gene}
#' \item{\code{mus}} {input vector of mutual exclusivity intensities of each
#' pathway}
#' \item{\code{poset}} {the poset of the model}
#' \item{\code{assignment}} {the assignment of genes to pathways}
#' \item{\code{epsilon}} {the error rate at the level of genes, real number
#' between 0 and 1}
#' \item{\code{type}} {way in which the model was simulated, in this case
#' \code{fixed}}
#' }
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases generateFixedModel
#'
#' @export
generateFixedModel<-function(lams,lamobs,mus,listEdges,assignment,epsilon)
{
# all the elements in listEdges are an 1 in the poset input matrix
poset<-matrix(0,nrow=length(mus),ncol=length(mus))
mats<-lapply(listEdges,function(x){poset[cbind(x[1],x[2])]<-1; return(poset)})
poset<-(Reduce("+",mats)>0)+0
l<-list("lams" = lams, "lamobs" = lamobs, "mus" = mus, "poset" = poset,
"assignment" = assignment, "epsilon" = epsilon, "type" = "fixed")
return(l)
}
###############################################################################
#' @title Draws samples from a pathTiMEx model
#'
#' @description \code{drawSamples} generates samples form a pathTiMEx model
#'
#' @param m number of samples to be simulated
#' @param model the pathTiMEx model, as a list, as returned by the functions
#' which generate data: \code{generateRandomModel}, \code{generateFixedModel}
#' and \code{generateStructureModel}
#'
#' @details This function generates a binary matrix on the level of genes
#' from the pathTiMEx model, as well as various other features of the model,
#' such as the waiting times to alteration, the observation times etc.
#'
#' @return list consisting of
#' \itemize{
#' \item{\code{XGenes}} {binary matrix with the simulated observations on the
#' level of genes, including noise. Rows represent samples and columns
#' represent genes. }
#' \item{\code{XGenesClean}} {binary matrix with the simulated observations on
#' the level of genes, before noise was added. Rows represent samples and
#' columns represent genes.}
#' \item{\code{XPathwaysClean}} {binary matrix with the simulated observations
#' on the level of pathways, before noise was added. Rows represent samples
#' and columns represent pathways.}
#' \item{\code{TGenes}} {matrix with the waiting times of the genes. Rows
#' represent samples and columns represent genes.}
#' \item{\code{TPathways}} {matrix with the waiting times of the pathways. The
#' waiting time of each pathway equals the minimum waiting time of its gene
#' members. Rows represent samples and columns represent pathways.}
#' \item{\code{Tobs}} {vector with the observation times. For each sample,
#' an independent observation time is drawn exponentially.}
#' \item{\code{ZGenes}} {matrix with the independent exponential components
#' contributing to the waiting times of the genes. Rows represent samples
#' and columns represent genes.}
#' \item{\code{m}} {number of samples}
#' \item{\code{model}} {structure with details about the model, s.a. waiting
#' time rates, poset etc. This type of structure is returned by the functions
#' whcih generate pathTiMEx models: \code{generateRandomModel},
#' \code{generateFixedModel} and \code{generateStructureModel}.}
#' }
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases drawSamples
#'
#' @export
drawSamples<-function(m,model)
{
poset <- model$poset
lams <- model$lams
lamobs <- model$lamobs
assignment <- model$assignment
mus <- model$mus
epsilon <- model$epsilon
n <- length(lams) # number of genes
k <- ncol(poset) # number of pathways
# Z is the independent exponential variable component specific to each gene
ZGenes <- matrix(NA, m, n)
# T is Z plus the waiting times of the parents
TGenes <- matrix(NA, m, n)
# T is the actual value of the waiting time of the pathway, as the minimum
# of the waiting times of its gene members
TPathways <- matrix(NA, m, k)
# sample observation times
Tobs <- rexp(m,rate=lamobs)
# generate Z of the genes (exponentially independent)
ZGenes<-t(replicate(m,rexp(n,rate=lams)))
# go over the poset of pathways iteratively, by its topological sort
topoPath <- topologicalSort(poset)
for (e in topoPath)
{
members <- which(assignment == e) # members of pathway e
parents <- which(poset[,e] == 1) # parents of pathway e
# if the pathway has no parents, then the waiting times of its gene members
# equal their exponential components
if (length(parents) == 0)
{
TGenes[,members] <- ZGenes[,members]
if (length(members)>1)
TPathways[,e] <- apply(TGenes[,members],1,min)
else
TPathways[,e] <- TGenes[,members]
}
# if the pathway has a single parent, then the waiting times of its gene members
# equal their exponential component plus the waiting time of its pathway parent
else if (length(parents) == 1)
{
TGenes[,members] <- ZGenes[,members] + TPathways[,parents]
if (length(members)>1)
TPathways[,e] <- apply(TGenes[,members],1,min)
else
TPathways[,e] <- TGenes[,members]
}
# if the pathway has more than one parent, then the waiting times of its gene members
# equal their exponential component plus the maximum waiting time of its pathway parents
else
{
TGenes[,members] <- ZGenes[,members] + apply(TPathways[,parents],1,max)
if (length(members)>1)
TPathways[,e] <- apply(TGenes[,members],1,min)
else
TPathways[,e] <- TGenes[,members]
}
}
XGenes <- matrix(NA,nrow=m,ncol=n)
# binary matrices of observation X for genes and pathways
XPathways <- (TPathways<Tobs)+0
for (e in topoPath)
{
# X for genes
members <- which(assignment == e) # members of pathway e
# simulate a mutually exclusive pathway without noise
XGenes[,members] <- sampleMEPathways(m,TGenes[,members],Tobs,mus[e],0)
}
colnames(XGenes) <- paste("gene",c(1:n),sep="")
colnames(XPathways) <- paste("pathway",c(1:k),sep="")
# the binary matrix before adding noise
XGenesClean<-XGenes
# add noise to the genes with probability epsilon
for (i in 1:m) # for each patient
{
# the genes to which noise is added, sampled binomially with success
# probability epsilon (1 turns into 0 and 0 turns into 1)
genesNoise<-which(rbinom(prob=epsilon,n=dim(XGenes)[2],size=1)==1)
if (length(genesNoise)>0) # if there is at least one gene for which noise is added
{
XGenes[i,genesNoise]<-(!XGenes[i,genesNoise])+0
}
}
# the pathway matrix is returned before adding noise
XPathwaysClean<-XPathways
l <- list("XGenes" = XGenes, "XGenesClean" = XGenesClean,
"XPathwaysClean" = XPathwaysClean, "TGenes" = TGenes,
"TPathways" = TPathways, "Tobs" = Tobs, "ZGenes" = ZGenes,
"m" = m, "model" = model)
return(l)
}
###############################################################################
#' @title Draws samples from a mutually exclusive pathway
#'
#' @description \code{sampleMEPathways} simulates a group of mutually exclusive
#' genes, for given waiting times of genes, observation time, intensity of
#' mutual exclusivity and noise level.
#'
#' @param m number of samples to simulate, as a positive integer
#' @param TGenesTrunc matrix with waiting times of the genes; columns represent
#' the genes, rows represent the samples
#' @param Tobs vector of observation times, with length the number of samples
#' @param muTrunc intensity of mutual exclusivity of the pathway to be
#' simulated, real number between 0 and 1
#' @param eps error rate at the level of genes, real number between 0 and 1
#'
#' @details In the case of perfect mutual exclusivity (\code{mu=1}), for each
#' sample, only the gene with the waiting time can be mutated, and only if its
#' waiting time is lower than the observation time.
#'
#' @return a binary matrix representing the simulated mutually exclusive
#' group, with columns as genes and rows as samples
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases sampleMEPathways
#'
#' @export
sampleMEPathways <- function(m,TGenesTrunc,Tobs,muTrunc,eps)
{
# the number of genes in the pathway
nTrunc <- dim(TGenesTrunc)[2]
if (is.null(nTrunc))
{
nTrunc <- 1
TGenesTrunc <- matrix(TGenesTrunc)
}
# the matrix of observation corresponding to the pathway
xTrunc <- matrix(0,nrow = m,ncol = nTrunc)
for (i in 1:m)
{
smalleq <- which(TGenesTrunc[i,] <= Tobs[i])
minwt <- smalleq[which.min(TGenesTrunc[i,smalleq])]
# only necessary if not all waiting times are larger than the observation time
if (length(minwt)>0)
{
if (runif(1)<muTrunc) {
# with probability mu, only X_min = 1, and all the rest are 0
xTrunc[i,minwt] <- 1
xTrunc[i,setdiff(c(1:nTrunc),minwt)] <- 0
} else {
# with probability 1-mu, all X corresponding to times < obs are 1 and the rest are 0
xTrunc[i,smalleq] <- 1
xTrunc[i, setdiff(c(1:nTrunc), smalleq)] <- 0
}
}
}
return(xTrunc)
}
###############################################################################
#' @title Performs TiMEx iteratively on a simulated dataset
#'
#' @description \code{iterTiMExSims} performs TiMEx iteratively on a simulated
#' dataset, by each time removing the highest ranking group of largest size and
#' runnning TiMEx again.
#'
#' @param samplesModel structure as returned by the function \code{drawSamples},
#' containing the simulated pathTiMEx model, including the simulated binary
#' matrices, the waiting times etc
#' @param pairMu threshold on the pair-level mu (input to the function
#' \code{TiMEx}). Default to 0.5.
#' @param pairPvalue threshold on the pair-level pvalue (input to the
#' function \code{TiMEx}). Default to 0.01.
#' @param groupPvalue threshold on the group-level pvalue (input to the
#' function \code{TiMEx}). Default to 0.1.
#' @param corr type of correction employed by TiMEx. It has to either be "bonf"
#' or "fdr". Default to "bonf".
#' @param noRuns maximum number of times to run the iterative procedure.
#' Default to 10.
#'
#' @details The groups reported are always the most significant ones of largest
#' size. Once a group is reported, it is removed from the dataset and TiMEx is
#' ran again. The groups are reported decreasingly by size. This function is
#' designed to be applied on data simulated by the functions in this package.
#'
#' @return list consisting of
#' \itemize{
#' \item{\code{geneNames}} {list consisting of the names of the genes part
#' of the identified mutually exclusive groups. each element is a vector of
#' gene names corresponding to one group.}
#' \item{\code{geneIndices}} {list consisting of the indices in the input
#' binary matrix of the genes part of the identified mutually exclusive
#' groups. each element is a vector of gene indices corresponding to one
#' group.}
#' \item{\code{geneAssignments}} {list consisting of the initial assignment to
#' pathways of the genes in the identified groups. Each element is a vector of
#' pathway indices corresponding to an identified mutually exclusive group.
#' The names of each vector represent the indices of the genes in the binary
#' input matrix.}
#' \item{\code{samplesModel}} {input structure containing the simulated
#' pathTiMEx model, including the binary matrix used as input to the function
#' \code{TiMEx}.}
#' \item{\code{muEst}} {vector with the estimated mu for the identified
#' mutually exclusive groups, of length the number of groups identified.}
#' \item{\code{corr}} {the input method for multiple testing correction used
#' by TiMEx.}
#' }
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases iterTiMExSims
#'
#' @export
iterTiMExSims <- function(samplesModel,pairMu,pairPvalue,groupPvalue,corr,noRuns)
{
mat<-samplesModel$XGenes
if (missing(pairMu))
pairMu<-0.5
if (missing(pairPvalue))
pairPvalue<-0.01
if (missing(groupPvalue))
groupPvalue<-0.1
if (missing(noRuns))
noRuns<-10
if (missing(corr))
corr<-"bonf"
realAssignment<-samplesModel$model$assignment
# record the initial matrix and gene names for reconstructing the indices afterwards
initialMat<-mat
initialNames<-colnames(mat)
geneNames<-list()
muEst<-c()
i<-1
maxSize<-1
while (i<=noRuns & !(is.null(dim(mat)) || ncol(mat)==0) & maxSize>0)
{
print(paste("TiMEx iteration number: ", i, sep=""))
resTiMEx<-TiMEx(mat, pairMu, pairPvalue, groupPvalue)
maxSize<-length(resTiMEx$idxSignif)
if (maxSize>0)
{
idxName<-which(names(resTiMEx$idxSignif[[maxSize]])==corr)
d<-dim(resTiMEx$idxSignif[[maxSize]][[idxName]])[1]
while (maxSize>0 && !is.null(d) && d==0)
{
maxSize<-maxSize-1
idxName<-which(names(resTiMEx$idxSignif[[maxSize]])==corr)
d<-dim(resTiMEx$idxSignif[[maxSize]][[idxName]])[1]
}
# if more than one group of maximal size were found
if (!is.null(dim(resTiMEx$idxSignif[[maxSize]][[idxName]])))
{
# keep the most significant group of largest size
geneNames[[i]]<-unlist(resTiMEx$genesSignif[[maxSize]][[idxName]][1,])
muEst[i]<-resTiMEx$MusGroup[[maxSize]][[idxName]][1]
toRemove<-unlist(resTiMEx$idxSignif[[maxSize]][[idxName]][1,])
} else
{
geneNames[[i]]<-unlist(resTiMEx$genesSignif[[maxSize]][[idxName]])
muEst[i]<-resTiMEx$MusGroup[[maxSize]][[idxName]]
toRemove<-unlist(resTiMEx$idxSignif[[maxSize]][[idxName]])
}
mat<-mat[,-toRemove] # remove the identified group from the data
}
i<-i+1
}
# arrange the groups by size decreasingly
muEst<-muEst[order(sapply(geneNames,length),decreasing = TRUE)]
geneNames<-geneNames[order(sapply(geneNames,length),decreasing = TRUE)]
# retrieve the initial indices
geneIndices<-lapply(geneNames,function(x){
match(x,initialNames)
})
# retrieve the assignments to groups
i<-1
geneAssignments <- list()
while (i <= length(geneIndices))
{
vecIdxs <- geneIndices[[i]]
geneAssignments[[i]] <- realAssignment[vecIdxs]
i <- i+1
}
# create return structure
l<-list("geneNames" = geneNames, "geneIndices" = geneIndices,
"geneAssignments" = geneAssignments, "samplesModel" = samplesModel,
"muEst" = muEst,"corr" = corr)
return(l)
}
###############################################################################
#' @title Assesses the performance of the inferred groups after running
#' TiMEx iteratively
#'
#' @description \code{clustMetrIterTiMEx} computes per-edge and per-pathway
#' metrics evaluating the performance of the pathway assignment inferred after
#' running TiMEx iteratively, based on data simulated from PathTiMEx
#'
#' @param resIterTiMEx structure containing the results from iteratively
#' running TiMEx on a binary dataset, as returned by the function
#' \code{iterTiMExSims}.
#'
#' @details This function assesses how well the real groups were recovered
#' after performing iterative inference with TiMEx.
#'
#' @return list consisting of
#' \itemize{
#' \item{\code{reconstruction}} {structure with metrics of the reconstuction of
#' the initial groups, as follows
#' \itemize{
#' \item{\code{groupsSpread}}{list containing as many elements as real
#' pathways. each element is a list with at most as many elements as the
#' number of inferred pathways (including potential NULL elements).
#' each element of this second list is a vector of genes part of the current
#' real pathway and also part of the inferred pathway. In othder words, the
#' first index represents the real group and the second index represents the
#' inferred group.}
#' \item{\code{groupsSpreadPercent}} {list containing as many elements as real
#' pathways. each element is a list with at most as many elements as the
#' number of inferred pathways (including potential NULL elements).
#' each element of this second list is the percentage of
#' reconstruction of the respective first pathway by the genes identified as
#' part of the second pathway. In othder words, the first index represents
#' the real group and the second index represents the inferred group.}
#' \item{\code{groupsSpreadOrd}} {list containing as many elements as real
#' pathways, ordered by the real mu values of the pathways. each element is a
#' list with at most as many elements as the number of inferred pathways
#' (including potential NULL elements). each element of this second list is a
#' vector of genes part of that particular real pathway
#' and also inferred as part of the current pathway. In othder words, the
#' first index represents the real group and the second index represents the
#' inferred group.}
#' \item{\code{groupsSpreadPercentOrd}} {list containing as many elements as
#' real pathways, orderde by the real mu values of the pathways.
#' each element is a list with at most as many elements as the
#' number of inferred pathways (including potential NULL elements). each
#' element of this second list is the percentage of reconstruction of the
#' respective first pathway by the genes identified as part of the second
#' pathway. In othder words, the first index represents the real group and the
#' second index represents the inferred group.}
#' \item{\code{groupsCompact}} {list containing as many elements as real
#' pathways. each element is a vector consisting of the genes members of
#' that particular pathway which were identified as part of a mutually
#' exclusive group, regardless of the group in which they were identified.}
#' \item{\code{groupsCompactPercent}} {list containing as many elements as real
#' pathways. each element is the percentage of reconstruction of that
#' particular pathway, i.e. the percentage of genes which were identified as
#' mutually exclusive, regardless of the group in which they were identified.}
#' \item{\code{groupsCompactOrd}} {list containing as many elements as real
#' pathways, ordered by the real mu values of the pathways. each element is a
#' vector consisting of the genes members of that particular pathway which
#' were identified as part of a mutually exclusive group, regardless of the
#' group in which they were identified.}
#' \item{\code{groupsCompactPercentOrd}} {list containing as many elements as
#' real pathways, ordered by the real mu values of the pathwayss. each
#' element is the percentage of reconstruction of that particular pathway,
#' i.e. the percentage of genes which were identified as mutually exclusive,
#' regardless of the group in which they were identified.}
#' \item{\code{groupsEmpty}} {list containing as many elements as
#' real pathways. each element contains the genes from that particular pathway
#' which were not identified as part of mutually exclusive groups.}
#' \item{\code{groupsEmptyPercent}} {list containing as many elements as
#' real pathways. each element is the percentage of the respective pathway
#' which was not identified as part of mutually exclusive groups.}
#' \item{\code{groupsEmptyOrd}} {list containing as many elements as
#' real pathways, ordered by the real mu values of the pathways. each element
#' contains the genes from that particular pathway which were not identified
#' as mutually exclusive.}
#' \item{\code{groupsEmptyPercentOrd}} {list containing as many elements as
#' real pathways, ordered by the real mu values of the pathways. each element
#' is the percentage of the respective pathway which was not identified as
#' part of mutually exclusive groups.}
#' \item{\code{booleanOthers}} {list containing as many elements as
#' real pathways. each element is a list with at most as many elements as the
#' number of inferred pathways (including potential NULL elements).
#' each element of this second list is either 0, if the
#' inferred group contained elements from more than one real pathway, or 1, if
#' the inferred group only contained elements from one real pathway. In othder
#' words, the first index represents the real group and the second index
#' represents the inferred group.}
#' \item{\code{booleanOthersOrd}}{list containing as many elements as
#' real pathways, orderde by the real mu values of the pathways. each element
#' is a list with at most as many elements as the number of inferred pathways
#' (including potential NULL elements). each element of this second list is
#' either 0, if the inferred group contained elements from more than one real
#' pathway, or 1, if the inferred group only contained elements from one real
#' pathway. In othder words, the first index represents the real group and the
#' second index represents the inferred group.}
#' }}
#' \item{\code{characteristics}} {structure with metrics per pathway
#' characterizing the identified groups, as follows
#' \itemize{
#' \item{\code{freqGenes}} {list containing as many elements as real pathways.
#' each element is a vector with frequencies of the genes inside that pathway.}
#' \item{\code{freqGenesAvg}} {list containing as many elements as real
#' pathways. each element is the average frequency of all genes inside that
#' pathway (averaged over all the genes in each pathway).}
#' \item{\code{freqGenesOrd}} {list containing as many elements as real
#' pathways. each element is a vector with the frequencies of the
#' genes inside that pathway, ordered decreasingly by the intensities of
#' mutual exclusivity of each pathway.}
#' \item{\code{realMus}} {vector containing the intensities of mutual
#' exclusivity of the real pathways}
#' \item{\code{realMusOrd}} {vector containing the intensities of mutual
#' exclusivity of the real pathways, ordered decreasingly}
#' }}
#' \item{\code{peredge}} {structure with metrics per edge characterizing the
#' reconstruction of groups, as follows
#' \itemize{
#' \item{\code{tp}} {true positive rate: the number of true mutual exclusivity
#' pairwise connections which were also identified, divided by the total
#' number of true mutual exclusivity pairwise connections.}
#' \item{\code{fp}} {false positive rate: the number of false mutual
#' exclusivity edges identified, divided by the total number of possible
#' pairwise connections.}
#' }}
#' \item{\code{resIterTiMEx}} {input structure containing the identified
#' mutually exclusive groups after running TiMEx itartively, together with
#' the simulated data on the basis of which the inference was done.}
#' }
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases clustMetrIterTiMEx
#'
#' @export
clustMetrIterTiMEx <- function(resIterTiMEx)
{
# number of genes
n <- length(resIterTiMEx$samplesModel$model$lams)
# real assignment used for simulating the data
realAssignment <- resIterTiMEx$samplesModel$model$assignment
# indices of the inferred groups
detectedGroups <- resIterTiMEx$geneIndices
# mus of the real groups
realMus<-resIterTiMEx$samplesModel$model$mus
# binary matrix used as input (including noise)
Xmatrix<-resIterTiMEx$samplesModel$XGenes
# the real poset
poset<-resIterTiMEx$samplesModel$model$poset
R <- as.relation(poset)
RT <- transitive_closure(R)
res <- relation_incidence(RT)
# number of parents of all upstream parents of each pathway in the poset
noParents <- apply(res,2,sum)
## metrics per edge
# the real mutual exclusivity connections, as edges in a square binary
# matrix
edgesReal<-matrix(0,nrow=n,ncol=n)
for (i in 1:n)
{
currentAssignment<-realAssignment[i]
coMembers<-which(realAssignment==currentAssignment)
edgesReal[i,coMembers]<-1
}
# the entries in the diagonal are spurious, so remove them
diag(edgesReal)<-0
# desymmetrize the matrix
edgesReal[lower.tri(edgesReal)]<-0
# count how many times each edge between any two genes was detected as
# part of any inferred mutually exclusive group
edgesFound<-matrix(0, nrow = n, ncol = n)
# for each detected group
for (x in detectedGroups)
{
# if there was at least one group of the current size detected
if (!is.null(x))
{
# record each edge from each group
if (length(x)>1)
{
edgesToAdd<-combinations(n=length(x),r=2,v=x)
edgesFound[cbind(edgesToAdd[,1],edgesToAdd[,2])]<-edgesFound[cbind(edgesToAdd[,1],edgesToAdd[,2])] + 1
}
}
}
# compute the true positive and the false negative rates per identified edges
# the number of true mutual exclusivity edges which were also identified
tp<-length(intersect(which(edgesReal==1),which(edgesFound==1)))/length(which(edgesReal==1))
# the number of falsely identified mutually exclusive edges
fp<-length(setdiff(which(edgesFound==1),which(edgesReal==1)))/length(which(edgesReal==1))
peredge<-list()
peredge$tp<-tp
peredge$fp<-fp
## metrics per pathway
# the first index represents the real group, and the second index is
# the inferred group. the elements are the genes
groupsSpread<-list() # as part of which groups each pathway was inferred
freqsGenes<-list() # average frequencies of genes inside each real pathway
for (i in c(1:length(unique(realAssignment)))) # for each real pathway
{
# all the members part of the current real pathway
currentMembers<-which(realAssignment==i)
# average frequency of genes inside each pathway
if (length(currentMembers)>1)
freqsGenes[[i]]<-apply(Xmatrix[,currentMembers],2,mean)
else
freqsGenes[[i]]<-mean(Xmatrix[,currentMembers])
# the reconstruction of each pathway
groupsSpread[[i]]<-list()
if (length(detectedGroups)>0)
{
for (x in c(1:length(detectedGroups))) # for each detected group
{
# genes part of pathway i which were found in group with index x
newEls<-intersect(detectedGroups[[x]],currentMembers)
if (length(newEls)>0)
groupsSpread[[i]][[x]]<-newEls
}
}
}
# average the frequencies of all genes in each group
freqsGenesAvg<-lapply(freqsGenes,function(x){mean(x)})
# the elements identified for each group, regardless of the group in
# which they were identified
groupsCompact<-lapply(groupsSpread,function(x){unlist(x)})
# the elements of the groups which were not at all idnentified
groupsEmpty<-lapply(c(1:length(groupsCompact)),function(x){
return(setdiff(which(realAssignment==x),groupsCompact[[x]]))
})
# whether any of the groups in groupsSpread also contained other elements
booleanOthers<-lapply(groupsSpread,function(x){
if (length(x)>0)
{
lapply(c(1:length(x)),function(y){
if (!is.null(x[[y]]))
{
# all members in the identified group
allMemsInGroup<-resIterTiMEx$geneIndices[[y]]
if (length(setdiff(allMemsInGroup,x[[y]])>0))
return (1)
else
return (0)
}
})
}
})
# groupsSpread as percentage
groupsSpreadPercent<-lapply(c(1:length(groupsSpread)),function(x){
sizeOfGroup<-length(which(realAssignment==x))
lapply(groupsSpread[[x]],function(y){
return(length(y)/sizeOfGroup)
})
})
# groupsCompact as percentage
groupsCompactPercent<-lapply(c(1:length(groupsCompact)),function(x){
sizeOfGroup<-length(which(realAssignment==x))
return(length(groupsCompact[[x]])/sizeOfGroup)
})
# groupsEmpty as percentage
groupsEmptyPercent<-lapply(c(1:length(groupsEmpty)),function(x){
return(1-groupsCompactPercent[[x]])
})
orderedMus<-order(realMus,decreasing = TRUE)
realMusOrd<-realMus[orderedMus]
# order the return structures decreasingly by the real mus of the real pathways
groupsSpreadOrd<-groupsSpread[orderedMus]
groupsSpreadPercentOrd<-groupsSpreadPercent[orderedMus]
groupsCompactOrd<-groupsCompact[orderedMus]
groupsCompactPercentOrd<-groupsCompactPercent[orderedMus]
groupsEmptyOrd<-groupsEmpty[orderedMus]
groupsEmptyPercentOrd<-groupsEmptyPercent[orderedMus]
booleanOthersOrd<-booleanOthers[orderedMus]
freqsGenesOrd<-freqsGenes[orderedMus]
freqsGenesAvgOrd<-freqsGenesAvg[orderedMus]
noParentsOrd<-noParents[orderedMus]
# use lists as return structures
reconstruction<-list()
reconstruction$groupsSpread<-groupsSpread
reconstruction$groupsSpreadPercent<-groupsSpreadPercent
reconstruction$groupsSpreadOrd<-groupsSpreadOrd
reconstruction$groupsSpreadPercentOrd<-groupsSpreadPercentOrd
reconstruction$groupsCompact<-groupsCompact
reconstruction$groupsCompactPercent<-groupsCompactPercent
reconstruction$groupsCompactOrd<-groupsCompactOrd
reconstruction$groupsCompactPercentOrd<-groupsCompactPercentOrd
reconstruction$groupsEmpty<-groupsEmpty
reconstruction$groupsEmptyPercent<-groupsEmptyPercent
reconstruction$groupsEmptyOrd<-groupsEmptyOrd
reconstruction$groupsEmptyPercentOrd<-groupsEmptyPercentOrd
reconstruction$booleanOthers<-booleanOthers
reconstruction$booleanOthersOrd<-booleanOthersOrd
characteristics<-list()
characteristics$freqsGenes<-freqsGenes
characteristics$freqsGenesAvg<-freqsGenesAvg
characteristics$freqsGenesOrd<-freqsGenesOrd
characteristics$realMus<-realMus
characteristics$realMusOrd<-realMusOrd
l<-list("reconstruction"=reconstruction,"characteristics"=characteristics,"peredge"=peredge,
"resIterTiMEx"=resIterTiMEx)
return(l)
}
###############################################################################
#' @title Evaluates performance of inference of groups
#'
#' @description \code{returnMetricsClust} returns metrics evaluating the
#' performance in group recovery of running TiMEx iteratively
#'
#' @param metricsIterTiMEx structure with metrics of inference of the mutually
#' exclusive groups after running TiMEx iteratively, as returned by the
#' function \code{clustMetrIterTiMEx}
#'
#' @details
#'
#' @return list consisting of
#' \itemize{
#' \item{\code{percent}} {vector with length the number of real mutually
#' exclusive pathways. each element represents the maximal percentage of the
#' current group which was reconstructed by another group (real number between
#' 0 and 1).}
#' \item{\code{alone}} {vector with length the number of real mutually
#' exclusive pathways. each element is either 0, if the initial group has not
#' been recovered by multiple groups, NA if it was and 2 if it hasn't been
#' identified at all.}
#' }
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases returnMetricsClust
#'
#' @export
returnMetricsClust<-function(metricsIterTiMEx)
{
percent<-sapply(metricsIterTiMEx$reconstruction$groupsSpreadPercent,function(x){
if (length(x)>0)
{
v<-unlist(x)
return(max(v))
}
else return(0)
})
alone<-sapply(metricsIterTiMEx$reconstruction$groupsSpreadPercent,function(x){
v<-unlist(x)
if (length(v)>0)
{
if (v[which.max(v)]==1)
{
v1<-v[-(which.max(v))]
if (length(v1)>0)
p<-max(v1)
else p<-0
}
else return(NA)
}
else return(2)
})
l<-list("percent" = percent, "alone" = alone)
return(l)
}
###############################################################################
#' @title Appends the genes as single clusters
#'
#' @description \code{addAllGenesNotInTiMEx} appends the genes not indentified
#' in any mutually exclusive groups as clusters of single genes.
#'
#' @param resIterTiMEx structure containing the results from iteratively
#' running TiMEx on a binary dataset, as returned by the function
#' \code{iterTiMExSims}.
#'
#' @details The function returns the updated clustering which also includes
#' the single genes.
#'
#' @return groupysL list with the identified clusters. each element is a cluster
#' with gene indices.
#'
#' @author Simona Constantinescu, \email{simona.constantinescu@@bsse.ethz.ch}
#'
#' @aliases addAllGenesNotInTiMEx
#'
#' @export
addAllGenesNotInTiMEx<-function(resIterTiMEx)
{
Datamat<-resIterTiMEx$samplesModel$XGenes
groupysL<-resIterTiMEx$geneIndices
# also add the genes which were not part of the groupings, as separate events
groupysL<-append(groupysL,as.list(setdiff(c(1:dim(Datamat)[2]),unlist(groupysL))))
return(groupysL)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.