################################################
## Functions to compare different MOFA models ##
################################################
#' @title Correlation of the latent factors across different trials
#' @name compareFactors
#' @description Different objects of \code{\link{MOFAmodel}} are compared in terms of correlation between
#' their latent factors. The correlation is calculated only on those samples which are present in all models.
#' Ideally, the output should look like a block diagonal matrix, suggesting that
#' all detected factors are robust under different initializations.
#' If not, it suggests that some factors are weak and not captured by all models.
#' @param models a list containing \code{\link{MOFAmodel}} objects.
#' @param comparison type of comparison, either 'pairwise', i.e. compare one model
#' with another one at a time, or 'all', i.e. calculate correlation between factors from all model.
#' By default, all models are compared.
#' @param show_rownames logical indicating whether to show rownames in heatmap (see also pheatmap documentation)
#' @param show_colnames logical indicating whether to show colnames in heatmap (see also pheatmap documentation)
#' @param ... extra arguments passed to pheatmap
#' @details This function can be helpful to evaluate the robustness of factors across
#' different random initilizations.
#' Large block of factors from different models in the correlation matrix show consistent factors,
#' while stand-alone factors that are only recovered in a single model instance are less reliable.
#' @return Plots a heatmap of correlation of Latent Factors in all models when 'comparison' is 'all'.
#' Otherwise, for each pair of models, a seperate heatmap is produced comparing one model againt the other.
#' The corresponding correlation matrix or list or pairwise correlation matrices is returned
#' @importFrom stats cor
#' @importFrom stats var
#' @importFrom pheatmap pheatmap
#' @importFrom grDevices colorRampPalette
#' @export
#' @examples
#' ### Example on simulated data
#' # Simulate Data
#' data <- makeExampleData()
#' # Create MOFA model
#' MOFAobject <- createMOFAobject(data)
#' # Prepare MOFA model
#' MOFAobject <- prepareMOFA(MOFAobject)
#' # Train several instances of MOFA models
#' n_inits <- 3
#' MOFAlist <- lapply(seq_len(n_inits), function(i) runMOFA(MOFAobject, outfile=tempfile()))
#' compareFactors(MOFAlist, comparison="all")
#' compareFactors(MOFAlist, comparison="pairwise")
compareFactors <- function(models, comparison = "all", show_rownames=FALSE, show_colnames=FALSE,...) {
# Sanity checks
if(!is.list(models))
stop("'models' has to be a list")
if (!all(vapply(models, function (l) is(l, "MOFAmodel"), logical(1))))
stop("Each element of the the list 'models' has to be an instance of MOFAmodel")
if (!comparison %in% c("all", "pairwise"))
stop("'comparison' has to be either 'all' or 'pairwise'")
# give generic names if no names present
if(is.null(names(models))) names(models) <- paste("model", seq_along(models), sep="")
# get latent factors
LFs <- lapply(seq_along(models), function(modelidx){
model <- models[[modelidx]]
Z <- getFactors(model)
Z
})
for (i in seq_along(LFs))
colnames(LFs[[i]]) <- paste(names(models)[i], colnames(LFs[[i]]), sep="_")
if (comparison=="all") {
#get common samples between models
commonSamples <- Reduce(intersect,lapply(LFs, rownames))
if(is.null(commonSamples))
stop("No common samples in all models for comparison")
#subset LFs to common samples
LFscommon <- Reduce(cbind, lapply(LFs, function(Z) {
Z <- Z[commonSamples,, drop=FALSE]
nonconst <- apply(Z,2,var, na.rm=TRUE) > 0
if(sum(nonconst) < ncol(Z)) message("Removing ", sum(!nonconst), " constant factors from the comparison.")
Z[, nonconst]
})
)
# calculate correlation
corLFs <- cor(LFscommon, use="complete.obs")
#annotation by model
# modelAnnot <- data.frame(model = rep(names(models), times=vapply(LFs, ncol, numeric(1))))
# rownames(modelAnnot) <- colnames(LFscommon)
#plot heatmap
# if(is.null(main)) main <- "Absolute correlation between latent factors"
if(length(unique(as.numeric(abs(corLFs))))>1){
pheatmap(abs(corLFs), show_rownames = show_rownames,show_colnames = show_colnames,
color = colorRampPalette(c("white",RColorBrewer::brewer.pal(9,name="YlOrRd")))(100),
# color=colorRampPalette(c("white", "orange" ,"red"))(100),
# annotation_col = modelAnnot, main= main , ...)
...)
} else warning("No plot produced as correlations consist of only one value")
# return(corLFs)
}
if(comparison=="pairwise"){
PairWiseCor <- lapply(seq_along(LFs[-length(LFs)]), function(i){
LFs1<-LFs[[i]]
sublist <- lapply((i+1):length(LFs), function(j){
LFs2<-LFs[[j]]
common_pairwise <- intersect(rownames(LFs1), rownames(LFs2))
if(is.null(common_pairwise)) {
warning(paste("No common samples between models",i,"and",j,"- No comparison possible"))
NA
}
else{
# if(is.null(main)) main <- paste("Absolute correlation between factors in model", i,"and",j)
nonconst1 <- apply(LFs1[common_pairwise,],2,var, na.rm=TRUE) > 0
nonconst2 <- apply(LFs2[common_pairwise,],2,var, na.rm=TRUE) > 0
if(sum(!nonconst1) + sum(!nonconst2)>0)
message("Removing ", sum(!nonconst1), " and " ,sum(!nonconst2),
" constant factors from the each model for the comparison, respectively.")
corLFs_pairs <- cor(LFs1[common_pairwise,nonconst1],
LFs2[common_pairwise,nonconst2],
use="complete.obs")
if(length(unique(abs(corLFs_pairs)))>1){
pheatmap(abs(corLFs_pairs),color=colorRampPalette(c("white", "orange" ,"red"))(100), ...)
} else warning("No plot produced as correlations consist of only one value")
corLFs_pairs
}
})
names(sublist) <- names(models)[(i+1):length(LFs)]
sublist
})
names(PairWiseCor) <- names(models[-length(models)])
return(NULL)
#return(PairWiseCor)
}
}
#' @title Compare different instances of trained \code{\link{MOFAmodel}}
#' @name compareModels
#' @description Different objects of \code{\link{MOFAmodel}} are compared in terms
#' of the final value of the ELBO statistics.
#' For model selection the model with the highest ELBO value is selected.
#' The height of the bar indicates the number of inferred factors and
#' the color of the bar the value of the ELBO statistic.
#' @param models a list containing \code{\link{MOFAmodel}} objects.
#' @param show_modelnames boolean, whether to indicate the name of each model instance
#' (names of the list in models) or not
#' @return a ggplot showing the number of factors and
#' the ELBO statistics of the given models as a barplot
#' @export
#' @examples
#' ### Example on simulated data
#' # Simulate Data
#' data <- makeExampleData()
#' # Create MOFA model
#' MOFAobject <- createMOFAobject(data)
#' # Prepare MOFA model
#' MOFAobject <- prepareMOFA(MOFAobject)
#' # Train several instances of MOFA models
#' n_inits <- 3
#' MOFAlist <- lapply(seq_len(n_inits), function(i) runMOFA(MOFAobject, outfile=tempfile()))
#' compareModels(MOFAlist)
compareModels <- function(models, show_modelnames = FALSE) {
# Sanity checks
if(!is.list(models))
stop("'models' has to be a list")
if (!all(vapply(models, function (l) is(l,"MOFAmodel"), logical(1))))
stop("Each element of the the list 'models' has to be an instance of MOFAmodel")
elbo_vals <- vapply(models, getELBO, numeric(1))
n_factors <- vapply(models, function(m) {
n_fac <- getDimensions(m)$K
n_fac
}, numeric(1))
if(is.null(names(models))) names(models) <- paste0("model_", seq_along(models))
df <- data.frame(ELBO=elbo_vals, model = names(models), n_factors=n_factors)
gg <- ggplot(df, aes_string(x="model",y="n_factors", fill="ELBO")) + geom_bar(stat="identity")+
ylab("Number of inferred factors")
if(show_modelnames) gg <- gg + theme(axis.text.x=element_text(angle=60, vjust=1, hjust=1))
else gg <- gg + theme(axis.text.x=element_blank())
return(gg)
}
#' @title Select the best model from a list of trained \code{\link{MOFAmodel}} objects
#' @name selectModel
#' @description Different trained objects of \code{\link{MOFAmodel}} are compared
#' in terms of the final value of the ELBO statistics
#' and the model with the highest ELBO value is selected.
#' @param models a list containing \code{\link{MOFAmodel}} objects.
#' @param plotit show a plot of the characteristics of the compared
#' \code{\link{MOFAmodel}} objects (ELBO value and number of inferred factors)?
#' @return a single \code{\link{MOFAmodel}} with the best ELBO statistics from the provided list
#' @export
#' @examples
#' # Simulate Data
#' data <- makeExampleData()
#' # Create MOFA model
#' MOFAobject <- createMOFAobject(data)
#' # Prepare MOFA model
#' MOFAobject <- prepareMOFA(MOFAobject)
#' # Train several instances of MOFA models
#' n_inits <- 3
#' MOFAlist <- lapply(seq_len(n_inits), function(i) runMOFA(MOFAobject, outfile=tempfile()))
#' selectModel(MOFAlist)
selectModel <- function(models, plotit =TRUE) {
# Sanity checks
if(!is.list(models))
stop("'models' has to be a list")
if (!all(vapply(models, function (l) is(l, "MOFAmodel"), logical(1))))
stop("Each element of the the list 'models' has to be an instance of MOFAmodel")
elbo_vals <- vapply(models, getELBO, numeric(1))
if(plotit) print(compareModels(models))
models[[which.max(elbo_vals)]]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.