###########################################
## Functions to visualise latent factors ##
###########################################
#' @title Plot histogram of latent factor values
#' @name plotFactorHist
#' @description Plot a histogram of latent factor values.
#' @param object a trained \code{\link{MOFAmodel}} object.
#' @param factor character vector with the factor name or numeric vector with the index of the factor.
#' @param group_by specifies groups used to color the samples of the histogram.
#' This can be either:
#' a character giving the name of a feature,
#' the name of a covariate (only if using a \code{\link{MultiAssayExperiment}} as input),
#' or a vector of the same length as the number of samples.
#' @param group_names names for the groups.
#' @param alpha transparency parameter.
#' Default is 0.5
#' @param binwidth binwidth for histogram. Default is \code{NULL},
#' which uses \code{ggplot2} default calculation.
#' @param showMissing boolean indicating whether to remove sample
#' for which \code{group_by} is missing (default is FALSE)
#' @details One of the first steps for the annotation of factors
#' is to visualise and color them using known covariates such as phenotypic or clinical data. \cr
#' This method generates a histogram of the sample values in a given latent factor. \cr
#' Similar functions are \code{\link{plotFactorScatter}} for doing scatter plots between pairs of factors
#' and \code{\link{plotFactorBeeswarm}} for doing Beeswarm plots of single factors.
#' @return Returns a \code{ggplot2} object
#' @import ggplot2
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotFactorHist(MOFA_CLL, factor=1)
#' plotFactorHist(MOFA_CLL, factor=1, group_by= "IGHV")
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotFactorHist(MOFA_scMT, factor=2)
plotFactorHist <- function(object, factor, group_by = NULL, group_names = "",
alpha = 0.5, binwidth = NULL, showMissing = FALSE) {
# Sanity checks
if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
if(length(factor)>1) stop("Please specify a single factor!")
# Collect relevant data
Z <- getFactors(object, factors = factor, as.data.frame = TRUE)
# Get factors
if (is.numeric(factor)) {
factor <- factorNames(object)[factor]
} else {
stopifnot(factor %in% factorNames(object))
}
# get groups
N <- getDimensions(object)[["N"]]
groupLegend <- TRUE
if (!is.null(group_by)) {
# It is the name of a covariate or a feature in the TrainData
if (length(group_by) == 1 & is.character(group_by)) {
if (group_names=="") group_names <- group_by
TrainData <- getTrainData(object)
featureNames <- lapply(TrainData, rownames)
if (group_by %in% Reduce(union,featureNames)) {
viewidx <- which(vapply(featureNames, function(vnm) group_by %in% vnm, logical(1)))
group_by <- TrainData[[viewidx]][group_by,]
} else if (is(InputData(object), "MultiAssayExperiment")) {
group_by <- getCovariates(object, group_by)
} else {
stop("'group_by' was specified but it was not recognised, please read the documentation")
}
# It is a vector of length N
} else if (length(group_by) > 1) {
stopifnot(length(group_by) == N)
# It is not recognised
} else {
stop("'group_by' was specified but it was not recognised, please read the documentation")
}
} else {
group_by <- rep(TRUE,N)
groupLegend <- FALSE
}
names(group_by) <- sampleNames(object)
Z$group_by <- group_by[Z$sample]
# Remove missing samples
if(!showMissing) Z <- Z[!is.na(group_by) & !is.nan(group_by),]
Z$group_by <- as.factor(Z$group_by)
xlabel <- factor
# Generate plot
p <- ggplot(Z, aes_string(x="value", group="group_by")) +
geom_histogram(aes(fill=group_by), alpha=alpha, binwidth=binwidth, position="identity") +
scale_y_continuous(expand=c(0,0)) +
xlab(xlabel) +
guides(fill=guide_legend(title=group_names)) +
theme(plot.margin = margin(40,40,20,20),
axis.text = element_text(size=rel(1.3), color = "black"),
axis.title.y = element_text(size=rel(1.5), margin=margin(0,15,0,0)),
axis.title.x = element_text(size=rel(1.5), margin=margin(15,0,0,0)),
axis.line = element_line(color="black", size=rel(1.0)),
# axis.ticks = element_line(color="black", size=0.5),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.key = element_rect(fill = "white")
)
if (!groupLegend) { p <- p + guides(fill = FALSE) }
return(p)
}
#' @title Beeswarm plot of latent factors
#' @name plotFactorBeeswarm
#' @description Beeswarm plot of the latent factor values.
#' @param object a trained \code{\link{MOFAmodel}} object.
#' @param factors character vector with the factor name(s),
#' or numeric vector with the index of the factor(s) to use.
#' Default is 'all'
#' @param color_by specifies groups or values used to color the samples.
#' This can be either:
#' a character giving the name of a feature,
#' a character giving the same of a covariate (only if using \code{\link{MultiAssayExperiment}} as input),
#' or a vector of the same length as the number of samples
#' specifying discrete groups or continuous numeric values.
#' @param shape_by specifies groups or values used for the shape of samples.
#' See color_by for how this can be specified. A maximum of 6 different values can be specified.
#' @param name_color name for color legend (usually only used if color_by is not a character itself)
#' @param name_shape name for shape legend (usually only used if shape_by is not a character itself)
#' @param showMissing logical indicating whether to remove samples
#' for which \code{shape_by} or \code{color_by} is missing.
#' @details One of the main steps for the annotation of factors is
#' to visualise and color them using known covariates or phenotypic data. \cr
#' This function generates a Beeswarm plot of the sample values in a given latent factor. \cr
#' Similar functions are \code{\link{plotFactorScatter}} for doing scatter plots and
#' \code{\link{plotFactorHist}} for doing histogram plots
#' @return Returns a \code{ggplot2} object
#' @import ggplot2 ggbeeswarm RColorBrewer grDevices
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotFactorBeeswarm(MOFA_CLL, factors=1:3)
#' plotFactorBeeswarm(MOFA_CLL, factors=1:2, color_by= "IGHV")
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotFactorBeeswarm(MOFA_scMT)
plotFactorBeeswarm <- function(object, factors = "all", color_by = NULL,
shape_by = NULL, name_color = "", name_shape = "", showMissing = FALSE) {
# Sanity checks
if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
# Get factors
if (is.numeric(factors)) {
factors <- factorNames(object)[factors]
} else {
if (paste0(factors,collapse="") == "all") {
factors <- factorNames(object)
} else {
stopifnot(all(factors %in% factorNames(object)))
}
}
Z <- getFactors(object, factors=factors, as.data.frame=TRUE)
Z$factor <- as.factor(Z$factor)
# Set color
N <- getDimensions(object)[["N"]]
colorLegend <- TRUE
if (!is.null(color_by)) {
# It is the name of a covariate or a feature in the TrainData
if (length(color_by) == 1 & is.character(color_by)) {
if(name_color=="") name_color <- color_by
TrainData <- getTrainData(object)
featureNames <- lapply(TrainData, rownames)
if(color_by %in% Reduce(union,featureNames)) {
viewidx <- which(vapply(featureNames, function(vnm) color_by %in% vnm, logical(1)))
color_by <- TrainData[[viewidx]][color_by,]
} else if(is(InputData(object), "MultiAssayExperiment")) {
color_by <- getCovariates(object, color_by)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
# It is a vector of length N
} else if (length(color_by) > 1) {
stopifnot(length(color_by) == N)
names(color_by) <- sampleNames(object)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
} else {
color_by <- rep(TRUE,N)
names(color_by) <- sampleNames(object)
colorLegend <- FALSE
}
if(length(unique(color_by)) < 5) color_by <- as.factor(color_by)
Z$color_by <- color_by[Z$sample]
# Set shape
shapeLegend <- TRUE
if (!is.null(shape_by)) {
# It is the name of a covariate or a feature in the TrainData
if (length(shape_by) == 1 & is.character(shape_by)) {
if(name_shape=="") name_shape <- shape_by
TrainData <- getTrainData(object)
featureNames <- lapply(TrainData, rownames)
if(shape_by %in% Reduce(union,featureNames)) {
viewidx <- which(vapply(featureNames, function(vnm) shape_by %in% vnm, logical(1)))
shape_by <- TrainData[[viewidx]][shape_by,]
} else if(is(InputData(object), "MultiAssayExperiment")) {
shape_by <- getCovariates(object, shape_by)
} else {
stop("'shape_by' was specified but it was not recognised, please read the documentation")
}
# It is a vector of length N
} else if (length(shape_by) > 1) {
stopifnot(length(shape_by) == N)
names(shape_by) <- sampleNames(object)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
} else {
shape_by <- rep(TRUE,N)
names(shape_by) <- sampleNames(object)
shapeLegend <- FALSE
}
if(length(unique(shape_by)) < 7) {
shape_by <- as.factor(shape_by)
} else {
stop("'shape_by' was specified but has too many values.\n The shape argument can take a maximum of 6 values")
}
Z$shape_by <- shape_by[Z$sample]
# Remove samples with missing values
if (showMissing==FALSE) {
Z <- Z[!(is.na(color_by) | is.nan(color_by) | color_by=="NaN" | is.na(shape_by) | is.nan(shape_by) | shape_by=="NaN"),]
# Z <- Z[!(is.na(color_by) | is.nan(color_by) | color_by=="NaN"),]
}
# Generate plot
p <- ggplot(Z, aes_string(x=0, y="value")) +
ggbeeswarm::geom_quasirandom(aes(color=color_by, shape=shape_by)) +
ylab("Factor value") + xlab("") +
scale_x_continuous(breaks=NULL) +
facet_wrap(~factor, scales="free") +
theme(
axis.text.y = element_text(size = rel(1.5), color = "black"),
axis.title.y = element_text(size = rel(1.5), color = "black"),
axis.line = element_line(color = "black", size = 0.4),
axis.ticks.length = unit(0.25,"cm"),
axis.ticks = element_line(color = "black"),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.title=element_text(size=20, hjust=0.5, color="black"),
legend.text=element_text(size=18, hjust=0.5, color="black"),
legend.position = "right",
legend.direction = "vertical",
legend.key = element_blank()
)
# If color_by is numeric, define the default gradient
# if (is.numeric(color_by)) { p <- p + scale_color_gradientn(colors=terrain.colors(10)) }
if (is.numeric(color_by)) {
p <- p + scale_color_gradientn(colors = colorRampPalette(rev(brewer.pal(n=5, name="RdYlBu")))(10))
}
# Add legend
if (colorLegend) {
p <- p + labs(color=name_color)
} else {
p <- p + guides(color = FALSE)
}
if (shapeLegend) {
p <- p + labs(shape=name_shape)
} else {
p <- p + guides(shape = FALSE)
}
return(p)
}
#' @title Scatterplot of two latent factors
#' @name plotFactorScatter
#' @description Scatterplot of the values of two latent factors.
#' @param object a trained \code{\link{MOFAmodel}} object.
#' @param factors a vector of length two with the factors to plot.
#' Factors can be specified either as a characters using the factor names,
#' or as numeric with the index of the factors
#' @param color_by specifies groups or values used to color the samples.
#' This can be either
#' a character giving the name of a feature present in the training data,
#' a character giving the same of a covariate (only if using \code{\link{MultiAssayExperiment}} as input),
#' or a vector of the same length as the number of samples specifying
#' discrete groups or continuous numeric values.
#' @param shape_by specifies groups or values used to shape the samples.
#' This can be either
#' a character giving the name of a feature present in the training data,
#' a character giving the same of a covariate (only if using \code{\link{MultiAssayExperiment}} as input),
#' or a vector of the same length as the number of samples specifying discrete groups.
#' @param name_color name for color legend.
#' @param name_shape name for shape legend.
#' @param dot_size dot size (default is 1.5).
#' @param dot_alpha dot transparency (default is 1.0, no transparency).
#' @param showMissing logical indicating whether to include samples for which
#' \code{shape_by} or \code{color_by} is missing.
#' @details One of the first steps for the annotation of factors is to
#' visualise and group/color them using known covariates such as phenotypic or clinical data.
#' This method generates a single scatterplot for the combination of two latent factors.
#' Similar functions are \code{\link{plotFactorScatters}} for doing multiple scatter plots and
#' \code{\link{plotFactorBeeswarm}} for doing Beeswarm plots for single factors.
#' @return Returns a \code{ggplot2} object
#' @import ggplot2
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotFactorScatter(MOFA_CLL, factors=1:2)
#' plotFactorScatter(MOFA_CLL, factors=1:2, color_by= "IGHV", shape_by="trisomy12", showMissing=FALSE)
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotFactorScatter(MOFA_scMT, factors=c(1,3))
plotFactorScatter <- function (object, factors, color_by = NULL, shape_by = NULL, name_color="",
name_shape="", dot_size=1.5, dot_alpha=1.0, showMissing = TRUE) {
# Sanity checks
if (!is(object, "MOFAmodel")) {
stop("'object' has to be an instance of MOFAmodel")
}
stopifnot(length(factors)==2)
# Get factors
if (is.numeric(factors)) {
factors <- factorNames(object)[factors]
} else {
if (paste0(factors,collapse="") == "all") {
factors <- factorNames(object)
} else {
stopifnot(all(factors %in% factorNames(object)))
}
}
Z <- getFactors(object, factors = factors)
factors <- colnames(Z)
# Get samples
samples <- sampleNames(object)
N <- getDimensions(object)[["N"]]
# Set color
colorLegend <- TRUE
if (!is.null(color_by)) {
# It is the name of a covariate or a feature in the TrainData
if (length(color_by) == 1 & is.character(color_by)) {
if(name_color=="") name_color <- color_by
TrainData <- getTrainData(object)
featureNames <- lapply(TrainData, rownames)
if(color_by %in% Reduce(union,featureNames)) {
viewidx <- which(vapply(featureNames, function(vnm) color_by %in% vnm, logical(1)))
color_by <- TrainData[[viewidx]][color_by,]
} else if(is(InputData(object), "MultiAssayExperiment")) {
color_by <- getCovariates(object, color_by)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
# It is a vector of length N
} else if (length(color_by) > 1) {
stopifnot(length(color_by) == N)
# color_by <- as.factor(color_by)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
} else {
color_by <- rep(TRUE,N)
names(color_by) <- sampleNames(object)
colorLegend <- FALSE
}
# Set shape
shapeLegend <- TRUE
if (!is.null(shape_by)) {
# It is the name of a covariate
if (length(shape_by) == 1 & is.character(shape_by)) {
if(name_shape=="") name_shape <- shape_by
TrainData <- getTrainData(object)
featureNames <- lapply(TrainData, rownames)
if(shape_by %in% Reduce(union,featureNames)) {
viewidx <- which(vapply(featureNames, function(vnm) shape_by %in% vnm, logical(1)))
shape_by <- TrainData[[viewidx]][shape_by,]
} else if(is(InputData(object), "MultiAssayExperiment")){
shape_by <- getCovariates(object, shape_by)
}
else stop("'shape_by' was specified but it was not recognised, please read the documentation")
# It is a vector of length N
} else if (length(shape_by) > 1) {
stopifnot(length(shape_by) == N)
} else {
stop("'shape_by' was specified but it was not recognised, please read the documentation")
}
} else {
shape_by <- rep(TRUE,N)
names(shape_by) <- sampleNames(object)
shapeLegend <- FALSE
}
# Create data frame to plot
df = data.frame(x = Z[, factors[1]], y = Z[, factors[2]], shape_by = shape_by, color_by = color_by)
# remove values missing color or shape annotation
if (!showMissing) {
df <- df[!(is.na(df$shape_by) | is.na(df$color_by)),]
}
# turn shape_by into factors
if (any(is.na(df$shape_by))) {
df$shape_by <- factor(df$shape_by, levels=c(as.character(unique(df$shape_by)),"NA"))
df$shape_by[is.na(df$shape_by)] <- "NA"
} else {
df$shape_by <- as.factor(df$shape_by)
}
# turn color_by into factors
if (length(unique(df$color_by)) < 5) df$color_by <- as.factor(df$color_by)
# Remove missing values
df <- df[!(is.na(df$x) | is.na(df$y)),]
xlabel <- factors[1]
ylabel <- factors[2]
p <- ggplot(df, aes_string(x = "x", y = "y")) +
geom_point(aes_string(color = "color_by", shape = "shape_by"), size=dot_size, alpha=dot_alpha) +
xlab(xlabel) + ylab(ylabel) +
# scale_shape_manual(values=c(19,1,2:18)[seq_along(unique(shape_by))]) +
theme(plot.margin = margin(20, 20, 10, 10),
axis.text = element_text(size = rel(1), color = "black"),
axis.title = element_text(size = 16),
axis.title.y = element_text(size = rel(1.1), margin = margin(0, 10, 0, 0)),
axis.title.x = element_text(size = rel(1.1), margin = margin(10, 0, 0, 0)),
axis.line = element_line(color = "black", size = 0.5),
axis.ticks = element_line(color = "black", size = 0.5),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
legend.key = element_rect(fill = "white"),
legend.text = element_text(size = 16),
legend.title = element_text(size =16)
)
if (colorLegend) {
p <- p + labs(color = name_color)
} else {
p <- p + guides(color = FALSE)
}
if (shapeLegend) {
p <- p + labs(shape = name_shape)
} else {
p <- p + guides(shape = FALSE)
}
return(p)
}
#' @title Pairwise scatterplots of multiple latent factors
#' @name plotFactorScatters
#' @description Scatterplots of the sample values for pair-wise
#' combinations of multiple latent factors.
#' @param object a \code{\link{MOFAmodel}} object.
#' @param factors character vector with the factor name(s), or
#' numeric vector with the index of the factor(s) to use.
#' Default is 'all'
#' @param color_by specifies groups or values used to color the samples.
#' This can be either:
#' a character giving the name of a feature present in the training data,
#' a character giving the same of a covariate (only if using \code{\link{MultiAssayExperiment}} as input),
#' or a vector of the same length as the number of samples specifying discrete groups or
#' continuous numeric values.
#' @param shape_by specifies groups or values used to shape the samples.
#' This can be either:
#' a character giving the name of a feature present in the training data,
#' a character giving the same of a covariate (only if using \code{\link{MultiAssayExperiment}} as input),
#' or a vector of the same length as the number of samples specifying discrete groups.
#' @param name_color name for color legend (usually only used if color_by is not a character itself)
#' @param name_shape name for shape legend (usually only used if shape_by is not a character itself)
#' @param showMissing logical indicating whether to include samples for which
#' \code{shape_by} or \code{color_by} is missing
#' @details One of the first steps for the annotation of factors is to visualise and
#' overlap them with known covariates such as phenotypic or clinical data.
#' This method generates multiple scatterplots for pairwise combinations of several latent factors.
#' Similar functions are \code{\link{plotFactorScatter}} for doing single scatter plots and
#' \code{\link{plotFactorBeeswarm}} for doing Beeswarm plots for single factors.
#' @return \code{ggplot2} object
#' @import ggplot2
#' @importFrom stats var
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotFactorScatters(MOFA_CLL, factors=1:3)
#' plotFactorScatters(MOFA_CLL, factors=1:3, color_by= "IGHV")
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotFactorScatters(MOFA_scMT)
plotFactorScatters <- function(object, factors = "all", showMissing=TRUE,
color_by=NULL, name_color="",
shape_by=NULL, name_shape="") {
# Sanity checks
if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
# Collect relevant data
N <- getDimensions(object)[["N"]]
Z <- getFactors(object, factors = factors)
factors <- colnames(Z)
# Get factors
if (is.numeric(factors)) {
factors <- factorNames(object)[factors]
} else {
if (paste0(factors,collapse="") == "all") {
factors <- factorNames(object)
} else {
stopifnot(all(factors %in% factorNames(object)))
}
}
Z <- getFactors(object, factors = factors)
# Remove constant factors
tmp <- apply(Z,2,var,na.rm=TRUE)
if (any(tmp==0)) {
# message(paste0("Removing constant factors: ", paste(which(tmp==0), collapse="")))
Z <- Z[,!tmp==0]
factors <- factors[!tmp==0]
}
# Set color
colorLegend <- TRUE
if (!is.null(color_by)) {
# It is the name of a covariate or a feature in the TrainData
if (length(color_by) == 1 & is.character(color_by)) {
if(name_color=="") name_color <- color_by
TrainData <- getTrainData(object)
featureNames <- lapply(TrainData, rownames)
if(color_by %in% Reduce(union,featureNames)) {
viewidx <- which(vapply(featureNames, function(vnm) color_by %in% vnm, logical(1)))
color_by <- TrainData[[viewidx]][color_by,]
} else if(is(InputData(object), "MultiAssayExperiment")){
color_by <- getCovariates(object, color_by)
}
else stop("'color_by' was specified but it was not recognised, please read the documentation")
# It is a vector of length N
} else if (length(color_by) > 1) {
stopifnot(length(color_by) == N)
# color_by <- as.factor(color_by)
} else {
stop("'color_by' was specified but it was not recognised, please read the documentation")
}
} else {
color_by <- rep(TRUE,N)
names(color_by) <- sampleNames(object)
colorLegend <- FALSE
}
# Set shape
shapeLegend <- TRUE
if (!is.null(shape_by)) {
# It is the name of a covariate
if (length(shape_by) == 1 & is.character(shape_by)) {
if(name_shape=="") name_shape <- shape_by
TrainData <- getTrainData(object)
featureNames <- lapply(TrainData, rownames)
if (shape_by %in% Reduce(union,featureNames)) {
viewidx <- which(vapply(featureNames, function(vnm) shape_by %in% vnm, logical(1)))
shape_by <- TrainData[[viewidx]][shape_by,]
} else if(is(InputData(object), "MultiAssayExperiment")){
shape_by <- getCovariates(object, shape_by)
}
else stop("'shape_by' was specified but it was not recognised, please read the documentation")
# It is a vector of length N
# It is a vector of length N
} else if (length(shape_by) > 1) {
stopifnot(length(shape_by) == N)
} else {
stop("'shape_by' was specified but it was not recognised, please read the documentation")
}
} else {
shape_by <- rep(TRUE,N)
names(shape_by) <- sampleNames(object)
shapeLegend <- FALSE
}
# Remove missing values
if(!showMissing) {
Z <- Z[!(is.na(color_by) | is.nan(color_by)),]
color_by <- color_by[!is.na(color_by)]
shape_by <- shape_by[!is.na(shape_by)]
}
# Crete data.frame
df <- as.data.frame(Z); colnames(df) <- paste0("LF",colnames(df))
df <- cbind(df, color_by=color_by, shape_by=shape_by)
#turn into factors
df$shape_by[is.na(df$shape_by)] <- "NA"
df$shape_by <- as.factor(df$shape_by)
if(length(unique(df$color_by)) < 5) df$color_by <- as.factor(df$color_by)
# Define title and legend of the plot
main <- ""
p <- ggplot(df, aes_string(x=colnames(df)[1], y=colnames(df)[2], color="color_by", shape="shape_by")) +
geom_point()
if (colorLegend | shapeLegend) {
p <- p +
theme(
legend.title=element_text(size=15, hjust=0.5, color="black"),
legend.position = "right",
legend.direction = "vertical",
legend.key = element_blank()
)
# If color_by is numeric, define the default gradient
if (is.numeric(df$color_by)) { p <- p + scale_color_gradientn(colors=terrain.colors(10)) }
if (colorLegend) { p <- p + labs(color = name_color) } else { p <- p + guides(color = FALSE) }
if (shapeLegend) { p <- p + labs(shape = name_shape) } else { p <- p + guides(shape = FALSE) }
# Extract the legend
legend <- GGally::grab_legend(p)
} else {
legend <- NULL
}
# Generate plot
p <- GGally::ggpairs(df, columns = colnames(df[,!colnames(df) %in% c("color_by","shape_by")]),
lower=list(continuous="points"),
diag=list(continuous='blankDiag'),
upper=list(continuous='points'),
mapping=aes(color=color_by, shape=shape_by), title=main, legend=legend) +
theme_bw() +
theme(plot.title = element_text(size = 16, hjust=0.5, color="black"),
axis.title = element_text(size = 10, color="black"),
axis.text = element_text(size = 9, color="black"),
legend.position = "right",
legend.direction = "vertical"
)
# If color_by is numeric, define the default gradient
if (is.numeric(df$color_by)) {
for(i in seq_len(p$nrow)) {
for(j in seq_len(p$ncol)){
p[i,j] <- p[i,j] + scale_color_gradientn(colors=terrain.colors(10))
}
}
}
return(p)
}
#' @title Plot correlation matrix between latent factors
#' @name plotFactorCor
#' @description Function to plot the correlation matrix between the latent factors.
#' @param object a trained \code{\link{MOFAmodel}} object.
#' @param method a character indicating the type of correlation coefficient to be computed:
#' pearson (default), kendall, or spearman.
#' @param ... arguments passed to \code{\link[corrplot]{corrplot}}
#' @details This method plots the correlation matrix between the latent factors. \cr
#' The model encourages the factors to be uncorrelated, so this function usually
#' yields a diagonal correlation matrix. \cr
#' However, it is not a hard constraint such as in Principal Component Analysis and
#' correlations between factors can occur, particularly with large number factors. \cr
#' Generally, correlated factors are redundant and should be avoided, as they make interpretation harder.
#' Therefore, if you have too many correlated factors we suggest you try reducing the number of factors.
#' @return Returns a symmetric matrix with the correlation coefficient between every pair of factors.
#' @importFrom corrplot corrplot
#' @export
#' @examples
#' # Example on the CLL data
#' filepath <- system.file("extdata", "CLL_model.hdf5", package = "MOFAdata")
#' MOFA_CLL <- loadModel(filepath)
#' plotFactorCor(MOFA_CLL)
#'
#' # Example on the scMT data
#' filepath <- system.file("extdata", "scMT_model.hdf5", package = "MOFAdata")
#' MOFA_scMT <- loadModel(filepath)
#' plotFactorCor(MOFA_scMT)
plotFactorCor <- function(object, method = "pearson", ...) {
# Sanity checks
if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel")
# Fetch factors
Z <- getFactors(object)
# Compute and plot correlation
rownames(Z) <- paste0("LF", seq_len(nrow(Z)))
colnames(Z) <- paste0("LF", seq_len(ncol(Z)))
r <- abs(cor(x=Z, y=Z, method=method, use = "complete.obs"))
p <- corrplot(r, tl.col="black", ...)
return(r)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.