# SPDX-License-Identifier: AGPL-3.0-or-later
# Copyright (C) 2021 Kevin Lu
# Parts modified from EpigenCentral
#' Generate principal component analysis plots from normalized methylation beta values.
#' By default, this will do this for all CpGs, however, you can pass in a data frame
#' of specific ones of interest to only consider differentially methylated CpGs.
#'
#' @param grset minfi GenomicRatioSet containing beta values
#' @param df_dmps optional data frame of CpGs of interest
#'
#' @examples
#' \dontrun{
#' grset <- read_geo_tsv("extdata/GSE55491/GSE55491_series_matrix.txt")
#' pca_plot(grset)
#' }
#' @references
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag
#' New York, 2016.
#'
#' @return ggplot2 PCA plot
#' @export
pca_plot <- function(grset, df_dmps = NULL) {
if (is.null(df_dmps)) {
betas <- minfi::getBeta(grset)
} else {
betas <- minfi::getBeta(grset)[df_dmps$CpG.Site,]
}
# Perform the principal component analysis
epsilon = 0.0001
betas[betas < epsilon] <- epsilon
betas[betas > 1 - epsilon] <- 1 - epsilon
x <- scale(t(betas))
x[is.nan(x)] <- 0
pca <- stats::prcomp(x[, matrixStats::colSds(x, na.rm = T) > 0])
plot_df <- as.data.frame(pca$x)
# Centres the plot on the data points while keeping the plot square
limits_x <- c(min(plot_df$PC1), max(plot_df$PC1))
limits_y <- c(min(plot_df$PC2), max(plot_df$PC2))
width_x <- limits_x[2] - limits_x[1]
width_y <- limits_y[2] - limits_y[1]
padding <- abs(width_y - width_x) / 2
if (width_y > width_x) {
limits_x <- limits_x + c(-padding, padding)
} else {
limits_y <- limits_y + c(-padding, padding)
}
# Create the centred PCA plot
if (is.null(grset$Sample_Group)) {
# No sample sheet to discriminate
aesthetic_map <- ggplot2::aes(plot_df$PC1, plot_df$PC2)
} else {
plot_df$Groups <- grset$Sample_Group
aesthetic_map <- ggplot2::aes(plot_df$PC1, plot_df$PC2, colour = plot_df$Groups)
}
ggplot2::ggplot(plot_df, x = plot_df$PC1, y = plot_df$PC2, aesthetic_map) +
ggplot2::geom_point() +
ggplot2::coord_fixed(ratio = 1, xlim = limits_x, ylim = limits_y)
# Show sample labels if there aren't too many
if (length(Biobase::pData(grset)$Sample_Name) < 50) {
ggplot2::last_plot() +
ggplot2::geom_text(
label = rownames(plot_df),
size = 1.9,
nudge_y = (limits_y[2] - limits_y[1]) / 100)
}
ggplot2::last_plot()
}
# [END]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.