#' Plot PCA
#'
#' \code{plot_pca} generates a PCA plot using the top variable proteins.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param x Integer(1),
#' Sets the principle component to plot on the x-axis.
#' @param y Integer(1),
#' Sets the principle component to plot on the y-axis.
#' @param indicate Character,
#' Sets the color, shape and facet_wrap of the plot
#' based on columns from the experimental design (colData).
#' @param label Logical,
#' Whether or not to add sample labels.
#' @param n Integer(1),
#' Sets the number of top variable proteins to consider.
#' @param point_size Integer(1),
#' Sets the size of the points.
#' @param label_size Integer(1),
#' Sets the size of the labels.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the PCA plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' PCA plot is based on are returned.
#' @return A scatter plot (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot PCA
#' plot_pca(dep)
#' plot_pca(dep, indicate = "condition")
#' @export
plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
label = FALSE, n = 500, point_size = 4, label_size = 3, plot = TRUE) {
if(is.integer(x)) x <- as.numeric(x)
if(is.integer(y)) y <- as.numeric(y)
if(is.integer(n)) n <- as.numeric(n)
if(is.integer(point_size)) point_size <- as.numeric(point_size)
if(is.integer(label_size)) label_size <- as.numeric(label_size)
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
is.numeric(x),
length(x) == 1,
is.numeric(y),
length(y) == 1,
is.numeric(n),
length(n) == 1,
is.character(indicate),
is.logical(label),
is.numeric(point_size),
length(point_size) == 1,
is.numeric(label_size),
length(label_size) == 1,
is.logical(plot),
length(plot) == 1)
# Check for valid x and y values
if(x > ncol(dep) | y > ncol(dep)) {
stop(paste0("'x' and/or 'y' arguments are not valid\n",
"Run plot_pca() with 'x' and 'y' <= ",
ncol(dep), "."),
call. = FALSE)
}
# Check for valid 'n' value
if(n > nrow(dep)) {
stop(paste0("'n' argument is not valid.\n",
"Run plot_pca() with 'n' <= ",
nrow(dep),
"."),
call. = FALSE)
}
# Check for valid 'indicate'
columns <- colnames(colData(dep))
if(!is.null(indicate)) {
if(length(indicate) > 3) {
stop("Too many features in 'indicate'
Run plot_pca() with a maximum of 3 indicate features")
}
if(any(!indicate %in% columns)) {
stop(paste0("'",
paste0(indicate, collapse = "' and/or '"),
"' column(s) is/are not present in ",
deparse(substitute(dep)),
".\nValid columns are: '",
paste(columns, collapse = "', '"),
"'."),
call. = FALSE)
}
}
# Get the variance per protein and take the top n variable proteins
var <- apply(assay(dep), 1, sd)
df <- assay(dep)[order(var, decreasing = TRUE)[seq_len(n)],]
# Calculate PCA
pca <- prcomp(t(df), scale = FALSE)
pca_df <- pca$x %>%
data.frame() %>%
rownames_to_column() %>%
left_join(., data.frame(colData(dep)), by = c("rowname" = "ID"))
# Calculate the percentage of variance explained
percent <- round(100 * pca$sdev^2 / sum(pca$sdev^2), 1)
# Make factors of indicate features
for(feat in indicate) {
pca_df[[feat]] <- as.factor(pca_df[[feat]])
}
# Plot the PCA plot
p <- ggplot(pca_df, aes(get(paste0("PC", x)), get(paste0("PC", y)))) +
labs(title = paste0("PCA plot - top ", n, " variable proteins"),
x = paste0("PC", x, ": ", percent[x], "%"),
y = paste0("PC", y, ": ", percent[y], "%")) +
coord_fixed() +
theme_DEP1()
if(length(indicate) == 0) {
p <- p + geom_point(size = point_size)
}
if(length(indicate) == 1) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]]),
size = point_size) +
labs(col = indicate[1])
}
if(length(indicate) == 2) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
shape = pca_df[[indicate[2]]]),
size = point_size) +
labs(col = indicate[1],
shape = indicate[2])
}
if(length(indicate) == 3) {
p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
shape = pca_df[[indicate[2]]]),
size = point_size) +
facet_wrap(~pca_df[[indicate[3]]])
labs(col = indicate[1],
shape = indicate[2])
}
if(label) {
p <- p + geom_text(aes(label = rowname), size = label_size)
}
if(plot) {
return(p)
} else {
df <- pca_df %>%
select(rowname, paste0("PC", c(x, y)), match(indicate, colnames(pca_df)))
colnames(df)[1] <- "sample"
return(df)
}
}
#' Plot correlation matrix
#'
#' \code{plot_cor} generates a Pearson correlation matrix.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param significant Logical(1),
#' Whether or not to filter for significant proteins.
#' @param lower Integer(1),
#' Sets the lower limit of the color scale.
#' @param upper Integer(1),
#' Sets the upper limit of the color scale.
#' @param pal Character(1),
#' Sets the color panel (from \pkg{RColorBrewer}).
#' @param pal_rev Logical(1),
#' Whether or not to invert the color palette.
#' @param indicate Character,
#' Sets additional annotation on the top of the heatmap
#' based on columns from the experimental design (colData).
#' @param font_size Integer(1),
#' Sets the size of the labels.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the correlation matrix plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' correlation matrix plot is based on are returned.
#' @param ... Additional arguments for Heatmap function as depicted in
#' \code{\link[ComplexHeatmap]{Heatmap}}
#' @return A heatmap plot (generated by \code{\link[ComplexHeatmap]{Heatmap}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot correlation matrix
#' plot_cor(dep)
#' @export
plot_cor <- function(dep, significant = TRUE, lower = -1, upper = 1,
pal = "PRGn", pal_rev = FALSE, indicate = NULL,
font_size = 12, plot = TRUE, ...) {
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
is.logical(significant),
length(significant) == 1,
is.numeric(lower),
length(lower) == 1,
is.numeric(upper),
length(upper) == 1,
is.character(pal),
length(pal) == 1,
is.logical(pal_rev),
length(pal_rev) == 1,
is.numeric(font_size),
length(font_size) == 1,
is.logical(plot),
length(plot) == 1)
# Check for valid lower and upper values
if(!(lower >= -1 & upper >= -1 & lower <= 1 & upper <= 1)) {
stop("'lower' and/or 'upper' arguments are not valid
Run plot_pca() with 'lower' and 'upper' between -1 and 1",
call. = FALSE)
}
# Check for valid pal
pals <- RColorBrewer::brewer.pal.info %>%
rownames_to_column() %>%
filter(category != "qual")
if(!pal %in% pals$rowname) {
stop("'", pal,"' is not a valid color panel",
" (qualitative panels also not allowed)\n",
"Run plot_pca() with one of the following 'pal' options: ",
paste(pals$rowname, collapse = "', '"), "'",
call. = FALSE)
}
if(any(is.na(assay(dep)))) {
stop("Missing values in '", deparse(substitute(dep)), "'. Use plot_dist() instead")
}
# Heatmap annotation
if(!is.null(indicate)) {
assertthat::assert_that(is.character(indicate))
col_data <- colData(dep) %>%
as.data.frame()
columns <- colnames(col_data)
if(any(!indicate %in% columns)) {
stop("'",
paste0(indicate, collapse = "' and/or '"),
"' column(s) is/are not present in ",
deparse(substitute(dep)),
".\nValid columns are: '",
paste(columns, collapse = "', '"),
"'.",
call. = FALSE)
}
# Get annotation
anno <- colData(dep) %>%
data.frame() %>%
select(indicate)
# Annotation color
names <- colnames(anno)
anno_col <- vector(mode="list", length=length(names))
names(anno_col) <- names
for(i in names) {
var = anno[[i]] %>% unique() %>% sort()
if(length(var) == 1)
cols <- c("black")
if(length(var) == 2)
cols <- c("orangered", "cornflowerblue")
if(length(var) < 7 & length(var) > 2)
cols <- RColorBrewer::brewer.pal(length(var), "Pastel1")
if(length(var) >= 7)
cols <- RColorBrewer::brewer.pal(length(var), "Set3")
names(cols) <- var
anno_col[[i]] <- cols
}
# HeatmapAnnotation object
ha1 = HeatmapAnnotation(df = anno,
col = anno_col,
show_annotation_name = TRUE)
} else {
ha1 <- NULL
}
# Filter for significant proteins
if(significant) {
# Check for significant column
if(!"significant" %in% colnames(rowData(dep, use.names = FALSE))) {
stop("'significant' column is not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required column",
call. = FALSE)
}
dep <- dep[rowData(dep, use.names = FALSE)$significant, ]
}
# Calculate correlation matrix
cor_mat <- cor(assay(dep))
# Plot heatmap
ht1 = Heatmap(cor_mat,
col = circlize::colorRamp2(
seq(lower, upper, ((upper-lower)/7)),
if(pal_rev) {
rev(RColorBrewer::brewer.pal(8, pal))
} else {
RColorBrewer::brewer.pal(8, pal)
}),
heatmap_legend_param = list(
color_bar = "continuous",
legend_direction = "horizontal",
legend_width = unit(5, "cm"),
title_position = "topcenter"),
name = "Pearson correlation",
column_names_gp = gpar(fontsize = font_size),
row_names_gp = gpar(fontsize = font_size),
top_annotation = ha1,
...)
if(plot) {
draw(ht1, heatmap_legend_side = "top")
} else {
df <- as.data.frame(cor_mat)
return(df)
}
}
#' Plot Gower's distance matrix
#'
#' \code{plot_dist} generates a distance matrix heatmap using the Gower's distance.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param significant Logical(1),
#' Whether or not to filter for significant proteins.
#' @param pal Character(1),
#' Sets the color panel (from \pkg{RColorBrewer}).
#' @param pal_rev Logical(1),
#' Whether or not to invert the color palette.
#' @param indicate Character,
#' Sets additional annotation on the top of the heatmap
#' based on columns from the experimental design (colData).
#' @param font_size Integer(1),
#' Sets the size of the labels.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the distance matrix plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' distance matrix plot is based on are returned.
#' @param ... Additional arguments for Heatmap function as depicted in
#' \code{\link[ComplexHeatmap]{Heatmap}}
#' @return A heatmap plot (generated by \code{\link[ComplexHeatmap]{Heatmap}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot correlation matrix
#' plot_dist(dep)
#' @export
plot_dist <- function(
dep, significant = TRUE, pal = "YlOrRd",
pal_rev = TRUE, indicate = NULL,
font_size = 12, plot = TRUE, ...) {
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
is.logical(significant),
length(significant) == 1,
is.character(pal),
length(pal) == 1,
is.logical(pal_rev),
length(pal_rev) == 1,
is.numeric(font_size),
length(font_size) == 1,
is.logical(plot),
length(plot) == 1)
# Check for valid pal
pals <- RColorBrewer::brewer.pal.info %>%
rownames_to_column() %>%
filter(category != "qual")
if(!pal %in% pals$rowname) {
stop("'", pal,"' is not a valid color panel",
" (qualitative panels also not allowed)\n",
"Run plot_pca() with one of the following 'pal' options: ",
paste(pals$rowname, collapse = "', '"), "'",
call. = FALSE)
}
# Heatmap annotation
if(!is.null(indicate)) {
assertthat::assert_that(is.character(indicate))
col_data <- colData(dep) %>%
as.data.frame()
columns <- colnames(col_data)
if(any(!indicate %in% columns)) {
stop("'",
paste0(indicate, collapse = "' and/or '"),
"' column(s) is/are not present in ",
deparse(substitute(dep)),
".\nValid columns are: '",
paste(columns, collapse = "', '"),
"'.",
call. = FALSE)
}
# Get annotation
anno <- colData(dep) %>%
data.frame() %>%
select(indicate)
# Annotation color
names <- colnames(anno)
anno_col <- vector(mode="list", length=length(names))
names(anno_col) <- names
for(i in names) {
var = anno[[i]] %>% unique() %>% sort()
if(length(var) == 1)
cols <- c("black")
if(length(var) == 2)
cols <- c("orangered", "cornflowerblue")
if(length(var) < 7 & length(var) > 2)
cols <- RColorBrewer::brewer.pal(length(var), "Pastel1")
if(length(var) >= 7)
cols <- RColorBrewer::brewer.pal(length(var), "Set3")
names(cols) <- var
anno_col[[i]] <- cols
}
# HeatmapAnnotation object
ha1 = HeatmapAnnotation(df = anno,
col = anno_col,
show_annotation_name = TRUE)
} else {
ha1 <- NULL
}
# Filter for significant proteins
if(significant) {
# Check for significant column
if(!"significant" %in% colnames(rowData(dep, use.names = FALSE))) {
stop("'significant' column is not present in '",
deparse(substitute(dep)),
"'\nRun add_rejections() to obtain the required column",
call. = FALSE)
}
dep <- dep[rowData(dep, use.names = FALSE)$significant, ]
}
# Calculate distance matrix
dist_mat <- cluster::daisy(t(assay(dep)), metric = "gower") %>%
as.matrix()
max <- max(dist_mat)
# Plot heatmap
ht1 = Heatmap(dist_mat,
col = circlize::colorRamp2(
seq(0, max, ((max)/7)),
if(pal_rev) {
rev(RColorBrewer::brewer.pal(8, pal))
} else {
RColorBrewer::brewer.pal(8, pal)
}),
heatmap_legend_param = list(color_bar = "continuous",
legend_direction = "horizontal",
legend_width = unit(5, "cm"),
title_position = "topcenter"),
name = "Gower's distance",
column_names_gp = gpar(fontsize = font_size),
row_names_gp = gpar(fontsize = font_size),
top_annotation = ha1,
...)
if(plot) {
draw(ht1, heatmap_legend_side = "top")
} else {
df <- as.data.frame(dist_mat)
return(df)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.