#' Plot row standard deviations versus row means
#'
#' \code{meanSdPlot} generates a hexagonal heatmap
#' of the row standard deviations versus row means
#' from SummarizedExperiment objects.
#' See \code{\link[vsn]{meanSdPlot}}.
#'
#' @param x SummarizedExperiment,
#' Data object.
#' @param ranks Logical,
#' Whether or not to plot the row means on the rank scale.
#' @param xlab Character,
#' x-axis label.
#' @param ylab Character,
#' y-axis label.
#' @param pch Ignored -
#' exists for backward compatibility.
#' @param plot Logical,
#' Whether or not to produce the plot.
#' @param bins Numeric vector,
#' Data object before normalization.
#' @param ... Other arguments,
#' Passed to \code{\link[ggplot2:geom_hex]{stat_binhex}}.
#' @return A scatter plot of row standard deviations
#' versus row means(generated by \code{\link[ggplot2:geom_hex]{stat_binhex}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter and normalize
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#'
#' # Plot meanSdPlot
#' meanSdPlot(norm)
#' @export
meanSdPlot <- function(x,
ranks = TRUE,
xlab = ifelse(ranks, "rank(mean)", "mean"),
ylab = "sd",
pch,
plot = TRUE,
bins = 50,
...) {
vsn::meanSdPlot(assay(x),
ranks = ranks,
xlab = xlab,
ylab = ylab,
pch = pch,
plot = plot,
...)
}
#' Visualize normalization
#'
#' \code{plot_normalization} generates boxplots
#' of all conditions for input objects, e.g. before and after normalization.
#'
#' @param se SummarizedExperiment,
#' Data object, e.g. before normalization (output from \code{\link{make_se}()}
#' or \code{\link{make_se_parse}()}).
#' @param ... Additional SummarizedExperiment object(s),
#' E.g. data object after normalization
#' (output from \code{\link{normalize_vsn}}).
#' @return Boxplots of all conditions
#' for input objects, e.g. before and after normalization
#' (generated by \code{\link[ggplot2]{ggplot}}).
#' Adding components and other plot adjustments can be easily done
#' using the ggplot2 syntax (i.e. using '+')
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter and normalize
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#'
#' # Plot normalization
#' plot_normalization(se, filt, norm)
#' @export
plot_normalization <- function(se, ...) {
# Get arguments from call
call <- match.call()
arglist <- lapply(call[-1], function(x) x)
var.names <- vapply(arglist, deparse, character(1))
arglist <- lapply(arglist, eval.parent, n = 2)
names(arglist) <- var.names
# Show error if inputs are not the required classes
lapply(arglist, function(x) {
assertthat::assert_that(inherits(x,
"SummarizedExperiment"),
msg = "input objects need to be of class 'SummarizedExperiment'")
if (any(!c("label", "ID", "condition", "replicate") %in% colnames(colData(x)))) {
stop("'label', 'ID', 'condition' and/or 'replicate' ",
"columns are not present in (one of) the input object(s)",
"\nRun make_se() or make_se_parse() to obtain the required columns",
call. = FALSE)
}
})
# Function to get a long data.frame of the assay data
# annotated with sample info
gather_join <- function(se) {
assay(se) %>%
data.frame() %>%
gather(ID, val) %>%
left_join(., data.frame(colData(se)), by = "ID")
}
df <- map_df(arglist, gather_join, .id = "var") %>%
mutate(var = factor(var, levels = names(arglist)))
# Boxplots for conditions with facet_wrap
# for the original and normalized values
ggplot(df, aes(x = ID, y = val, fill = condition)) +
geom_boxplot(notch = TRUE, na.rm = TRUE) +
coord_flip() +
facet_wrap(~var, ncol = 1) +
labs(x = "", y = expression(log[2]~"Intensity")) +
theme_DEP1()
}
#' Visualize imputation
#'
#' \code{plot_imputation} generates density plots
#' of all conditions for input objects, e.g. before and after imputation.
#'
#' @param se SummarizedExperiment,
#' Data object, e.g. before imputation
#' (output from \code{\link{normalize_vsn}()}).
#' @param ... Other SummarizedExperiment object(s),
#' E.g. data object after imputation
#' (output from \code{\link{impute}()}).
#' @return Density plots of all conditions
#' of all conditions for input objects, e.g. before and
#' after imputation (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Plot imputation
#' plot_imputation(filt, norm, imputed)
#' @export
plot_imputation <- function(se, ...) {
# Get arguments from call
call <- match.call()
arglist <- lapply(call[-1], function(x) x)
var.names <- vapply(arglist, deparse, character(1))
arglist <- lapply(arglist, eval.parent, n = 2)
names(arglist) <- var.names
# Show error if inputs are not the required classes
lapply(arglist, function(x) {
assertthat::assert_that(inherits(x,
"SummarizedExperiment"),
msg = "input objects need to be of class 'SummarizedExperiment'")
if (any(!c("label", "ID", "condition", "replicate") %in% colnames(colData(x)))) {
stop("'label', 'ID', 'condition' and/or 'replicate' ",
"columns are not present in (one of) the input object(s)",
"\nRun make_se() or make_se_parse() to obtain the required columns",
call. = FALSE)
}
})
# Function to get a long data.frame of the assay data
# annotated with sample info
gather_join <- function(se) {
assay(se) %>%
data.frame() %>%
gather(ID, val) %>%
left_join(., data.frame(colData(se)), by = "ID")
}
df <- map_df(arglist, gather_join, .id = "var") %>%
mutate(var = factor(var, levels = names(arglist)))
# Density plots for different conditions with facet_wrap
# for original and imputed samles
ggplot(df, aes(val, col = condition)) +
geom_density(na.rm = TRUE) +
facet_wrap(~var, ncol = 1) +
labs(x = expression(log[2]~"Intensity"), y = "Density") +
theme_DEP1()
}
#' Visualize intensities of proteins with missing values
#'
#' \code{plot_detect} generates density and CumSum plots
#' of protein intensities with and without missing values
#'
#' @param se SummarizedExperiment,
#' Data object with missing values.
#' @return Density and CumSum plots of intensities of
#' proteins with and without missing values
#' (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter
#' filt <- filter_missval(se, thr = 0)
#'
#' # Plot intensities of proteins with missing values
#' plot_detect(filt)
#' @export
plot_detect <- function(se) {
# Show error if inputs are not the required classes
assertthat::assert_that(inherits(se, "SummarizedExperiment"))
se_assay <- assay(se)
# Show error if there are no missing values
if(!any(is.na(se_assay))) {
stop("No missing values in '", deparse(substitute(se)), "'",
call. = FALSE)
}
# Get a long data.frame of the assay data annotated with sample info
df <- se_assay %>%
data.frame() %>%
rownames_to_column() %>%
gather(ID, val, -rowname)
# Get a summarized table with mean protein intensities and
# indication whether the protein has missing values
stat <- df %>%
group_by(rowname) %>%
summarize(mean = mean(val, na.rm = TRUE), missval = any(is.na(val)))
# Calculate cumulative fraction
cumsum <- stat %>%
group_by(missval) %>%
arrange(mean) %>%
mutate(num = 1, cs = cumsum(num), cs_frac = cs/n())
# Plot the densities and cumalitive fractions for
# proteins with and without missing values
p1 <- ggplot(stat, aes(mean, col = missval)) +
geom_density(na.rm = TRUE) +
labs(x = expression(log[2]~"Intensity"), y = "Density") +
guides(col = guide_legend(title = "Missing values")) +
theme_DEP1()
p2 <- ggplot(cumsum, aes(mean, cs_frac, col = missval)) +
geom_line() +
labs(x = expression(log[2]~"Intensity"), y = "Cumulative fraction") +
guides(col = guide_legend(title = "Missing values")) +
theme_DEP1()
gridExtra::grid.arrange(p1, p2, ncol = 1)
}
#' Plot a heatmap of proteins with missing values
#'
#' \code{plot_missval} generates a heatmap of proteins
#' with missing values to discover whether values are missing by random or not.
#'
#' @param se SummarizedExperiment,
#' Data object with missing values.
#' @return A heatmap indicating whether values are missing (0) or not (1)
#' (generated by \code{\link[ComplexHeatmap]{Heatmap}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#'
#' # Plot missing values heatmap
#' plot_missval(filt)
#' @export
plot_missval <- function(se) {
# Show error if input is not the required classes
assertthat::assert_that(inherits(se, "SummarizedExperiment"))
se_assay <- assay(se)
# Show error if there are no missing values
if(!any(is.na(se_assay))) {
stop("No missing values in '", deparse(substitute(se)), "'",
call. = FALSE)
}
# Make assay data binary (1 = valid value, 0 = missing value)
df <- se_assay %>% data.frame(.)
missval <- df[apply(df, 1, function(x) any(is.na(x))), ]
missval <- ifelse(is.na(missval), 0, 1)
# Plot binary heatmap
ht2 = Heatmap(missval,
col = c("white", "black"),
column_names_side = "top",
show_row_names = FALSE,
show_column_names = TRUE,
name = "Missing values pattern",
column_names_gp = gpar(fontsize = 16),
heatmap_legend_param = list(at = c(0, 1),
labels = c("Missing value", "Valid value")))
draw(ht2, heatmap_legend_side = "top")
}
#' Plot a P value histogram
#'
#' \code{plot_p_hist} generates a p value histogram.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param adjusted Logical(1),
#' Whether or not to use adjusted p values.
#' @param wrap Logical(1),
#' Whether or not to display different histograms for the different contrasts.
#' @return A histogram (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot p value histogram
#' plot_p_hist(dep)
#' plot_p_hist(dep, wrap = TRUE)
#' @export
plot_p_hist <- function(dep, adjusted = FALSE, wrap = FALSE) {
assert_that(inherits(dep, "SummarizedExperiment"),
is.logical(adjusted),
length(adjusted) == 1,
is.logical(wrap),
length(wrap) == 1)
# Get rowData
row_data <- rowData(dep, use.names = FALSE) %>%
data.frame()
if(length(grep("_p.adj", colnames(row_data))) < 1) {
stop("'[contrast]_p.adj' columns are not present in '",
deparse(substitute(dep)),
"'\nRun test_diff() to obtain the required columns",
call. = FALSE)
}
if(length(grep("_p.val", colnames(row_data))) < 1) {
stop("'[contrast]_p.val' columns are not present in '",
deparse(substitute(dep)),
"'\nRun test_diff() to obtain the required columns",
call. = FALSE)
}
if(!"name" %in% colnames(row_data)) {
stop("'name' column not present in '",
deparse(substitute(dep)),
"'\nRun make_se() or make_se_parse() to obtain the required columns",
call. = FALSE)
}
# Grep P value columns
if(adjusted) {
p_val <- row_data %>%
select(name, ends_with("_p.adj")) %>%
gather(var, val, -name)
} else {
p_val <- row_data %>%
select(name, ends_with("_p.val")) %>%
gather(var, val, -name)
}
# Plot histogram
p <- ggplot(p_val, aes(val)) +
geom_histogram(bins = 100, center = 0.005) +
labs(title = "P value histogram",
x = "P-value") +
theme_DEP1()
if(wrap) {
p <- p + facet_wrap(~ var)
}
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.