#' @title Plot percent increase in detection of DE genes across replicate levels
#'
#' @description
#' \code{ggplot2_marginPlot} function plots the percent change in number of DE
#' genes identified at each step-wise increase in replicate level.
#'
#' @details
#' The percent change is calculated as (margin*100%)/(num. of DE genes at the
#' lower replicate level). The results are visualized as bar plots. Either mean
#' or median can be used for the calculation.
#'
#' @param deg The list of DE genes generated by one of ERSSA::DE_*.R scripts.
#' @param stat The statistic used for plotting. Options include 'mean',
#' 'median'. Default='median'.
#' @param path Path to which the plot will be saved. Default to current working
#' directory.
#' @param save_plot Boolean. Whether to save plot to drive. Default to TRUE.
#'
#' @return A list is returned containing:
#' \itemize{
#' \item{gg_object} {the ggplot2 object, which can then be further
#' customized.}
#' \item{marg_diff.dataframe} {the tidy table version of percent changes for
#' plotting.}
#' }
#' @author Zixuan Shao, \email{Zixuanshao.zach@@gmail.com}
#'
#' @examples
#' # load edgeR deg object generated by erssa_edger using example dataset
#' # example dataset containing 1000 genes, 4 replicates and 5 comb. per rep.
#' # level
#' data(deg.partial, package = "ERSSA")
#'
#' gg_margin = ggplot2_marginPlot(deg.partial)
#'
#' @references
#' H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
#' Springer-Verlag New York, 2009.
#'
#' @export
#'
#' @import ggplot2
#' @importFrom stats median
ggplot2_marginPlot = function(deg=NULL, stat='median', path='.', save_plot=TRUE){
if (is.null(deg)){
stop('Missing required deg argument in ggplot2_marginPlot function')
}
# create name for the step-change in replicate level
name = vapply(seq(1, length(deg)-1), function(index) {
paste0(names(deg)[index], ' \u2192 ',
names(deg)[index+1])
}, FUN.VALUE = character(1), USE.NAMES = FALSE)
# calculate percent difference
percent_diff = vapply(seq(1, length(deg)-1), function(index) {
num_i = sapply(deg[[names(deg)[index]]], function(x) length(x))
num_j = sapply(deg[[names(deg)[index+1]]], function(x) length(x))
if (stat=='mean'){
per_diff = (mean(num_j)-mean(num_i))*100/mean(num_i)
} else if (stat=='median'){
per_diff = (median(num_j)-median(num_i))*100/
median(num_i)
} else {
stop('Only mean or median currently supported for plotting marginal
difference in Num. of DE genes.')
}
return(per_diff)
}, FUN.VALUE = numeric(1), USE.NAMES = FALSE)
# round the diff
rounded_per_diff = sapply(round(percent_diff,1), function(x) paste0(x,'%'))
# dataframe for plotting
per_diff_df = data.frame(replicate=name, per_diff=percent_diff,
rounded_per_diff=rounded_per_diff)
per_diff_df$replicate = factor(per_diff_df$replicate, levels =
per_diff_df$replicate)
# plot
gg = ggplot(per_diff_df, aes_string(x = 'replicate', y='per_diff')) +
geom_col(width=0.7) +
theme_bw(base_size=14) +
labs(x='', y=paste0('Percent change in ',stat,
' number of DE genes')) +
geom_text(aes(label = rounded_per_diff), vjust =
ifelse(per_diff_df$per_diff >= 0, -0.2, 1.2)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
if (save_plot==TRUE){
# create dir to save results
folder_path = file.path(path)
dir.create(folder_path, showWarnings = FALSE)
# save plot
ggsave(filename= file.path(path,
'ERSSA_plot_2_MarginalNumOfDEGenes.png'),
plot=gg, dpi=300, width = 20,
height = 15, units = "cm")
}
return(list(gg_object=gg, marg_diff.dataframe = per_diff_df))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.