#' Differential gene expression of multi-normalised NanoString mRNA data
#'
#' @description Provides a wrapper around limma to performs differential
#' gene expression analysis of all possible pairwise comparisons on all
#' normalised assays within a count_set object.
#'
#' @param count_set a normalised, count_set summarized experiment.
#' Default = NULL
#' @param adj_method method for multiple test corrections. Options are:
#' "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr" or "none".
#' See ?p.adjust.methods for a details description and references.
#' Default = "fdr".
#' @param p_cut_off p value threshold for DGE.
#' @param logFC_cut_off log2 fold change threshold for DGE
#' @param pairing if data is paired "paired", if unpaired "unpaired" (Default)
#'
#' @examples
#' # biological groups
#' rnf5_group <- c(rep("WT", 5), rep("KO", 5))
#'
#' # sample ids
#' rnf5_sampleid <- c("GSM3638131", "GSM3638132", "GSM3638133", "GSM3638134",
#' "GSM3638135", "GSM3638136", "GSM3638137", "GSM3638138",
#' "GSM3638139", "GSM3638140")
#'
#' # build count_set
#' rnf5_count_set <- count_set(count_data = Rnf5,
#' group = rnf5_group,
#' samp_id = rnf5_sampleid)
#' # normalize
#' rnf5_count_set_norm <- multi_norm(count_set = rnf5_count_set,
#' positive_control_scaling = TRUE,
#' background_correct = "mean2sd",
#' #plot_dir = "~/Dropbox/git/NanoStringClustR/plot_test/"
#' )
#' # rank normalizations
#' rnf5_eval <- norm_rank(rnf5_count_set_norm)
#'
#' # differential gene expression
#' rnf5_multi_diff <- multi_diff(count_set = rnf5_count_set_norm,
#' adj_method = "fdr",
#' p_cut_off = 0.05,
#' logFC_cut_off = 0)
#'
#' @return Returns a list containing number of DGE for each normalization
#' method, upset plots and full DEG results and fits.
#'
#' @export multi_diff
#'
#' @importFrom limma lmFit contrasts.fit eBayes topTable makeContrasts
#' @importFrom SummarizedExperiment assays
#' @importFrom ggplot2 ggplot aes geom_bar geom_point theme_classic scale_fill_brewer element_text xlab ylab theme vars facet_wrap
#' @importFrom Biobase subListExtract
#' @importFrom UpSetR upset fromList
#'
#' @references
#'
#' Ritchie, M.E., Phipson, B., Wu, D., Hu, Y., Law, C.W., Shi, W., and Smyth, G.K. (2015). limma powers differential
#' expression analyses for RNA-sequencing and microarray studies. Nucleic Acids Research 43(7), e47.
#'
#' Nils Gehlenborg (2019). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets.
#' R package version 1.4.0. https://CRAN.R-project.org/package=UpSetR
#'
#'
multi_diff <- function(count_set = NULL,
adj_method = "fdr",
p_cut_off = 0.1,
logFC_cut_off = 0,
pairing = "unpaired") {
####### Input checks #######---------------------------------------------------
# check count_set is provided
if(is.null(count_set)) stop("A count_set list generated by count_set must be provided.")
if(!is.null(count_set)) {
# check the file format
if(count_set@class != "SummarizedExperiment"){
stop(paste("summarized file provided is not a SummarizedExperiment,
Please produce a SummarizedExperiment using count_set.", "\n",
sep=""))
}
}
# check that the count_set has been normalised
if(length(names(assays(count_set))) == 1) stop("The count_set input must be normalised
with multi_norm")
# check the p-value adjustment method is correct
if((adj_method %in% c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY",
"fdr", "none")) == FALSE){
stop("adjust_method must be one of the following methods: \"holm\",
\"hochberg\", \"hommel\", \"bonferroni\", \"BH\", \"BY\", \"fdr\" or
\"none\"\n")
}
####### Input checks finished #######---------------------------------------
### 1. Design matrix ###
design <- build_design_mat(count_set = count_set, pairing = pairing)
design_table <-design$table
design <- design$design
### 2. Contrast matrix ###
cont_mat <- build_cont_mat(design = design, design_table = design_table)
####### DGE ######----------------------------------------------------------
assays_all <- unique(names(assays(count_set)))
#drop <- c("counts", "background_corrected", "positive_control_scaled")
#assays_all <- assays_all[!(assays_all %in% drop)]
### 3. Limma ###
full_results <- sapply(seq_along(assays_all), function(i){
limma_wrap(count_set = count_set,
norm_method = assays_all[i],
design = design,
cont_mat = cont_mat,
adj_method = adj_method)
})
colnames(full_results) <- assays_all
full_fits <- full_results[2, ]
full_results <- full_results[1, ]
###### no. of DEG ######----------------------------------------------------------
nDEG <- lapply(seq_along(full_results), function(x)
sapply(seq_along(full_results[[x]]), function(i)
length(which((full_results[[x]][[i]]$adj.P.Val < p_cut_off) &
(abs(full_results[[x]][[i]]$logFC) > logFC_cut_off)))))
names(nDEG) <- names(full_results)
nDEG_table <- as.data.frame(nDEG)
rownames(nDEG_table) <- names(full_results$housekeeping_scaled)
###### plot no. of DEGs ######---------------------------------------------------------
for_plot <- nDEG_table
for_plot$contrast <- rownames(for_plot)
for_plot <- reshape::melt(for_plot, id = "contrast")
colnames(for_plot) <- c("contrast", "norm_method", "No_DEG")
#plot
DEG_plot <- ggplot2::ggplot(data = for_plot, aes(x = norm_method, y = No_DEG))+
geom_bar(stat="identity", aes(fill = norm_method), colour = "black")+
ggplot2::facet_wrap(ggplot2::vars(contrast))+
ggplot2::scale_fill_brewer()+
theme(axis.text.x = element_text(angle = 90, hjust = 1),
legend.position = "none")+
xlab("Contrast")+
ylab(ifelse(adj_method == "none", paste("No. DEG (p <", p_cut_off, ")", sep=""),
paste("No. DEG (", adj_method, " adj.p <", p_cut_off, ")", sep="")))
###### upset diagrams ######----------------------------------------------------
contrasts <- names(full_results[1:length(full_results)][[1]])
#remove contrasts that have no DEG or UpSet won't work
contrasts <- contrasts[ ! contrasts %in% (rownames(nDEG_table[rowSums(nDEG_table) == 0,])) ]
length_contrasts <- vector("list", length(contrasts))
#if there are no DEGs
if(length(length_contrasts) == 0){
warning(paste("No overlap in DEG at p = ", p_cut_off," & logFC > ", logFC_cut_off,
" with adj_method = ", adj_method, ". UpSets not made.", sep=""))
} else {
upsets <- lapply(seq_along(contrasts), function(i){
upsets_from_contrasts(cont = contrasts[i],
full_results = full_results,
p_cut_off = p_cut_off,
logFC_cut_off = logFC_cut_off)
})
}
return(list("plot_DEG" = DEG_plot,
"overlap_DEG" = upsets,
"summary_DEG" = nDEG_table,
"results_DEG" = full_results,
"fits" = full_fits))
}
###### function to build design matrix #######--------------------------------
build_design_mat <- function(count_set = NULL,
pairing = "unpaired") {
group <- factor(count_set$group)
if(pairing == "unpaired"){
design <- stats::model.matrix(~ 0 + group)
design_table <- data.frame("sample" = colnames(count_set),
"group" = group)
}
if(pairing == "paired"){
pairs <- factor(count_set$pair)
design <- stats::model.matrix(~ 0 + group + pairs)
design_table <- data.frame("sample" = colnames(count_set),
"group" = group,
"pairs" = pairs)
}
rownames(design) <- colnames(count_set)
colnames(design) <- gsub("group", "", colnames(design))
colnames(design) <- gsub("pairs", "", colnames(design))
return(list("design" = design, "table" = design_table))
}
###### function to build contrast matrix ####### ------------------------------
build_cont_mat <- function(design = design, design_table = design_table){
all_c <- levels(design_table$group)
all_com <- utils::combn(all_c, 2)
all_combs <- c(sapply(seq_len(ncol(all_com)), function(i)
paste(all_com[1,i], all_com[2,i], sep="-")))
all_combinations <- paste(all_combs, collapse=",")
contrast_cmd <- paste("limma::makeContrasts(", all_combinations, ",levels = design)",
sep="")
contrast_matrix <- eval(parse(text=contrast_cmd))
return(contrast_matrix)
}
###### function to wrap limma ##### ------------------------------------
limma_wrap <- function(count_set = NULL,
norm_method = "housekeeping_scaled",
design = design,
cont_mat = cont_mat,
adj_method = adj_method){
get_counts <- paste0("assays(count_set)$", norm_method)
data_in <- na.omit(eval(parse(text=get_counts)))
fit<- limma::lmFit(data_in, design = design)
all_fit <- lapply(seq_len(ncol(cont_mat)), function(i)
limma::contrasts.fit(fit = fit,
contrasts = t(data.frame(t(cont_mat[,i]),
row.names = colnames(cont_mat)[i]))))
all_fit <- lapply(seq_len(length(all_fit)), function(i)
limma::eBayes(all_fit[[i]], robust = TRUE))
all_results <- lapply(seq_len(length(all_fit)), function(i)
limma::topTable(all_fit[[i]], number="inf", lfc = 0, adjust.method = adj_method))
names(all_results) <- colnames(cont_mat)
return(list("all_results" = all_results, "all_fit" = all_fit))
}
###########function to make upsets ####### -----------------------------------------
upsets_from_contrasts <- function(cont = contrast,
full_results = full_results,
p_cut_off = p_cut_off,
logFC_cut_off = logFC_cut_off){
topTable <- Biobase::subListExtract(full_results, cont)
DEG <- lapply(seq_along(topTable), function(j)
topTable[[j]][which((topTable[[j]]$adj.P.Val < p_cut_off) &
(abs(topTable[[j]]$logFC) > logFC_cut_off)) ,])
names(DEG) <- names(full_results)
DEG_rownames <- lapply(seq_along(DEG), function(j)
rownames(DEG[[j]]))
names(DEG_rownames) <- names(DEG)
#check there is an overlap to plot
check_overlaps <- fromList(DEG_rownames)
if (length(check_overlaps[,which(colSums(check_overlaps) > 0)])>1){
upset_i <- UpSetR::upset(fromList(DEG_rownames),
nsets = length(fromList(DEG_rownames)),
number.angles = 0,
point.size = 4,
line.size = 2,
mainbar.y.label = "norm_method intersections",
sets.x.label = "DEG per norm_method",
text.scale = c(2, 2, 2, 2, 2, 2))
return(upset_i)
} else {
warning("There is no overlap of DEG between normalization methods in contrast ", contrast_i, ".", sep = "")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.