#' Check the right value of sizeFactors
#'
#' Check the right value of sizeFactors
#'
#' @param dds a \code{DESeqDataSet} object
#' @param group vector of the condition from which each sample belongs
#' @param col colors of the points
#' @param out \code{TRUE} to export the figure
#' @param versionName versionName of the project
#' @return Histograms
#' @author Marie-Agnes Dillies and Hugo Varet
# created April 2nd, 2013
# modified Sept 24th, 2013 (diagDir > graphDir)
# modified Sept 26th, 2013 (option out)
# modified Oct 29th, 2013 (deleted target argument)
# modified Feb 6th, 2014 (fix x-axis)
# modified Mar 21st, 2014 (removed outputfile argument)
# modified July 30th, 2014 (added size factors vs total # of counts diagnostic plot)
# modified Aug 5th, 2014 (removed graphDir argument)
# modified July 19th, 2016 (total number of reads in millions)
# modified May 2nd, 2017 (add labels and colors)
# modified August 26th, 2019 (ggplot2)
diagSizeFactors <- function(dds, group, col=c("lightblue","orange","MediumVioletRed","SpringGreen"),
out = TRUE, versionName="."){
group <- data.frame(group=apply(group, 1, paste, collapse="-"))
group$group <- factor(group$group, levels=unique(group$group))
if (out) pdf(file=paste0("figures/", versionName, "-diagSizeFactors.pdf"))
geomeans <- exp(rowMeans(log(counts(dds))))
samples <- colnames(counts(dds))
counts.trans <- log2(counts(dds)/geomeans)
counts.trans <- counts.trans[which(!is.na(apply(counts.trans, 1, sum))),]
for (j in 1:ncol(dds)){
d <- data.frame(x=counts.trans[,j])
print(ggplot(data=d, aes(x=.data$x)) +
geom_histogram(bins=100) +
scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
xlab(expression(log[2]~(counts/geometric~mean))) +
ylab("Frequency") +
ggtitle(paste0(versionName," - Size factors diagnostic - Sample ", samples[j])) +
geom_vline(xintercept=log2(sizeFactors(dds)[j]), linetype="dashed", color="red", size=1))
}
if (out) dev.off()
# total read counts vs size factors
if (out) pdf(file=paste0("figures/", versionName, "-diagSizeFactorsTC.pdf"), width=8)
d <- data.frame(sf=sizeFactors(dds),
libsize=colSums(counts(dds))/1e6,
group,
sample=factor(colnames(dds), levels=colnames(dds)))
print(ggplot(data=d, aes(x=.data$sf, y=.data$libsize, color=.data$group, label=.data$sample)) +
geom_point(show.legend=TRUE, size=3) +
scale_colour_manual(values=col) +
labs(color="") +
geom_text_repel(show.legend=FALSE, size=5, point.padding=0.2) +
xlab("Size factors") +
ylab("Total number of reads (millions)") +
ggtitle("Diagnostic: size factors vs total number of reads") +
geom_abline(slope=coefficients(lm(libsize ~ sf + 0, data=d)), intercept=0, show.legend=FALSE, linetype="dashed", color="grey"))
if (out) dev.off()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.