#' Generate some plots to investigate the gene length effect
#'
#' Generate some plots to investigate the gene length effect
#'
#' @param counts matrix of counts
#' @param complete list of results from \code{exportComplete.DESeq2()} or \code{exportComplete.edgeR()}
#' @param glength \code{data.frame} of 2 columns: features's id and length
#' @param out \code{TRUE} to export the figure
#' @param versionName versionName of the project
#' @author Hugo Varet
# created January 18th, 2017
# modified February 10th, 2017
# modified August 26th, 2019 (ggplot2)
geneLengthEffect <- function(counts, complete, glength, out=TRUE, versionName="."){
if (nrow(counts) != nrow(glength) || any(sort(rownames(counts)) != sort(glength[,1]))){
stop("Different features in the counts matrix and in the glength table, please check them.")
}
names(glength) <- c("Id", "length")
# effect on the statistical tests
if (out) pdf(paste0("figures/", versionName, "-geneLengthEffectPvalue.pdf"), width=7, height=7)
for (name in names(complete)){
complete.name <- complete[[name]][,c("Id", "pvalue")]
complete.name <- merge(complete.name, glength, by="Id")
complete.name$length_class <- cut(complete.name$length,
breaks=quantile(complete.name$length, probs=seq(0, 1, 0.10), na.rm=TRUE),
include.lowest=TRUE, labels=1:10)
complete.name <- complete.name[which(!is.na(complete.name$pvalue)),]
reverselog_trans <- function(base = exp(1)) {
trans <- function(x) -log(x, base)
inv <- function(x) base^(-x)
trans_new(paste0("reverselog-", format(base)), trans, inv,
log_breaks(base = base),
domain = c(.Machine$double.xmin, Inf))
}
print(ggplot(complete.name, aes(x=.data$length_class, y=.data$pvalue, fill=.data$length_class)) +
geom_boxplot(show.legend=FALSE) +
xlab("Gene length (by decile)") +
ylab("P-value") +
ggtitle(paste0(versionName, " - gene length effect on raw P values ", name)))
}
if (out) dev.off()
# effect on the number of reads per gene
counts <- data.frame(Id=rownames(counts), counts+1)
counts <- merge(glength, counts, by="Id", sort=FALSE, all=FALSE)
# plot gene length vs counts
if (out) pdf(paste0("figures/", versionName, "-geneLengthEffectCounts.pdf"), width=7, height=7)
for (i in 3:ncol(counts)){
print(ggplot(data=counts, aes_string(x="length", y=names(counts)[i])) +
geom_point(show.legend = FALSE, size=0.2, alpha=0.2) +
scale_x_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(~10^.x))) +
scale_y_continuous(trans = log10_trans(),
breaks = trans_breaks("log10", function(x) 10^x),
labels = trans_format("log10", math_format(~10^.x))) +
xlab("Gene length") +
ylab("Raw counts") +
ggtitle(paste0(versionName, " - sample ", names(counts)[i])))
}
if (out) dev.off()
# export table
write.table(counts[order(counts$length),], file=paste0("tables/", versionName, ".geneLengthEffect.xls"), sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.