#' Proportion of reads associated with the three most expressed sequences per sample
#'
#' Proportion of reads associated with the three most expressed sequences per sample
#'
#' @param counts matrix of counts
#' @param n number of most expressed sequences to return
#' @param group vector of the condition from which each sample belongs
#' @param out \code{TRUE} to export the figure
#' @param col colors of the bars
#' @param versionName versionName of the project
#' @return A \code{data.frame} with the percentage of reads of the three most expressed sequences
#' @author Marie-Agnes Dillies and Hugo Varet
# created Oct 29th, 2013
# modified Feb 17th, 2014 (added versionName and tabDir to export the table)
# modified July 30th, 2014 (added the barplot from majSequence())
# modified Aug 5th, 2014 (removed graphDir and tabDir arguments)
# modified Oct 19th, 2017 (percentage instead of proportion)
# modified August 26th, 2019 (ggplot2)
majSequences <- function(counts, n=3, group, out=TRUE, col=c("lightblue","orange","MediumVioletRed","SpringGreen"), versionName="."){
seqnames <- apply(counts, 2, function(x){x <- sort(x, decreasing=TRUE); names(x)[1:n]})
seqnames <- unique(unlist(as.character(seqnames)))
sum <- apply(counts,2,sum)
counts <- counts[seqnames,]
sum <- matrix(sum, nrow(counts), ncol(counts), byrow=TRUE)
p <- round(100*counts/sum,digits=3)
write.table(p, file=paste0("tables/", versionName, ".majSequences.xls"),
row.names=TRUE, col.names=NA, sep="\t", quote=FALSE)
maj <- apply(p, 2, max)
seqname <- rownames(p)[apply(p, 2, which.max)]
if (out) pdf(file=paste0("figures/", versionName, "-majSeq.pdf"), width=min(14,7+3*ncol(counts)/10), height=7)
group <- data.frame(group=apply(group, 1, paste, collapse="-"))
group$group <- factor(group$group, levels=unique(group$group))
d <- data.frame(maj=maj, sample=factor(names(maj), levels=names(maj)), group, seqname=seqname)
print(ggplot(d, aes(x=.data$sample, y=.data$maj, fill=.data$group)) +
geom_bar(stat="identity", show.legend=TRUE) +
labs(fill="") +
scale_fill_manual(values=col) +
xlab("Samples") +
ylab("Percentage of reads") +
scale_y_continuous(expand=expansion(mult=c(0.01, 0.05))) +
ggtitle(paste(versionName, "Percentage of reads from\nmost expressed sequence", sep=" - ")) +
theme(axis.text.x=element_text(angle=90, hjust=1, vjust=0.5)) +
geom_text(aes(y=0.8*maj, label=seqname), color="black", size=2.5, angle=90, fontface="bold"))
if (out) dev.off()
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.