#' geneBinHeatmap
#'
#' A wrapper around `ComplexHeatmap`.
#'
#' @param se A bin-wise SummarizedExperiment as produced by
#' \code{\link{countFeatures}}
#' @param gene The gene of interest
#' @param what Type of values (i.e. assay) to plot
#' @param minDensityRatio Minimum ratio of read density (with respect to the
#' gene's average) for a bin to be plotted.
#' @param anno_rows Row annotation columns (i.e. columns of `rowData(se)`) to
#' plot
#' @param anno_columns Column annotation columns (i.e. columns of
#' `colData(se)`) to plot
#' @param anno_colors Annotation colors, as a list named with the row/column
#' annotations, see `\code{\link[ComplexHeatmap]{SingleAnnotation}}` for
#' details. Ignored if `left_annotation` and/or `top_annotation` are given
#' directly.
#' @param removeAmbiguous Logical; whether to remove bins that are
#' gene-ambiguous (i.e. overlap multiple genes).
#' @param merge_legends Logical; whether to merge legends. This effectively
#' calls `draw(..., merge_legends=TRUE)` around the heatmap.
#' @param cluster_columns Logical; whether to cluster columns (passed to
#' \code{\link[ComplexHeatmap]{Heatmap}})
#' @param left_annotation Passed to \code{\link[ComplexHeatmap]{Heatmap}},
#' overrides `anno_rows`.
#' @param top_annotation Passed to \code{\link[ComplexHeatmap]{Heatmap}},
#' overrides `anno_columns`.
#' @param ... Passed to `ComplexHeatmap` (see
#' \code{\link[ComplexHeatmap]{Heatmap}})
#'
#' @return A \code{\link[ComplexHeatmap]{Heatmap}}
#' @export
#' @import SummarizedExperiment
#' @importFrom ComplexHeatmap draw Heatmap HeatmapAnnotation rowAnnotation
#' @importFrom viridisLite viridis
#'
#' @examples
#' data(example_bin_se)
#' se <- diffSpliceWrapper(example_bin_se, ~condition)
#' geneBinHeatmap(se, "Jund")
geneBinHeatmap <- function(se, gene,
what=NULL,
anno_rows=c("type","logWidth","meanLogDensity",
"log10PValue","geneAmbiguous"),
anno_columns=c(), anno_colors=list(),
removeAmbiguous=FALSE, merge_legends=TRUE,
cluster_columns=FALSE, minDensityRatio=0.1,
left_annotation=NULL, top_annotation=NULL, ...){
se <- .checkSE(se, checkNorm=TRUE)
if(length(w <- .matchGene(se, gene))==0) stop("Gene not found!")
se <- sort(se[w,])
if(removeAmbiguous) se <- se[!rowData(se)$geneAmbiguous,]
se <- se[rowData(se)$logDensityRatio >= log(minDensityRatio),]
if(is.null(what)){
h1 <- geneBinHeatmap(se, gene, "logNormDensity", anno_rows=anno_rows,
anno_columns=anno_columns, anno_colors=anno_colors,
removeAmbiguous=removeAmbiguous, merge_legends=FALSE,
left_annotation=left_annotation, col=viridis(20),
column_title="logNormDensity",
top_annotation=top_annotation, ...)
h2 <- geneBinHeatmap(se, gene, "scaledLogCPM", anno_rows=c(),
anno_columns=anno_columns, anno_colors=anno_colors,
removeAmbiguous=removeAmbiguous, merge_legends=FALSE,
column_title="scaledLogCPM",
top_annotation=top_annotation, ...)
if(merge_legends) return(draw(h1+h2, merge_legends=TRUE))
return(h1+h2)
}else{
what <- match.arg(what, c("logNormDensity", "logCPM", "scaledLogCPM"))
}
if("log10PValue" %in% anno_rows &&
!("log10PValue" %in% colnames(rowData(se))))
rowData(se)[["log10PValue"]] <- -log10(rowData(se)$bin.p.value)
if("type" %in% anno_rows && is.null(anno_colors$type))
anno_colors$type <- .typeColors()
if(removeAmbiguous | !any(rowData(se)$geneAmbiguous))
anno_rows <- intersect(setdiff(anno_rows, "geneAmbiguous"),
colnames(rowData(se)))
if(is.null(left_annotation) && length(anno_rows)>0){
left_annotation <- rowAnnotation(
df=as.data.frame(rowData(se)[,anno_rows,drop=FALSE]),
col=anno_colors[intersect(names(anno_colors),anno_rows)])
}
if(is.null(top_annotation) && !is.null(anno_columns)){
top_annotation <- HeatmapAnnotation(
df=as.data.frame(colData(se)[,anno_columns,drop=FALSE]),
col=anno_colors[intersect(names(anno_colors),anno_rows)])
}
x <- assays(se)[[ifelse(what=="logNormDensity",what,"logcpm")]]
if(what=="scaledLogCPM") x <- t(scale(t(x)))
h <- Heatmap(x, name=what, show_row_names=FALSE, cluster_rows=FALSE,
cluster_columns=cluster_columns, row_title=paste(gene, "bins"),
top_annotation=top_annotation, left_annotation=left_annotation, ...)
if(merge_legends) return(draw(h, merge_legends=TRUE))
h
}
#' deuBinPlot
#'
#' @param se A bin-wise SummarizedExperiment as produced by
#' \code{\link{countFeatures}} and including bin-level tests (i.e. having been
#' passed through one of the DEU wrappers such as
#' \code{\link{diffSpliceWrapper}} or \code{\link{DEXSeqWrapper}})
#' @param gene The gene of interest
#' @param type Either 'summary' (plot DEU summary), 'sample' (plot sample-wise
#' data), or 'condition' (plot data aggregate by condition)
#' @param intronSize Intron plot size. If <=3, intron size will be this
#' fraction of the mean exon size. If >3, each intron will have the given size.
#' @param exonSize Scaling for exon sizes, either 'sqrt', 'log', or 'linear'.
#' @param y Value to plot on the y-axis. If `type="summary"`, this should be a
#' column of `rowData(se)`, otherwise should be an assay name of `se`.
#' @param condition The colData column containing the samples' condition.
#' @param size rowData variable to use to determine the thickness of the bins.
#' @param lineSize Size of the line connecting the bins. Use `lineSize=0` to
#' omit the line.
#' @param colour rowData variable to use to determine the colour of the bins.
#' If `type="condition"`, can also be "condition"; if `type="sample"` can be
#' any colData column.
#' @param alpha Alpha level, passed to ggplot.
#' @param removeAmbiguous Logical; whether to remove bins that are
#' gene-ambiguous (i.e. overlap multiple genes).
#' @param minDensityRatio Minimum ratio of read density (with respect to the
#' gene's average) for a bin to be plotted.
#'
#' @return A ggplot object
#' @importFrom matrixStats colMedians
#' @importFrom dplyr bind_rows
#' @export
#' @import ggplot2
#'
#' @examples
#' data(example_bin_se)
#' se <- diffSpliceWrapper(example_bin_se, ~condition)
#' deuBinPlot(se, "Jund")
deuBinPlot <- function(se, gene, type=c("summary","condition","sample"),
intronSize=2, exonSize=c("sqrt","linear","log"), y=NULL,
condition=NULL, size="type", lineSize=1,
colour=NULL, alpha=NULL, removeAmbiguous=TRUE,
minDensityRatio=0.1){
se <- .checkSE(se)
type <- match.arg(type)
exonSize <- match.arg(exonSize)
stopifnot(length(gene)==1)
if(is.null(colour)){
if(type!="summary" && !is.null(condition)){
colour <- "condition"
}else{
colour <- "log10PValue"
}
}
if(is.null(alpha)) alpha <- ifelse(type=="sample", 0.6, 1)
w <- .matchGene(se, gene)
if(length(w)==0) stop("Gene not found in the data!")
if(type=="condition"){
if(is.null(condition)){
condition <- ifelse("condition" %in% colnames(colData(se)), "condition", "group)
}
stopifnot(length(condition)==1 && condition %in% colnames(colData(se)))
}
if(is.null(y)){
if(type=="summary"){
ff <- colnames(rowData(se))[grep("bin.p.value",colnames(rowData(se)))-1]
for(f in c("coefficient","coefficients","log2fc","logFC",ff))
if(is.null(y) && f %in% colnames(rowData(se))) y <- f
if(is.null(y)) stop("Please specify `y` among the rowData columns.")
}else{
if("logNormDensity" %in% assayNames(se)){
y <- "logNormDensity"
}else{
y <- assayNames(se)[1]
message("Using assay '",y, "' (use `y` to specify another)")
}
}
}
se <- sort(se[w,])
if(removeAmbiguous) se <- se[!rowData(se)$geneAmbiguous,]
se <- se[rowData(se)$logDensityRatio >= log(minDensityRatio),]
de <- rowData(se)
if(y=="geneNormDensity" && !("geneNormDensity" %in% assayNames(se))){
si <- vapply(split(seq_len(ncol(se)), se[[condition]]),
FUN.VALUE=numeric(1), FUN=function(i){
mean(matrixStats::colMedians(assays(se)$logNormDensity[,i]))
})
assays(se)$geneNormDensity <-
t(t(assays(se)$logNormDensity)-si[se[[condition]]])
}
if(size=="type"){
levels(de$type)[grepl("CDS/.*UTR", levels(de$type))] <- "CDS/UTR"
levels(de$type)[grep("CDS/.*UTR", levels(de$type))] <- "CDS/UTR"
levels(de$type)[grep("CDS|non-coding", levels(de$type), invert=TRUE)] <-
"UTR"
de$type <- factor(de$type, c("non-coding","UTR","CDS/UTR","CDS"))
}
gr <- ranges(se)
de$log10PValue <- -log10(de$bin.p.value)
de$order <- seq_len(nrow(de))
de$width <- width(gr)
de$width2 <- switch(exonSize, sqrt=sqrt(de$width), log=log(de$width),
de$width)
de$nextIsIntron <- c(start(gr)[-1],rev(end(gr))[1]) > end(gr)+1
if(intronSize <= 3) intronSize <- intronSize*mean(de$width2)
de$x_start <- c(0,cumsum(de$width2)+
cumsum(de$nextIsIntron*intronSize))[-nrow(de)]
de$x_end <- de$x_start + de$width2
getd2 <- function(de)
data.frame( x_start=de$x_end[-nrow(de)], x_end=de$x_start[-1],
y_start=de[[y]][-nrow(de)], y_end=de[[y]][-1],
type=de$nextIsIntron[-nrow(de)])
p <- ggplot(as.data.frame(de))
if(type=="summary"){
if(lineSize>0){
d2 <- getd2(de)
p <- p + geom_segment(data=d2,
aes(x=x_start, xend=x_end, y=y_start, yend=y_end,
linetype=type),
colour="grey", size=lineSize, alpha=alpha)
}
}else{
y <- match.arg(y, assayNames(se))
if(type=="condition"){
e <- vapply(split(seq_len(ncol(se)), colData(se)[[condition]]),
FUN.VALUE=numeric(nrow(de)),
FUN=function(i) rowMeans(assays(se)[[y]][,i,drop=FALSE]))
}else{
e <- assays(se)[[y]][,]
}
de <- lapply(setNames(colnames(e),colnames(e)), FUN=function(x){
de2 <- as.data.frame(de)
de2[[y]] <- as.numeric(e[,x])
de2
})
if(lineSize>0) d2 <- dplyr::bind_rows(lapply(de, FUN=getd2), .id=type)
de <- dplyr::bind_rows(de, .id=type)
if(type=="sample"){
de <- cbind(de, colData(se)[de$sample,,drop=FALSE])
if(lineSize>0) d2 <- cbind(d2, colData(se)[d2$sample,,drop=FALSE])
}
if(type=="sample" && lineSize>0 && colour %in% colnames(d2)){
p <- p + geom_segment(data=d2,
aes_string(x="x_start", xend="x_end", y="y_start",
yend="y_end", linetype="type",
colour=colour),
size=lineSize, alpha=min(0.6,alpha))
}else{
p <- p + geom_segment(data=d2,
aes_string(x="x_start", xend="x_end", y="y_start",
yend="y_end", linetype="type", group=type),
colour="grey", size=lineSize, alpha=min(0.6,alpha))
}
}
if(colour=="type" && type!="sample")
p <- p + scale_colour_manual(values=.typeColors())
if(size=="type")
p <- p + scale_size_manual(values=c("non-coding"=2, "UTR"=3, "CDS/UTR"=4,
CDS=6))
xlab <- switch(exonSize,
linear="Genomic location",
paste0(exonSize,"-scaled genomic location"))
p + scale_linetype_manual(values=c("TRUE"="dotted", "FALSE"="solid"),
guide=FALSE) +
geom_segment(data=as.data.frame(de), alpha=alpha,
aes_string(x="x_start", xend="x_end", y=y, yend=y, size=size,
colour=colour)) +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
ggtitle(paste0(gene, " (", as.character(strand(se)[1]),")")) + xlab(xlab) +
ylab(ifelse(y=="geneNormDensity","Gene-normalized logDensity",y))
}
#' plotTopGenes
#'
#' @param se A bin-wise SummarizedExperiment as produced by
#' \code{\link{countFeatures}} and including bin-level tests (i.e. having been
#' passed through one of the DEU wrappers such as
#' \code{\link{diffSpliceWrapper}} or \code{\link{DEXSeqWrapper}})
#' @param n The maximum number of genes for which to plot labels
#' @param FDR The FDR threshold above which to plot labels
#' @param diffUTR Logical; if FALSE, uses absolute coefficients (appropriate
#' for normal differential exon usage); if TRUE, uses non-absolute (ie changes
#' should be in the same direction across significant bins) and width-weighted
#' scores (i.e. larger bins have more weight) -- this is relevant only when
#' testing UTR usage.
#' @param alpha Points transparency
#' @param ... Passed to \link[ggrepel]{geom_label_repel}; this can for instance
#' be used to increase `max.overlaps` when not all desired gene labels are
#' displayed)
#'
#' @return A ggplot
#' @export
#' @importFrom S4Vectors metadata
#' @import ggplot2
#' @importFrom ggrepel geom_text_repel
#'
#' @examples
#' data(example_bin_se)
#' se <- diffSpliceWrapper(example_bin_se, ~condition)
#' plotTopGenes(se)
plotTopGenes <- function(se, n=10, FDR=0.05, diffUTR=FALSE, alpha=1, ...){
if(is(se, "SummarizedExperiment")){
se <- .checkSE(se, requireStats=TRUE)
se <- metadata(se)$geneLevel
}
stopifnot(!is.null(se) && (is.data.frame(se) || is(se,"DFrame")))
se <- as.data.frame(se[!is.na(se$q.value),])
if(length(w <- which(se$q.value<=0))>0){
minnzq <- -log10(min(se$q.value[-w],na.rm=TRUE))
score <- abs(se[[ifelse(diffUTR,"sizeScore","w.abs.coef")]])
o <- order(order(score[w]))
se$q.value[w] <- 10^-(minnzq+o*minnzq/(4*length(w)))
}
if(diffUTR){
if(is.null(alpha)) alpha <- 1
se$col <- sign(se$sizeScore)
se$col[is.na(se$col)] <- 0
p <- ggplot(se, aes(sizeScore, -log10(q.value))) +
geom_vline(xintercept=0, linetype="dashed", col="grey") +
geom_point(aes(colour=log(density.ratio), size=geneMeanDensity), alpha=alpha) +
xlab("Weighted size score") + scale_colour_viridis_c(direction = -1)
de <- se[se$q.value<FDR,,drop=FALSE]
de$tmp <- abs(-log10(de$q.value)*de$sizeScore)
de$tmp <- -log10(de$q.value)*de$sizeScore/rank(de$q.value)
}else{
se$tmp <- -log10(se$q.value)*se$w.abs.coef/rank(se$q.value)
if(is.null(alpha)){
p <- ggplot(se, aes(w.abs.coef, -log10(q.value))) +
geom_point(aes(colour=log(density.ratio), size=geneMeanDensity,
alpha=sqrt(sqrt(tmp)))) + scale_alpha(guide=FALSE) +
scale_colour_viridis_c(direction = -1)
}else{
p <- ggplot(se, aes(w.abs.coef, -log10(q.value))) +
geom_point(aes(colour=log(density.ratio), size=geneMeanDensity),
alpha=alpha) + scale_colour_viridis_c(direction = -1)
}
p <- p + xlab("Weighted absolute coefficient")
de <- se[se$q.value<FDR,,drop=FALSE]
}
if(n>0){
de <- de[head(order(-de$tmp),n),,drop=FALSE]
if(is.null(de$name)) de$name <- row.names(de)
p <- p + geom_text_repel(data=de, aes(label=name), ...)
}
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.