#'
#' Visualization functions for FRASER
#'
#' The FRASER package provides mutliple functions to visualize
#' the data and the results of a full data set analysis.
#'
#' This is the list of all plotting function provided by FRASER:
#' \itemize{
#' \item plotAberrantPerSample()
#' \item plotVolcano()
#' \item plotExpression()
#' \item plotQQ()
#' \item plotExpectedVsObservedPsi()
#' \item plotCountCorHeatmap()
#' \item plotFilterExpression()
#' \item plotFilterVariability()
#' \item plotEncDimSearch()
#' \item plotBamCoverage()
#' \item plotBamCoverageFromResultTable()
#' \item plotManhattan()
#' \item plotSpliceMetricRank()
#' }
#'
#' For a detailed description of each plot function please see the details.
#' Most of the functions share the same parameters.
#'
#### Data specific parameters
#' @param object,fds An \code{\link{FraserDataSet}} object.
#' @param type The psi type: either psi5, psi3 or theta (for SE).
#' @param sampleID A sample ID which should be plotted. Can also be a vector.
#' Integers are treated as indices.
#' @param idx A junction site ID or gene ID or one of both, which
#' should be plotted. Can also be a vector. Integers are treated
#' as indices.
#' @param padjCutoff,deltaPsiCutoff Significance or delta
#' psi cutoff to mark outliers
#' @param global Flag to plot a global Q-Q plot, default FALSE
#' @param normalized If TRUE, the normalized psi values are used, the default,
#' otherwise the raw psi values
#' @param aggregate If TRUE, the pvalues are aggregated by gene (default),
#' otherwise junction level pvalues are used (default for Q-Q plot).
#' @param result The result table to be used by the method.
#' @param label Indicates the genes or samples that will be labelled in the
#' plot (only for \code{basePlot=TRUE}). Setting
#' \code{label="aberrant"} will label all aberrant genes or
#' samples. Labelling can be turned off by setting
#' \code{label=NULL}. The user can also provide a custom
#' list of gene symbols or sampleIDs.
#' @param subsetName The name of a subset of genes of interest for which FDR
#' corrected pvalues were previously computed. Those FDR values
#' on the subset will then be used to determine aberrant status.
#' Default is NULL (using transcriptome-wide FDR corrected pvalues).
#' @param BPPARAM BiocParallel parameter to use.
#' @param Ncpus Number of cores to use.
#' @param plotType The type of plot that should be shown as character string.
#' For plotEncDimSearch, it has to be either \code{"auc"}
#' for a plot of the area under the curve (AUC) or
#' \code{"loss"} for the model loss. For the correlation heatmap,
#' it can be either \code{"sampleCorrelation"} for a
#' sample-sample correlation heatmap or \code{"junctionSample"}
#' for a junction-sample correlation heatmap.
#' @param onlyVariableIntrons Logical value indicating whether to show only
#' introns that also pass the variability filter. Defaults to
#' FALSE.
#' @param onlyExpressedIntrons Logical value indicating whether to show only
#' introns that also pass the expression filter. Defaults to
#' FALSE.
#' @param gr A GRanges object indicating the genomic range that should be shown
#' in \code{plotBamCoverage}.
#' @param control_samples The sampleIDs of the samples used as control in
#' \code{plotBamCoverage}.
#' @param min_junction_count The minimal junction count across samples required
#' for a junction to appear in the splicegraph and coverage tracks
#' of \code{plotBamCoverage}.
#' @param txdb A TxDb object giving the gene/transcript annotation to use.
#' @param orgDb A OrgDb object giving the mapping of gene ids and symbols.
#' @param show_full_gene Should the full genomic range of the gene be shown in
#' \code{plotBamCoverageFromResultTable} (default: FALSE)?
#' If FALSE, only a certain region (see parameters left_extension
#' and right_extension) around the outlier junction is shown.
#' @param left_extension Indicating how far the plotted range around the outlier
#' junction should be extended to the left in
#' \code{plotBamCoverageFromResultTable}.
#' @param right_extension Indicating how far the plotted range around the
#' outlier junction should be extended to the right in
#' \code{plotBamCoverageFromResultTable}.
#' @param res_gene_col The column name in the given results table that
#' contains the gene annotation.
#' @param res_geneid_type The type of gene annotation in the results table in
#' \code{res_gene_col} (e.g. SYMBOL or ENTREZID etc.). This
#' information is needed for mapping between the results table and
#' the provided annotation in the txdb object.
#' @param txdb_geneid_type The type of gene_id present in \code{genes(txdb)}
#' (e.g. ENTREZID). This information is needed for
#' mapping between the results table and the provided annotation
#' in the txdb object.
#' @param highlight_range A \code{GenomicRanges} or \code{GenomicRangesList}
#' object of ranges to be highlighted in the splicegraph of
#' \code{plotBamCoverage}.
#' @param highlight_range_color The color of highlighted ranges in
#' the splicegraph of \code{plotBamCoverage}.
#' @param toscale In \code{plotBamCoverage}, indicates which part of the
#' plotted region should be drawn to scale. Possible values are
#' 'exon' (exonic regions are drawn to scale),
#' 'gene' (both exonic and intronic regions are drawn to scale) or
#' 'none' (exonic and intronic regions have constant length)
#' (see SGSeq package).
#' @param splicegraph_labels Indicated the format of exon/splice junction
#' labels in the splicegraph of \code{plotBamCoverage}.
#' Possible values are 'genomic_range' (gives the start position
#' of the first exon and the end position of the last exon that
#' are shown), 'id' (format E1,... J1,...), 'name' (format
#' type:chromosome:start-end:strand for each feature),
#' 'none' for no labels (see SGSeq package).
#' @param splicegraph_position The position of the splicegraph relative to the
#' coverage tracks in \code{plotBamCoverage}. Possible values
#' are 'top' (default) and 'bottom'.
#'
#### Graphical parameters
#' @param main Title for the plot, if missing a default title will be used.
#' @param colGroup Group of samples that should be colored.
#' @param basePlot if \code{TRUE} (default), use the R base plot version, else
#' use the plotly framework.
#' @param conf.alpha If set, a confidence interval is plotted, defaults to 0.05
#' @param samplingPrecision Plot only non overlapping points in Q-Q plot to
#' reduce number of points to plot. Defines the digits to round to.
#' @param logit If TRUE, the default, psi values are plotted in logit space.
#' @param nClust Number of clusters to show in the row and
#' column dendrograms.
#' @param sampleClustering A clustering of the samples that should be used as an
#' annotation of the heatmap.
#' @param show_rownames,show_colnames Logical value indicating whether to show
#' row or column names on the heatmap axes.
#' @param annotation_col,annotation_row Row or column annotations that should be
#' plotted on the heatmap.
#' @param topN Top x most variable junctions that should be used for the
#' calculation of sample x sample correlations.
#' @param topJ Top x most variable junctions that should be displayed in the
#' junction-sample correlation heatmap. Only applies if plotType
#' is "junctionSample".
#' @param minMedian,minCount,minDeltaPsi Minimal median (\eqn{m \ge 1}),
#' delta psi (\eqn{|\Delta\psi| > 0.1}), read count (\eqn{n \ge 10})
#' value of a junction to be considered for the correlation heatmap.
#' @param border_color Sets the border color of the heatmap
#' @param plotMeanPsi,plotCov If \code{TRUE}, then the heatmap is annotated with
#' the mean psi values or the junction coverage.
#' @param bins Set the number of bins to be used in the histogram.
#' @param legend.position Set legend position (x and y coordinate), defaults to
#' the top right corner.
#' @param color_annotated The color for exons and junctions present in
#' the given annotation (in the splicegraph of
#' \code{plotBamCoverage}).
#' @param color_novel The color for novel exons and junctions not present in
#' the given annotation (in the splicegraph of
#' \code{plotBamCoverage}).
#' @param color_sample_interest The color in \code{plotBamCoverage} for the
#' sample of interest.
#' @param color_control_samples The color in \code{plotBamCoverage} for the
#' samples used as controls.
#' @param curvature_splicegraph The curvature of the junction arcs in the
#' splicegraph in \code{plotBamCoverage}. Decrease this value
#' for flatter arcs and increase it for steeper arcs.
#' @param curvature_coverage The curvature of the junction arcs in the
#' coverage tracks of \code{plotBamCoverage}. Decrease this
#' value for flatter arcs and increase it for steeper arcs.
#' @param mar The margin of the plot area for \code{plotBamCoverage}
#' (b,l,t,r).
#' @param cex For controlling the size of text and numbers in
#' \code{plotBamCoverage}.
#' @param chr Vector of chromosome names to show in \code{plotManhattan}. The
#' default is to show all chromosomes.
#' @param value Indicates which assay is shown in the manhattan plot. Defaults
#' to 'pvalue'. Other options are 'deltaPsi' and 'zScore'.
#' @param chrColor Interchanging colors by chromosome for \code{plotManhattan}.
#'
#### Additional ... parameter
#' @param ... Additional parameters passed to plot() or plot_ly() if not stated
#' otherwise in the details for each plot function
#'
#' @details
#'
#' \code{plotAberrantPerSample}: The number of aberrant events per sample are
#' plotted sorted by rank. The ... parameters are passed on to the
#' \code{\link{aberrant}} function.
#'
#' \code{plotVolcano}: the volcano plot is sample-centric. It plots for a given
#' sample and psi type the negative log10 nominal P-values against the delta psi
#' values for all splice sites or aggregates by gene if requested.
#'
#' \code{plotExpression}: This function plots for a given site the
#' read count at this site (i.e. K) against the total coverage (i.e. N) for the
#' given psi type (\eqn{\psi_5, \psi_3, or \theta}{\psi5, \psi3, or \theta}
#' (SE)) for all samples.
#'
#' \code{plotQQ}: the quantile-quantile plot for a given gene or if
#' \code{global} is set to \code{TRUE} over the full data set. Here the
#' observed P-values are plotted against the expected ones in the negative
#' log10 space.
#'
#' \code{plotExpectedVsObservedPsi}: A scatter plot of the observed psi
#' against the predicted psi for a given site.
#'
#' \code{plotSpliceMetricRank}: This function plots for a given intron the
#' observed values of the selected splice metrix against the sample rank.
#'
#' \code{plotCountCorHeatmap}: The correlation heatmap of the count data either
#' of the full data set (i.e. sample-sample correlations) or of the top x most
#' variable junctions (i.e. junction-sample correlations). By default the values
#' are log transformed and row centered. The ... arguments are passed to the
#' \code{\link[pheatmap]{pheatmap}} function.
#'
#' \code{plotFilterExpression}: The distribution of FPKM values. If the
#' FraserDataSet object contains the \code{passedFilter} column, it will plot
#' both FPKM distributions for the expressed introns and for the filtered
#' introns.
#'
#' \code{plotFilterVariability}: The distribution of maximal delta Psi values.
#' If the FraserDataSet object contains the \code{passedFilter} column,
#' it will plot both maximal delta Psi distributions for the variable
#' introns and for the filtered (i.e. non-variable) introns.
#'
#' \code{plotEncDimSearch}: Visualization of the hyperparameter optimization.
#' It plots the encoding dimension against the achieved loss (area under the
#' precision-recall curve). From this plot the optimum should be choosen for
#' the \code{q} in fitting process.
#'
#' \code{plotManhattan}: A Manhattan plot showing the junction pvalues by
#' genomic position. Useful to identify if outliers cluster by genomic position.
#'
#' \code{plotBamCoverage}: A sashimi plot showing the read coverage from
#' the underlying bam files for a given genomic range and sampleIDs.
#'
#' \code{plotBamCoverageFromResultTable}: A sashimi plot showing the read
#' coverage from the underlying bam files for a row in the results table. Can
#' either show the full range of the gene with the outlier junction or only a
#' certain region around the outlier.
#'
#' @return If base R graphics are used nothing is returned else the plotly or
#' the gplot object is returned.
#'
#' @name plotFunctions
#' @rdname plotFunctions
#' @aliases plotFunctions plotAberrantPerSample plotVolcano plotQQ
#' plotExpression plotCountCorHeatmap plotFilterExpression
#' plotExpectedVsObservedPsi plotEncDimSearch plotManhattan
#' plotBamCoverage plotBamCoverageFromResultTable
#' @examples
#' \dontshow{set.seed(42)}
#' # create full FRASER object
#' fds <- makeSimulatedFraserDataSet(m=40, j=200)
#' fds <- calculatePSIValues(fds)
#' fds <- filterExpressionAndVariability(fds, filter=FALSE)
#' # this step should be done for more dimensions in practice
#' fds <- optimHyperParams(fds, "jaccard", q_param=c(2,5,10,25))
#'
#' # assign gene names to show functionality on test dataset
#' # use fds <- annotateRanges(fds) on real data
#' mcols(fds, type="j")$hgnc_symbol <-
#' paste0("gene", sample(1:25, nrow(fds), replace=TRUE))
#'
#' # fit and calculate pvalues
#' genesOfInterest <- rep(list(paste0("gene", sample(1:25, 10))), 4)
#' names(genesOfInterest) <- c("sample1", "sample6", "sample15", "sample23")
#' fds <- FRASER(fds, subsets=list("testSet"=genesOfInterest))
#'
#' # QC plotting
#' plotFilterExpression(fds)
#' plotFilterVariability(fds)
#' plotCountCorHeatmap(fds, "jaccard")
#' plotCountCorHeatmap(fds, "jaccard", normalized=TRUE)
#' plotEncDimSearch(fds, type="jaccard")
#'
#' # extract results
#' plotAberrantPerSample(fds, aggregate=FALSE)
#' plotAberrantPerSample(fds, aggregate=TRUE, subsetName="testSet")
#' plotVolcano(fds, "sample2", "jaccard", label="aberrant")
#' plotVolcano(fds, "sample1", "jaccard", aggregate=TRUE, subsetName="testSet")
#'
#' # dive into gene/sample level results
#' res <- as.data.table(results(fds))
#' res
#' plotExpression(fds, result=res[1])
#' plotQQ(fds, result=res[1])
#' plotExpectedVsObservedPsi(fds, res=res[1])
#' plotSpliceMetricRank(fds, res=res[1])
#'
#' # other ways to call these plotting functions
#' plotExpression(fds, idx=10, sampleID="sample1", type="jaccard")
#' plotExpression(fds, result=res[1], subsetName="testSet")
#' plotQQ(fds, idx=10, sampleID="sample1", type="jaccard")
#' plotQQ(fds, result=res[1], subsetName="testSet")
#' plotExpectedVsObservedPsi(fds, idx=10, sampleID="sample1", type="jaccard")
#' plotExpectedVsObservedPsi(fds, result=res[1], subsetName="testSet")
#' plotSpliceMetricRank(fds, idx=10, sampleID="sample1", type="jaccard")
#' plotSpliceMetricRank(fds, result=res[1], subsetName="testSet")
#'
#' # create manhattan plot of pvalues by genomic position
#' if(require(ggbio)){
#' plotManhattan(fds, type="jaccard", sampleID="sample10")
#' }
#'
#' # plot splice graph and coverage from bam files in a given region
#' if(require(SGSeq)){
#' fds <- createTestFraserSettings()
#' gr <- GRanges(seqnames="chr19",
#' IRanges(start=7587496, end=7598895),
#' strand="+")
#' plotBamCoverage(fds, gr=gr, sampleID="sample3",
#' control_samples="sample2", min_junction_count=5,
#' curvature_splicegraph=1, curvature_coverage=1,
#' mar=c(1, 7, 0.1, 3))
#'
#' # plot coverage from bam file for a row in the result table
#' fds <- createTestFraserDataSet()
#' require(TxDb.Hsapiens.UCSC.hg19.knownGene)
#' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#' require(org.Hs.eg.db)
#' orgDb <- org.Hs.eg.db
#'
#' res <- results(fds, padjCutoff=NA, deltaPsiCutoff=NA)
#' res_dt <- as.data.table(res)
#' res_dt <- res_dt[sampleID == "sample2",]
#'
#' # plot full range of gene containing outlier junction
#' plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=TRUE,
#' txdb=txdb, orgDb=orgDb, control_samples="sample3")
#'
#' # plot only certain range around outlier junction
#' plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=FALSE,
#' control_samples="sample3", curvature_splicegraph=0.5, txdb=txdb,
#' curvature_coverage=0.5, right_extension=5000, left_extension=5000,
#' splicegraph_labels="id")
#' }
#'
NULL
plotVolcano.FRASER <- function(object, sampleID,
type=fitMetrics(object), basePlot=TRUE,
aggregate=FALSE, main=NULL, label=NULL,
deltaPsiCutoff=0.1, padjCutoff=0.1, subsetName=NULL, ...){
type <- match.arg(type)
dt <- getPlottingDT(object, axis="col", type=type, idx=sampleID,
aggregate=aggregate, deltaPsiCutoff=deltaPsiCutoff,
padjCutoff=padjCutoff, subsetName=subsetName, ...)
dt[is.na(padj), aberrant:=NA]
dt[aberrant == TRUE, aberrantLabel:="aberrant"]
dt[aberrant == FALSE, aberrantLabel:="not aberrant"]
dt[is.na(aberrant), aberrantLabel:="not in tested group"]
g <- ggplot(dt, aes(x=deltaPsi, y=-log10(pval), color=aberrantLabel,
label=featureID, text=paste0(
"SampleID: ", sampleID, "<br>",
"featureID: ", featureID, "<br>",
"Raw count (K): ", k, "<br>",
"Raw total count (N): ", n, "<br>",
"p value: ", signif(pval, 5), "<br>",
"delta Psi: ", round(deltaPsi, 2), "<br>",
"Type: ", type))) +
geom_point(aes(alpha=ifelse(aberrantLabel == "aberrant", 1, 0.8))) +
xlab(as.expression(
bquote(paste(Delta, .(ggplotLabelPsi(type)[[1]]) ))
)) +
ylab(expression(paste(-log[10], "(P value)"))) +
scale_color_manual(values=c("not aberrant"="black",
"aberrant"="firebrick",
"not in tested group"="lightsteelblue")) +
theme_bw() +
theme(legend.position="bottom") +
guides(alpha="none", color=guide_legend(title=""))
if(isFALSE(basePlot)){
g <- g + xlab(paste("delta",
ggplotLabelPsi(type, asCharacter=TRUE)[[1]])) +
ylab("-log[10](P value)")
if(is.null(main)){
main <- paste0("Volcano plot: ", sampleID, ", ",
ggplotLabelPsi(type, asCharacter=TRUE)[[1]])
}
} else{
if(!is.null(label)){
if(isScalarCharacter(label) && label == "aberrant"){
if(nrow(dt[aberrant == TRUE,]) > 0){
g <- g + geom_text_repel(data=dt[aberrant == TRUE,],
fontface='bold', hjust=-.2, vjust=-.2)
}
}
else{
if(nrow(dt[featureID %in% label]) > 0){
g <- g + geom_text_repel(data=
subset(dt, featureID %in% label),
fontface='bold', hjust=-.2, vjust=-.2)
}
if(any(!(label %in% dt[,featureID]))){
warning("Did not find gene(s) ",
paste(label[!(label %in% dt[,featureID])],
collapse=", "), " to label.")
}
}
}
if(is.null(main)){
main <- as.expression(bquote(paste(
bold("Volcano plot: "), .(sampleID), ", ",
.(ggplotLabelPsi(type)[[1]]))))
}
}
if(is.null(subsetName)){
subtitle <- NULL
} else{
subtitle <- paste0("FDR across ",
ifelse(isTRUE(aggregate), "genes", "introns"),
" in the ", subsetName,
" group (N=", dt[!is.na(padj), .N], ")")
}
g <- g + ggtitle(main, subtitle=subtitle)
plotBasePlot(g, basePlot)
}
#'
#' Volcano plot
#'
#' Plots the p values over the delta psi values, known as volcano plot.
#' Visualizes per sample the outliers. By type and aggregate by
#' gene if requested.
#'
#' @rdname plotFunctions
#' @export
setMethod("plotVolcano", signature="FraserDataSet", plotVolcano.FRASER)
plotAberrantPerSample.FRASER <- function(object, main,
type=fitMetrics(object),
padjCutoff=0.1, deltaPsiCutoff=0.1,
aggregate=TRUE, subsetName=NULL, BPPARAM=bpparam(), ...){
type <- match.arg(type, several.ok=TRUE)
if(missing(main)){
main <- paste('Aberrant events per sample')
if(isTRUE(aggregate)){
main <- paste(main, "by gene")
}
}
if(is.null(subsetName)){
subtitle <- NULL
} else{
subtitle <- paste0("FDR across genes in the ", subsetName, " group")
}
# extract outliers
outliers <- bplapply(type, aberrant, object=object, by="sample",
padjCutoff=padjCutoff,
deltaPsiCutoff=deltaPsiCutoff, aggregate=aggregate, ...,
subsetName=subsetName, BPPARAM=BPPARAM)
dt2p <- rbindlist(lapply(seq_along(outliers), function(idx){
vals <- outliers[[idx]]
padj_assay <- padjVals(object, type=type[idx], subsetName=subsetName,
level=ifelse(isTRUE(aggregate), "gene", "site"))
testedSamples <- names(
which(colSums(is.na(padj_assay)) != nrow(padj_assay)))
if(length(testedSamples) == 0){
stop("No non-NA padj values found for the tested group.")
}
vals <- vals[testedSamples]
data.table(type=type[idx], value=sort(vals), median=median(vals),
rank=seq_along(vals))
}))
# plot them
g <- ggplot(dt2p, aes(x=rank, y=value, color=type)) +
geom_line() +
geom_hline(aes(yintercept=median, color=type, lty="Median")) +
theme_bw() +
theme_cowplot() + background_grid(major="xy", minor="xy") +
ggtitle(main, subtitle=subtitle) +
xlab("Sample rank") +
ylab("Number of outliers") +
scale_color_brewer(palette="Dark2", name=element_blank(),
labels=ggplotLabelPsi(dt2p[,unique(type)])) +
scale_linetype_manual(name="", values=2, labels="Median")
if(!all(dt2p[,value] == 0)){
g <- g + scale_y_log10(limits=c(1, max(unlist(outliers)))) +
annotation_logticks(sides="l")
}
g
}
#'
#' Number of aberrant events per sample
#'
#' Plot the number of aberrant events per samples
#'
#' @rdname plotFunctions
#' @export
setMethod("plotAberrantPerSample", signature="FraserDataSet",
plotAberrantPerSample.FRASER)
#'
#' Junction expression plot
#'
#' Plots the observed split reads of the junction of interest over all reads
#' coming from the given donor/acceptor.
#'
#' @rdname plotFunctions
#' @export
plotExpression <- function(fds, type=fitMetrics(fds),
idx=NULL, result=NULL, colGroup=NULL,
basePlot=TRUE, main=NULL, label="aberrant",
subsetName=NULL, ...){
if(!is.null(result)){
type <- as.character(result$type)
idx <- getIndexFromResultTable(fds, result)
} else {
type <- match.arg(type)
}
dt <- getPlottingDT(fds, axis="row", type=type, idx=idx,
subsetName=subsetName, ...)
dt[,featureID:=limitGeneNamesList(featureID, maxLength=3)]
if(!is.null(colGroup)){
if(all(colGroup %in% samples(fds))){
colGroup <- samples(fds) %in% colGroup
}
dt[colGroup,aberrant:=TRUE]
}
dt[,aberrant:=factor(aberrant, levels=c("TRUE", "FALSE"))]
gr <- granges(rowRanges(fds,type=type)[idx,])
genomic_pos_label <- paste0(seqnames(gr), ":", start(gr), "-", end(gr),
":", strand(gr))
if(is.null(main)){
if(isTRUE(basePlot)){
main <- as.expression(bquote(bold(paste(
.(ggplotLabelPsi(type)[[1]]), " expression plot: ",
.(genomic_pos_label),
" (", bolditalic(.(as.character(dt[,unique(featureID)]))),
"; row index: ", .(as.character(dt[,unique(idx)])), ")"))))
} else{
main <- paste0(ggplotLabelPsi(type, asCharacter=TRUE)[[1]],
" expression plot: ", genomic_pos_label,
" (", dt[,unique(featureID)],
"; row index: ", dt[,unique(idx)], ")")
}
}
if(is.null(subsetName)){
subtitle <- NULL
} else{
subtitle <- paste0("FDR across genes in the ", subsetName, " group")
}
g <- ggplot(dt, aes(x=n + 2, y=k + 1, color=aberrant, label=sampleID,
text=paste0(
"Sample: ", sampleID, "<br>",
"Counts (K): ", k, "<br>",
"Total counts (N): ", n, "<br>",
"p value: ", signif(pval, 5), "<br>",
"padjust: ", signif(padj, 5), "<br>",
"Observed Psi: ", round(obsPsi, 2), "<br>",
"Predicted mu: ", round(predPsi, 2), "<br>"))) +
geom_point(alpha=ifelse(as.character(dt$aberrant) == "TRUE", 1, 0.7)) +
scale_x_log10() +
scale_y_log10() +
theme_bw() +
theme(legend.position="none", title=) +
xlab("Total junction coverage + 2 (N)") +
ylab("Junction count + 1 (K)") +
ggtitle(main, subtitle=subtitle) +
annotation_logticks(sides='bl')
if(isTRUE(basePlot) && !is.null(label)){
if(isScalarCharacter(label) && label == "aberrant"){
if(nrow(dt[aberrant == TRUE,]) > 0){
g <- g + geom_text_repel(data=dt[aberrant == TRUE,],
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
}
else{
if(nrow(dt[sampleID %in% label]) > 0){
g <- g + geom_text_repel(data=subset(dt, sampleID %in% label),
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
if(any(!(label %in% dt[,sampleID]))){
warning("Did not find sample(s) ",
paste(label[!(label %in% dt[,sampleID])],
collapse=", "), " to label.")
}
}
}
if(is.null(colGroup)){
g <- g + scale_colour_manual(
values=c("FALSE"="gray70", "TRUE"="firebrick"))
}
plotBasePlot(g, basePlot)
}
#'
#' Junction splice metric plot
#'
#' Plots the observed values of the splice metric across samples for a
#' junction of interest.
#'
#' @rdname plotFunctions
#' @export
plotSpliceMetricRank <- function(fds, type=fitMetrics(fds),
idx=NULL, result=NULL, colGroup=NULL,
basePlot=TRUE, main=NULL, label="aberrant",
subsetName=NULL, ...){
if(!is.null(result)){
type <- as.character(result$type)
idx <- getIndexFromResultTable(fds, result)
} else {
type <- match.arg(type)
}
dt <- getPlottingDT(fds, axis="row", type=type, idx=idx,
subsetName=subsetName, ...)
dt[,featureID:=limitGeneNamesList(featureID, maxLength=3)]
# rank on observed value of splice metric of interest
dt[, rank := rank(obsPsi, ties.method="random", na.last=FALSE)]
if(!is.null(colGroup)){
if(all(colGroup %in% samples(fds))){
colGroup <- samples(fds) %in% colGroup
}
dt[colGroup,aberrant:=TRUE]
}
dt[,aberrant:=factor(aberrant, levels=c("TRUE", "FALSE"))]
gr <- granges(rowRanges(fds,type=type)[idx,])
genomic_pos_label <- paste0(seqnames(gr), ":", start(gr), "-", end(gr),
":", strand(gr))
if(is.null(main)){
if(isTRUE(basePlot)){
main <- as.expression(bquote(bold(paste(
.(genomic_pos_label),
" (", bolditalic(.(as.character(dt[,unique(featureID)]))),
"; row index: ", .(as.character(dt[,unique(idx)])), ")"))))
} else{
main <- paste0(genomic_pos_label,
" (", dt[,unique(featureID)],
"; row index: ", dt[,unique(idx)], ")")
}
}
if(is.null(subsetName)){
subtitle <- NULL
} else{
subtitle <- paste0("FDR across genes in the ", subsetName, " group")
}
if(isTRUE(basePlot)){
ylab <- bquote("Observed " ~ .(ggplotLabelPsi(type)[[1]]))
} else{
ylab <- paste("Observed", ggplotLabelPsi(type, asCharacter=TRUE)[[1]])
}
g <- ggplot(dt, aes(x=rank, y=obsPsi, color=aberrant, label=sampleID,
text=paste0(
"Sample: ", sampleID, "<br>",
"Counts (K): ", k, "<br>",
"Total counts (N): ", n, "<br>",
"p value: ", signif(pval, 5), "<br>",
"padjust: ", signif(padj, 5), "<br>",
"Observed Psi: ", round(obsPsi, 2), "<br>",
"Predicted mu: ", round(predPsi, 2), "<br>"))) +
geom_point(alpha=ifelse(as.character(dt$aberrant) == "TRUE", 1, 0.7)) +
theme_bw() +
theme(legend.position="none", title=) +
xlab("Sample rank") +
ylab(ylab) +
ggtitle(main, subtitle=subtitle) +
ylim(0,1)
if(isTRUE(basePlot) && !is.null(label)){
if(isScalarCharacter(label) && label == "aberrant"){
if(nrow(dt[aberrant == TRUE,]) > 0){
g <- g + geom_text_repel(data=dt[aberrant == TRUE,],
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
}
else{
if(nrow(dt[sampleID %in% label]) > 0){
g <- g + geom_text_repel(data=subset(dt, sampleID %in% label),
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
if(any(!(label %in% dt[,sampleID]))){
warning("Did not find sample(s) ",
paste(label[!(label %in% dt[,sampleID])],
collapse=", "), " to label.")
}
}
}
if(is.null(colGroup)){
g <- g + scale_colour_manual(
values=c("FALSE"="gray70", "TRUE"="firebrick"))
}
plotBasePlot(g, basePlot)
}
#'
#' Expected over Overserved plot
#'
#' Plots the expected psi value over the observed psi value of the given
#' junction.
#'
#' @rdname plotFunctions
#' @export
plotExpectedVsObservedPsi <- function(fds, type=fitMetrics(fds),
idx=NULL, result=NULL, colGroup=NULL, main=NULL,
basePlot=TRUE, label="aberrant", subsetName=NULL, ...){
type <- match.arg(type)
# get plotting data
dt <- getPlottingDT(fds, axis="row", type=type, result=result,
idx=idx, subsetName=subsetName, ...)
type <- as.character(unique(dt$type))
idx <- unique(dt$idx)
dt[,featureID:=limitGeneNamesList(featureID, maxLength=3)]
gr <- granges(rowRanges(fds,type=type)[idx,])
genomic_pos_label <- paste0(seqnames(gr), ":", start(gr), "-", end(gr),
":", strand(gr))
if(is.null(main)){
if(isTRUE(basePlot)){
main <- as.expression(bquote(bold(paste(
.(genomic_pos_label),
" (", bolditalic(.(as.character(dt[,unique(featureID)]))),
"; row index: ", .(as.character(dt[,unique(idx)])), ")"))))
} else{
main <- paste0(genomic_pos_label,
" (", dt[,unique(featureID)],
"; row index: ", dt[,unique(idx)], ")")
}
}
if(is.null(subsetName)){
subtitle <- NULL
} else{
subtitle <- paste0("FDR across genes in the ", subsetName, " group")
}
if(!is.null(colGroup)){
if(is.logical(colGroup)){
dt[colGroup, aberrant:=TRUE]
} else {
warning("not implemented yet!")
}
}
if(isTRUE(basePlot)){
ylab <- bquote("Observed " ~ .(ggplotLabelPsi(type)[[1]]))
xlab <- bquote("Predicted " ~ .(ggplotLabelPsi(type)[[1]]))
} else{
ylab <- paste("Observed", ggplotLabelPsi(type, asCharacter=TRUE)[[1]])
xlab <- paste("Predicted", ggplotLabelPsi(type, asCharacter=TRUE)[[1]])
}
g <- ggplot(dt, aes(y=obsPsi, x=predPsi, label=sampleID, color=aberrant,
text=paste0(
"Sample: ", sampleID, "<br>",
"Counts (K): ", k, "<br>",
"Total counts (N): ", n, "<br>",
"p value: ", signif(pval, 5), "<br>",
"padjust: ", signif(padj, 5), "<br>",
"Observed Psi: ", round(obsPsi, 2), "<br>",
"Predicted mu: ", round(predPsi, 2), "<br>"))) +
geom_point(alpha=ifelse(dt$aberrant, 1, 0.5),
color=c("gray70", "firebrick")[dt$aberrant + 1]) +
geom_abline(intercept = 0, slope=1) +
xlim(c(0,1)) + ylim(c(0,1)) +
theme_bw() +
theme(legend.position="none") +
xlab(xlab) +
ylab(ylab) +
ggtitle(main, subtitle=subtitle)
if(isTRUE(basePlot) && !is.null(label)){
if(isScalarCharacter(label) && label == "aberrant"){
if(nrow(dt[aberrant == TRUE,]) > 0){
g <- g + geom_text_repel(data=dt[aberrant == TRUE,],
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
}
else{
if(nrow(dt[sampleID %in% label]) > 0){
g <- g + geom_text_repel(data=subset(dt, sampleID %in% label),
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
if(any(!(label %in% dt[,sampleID]))){
warning("Did not find sample(s) ",
paste(label[!(label %in% dt[,sampleID])],
collapse=", "), " to label.")
}
}
}
if(is.null(colGroup)){
g <- g + scale_colour_manual(
values=c("gray70", "firebrick"))
}
plotBasePlot(g, basePlot)
}
plotQQ.FRASER <- function(object, type=NULL, idx=NULL, result=NULL,
aggregate=FALSE, global=FALSE, main=NULL, conf.alpha=0.05,
samplingPrecision=3, basePlot=TRUE, label="aberrant",
Ncpus=min(3, getDTthreads()), subsetName=NULL, ...){
# check parameters
if(is.null(aggregate)){
aggregate <- isTRUE(global)
} else if(!(is.logical(aggregate) |
all(aggregate %in% colnames(mcols(object))))){
stop("Please provide TRUE/FALSE or a ",
"charactor matching a column name in mcols.")
}
if(isTRUE(global)){
if(is.null(type)){
type <- fitMetrics(object)
}
dt <- rbindlist(bplapply(type, getPlottingDT, fds=object, axis="col",
idx=TRUE, aggregate=aggregate, subsetName=subsetName,
Ncpus=Ncpus, ...))
# remove duplicated entries donor/acceptor sites if not aggregated
# by a feature
if(isFALSE(aggregate)){
dt <- dt[!duplicated(dt, by=c("type", "spliceID", "sampleID"))]
}
} else {
dots <- list(...)
if(!"pvalLevel" %in% names(dots)){
dots[["pvalLevel"]] <- "junction"
}
dots <- append(list(fds=object, axis="row", type=type, idx=idx,
result=result, aggregate=aggregate,
subsetName=subsetName),
dots)
dt <- do.call(getPlottingDT, args=dots)
}
if(is.null(main)){
if(isTRUE(global)){
main <- "Global QQ plot"
} else {
type <- as.character(dt[,unique(type)])
featureID <- as.character(dt[,unique(featureID)])
featureID <- limitGeneNamesList(featureID, maxLength=3)
idx <- dt[, unique(idx)]
gr <- granges(rowRanges(object,type=type)[idx,])
genomic_pos_label <- paste0(seqnames(gr), ":",
start(gr), "-", end(gr), ":", strand(gr))
if(isTRUE(basePlot)){
main <- as.expression(bquote(bold(paste(
.(ggplotLabelPsi(type)[[1]]), " Q-Q plot: ",
.(genomic_pos_label),
" (", bolditalic(.(featureID)),
"; row index: ", .(as.character(idx)), ")"))))
} else{
main <- paste0(ggplotLabelPsi(type, asCharacter=TRUE)[[1]],
" Q-Q plot: ", genomic_pos_label,
" (", featureID,
"; row index: ", idx, ")")
}
}
}
if(is.null(subsetName) || isTRUE(global)){
subtitle <- NULL
} else{
subtitle <- paste0("FDR across genes in the ", subsetName, " group")
}
# points
dt2p <- dt[order(type, pval)]
dt2p[,obs:=-log10(pval)]
dt2p[is.na(obs), obs:=0]
dt2p[is.infinite(obs), obs:=dt2p[is.finite(obs),max(obs)]]
if(dt2p[,length(unique(obs))] < 2 | nrow(dt2p) < 2){
warning("There are no pvalues or all are NA or 1!")
return(FALSE)
}
dt2p[,exp:=-log10(ppoints(.N)),by=type]
# down sample if requested
dt2p[,plotPoint:=TRUE]
if(isTRUE(samplingPrecision) | isScalarNumeric(samplingPrecision)){
if(is.logical(samplingPrecision)){
samplingPrecision <- 3
}
mypoints <- !duplicated(dt2p[,.(obs=round(obs, samplingPrecision),
exp=round(exp, samplingPrecision), type)],
by=c("obs", "exp", "type"))
dt2p[,plotPoint:=mypoints]
}
# create qq-plot
g <- ggplot(dt2p[plotPoint == TRUE,], aes(x=exp, y=obs, col=aberrant,
label=sampleID,
text=paste(
"<br>SampleID: ", sampleID, "<br>K: ", k, "<br>N: ", n))) +
geom_point() +
theme_bw() +
ggtitle(main, subtitle=subtitle)
if(isTRUE(basePlot)){
g <- g +
xlab(expression(-log[10]~"(expected P)")) +
ylab(expression(-log[10]~"(observed P)"))
} else{
g <- g +
xlab("-log[10] (expected P)") +
ylab("-log[10] (observed P)")
}
# Set color scale for global/local
if(isFALSE(global)){
g <- g + scale_color_manual(values=c("black", "firebrick"),
name="Aberrant") +
theme(legend.position="none")
} else {
g$mapping$colour <- quote(type)
g <- g + scale_color_brewer(palette="Dark2", name="Splice metric",
labels=ggplotLabelPsi(unique(dt2p$type)))
}
# add confidence band if requesded
# http://genome.sph.umich.edu/wiki/Code_Sample:_Generating_QQ_Plots_in_R
if(is.numeric(conf.alpha)){
dt2p[,rank:=seq_len(.N), by=type]
dt2p[plotPoint == TRUE,upper:=-log10(
qbeta(conf.alpha/2, rank, max(rank) - rank)), by=type]
dt2p[plotPoint == TRUE,lower:=-log10(
qbeta(1-conf.alpha/2, rank, max(rank) - rank)), by=type]
# only plot one psiType if multiple are existing
if(length(unique(dt2p$type)) > 1){
typeOrder <- c("theta", "psi5", "psi3")
type2take <- min(which(typeOrder %in% unique(dt2p$type)))
dt2p[type != typeOrder[type2take],
c("upper", "lower"):=list(NA, NA)]
}
g <- g + geom_ribbon(data=dt2p[plotPoint == TRUE & !is.na(upper),],
aes(x=exp, ymin=lower, ymax=upper, text=NULL),
alpha=0.2, color="gray")
}
# add labels if requested
if(isFALSE(global) && isTRUE(basePlot) && !is.null(label)){
if(isScalarCharacter(label) && label == "aberrant"){
if(nrow(dt2p[aberrant == TRUE,]) > 0){
g <- g + geom_text_repel(data=dt2p[aberrant == TRUE,],
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
}
else{
if(nrow(dt2p[sampleID %in% label]) > 0){
g <- g + geom_text_repel(data=subset(dt2p, sampleID %in%
label),
aes(col=aberrant),
fontface='bold', hjust=-.2, vjust=-.2)
}
if(any(!(label %in% dt2p[,sampleID]))){
warning("Did not find sample(s) ",
paste(label[!(label %in% dt2p[,sampleID])],
collapse=", "), " to label.")
}
}
}
# add abline in the end
g <- g + geom_abline(intercept=0, slope=1, col="firebrick")
if(isFALSE(global)){
return(plotBasePlot(g, basePlot))
}
g
}
#'
#' Q-Q plot
#'
#' Plots the quantile-quantile plot
#'
#' @rdname plotFunctions
#' @export
setMethod("plotQQ", signature="FraserDataSet", plotQQ.FRASER)
plotEncDimSearch.FRASER <- function(object, type=psiTypes,
plotType=c("auc", "loss")){
type <- match.arg(type)
plotType <- match.arg(plotType)
data <- hyperParams(object, type=type, all=TRUE)
if (is.null(data)) {
warning(paste("no hyperparameters were estimated for", type,
"\nPlease use `optimHyperParams` to compute them."))
return(NULL)
}
if(!"nsubset" %in% colnames(data)){
data[,nsubset:="NA"]
}
data[,noise:=as.factor(noise)]
data[,nsubset:=as.factor(nsubset)]
data[,isOptimalQ:= q == .SD[aroc == max(aroc), q], by="nsubset,noise"]
if(plotType == "auc"){
g1 <- ggplot(data, aes(q, aroc, col=nsubset, linetype=noise)) +
geom_point() +
geom_smooth(method="loess", formula=y~x) +
geom_vline(data=data[isOptimalQ == TRUE,],
mapping=aes(xintercept=q, col=nsubset, linetype=noise)) +
geom_text(data=data[isOptimalQ == TRUE,],
aes(y=0.0, q+1, label=q)) +
ggtitle(as.expression(bquote(bold(paste(
"Q estimation for ", .(ggplotLabelPsi(type)[[1]])))))) +
xlab("Estimated q") +
ylab("Area under the PR curve") +
theme_bw(base_size=16)
if(data[,uniqueN(nsubset)] == 1 & data[,uniqueN(noise)] == 1){
g1 <- g1 + theme(legend.position='none')
}
g1
}
else{
g2 <- ggplot(data, aes(q, eval, col=nsubset, linetype=noise)) +
geom_point() +
geom_smooth() +
geom_vline(data=data[isOptimalQ == TRUE,],
mapping=aes(xintercept=q, col=nsubset, linetype=noise)) +
ggtitle(as.expression(bquote(bold(paste(
"Q estimation for ", .(ggplotLabelPsi(type)[[1]])))))) +
xlab("Estimated q") +
ylab("Model loss") +
theme_bw(base_size=16)
if(data[,uniqueN(nsubset)] == 1 & data[,uniqueN(noise)] == 1){
g2 <- g2 + theme(legend.position='none')
}
g2
}
}
#'
#' Plots the results from the hyperparamter optimization.
#'
#' @rdname plotFunctions
#' @export
setMethod("plotEncDimSearch", signature="FraserDataSet",
plotEncDimSearch.FRASER)
#'
#' Plot filter expression
#'
#' Histogram of the geometric mean per junction based on the filter status
#'
#' @rdname plotFunctions
#' @export
plotFilterExpression <- function(fds, bins=200, legend.position=c(0.8, 0.8),
onlyVariableIntrons=FALSE){
# check that expression filter has been calculated
if(!("passedExpression" %in% colnames(mcols(fds, type="j")))){
stop("Please calculate the expression filter values first with the ",
"filterExpression function.")
}
# get mean count for all junctions in the fds object
cts <- K(fds, "psi5")
rowlgm <- exp(rowMeans(log(cts + 1)))
dt <- data.table(
value=rowlgm,
passed=mcols(fds, type="j")[['passedExpression']])
if(isTRUE(onlyVariableIntrons)){
dt[,passed:=mcols(fds, type="j")[['passed']]]
}
dt[,passed:=factor(passed, levels=c(TRUE, FALSE))]
colors <- brewer.pal(3, "Dark2")[seq_len(2)]
ggplot(dt, aes(value, fill=passed)) +
geom_histogram(bins=bins) +
scale_x_log10() +
scale_y_log10() +
scale_fill_manual(values=colors, name="Passed",
labels=c("True", "False")) +
xlab("Mean Intron Expression") +
ylab("Introns") +
ggtitle("Expression filtering") +
theme_bw() +
theme(legend.position=legend.position)
}
#'
#' Plot filter variability
#'
#' Histogram of minimal delta psi per junction
#'
#' @rdname plotFunctions
#' @export
plotFilterVariability <- function(fds, bins=200, legend.position=c(0.8, 0.8),
onlyExpressedIntrons=FALSE){
# check that expression filter has been calculated
mcolNames <- colnames(mcols(fds, type="j"))
if(!("passedVariability" %in% mcolNames)){
stop("Please calculate the expression filter values first with the ",
"filterVariability function.")
}
# get plotting data
delta_cols <- mcolNames[grepl("maxD", mcolNames)]
if(any(delta_cols == "maxDJaccard")){
delta_cols <- "maxDJaccard"
}
dt <- data.table(
value=apply(mcols(fds, type="j")[, delta_cols, drop=FALSE], 1, max),
passed=mcols(fds, type="j")[['passedVariability']])
if(isTRUE(onlyExpressedIntrons)){
dt[,passed:=mcols(fds, type="j")[['passed']]]
}
# check if file with removed counts exists and add them when it exists
nonVarDir <- file.path(workingDir(fds), "savedObjects", nameNoSpace(fds),
"nonVariableJunctions")
if(dir.exists(nonVarDir)){
nV_stored <- loadHDF5SummarizedExperiment(dir=nonVarDir)
mcolNames_stored <- colnames(mcols(nV_stored))
delta_cols_stored <- mcolNames_stored[grepl("maxD", mcolNames_stored)]
if(any(delta_cols_stored == "maxDJaccard")){
delta_cols_stored <- "maxDJaccard"
}
nonVar_dt <- data.table(
value=apply(mcols(nV_stored)[,delta_cols_stored, drop=FALSE], 1, max),
passed=FALSE)
dt <- rbind(dt, nonVar_dt)
}
dt[,passed:=factor(passed, levels=c(TRUE, FALSE))]
colors <- brewer.pal(3, "Dark2")[seq_len(2)]
if(any(delta_cols == "maxDJaccard")){
xlab <- bquote("Maximal" ~ Delta ~ "to mean J per intron")
} else{
xlab <- bquote("Maximal" ~ Delta ~ "to mean" ~Psi[5] ~ "," ~ Psi[3] ~
"or" ~ theta ~ "per intron")
}
ggplot(dt, aes(value, fill=passed)) +
geom_histogram(bins=bins) +
scale_y_log10() +
scale_fill_manual(values=colors, name="Passed",
labels=c("True", "False")) +
xlab(xlab) +
ylab("Introns") +
ggtitle("Variability filtering") +
theme_bw() +
theme(legend.position=legend.position)
}
plotCountCorHeatmap.FRASER <- function(object,
type=psiTypes, logit=FALSE,
topN=50000, topJ=5000, minMedian=1, minCount=10,
main=NULL, normalized=FALSE, show_rownames=FALSE,
show_colnames=FALSE, minDeltaPsi=0.1, annotation_col=NA,
annotation_row=NA, border_color=NA, nClust=5,
plotType=c("sampleCorrelation", "junctionSample"),
sampleClustering=NULL, plotMeanPsi=TRUE, plotCov=TRUE, ...){
type <- match.arg(type)
plotType <- match.arg(plotType)
# use counts as matrix, otherwise x(fds,...) does not work later on
counts(object, type=type, side="other", HDF5=FALSE) <-
as.matrix(counts(object, type=type, side="other"))
counts(object, type=type, side="ofInterest", HDF5=FALSE) <-
as.matrix(counts(object, type=type, side="ofInterest"))
kmat <- K(object, type=type)
nmat <- N(object, type=type)
expRowsMedian <- rowMedians(kmat) >= minMedian
expRowsMax <- rowMax(kmat) >= minCount
table(expRowsMax & expRowsMedian)
skmat <- kmat[expRowsMax & expRowsMedian,]
snmat <- nmat[expRowsMax & expRowsMedian,]
# check that we have at least 1 read for each sample
minCovColSums <- colSums(snmat > minCount)
if(any(minCovColSums < 2)){
message("Warning:",
" The following samples do not have at least 2 junctions",
" with the minimum read coverage after filtering!",
" They will be disregarded from the plot. ",
"\nAffected IDs are: \n\t", paste(collapse=", ",
names(minCovColSums)[minCovColSums < 2]))
ids2plot <- logical(length(minCovColSums))
ids2plot[minCovColSums >= 2] <- TRUE
skmat <- skmat[,ids2plot]
snmat <- snmat[,ids2plot]
object <- object[,ids2plot]
}
xmat <- (skmat + 1*pseudocount())/(snmat + 2*pseudocount())
if(isTRUE(logit)){
xmat <- qlogisWithCap(xmat)
}
xmat[snmat < minCount] <- NA
xmat_rc <- xmat - rowMeans(xmat, na.rm=TRUE)
xmat_rc_sd <- rowSds(xmat_rc, na.rm=TRUE)
nrNonNA <- rowSums(!is.na(xmat_rc))
xmat_rc_sd[nrNonNA > 0.5*ncol(xmat_rc)] <- min(xmat_rc_sd)
plotIdx <- rank(xmat_rc_sd) >= length(xmat_rc_sd) - topN
xmat_rc_2_plot <- xmat_rc[plotIdx,]
cormatS <- cor(xmat_rc_2_plot, use="pairwise", method="spearman")
if(isTRUE(normalized)){
pred_mu <- as.matrix(predictedMeans(object, type=type)[
expRowsMax & expRowsMedian,][plotIdx,])
if(isTRUE(logit)){
pred_mu <- qlogisWithCap(pred_mu)
}
pred_mu[(snmat < minCount)[plotIdx,]] <- NA
lpred_mu_rc <- pred_mu - rowMeans(pred_mu, na.rm=TRUE)
xmat_rc_2_plot <- xmat_rc_2_plot - lpred_mu_rc
}
cormat <- cor(xmat_rc_2_plot, use="pairwise", method="spearman")
breaks <- seq(-1, 1, length.out=50)
if(plotType == "junctionSample"){
if(isTRUE(normalized)){
pred_mu <- as.matrix(predictedMeans(object, type=type)[
expRowsMax & expRowsMedian,])
if(isTRUE(logit)){
pred_mu <- qlogisWithCap(pred_mu)
breaks <- seq(-5, 5, length.out=50)
}
lpred_mu_rc <- pred_mu - rowMeans(pred_mu)
xmat_rc <- xmat_rc - lpred_mu_rc
}
object <- object[expRowsMax & expRowsMedian,,by=type]
j2keepVa <- variableJunctions(object, type, minDeltaPsi)
j2keepDP <- rowQuantiles(kmat[expRowsMax & expRowsMedian,],
probs=0.75) >= 10
j2keep <- j2keepDP & j2keepVa
xmat_rc_2_plot <- xmat_rc[j2keep,]
mostVarKeep <- subsetKMostVariableJunctions(
object[j2keep,,by=type], type, topJ)
xmat_rc_2_plot <- xmat_rc_2_plot[mostVarKeep,]
rownames(xmat_rc_2_plot) <- seq_len(nrow(xmat_rc_2_plot))
}
if(is.character(annotation_col)){
annotation_col <- getColDataAsDFFactors(object, annotation_col)
}
if(is.character(annotation_row)){
annotation_row <- getColDataAsDFFactors(object, annotation_row)
}
# annotate with sample clusters
if(is.null(sampleClustering)){
# annotate samples with clusters from sample correlation heatmap
nClust <- min(nClust, nrow(cormatS))
clusters <- as.factor(cutree(hclust(dist(cormatS)), k=nClust))
} else if(!is.na(sampleClustering)){
clusters <- sampleClustering
}
if(!isTRUE(is.na(sampleClustering))){
if(!is.null(nrow(annotation_col))){
annotation_col$sampleCluster <- clusters
} else {
annotation_col <- data.frame(sampleCluster=clusters)
}
}
if(plotType == "junctionSample"){
# annotate junctions with meanPsi and meanCoverage
xmat <- xmat[j2keep,]
xmat <- xmat[mostVarKeep,]
meanPsi <- if(isTRUE(logit)){
rowMeans(plogis(xmat), na.rm=TRUE)
} else{
rowMeans(xmat, na.rm=TRUE)
}
meanPsiBins <- cut(meanPsi, breaks = c(0, 0.33, 0.66, 1),
include.lowest=TRUE)
if(isTRUE(plotMeanPsi)){
if(!is.null(nrow(annotation_row))){
annotation_row$meanPsi <- meanPsiBins
} else{
annotation_row <- data.frame(meanPsi=meanPsiBins)
}
}
snmat <- snmat[j2keep,]
snmat <- snmat[mostVarKeep,]
meanCoverage <- rowMeans(snmat)
cutpoints <- sort(unique(round(log10(meanCoverage))))
if(max(cutpoints) < ceiling(log10(max(meanCoverage)))){
cutpoints <- c(cutpoints, ceiling(log10(max(meanCoverage))))
}
meanCoverage <- cut(meanCoverage, breaks=10^(cutpoints),
include.lowest=TRUE)
if(isTRUE(plotCov)){
annotation_row$meanCoverage <- meanCoverage
}
if(isTRUE(nrow(annotation_row) > 0)){
rownames(annotation_row) <- rownames(xmat_rc_2_plot)
}
cormat <- xmat_rc_2_plot
}
if(is.null(main)){
main <- ifelse(normalized, "Normalized intron-centered ",
"Raw intron-centered ")
if(plotType == "sampleCorrelation"){
if(isTRUE(logit)){
main <- paste0(main, "Logit(PSI) correlation (", type, ")")
} else {
main <- paste0(main, "PSI correlation (", type, ")")
}
} else {
if(isTRUE(logit)){
main <- paste0(main, "Logit(PSI) data (", type, ", top ", topJ,
")")
} else {
main <- paste0(main, "PSI data (", type, ", top ", topJ, ")")
}
}
}
pheatmap(cormat, show_rownames=show_rownames, show_colnames=show_colnames,
main=main, annotation_col=annotation_col, breaks=breaks,
annotation_row=annotation_row, ..., border_color=border_color,
color=colorRampPalette(colors=rev(brewer.pal(11, "RdBu")))(50)
)
}
#'
#' Plot count correlation
#'
#' Count correlation heatmap function
#'
#' @rdname plotFunctions
#' @export
setMethod("plotCountCorHeatmap", signature="FraserDataSet",
plotCountCorHeatmap.FRASER)
#'
#' Plot coverage from bam files for given genomic range and sample ids
#'
#' @rdname plotFunctions
#' @export
plotBamCoverage <- function(fds, gr, sampleID,
control_samples=sample(
samples(fds[, which(samples(fds) != sampleID)]),
min(3, ncol(fds)-length(sampleID))),
txdb=NULL, min_junction_count=20,
highlight_range=NULL, highlight_range_color="firebrick",
color_annotated="gray", color_novel="goldenrod3",
color_sample_interest="firebrick", color_control_samples="dodgerblue4",
toscale=c("exon", "gene", "none"), mar=c(2, 10, 0.1, 5),
curvature_splicegraph=1, curvature_coverage=1, cex=1,
splicegraph_labels=c("genomic_range", "id", "name", "none"),
splicegraph_position=c("top", "bottom"), ...){
if(missing(fds)){
stop("Missing input: fds (FraserDataSet object)")
} else{
stopifnot(is(fds, "FraserDataSet"))
}
if(missing(gr)){
stop("Missing input gr (genomic range to plot).")
} else{
stopifnot(is(gr, "GenomicRanges"))
stopifnot(length(gr) > 0)
}
if(missing(sampleID)){
stop("Missing input: sample_of_interest")
}
toscale <- match.arg(toscale)
splicegraph_labels <- match.arg(splicegraph_labels)
splicegraph_position <- match.arg(splicegraph_position)
# extract bam info for sample ids to plot
all_sids <- c(sampleID, control_samples)
si_out <- getSGSeqSI(fds, all_sids)
sgseq_si <- si_out[[1]]
fds <- si_out[[2]]
# collapse input ranges if several
if(any(strand(gr) == "*")){
# seems to throw an error with * strand so guessing strand instead
if(all(strand(gr) == "*")){
guessStrand <- "+"
} else{
guessStrand <- strand(gr[strand(gr) != "*"])[1]
}
strand(gr)[strand(gr) == "*"] <- guessStrand
warning("Input genomic ranges contained unstranded ranges.\n",
"This function needs strand information, guessing strand to ",
"be ", guessStrand, ".")
}
if(!all(strand(gr) == strand(gr[1,]))){
warning("Input genomic ranges contained ranges on different strands,\n",
"only showing coverage for the ", strand(gr[1,]), " strand.")
strand(gr) <- rep(strand(gr[1,]), length(gr))
}
gr <- range(gr)
gr <- keepSeqlevels(gr, unique(as.character(seqnames(gr))))
# convert highlight_range to GRangesList if not
if(!is.null(highlight_range) && !is(highlight_range, "GRangesList")){
stopifnot(is(highlight_range, "GRanges"))
highlight_range <- GRangesList(highlight_range)
}
# extract splice graph
sgfc_pred <- SGSeq::analyzeFeatures(sgseq_si, which = gr,
min_junction_count=min_junction_count, psi=0,
...)
# overlap detected junctions with annotation
if(!is.null(txdb)){
# subset to chr of interest
seqlevels(txdb) <- unique(as.character(seqnames(gr)))
# extract transcript features with SGSeq package
txf <- SGSeq::convertToTxFeatures(txdb)
txf <- txf[txf %over% gr]
# restore seqlevels of txdb object
seqlevels(txdb) <- seqlevels0(txdb)
# annotate splice junctions with annotation features
sgfc_pred <- SGSeq::annotate(sgfc_pred, txf)
} else{
# when no annotation is given, show everything in the same color
color_novel <- color_annotated
}
# get genomic positions for first and last exon in given range
if(splicegraph_labels == "genomic_range"){
# tell plotSpliceGraph function to use custom labels
splicegraph_labels <- "label"
# create custom labels (only for first and last exon for readability)
mcols(sgfc_pred)$label <- ""
exons <- which(SGSeq::type(sgfc_pred) == "E" &
rowRanges(sgfc_pred) %over% gr)
exons <- unique(c(exons[1], tail(exons, n=1)))
if(length(exons) == 1){
mcols(sgfc_pred)$label[exons] <-
paste(seqnames(sgfc_pred),
paste(start(sgfc_pred), end(sgfc_pred), sep="-"),
strand(sgfc_pred), sep=":")[exons]
}
if(length(exons) == 2){
mcols(sgfc_pred)$label[exons[1]] <-
paste(seqnames(sgfc_pred),
start(sgfc_pred),
strand(sgfc_pred), sep=":")[exons[1]]
mcols(sgfc_pred)$label[exons[2]] <-
paste(seqnames(sgfc_pred),
end(sgfc_pred),
strand(sgfc_pred), sep=":")[exons[2]]
}
}
# plot splice graph and coverage of junctions from bam
nr_sa2p <- length(all_sids)
par(mfrow = c(nr_sa2p+1, 1), mar=mar, cex=cex)
if(splicegraph_position == "top"){
SGSeq::plotSpliceGraph(rowRanges(sgfc_pred),
which=gr,
toscale=toscale,
color=color_annotated,
color_novel=color_novel,
ypos=c(0.25, 0.1),
ranges=highlight_range,
ranges_color=highlight_range_color,
ranges_ypos=c(0.01, 0.02),
curvature=curvature_splicegraph,
label=splicegraph_labels)
}
for (j in seq_along(sampleID)) {
SGSeq::plotCoverage(
sgfc_pred[, which(colnames(sgfc_pred) == sampleID[j])],
which = gr,
toscale = toscale,
label=sampleID[j],
color=color_sample_interest,
curvature=curvature_coverage)
}
for (j in seq_along(control_samples)) {
SGSeq::plotCoverage(
sgfc_pred[, which(colnames(sgfc_pred) == control_samples[j])],
which = gr,
toscale = toscale,
label=control_samples[j],
color=color_control_samples,
curvature=curvature_coverage)
}
if(splicegraph_position == "bottom"){
SGSeq::plotSpliceGraph(rowRanges(sgfc_pred),
which=gr,
toscale=toscale,
color_novel=color_novel,
ypos=c(0.25, 0.1),
ranges=highlight_range,
ranges_color=highlight_range_color,
ranges_ypos=c(0.01, 0.02),
curvature=curvature_splicegraph,
label=splicegraph_labels)
}
return(invisible(fds))
}
#'
#' Plot coverage from bam files for given row of results table
#'
#' @rdname plotFunctions
#' @export
plotBamCoverageFromResultTable <- function(fds, result, show_full_gene=FALSE,
txdb=NULL, orgDb=NULL, res_gene_col="hgncSymbol",
res_geneid_type="SYMBOL", txdb_geneid_type="ENTREZID",
left_extension=1000, right_extension=1000, ...){
stopifnot(is(fds, "FraserDataSet"))
if(is(result, "GenomicRanges")){
result <- as.data.table(result)
}
stopifnot(is.data.table(result))
stopifnot(result[,.N] == 1)
sid <- result[,sampleID]
jidx <- getIndexFromResultTable(fds, result)
outlier_range <- rowRanges(fds, type=result[,type])[jidx,]
# showing either full range of the gene in which the outlier occured
if(show_full_gene == TRUE){
if(missing(txdb)){
stop("Missing input: txdb (for extracting gene range)")
}
if(missing(orgDb)){
stop("Missing input: orgDb (for mapping of IDs to txdb)")
}
result_gene <- result[,get(res_gene_col)]
result_gene <- strsplit(result_gene, ";", fixed=TRUE)[[1]]
if(is.data.table(orgDb)){
tmp <- merge(x=as.data.table(genes(txdb))[,.(gene_id)], y=orgDb,
by.y=txdb_geneid_type, by.x="gene_id", all.x=TRUE,
sort=FALSE)[,.(gene_id, feature=get(res_geneid_type))]
setnames(tmp, "feature", res_geneid_type)
txdb_geneid <- tmp[get(res_geneid_type) %in% result_gene, gene_id]
} else {
tmp <- as.data.table(
select(orgDb,
keys=result_gene,
columns=txdb_geneid_type,
keytype=res_geneid_type)
)
txdb_geneid <- tmp[, get(txdb_geneid_type)]
}
gr <- genes(txdb, filter=list("gene_id"=txdb_geneid))
if(length(gr) == 0){
stop("Could not extract genomic coordinates for input gene.")
}
} else{
# or just showing a certain region around the outlier junction
gr <- outlier_range
start(gr) <- start(gr) - left_extension
end(gr) <- end(gr) + right_extension
}
# if several genes overlap, only show those on same strand as outlier
if(as.character(strand(outlier_range)) != "*" &
length(gr[strand(gr) == strand(outlier_range),]) > 0){
gr <- gr[strand(gr) == strand(outlier_range),]
}
# create the coverage plot for the given outlier
fds <- plotBamCoverage(fds,
gr=gr,
sampleID=sid,
txdb=txdb,
highlight_range=outlier_range,
...)
return(invisible(fds))
}
plotManhattan.FRASER <- function(object, sampleID, value="pvalue",
type=fitMetrics(object), chr=NULL,
main=paste0("sample: ", sampleID),
chrColor=c("black", "darkgrey"),
subsetName=NULL, ...){
# check necessary packages
if (!requireNamespace('ggbio')){
stop("For this function, the ggbio package is required.")
}
if (!requireNamespace('biovizBase')){
stop("For this function, the biovizBase package is required.")
}
# check arguments
stopifnot(is(object, "FraserDataSet"))
stopifnot(sampleID %in% samples(object))
type <- match.arg(type)
additional_args <- list(...)
padjCutoff <- 0.1
if("padjCutoff" %in% names(additional_args)){
padjCutoff <- additional_args$padjCutoff
}
deltaPsiCutoff <- ifelse(type == "jaccard", 0.1, 0.3)
if("deltaPsiCutoff" %in% names(additional_args)){
deltaPsiCutoff <- additional_args$deltaPsiCutoff
}
# extract neccessary informations
gr_sample <- rowRanges(object, type=type)
seqlevelsStyle(gr_sample) <- seqlevelsStyle(object)
mcols(gr_sample)[,"pvalue"] <- -log10(
pVals(object, type=type, level="junction")[,sampleID])
mcols(gr_sample)[,"padjust"] <- -log10(padjVals(object, type=type,
level="site", subsetName=subsetName)[,sampleID])
mcols(gr_sample)[,"delta"] <- deltaPsiValue(object, type=type)[,sampleID]
# Add values to granges
if(value %in% c('pvalue', 'pValue', 'pv')){
gr_sample$value <- mcols(gr_sample)[, "pvalue"]
ylabel <- expression(paste(-log[10], "(P-value)"))
}
if(value %in% c('zscore', 'zScore')){
gr_sample$value <- zScores(object, type=type)[, sampleID]
ylabel <- value
}
if(value %in% c('delta', 'deltaPsi', 'deltaJaccard')){
gr_sample$value <- mcols(gr_sample)[, "delta"]
ylabel <- bquote(Delta ~ .(ggplotLabelPsi(type)[[1]]))
}
# only one point per donor/acceptor site (relevant only for psi5 and psi3)
index <- getSiteIndex(object, type=type)
nonDup <- !duplicated(index)
gr_sample <- gr_sample[nonDup,]
# Sort granges for plot
gr_sample <- sortSeqlevels(gr_sample)
gr_sample <- sort(gr_sample)
# subset to chromosomes in chrSubset if requested
if(!is.null(chr)){
# check input
if(!all(chr %in% unique(seqnames(gr_sample)))){
stop("Not all chromosomes selected for subsetting are present ",
"in the GRanges object.")
}
# subset
gr_sample <- gr_sample[as.character(seqnames(gr_sample)) %in% chr]
# add chr to plot title if only one chr given
if(length(chr) == 1){
main <- paste0(main, "; ",
paste(chr, collapse=", ", sep=""))
}
}
# find outlier indices
if(!type %in% c("psi3", "psi5")){
outlier_idx <- which(gr_sample$padjust >= -log10(padjCutoff) &
abs(gr_sample$delta) >= deltaPsiCutoff)
} else{
outlier_idx <- which(gr_sample$padjust >= -log10(padjCutoff))
}
message("highlighting ", length(gr_sample[outlier_idx,]), " outliers ...")
# plot manhattan plot
plotGrandLinear.adapted(gr_sample, aes(y=value),
color=chrColor,
highlight.gr=gr_sample[outlier_idx,],
highlight.overlap="equal",
use.genome.coords=is.null(chr)) +
labs(title=main, x="", y=ylabel)
}
#'
#' Plot manhattan plot of junction pvalues
#'
#' @rdname plotFunctions
#' @export
setMethod("plotManhattan", signature="FraserDataSet",
plotManhattan.FRASER)
#'
#' helper function to get the annotation as data frame from the col data object
#'
#' @noRd
getColDataAsDFFactors <- function(fds, names){
tmpDF <- data.frame(colData(fds)[,names])
colnames(tmpDF) <- names
for(i in names){
if(any(is.na(tmpDF[, i]))){
tmpDF[,i] <- as.factor(paste0("", tmpDF[,i]))
}
if(is.integer(tmpDF[,i]) && length(levels(as.factor(tmpDF[,i]))) <= 10){
tmpDF[,i] <- as.factor(paste0("", tmpDF[,i]))
}
}
rownames(tmpDF) <- rownames(colData(fds))
return(tmpDF)
}
#'
#' used to cap the qlogis for the correlation heatmap
#'
#' @noRd
qlogisWithCap <- function(x, digits=2){
x <- round(x, digits)
x <- pmin(pmax(x, 10^-digits), 1-10^-digits)
ans <- qlogis(x)
ans[is.infinite(ans)] <- NA
rowm <- rowMaxs(ans, na.rm=TRUE)
idx <- which(is.na(ans), arr.ind=TRUE)
ans[idx] <- rowm[idx[,"row"]]
return(ans)
}
#'
#' Helper to get nice Splice metric labels in ggplot
#'
#' @noRd
ggplotLabelPsi <- function(type, asCharacter=FALSE){
type <- as.character(type)
if(isFALSE(asCharacter)){
vapply(type, FUN=function(x)
switch (x,
jaccard = c(bquote(Intron~Jaccard~Index)),
psi5 = c(bquote(psi[5])),
psi3 = c(bquote(psi[3])),
theta = c(bquote(theta))),
FUN.VALUE=c(bquote(psi[3])))
} else{
vapply(type, FUN=function(x)
switch (x,
jaccard = "Intron-Jaccard-Index",
psi5 = "psi[5]",
psi3 = "psi[3]",
theta = "theta"),
FUN.VALUE=character(1))
}
}
#'
#' Extract info from bam files needed for SGSeq functions to work
#'
#' @noRd
getSGSeqSI <- function(fds, sample_ids){
# check if bam info is already stored in fds for given samples
if("SGSeq_sampleinfo" %in% names(metadata(fds))){
si <- metadata(fds)[["SGSeq_sampleinfo"]]
si <- si[si$sample_name %in% sample_ids,]
if(nrow(si) != length(sample_ids)){
# add bam info for missing sample_ids
missing_ids <- sample_ids[!sample_ids %in% si$sample_name]
message("Extracting SGSeq sample info from BAM files for samples ",
paste(missing_ids, collapse=", "), " ...")
df_missing <- data.frame(
sample_name=samples(fds)[samples(fds) %in% missing_ids],
file_bam=bamFile(fds)[samples(fds) %in% missing_ids])
si_new <- SGSeq::getBamInfo(df_missing, yieldSize=1e6)
si_new$lib_size <- 50e6 # dummy value to speed up this part
si <- rbind(si, si_new)
metadata(fds)[["SGSeq_sampleinfo"]] <-
rbind(metadata(fds)[["SGSeq_sampleinfo"]], si_new)
}
return(list(si, fds))
} else{
message("Extracting SGSeq sample info from BAM files for samples ",
paste(sample_ids, collapse=", "), " ...")
df <- data.frame(
sample_name=samples(fds)[samples(fds) %in% sample_ids],
file_bam=bamFile(fds)[samples(fds) %in% sample_ids])
si <- SGSeq::getBamInfo(df, yieldSize=1e6)
si$lib_size <- 50e6 # dummy value to speed up this part
metadata(fds)[["SGSeq_sampleinfo"]] <- si
return(list(si, fds))
}
}
#'
#' Adapted function from ggbio package to create manhattan plot.
#' Adapted to allow highlighting only ranges that exactly match. Uses functions
#' from package biovizBase.
#'
#' @noRd
plotGrandLinear.adapted <- function (obj, ..., facets, space.skip = 0.01,
geom = NULL, cutoff = NULL, cutoff.color = "red", cutoff.size = 1,
legend = FALSE, xlim, ylim, xlab, ylab, main, highlight.gr = NULL,
highlight.name = NULL, highlight.col = "red", highlight.label = TRUE,
highlight.label.size = 5, highlight.label.offset = 0.05,
highlight.label.col = "black",
highlight.overlap = c("any", "start", "end", "within", "equal"),
spaceline = FALSE, use.genome.coords=TRUE){
if (is.null(geom))
geom <- "point"
args <- list(...)
args.aes <- biovizBase::parseArgsForAes(args)
args.non <- biovizBase::parseArgsForNonAes(args)
two.color <- c("#0080FF", "#4CC4FF")
.is.seq <- FALSE
if (!"colour" %in% names(args.aes)) {
if (!any(c("color", "colour") %in% names(args.non))) {
.color <- two.color
args.aes$color <- as.name("seqnames")
.is.seq <- TRUE
}
else {
if (length(args.non$color) > 1) {
.color <- args.non$color
args.aes$color <- as.name("seqnames")
.is.seq <- TRUE
args.non <- args.non[!names(args.non) %in% c("colour",
"color")]
}
}
}
else {
if (quo_name(args.aes$colour) == "seqnames")
args.aes$colour <- as.name("seqnames")
}
if (!"y" %in% names(args.aes))
stop("need to provide y")
if(isTRUE(use.genome.coords)){
args.non$coord <- "genome"
}
args.non$space.skip <- space.skip
args.non$geom <- geom
args.non$object <- obj
aes.res <- do.call(aes, args.aes)
p <- do.call(ggbio::autoplot, c(list(aes.res), args.non))
if (!legend)
p <- p + theme(legend.position = "none")
if (!missing(ylab))
p <- p + ylab(ylab)
if (!is.null(cutoff))
p <- p + geom_hline(yintercept = cutoff, color = cutoff.color,
size = cutoff.size)
chrs <- names(seqlengths(obj))
if (.is.seq) {
N <- length(chrs)
cols <- rep(.color, round(N/length(.color)) + 1)[1:N]
names(cols) <- chrs
p <- p + scale_color_manual(values = cols)
}
if (!missing(facets)) {
args$facets <- facets
args.facets <- biovizBase::subsetArgsByFormals(args, facet_grid,
facet_wrap)
facet <- ggbio:::.buildFacetsFromArgs(obj, args.facets)
p <- p + facet
}
p <- p + theme(panel.grid.minor = element_blank())
if (!is.null(highlight.gr)) {
highlight.overlap <- match.arg(highlight.overlap)
idx <- findOverlaps(obj, highlight.gr, type=highlight.overlap)
.h.pos <- lapply(split(queryHits(idx), subjectHits(idx)),
function(id) {
gr <- GRanges(as.character(seqnames(p@data))[id][1],
IRanges(start = min(start(p@data[id])), end = max(end(p@data[id]))))
val <- max(as.numeric(values(p@data[id])[, quo_name(args.aes$y)]))
val <- val * (1 + highlight.label.offset)
values(gr)$val <- val
gr
})
.h.pos <- suppressWarnings(do.call("c", unname(.h.pos)))
if (length(.h.pos)) {
if (is.null(highlight.name)) {
highlight.name <- names(highlight.gr)
}
else {
highlight.name <- values(highlight.gr)[, highlight.name]
}
p <- p + geom_point(data = biovizBase::mold(p@data[queryHits(idx)]),
do.call(aes, list(x = substitute(midpoint), y = args.aes$y)),
color = highlight.col)
if (!is.null(highlight.name)) {
seqlevels(.h.pos, pruning.mode = "coarse") <- seqlevels(obj)
suppressWarnings(seqinfo(.h.pos) <- seqinfo(obj))
.trans <- biovizBase::transformToGenome(.h.pos, space.skip = space.skip)
values(.trans)$mean <- (start(.trans) + end(.trans))/2
values(.trans)$names <- highlight.name
p <- p + geom_text(data = biovizBase::mold(.trans),
size = highlight.label.size,
vjust = 0, color = highlight.label.col, do.call(aes,
list(x = substitute(mean), y = as.name("val"),
label = as.name("names"))))
}
}
}
if (spaceline) {
vline.df <- p@ggplot$data
vline.df <- do.call(rbind, by(vline.df, vline.df$seqnames,
function(dd) {
data.frame(start = min(dd$start), end = max(dd$end))
}))
gap <- (vline.df$start[-1] + vline.df$end[-nrow(vline.df)])/2
p <- p + geom_vline(xintercept = gap, alpha = 0.5, color = "gray70") +
theme(panel.grid = element_blank())
}
if (!missing(main))
p <- p + labs(title = main)
if (!missing(xlim))
p <- p + xlim(xlim)
if (!missing(ylim))
p <- p + ylim(ylim)
if (missing(xlab))
xlab <- ""
p <- p + ggplot2::xlab(xlab)
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.