plotManhattan | R Documentation |
The FRASER package provides mutliple functions to visualize the data and the results of a full data set analysis.
Plots the p values over the delta psi values, known as volcano plot. Visualizes per sample the outliers. By type and aggregate by gene if requested.
Plot the number of aberrant events per samples
Plots the observed split reads of the junction of interest over all reads coming from the given donor/acceptor.
Plots the observed values of the splice metric across samples for a junction of interest.
Plots the expected psi value over the observed psi value of the given junction.
Plots the quantile-quantile plot
Histogram of the geometric mean per junction based on the filter status
Histogram of minimal delta psi per junction
Count correlation heatmap function
plotManhattan(object, ...)
## S4 method for signature 'FraserDataSet'
plotVolcano(
object,
sampleID,
type = fitMetrics(object),
basePlot = TRUE,
aggregate = FALSE,
main = NULL,
label = NULL,
deltaPsiCutoff = 0.1,
padjCutoff = 0.1,
subsetName = NULL,
...
)
## S4 method for signature 'FraserDataSet'
plotAberrantPerSample(
object,
main,
type = fitMetrics(object),
padjCutoff = 0.1,
deltaPsiCutoff = 0.1,
aggregate = TRUE,
subsetName = NULL,
BPPARAM = bpparam(),
...
)
plotExpression(
fds,
type = fitMetrics(fds),
idx = NULL,
result = NULL,
colGroup = NULL,
basePlot = TRUE,
main = NULL,
label = "aberrant",
subsetName = NULL,
...
)
plotSpliceMetricRank(
fds,
type = fitMetrics(fds),
idx = NULL,
result = NULL,
colGroup = NULL,
basePlot = TRUE,
main = NULL,
label = "aberrant",
subsetName = NULL,
...
)
plotExpectedVsObservedPsi(
fds,
type = fitMetrics(fds),
idx = NULL,
result = NULL,
colGroup = NULL,
main = NULL,
basePlot = TRUE,
label = "aberrant",
subsetName = NULL,
...
)
## S4 method for signature 'FraserDataSet'
plotQQ(
object,
type = NULL,
idx = NULL,
result = NULL,
aggregate = FALSE,
global = FALSE,
main = NULL,
conf.alpha = 0.05,
samplingPrecision = 3,
basePlot = TRUE,
label = "aberrant",
Ncpus = min(3, getDTthreads()),
subsetName = NULL,
...
)
## S4 method for signature 'FraserDataSet'
plotEncDimSearch(object, type = psiTypes, plotType = c("auc", "loss"))
plotFilterExpression(
fds,
bins = 200,
legend.position = c(0.8, 0.8),
onlyVariableIntrons = FALSE
)
plotFilterVariability(
fds,
bins = 200,
legend.position = c(0.8, 0.8),
onlyExpressedIntrons = FALSE
)
## S4 method for signature 'FraserDataSet'
plotCountCorHeatmap(
object,
type = psiTypes,
logit = FALSE,
topN = 50000,
topJ = 5000,
minMedian = 1,
minCount = 10,
main = NULL,
normalized = FALSE,
show_rownames = FALSE,
show_colnames = FALSE,
minDeltaPsi = 0.1,
annotation_col = NA,
annotation_row = NA,
border_color = NA,
nClust = 5,
plotType = c("sampleCorrelation", "junctionSample"),
sampleClustering = NULL,
plotMeanPsi = TRUE,
plotCov = TRUE,
...
)
plotBamCoverage(
fds,
gr,
sampleID,
control_samples = sample(samples(fds[, which(samples(fds) != sampleID)]), min(3,
ncol(fds) - length(sampleID))),
txdb = NULL,
min_junction_count = 20,
highlight_range = NULL,
highlight_range_color = "firebrick",
color_annotated = "gray",
color_novel = "goldenrod3",
color_sample_interest = "firebrick",
color_control_samples = "dodgerblue4",
toscale = c("exon", "gene", "none"),
mar = c(2, 10, 0.1, 5),
curvature_splicegraph = 1,
curvature_coverage = 1,
cex = 1,
splicegraph_labels = c("genomic_range", "id", "name", "none"),
splicegraph_position = c("top", "bottom"),
...
)
plotBamCoverageFromResultTable(
fds,
result,
show_full_gene = FALSE,
txdb = NULL,
orgDb = NULL,
res_gene_col = "hgncSymbol",
res_geneid_type = "SYMBOL",
txdb_geneid_type = "ENTREZID",
left_extension = 1000,
right_extension = 1000,
...
)
## S4 method for signature 'FraserDataSet'
plotManhattan(
object,
sampleID,
value = "pvalue",
type = fitMetrics(object),
chr = NULL,
main = paste0("sample: ", sampleID),
chrColor = c("black", "darkgrey"),
subsetName = NULL,
...
)
object , fds |
An |
... |
Additional parameters passed to plot() or plot_ly() if not stated otherwise in the details for each plot function |
sampleID |
A sample ID which should be plotted. Can also be a vector. Integers are treated as indices. |
type |
The psi type: either psi5, psi3 or theta (for SE). |
basePlot |
if |
aggregate |
If TRUE, the pvalues are aggregated by gene (default), otherwise junction level pvalues are used (default for Q-Q plot). |
main |
Title for the plot, if missing a default title will be used. |
label |
Indicates the genes or samples that will be labelled in the
plot (only for |
padjCutoff , deltaPsiCutoff |
Significance or delta psi cutoff to mark outliers |
subsetName |
The name of a subset of genes of interest for which FDR corrected pvalues were previously computed. Those FDR values on the subset will then be used to determine aberrant status. Default is NULL (using transcriptome-wide FDR corrected pvalues). |
BPPARAM |
BiocParallel parameter to use. |
idx |
A junction site ID or gene ID or one of both, which should be plotted. Can also be a vector. Integers are treated as indices. |
result |
The result table to be used by the method. |
colGroup |
Group of samples that should be colored. |
global |
Flag to plot a global Q-Q plot, default FALSE |
conf.alpha |
If set, a confidence interval is plotted, defaults to 0.05 |
samplingPrecision |
Plot only non overlapping points in Q-Q plot to reduce number of points to plot. Defines the digits to round to. |
Ncpus |
Number of cores to use. |
plotType |
The type of plot that should be shown as character string.
For plotEncDimSearch, it has to be either |
bins |
Set the number of bins to be used in the histogram. |
legend.position |
Set legend position (x and y coordinate), defaults to the top right corner. |
onlyVariableIntrons |
Logical value indicating whether to show only introns that also pass the variability filter. Defaults to FALSE. |
onlyExpressedIntrons |
Logical value indicating whether to show only introns that also pass the expression filter. Defaults to FALSE. |
logit |
If TRUE, the default, psi values are plotted in logit space. |
topN |
Top x most variable junctions that should be used for the calculation of sample x sample correlations. |
topJ |
Top x most variable junctions that should be displayed in the junction-sample correlation heatmap. Only applies if plotType is "junctionSample". |
minMedian , minCount , minDeltaPsi |
Minimal median ( |
normalized |
If TRUE, the normalized psi values are used, the default, otherwise the raw psi values |
show_rownames , show_colnames |
Logical value indicating whether to show row or column names on the heatmap axes. |
annotation_col , annotation_row |
Row or column annotations that should be plotted on the heatmap. |
border_color |
Sets the border color of the heatmap |
nClust |
Number of clusters to show in the row and column dendrograms. |
sampleClustering |
A clustering of the samples that should be used as an annotation of the heatmap. |
plotMeanPsi , plotCov |
If |
gr |
A GRanges object indicating the genomic range that should be shown
in |
control_samples |
The sampleIDs of the samples used as control in
|
txdb |
A TxDb object giving the gene/transcript annotation to use. |
min_junction_count |
The minimal junction count across samples required
for a junction to appear in the splicegraph and coverage tracks
of |
highlight_range |
A |
highlight_range_color |
The color of highlighted ranges in
the splicegraph of |
color_annotated |
The color for exons and junctions present in
the given annotation (in the splicegraph of
|
color_novel |
The color for novel exons and junctions not present in
the given annotation (in the splicegraph of
|
color_sample_interest |
The color in |
color_control_samples |
The color in |
toscale |
In |
mar |
The margin of the plot area for |
curvature_splicegraph |
The curvature of the junction arcs in the
splicegraph in |
curvature_coverage |
The curvature of the junction arcs in the
coverage tracks of |
cex |
For controlling the size of text and numbers in
|
splicegraph_labels |
Indicated the format of exon/splice junction
labels in the splicegraph of |
splicegraph_position |
The position of the splicegraph relative to the
coverage tracks in |
show_full_gene |
Should the full genomic range of the gene be shown in
|
orgDb |
A OrgDb object giving the mapping of gene ids and symbols. |
res_gene_col |
The column name in the given results table that contains the gene annotation. |
res_geneid_type |
The type of gene annotation in the results table in
|
txdb_geneid_type |
The type of gene_id present in |
left_extension |
Indicating how far the plotted range around the outlier
junction should be extended to the left in
|
right_extension |
Indicating how far the plotted range around the
outlier junction should be extended to the right in
|
value |
Indicates which assay is shown in the manhattan plot. Defaults to 'pvalue'. Other options are 'deltaPsi' and 'zScore'. |
chr |
Vector of chromosome names to show in |
chrColor |
Interchanging colors by chromosome for |
This is the list of all plotting function provided by FRASER:
plotAberrantPerSample()
plotVolcano()
plotExpression()
plotQQ()
plotExpectedVsObservedPsi()
plotCountCorHeatmap()
plotFilterExpression()
plotFilterVariability()
plotEncDimSearch()
plotBamCoverage()
plotBamCoverageFromResultTable()
plotManhattan()
plotSpliceMetricRank()
For a detailed description of each plot function please see the details. Most of the functions share the same parameters.
plotAberrantPerSample
: The number of aberrant events per sample are
plotted sorted by rank. The ... parameters are passed on to the
aberrant
function.
plotVolcano
: the volcano plot is sample-centric. It plots for a given
sample and psi type the negative log10 nominal P-values against the delta psi
values for all splice sites or aggregates by gene if requested.
plotExpression
: This function plots for a given site the
read count at this site (i.e. K) against the total coverage (i.e. N) for the
given psi type (\psi_5, \psi_3, or \theta
(SE)) for all samples.
plotQQ
: the quantile-quantile plot for a given gene or if
global
is set to TRUE
over the full data set. Here the
observed P-values are plotted against the expected ones in the negative
log10 space.
plotExpectedVsObservedPsi
: A scatter plot of the observed psi
against the predicted psi for a given site.
plotSpliceMetricRank
: This function plots for a given intron the
observed values of the selected splice metrix against the sample rank.
plotCountCorHeatmap
: The correlation heatmap of the count data either
of the full data set (i.e. sample-sample correlations) or of the top x most
variable junctions (i.e. junction-sample correlations). By default the values
are log transformed and row centered. The ... arguments are passed to the
pheatmap
function.
plotFilterExpression
: The distribution of FPKM values. If the
FraserDataSet object contains the passedFilter
column, it will plot
both FPKM distributions for the expressed introns and for the filtered
introns.
plotFilterVariability
: The distribution of maximal delta Psi values.
If the FraserDataSet object contains the passedFilter
column,
it will plot both maximal delta Psi distributions for the variable
introns and for the filtered (i.e. non-variable) introns.
plotEncDimSearch
: Visualization of the hyperparameter optimization.
It plots the encoding dimension against the achieved loss (area under the
precision-recall curve). From this plot the optimum should be choosen for
the q
in fitting process.
plotManhattan
: A Manhattan plot showing the junction pvalues by
genomic position. Useful to identify if outliers cluster by genomic position.
plotBamCoverage
: A sashimi plot showing the read coverage from
the underlying bam files for a given genomic range and sampleIDs.
plotBamCoverageFromResultTable
: A sashimi plot showing the read
coverage from the underlying bam files for a row in the results table. Can
either show the full range of the gene with the outlier junction or only a
certain region around the outlier.
If base R graphics are used nothing is returned else the plotly or the gplot object is returned.
# create full FRASER object
fds <- makeSimulatedFraserDataSet(m=40, j=200)
fds <- calculatePSIValues(fds)
fds <- filterExpressionAndVariability(fds, filter=FALSE)
# this step should be done for more dimensions in practice
fds <- optimHyperParams(fds, "jaccard", q_param=c(2,5,10,25))
# assign gene names to show functionality on test dataset
# use fds <- annotateRanges(fds) on real data
mcols(fds, type="j")$hgnc_symbol <-
paste0("gene", sample(1:25, nrow(fds), replace=TRUE))
# fit and calculate pvalues
genesOfInterest <- rep(list(paste0("gene", sample(1:25, 10))), 4)
names(genesOfInterest) <- c("sample1", "sample6", "sample15", "sample23")
fds <- FRASER(fds, subsets=list("testSet"=genesOfInterest))
# QC plotting
plotFilterExpression(fds)
plotFilterVariability(fds)
plotCountCorHeatmap(fds, "jaccard")
plotCountCorHeatmap(fds, "jaccard", normalized=TRUE)
plotEncDimSearch(fds, type="jaccard")
# extract results
plotAberrantPerSample(fds, aggregate=FALSE)
plotAberrantPerSample(fds, aggregate=TRUE, subsetName="testSet")
plotVolcano(fds, "sample2", "jaccard", label="aberrant")
plotVolcano(fds, "sample1", "jaccard", aggregate=TRUE, subsetName="testSet")
# dive into gene/sample level results
res <- as.data.table(results(fds))
res
plotExpression(fds, result=res[1])
plotQQ(fds, result=res[1])
plotExpectedVsObservedPsi(fds, res=res[1])
plotSpliceMetricRank(fds, res=res[1])
# other ways to call these plotting functions
plotExpression(fds, idx=10, sampleID="sample1", type="jaccard")
plotExpression(fds, result=res[1], subsetName="testSet")
plotQQ(fds, idx=10, sampleID="sample1", type="jaccard")
plotQQ(fds, result=res[1], subsetName="testSet")
plotExpectedVsObservedPsi(fds, idx=10, sampleID="sample1", type="jaccard")
plotExpectedVsObservedPsi(fds, result=res[1], subsetName="testSet")
plotSpliceMetricRank(fds, idx=10, sampleID="sample1", type="jaccard")
plotSpliceMetricRank(fds, result=res[1], subsetName="testSet")
# create manhattan plot of pvalues by genomic position
if(require(ggbio)){
plotManhattan(fds, type="jaccard", sampleID="sample10")
}
# plot splice graph and coverage from bam files in a given region
if(require(SGSeq)){
fds <- createTestFraserSettings()
gr <- GRanges(seqnames="chr19",
IRanges(start=7587496, end=7598895),
strand="+")
plotBamCoverage(fds, gr=gr, sampleID="sample3",
control_samples="sample2", min_junction_count=5,
curvature_splicegraph=1, curvature_coverage=1,
mar=c(1, 7, 0.1, 3))
# plot coverage from bam file for a row in the result table
fds <- createTestFraserDataSet()
require(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
require(org.Hs.eg.db)
orgDb <- org.Hs.eg.db
res <- results(fds, padjCutoff=NA, deltaPsiCutoff=NA)
res_dt <- as.data.table(res)
res_dt <- res_dt[sampleID == "sample2",]
# plot full range of gene containing outlier junction
plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=TRUE,
txdb=txdb, orgDb=orgDb, control_samples="sample3")
# plot only certain range around outlier junction
plotBamCoverageFromResultTable(fds, result=res_dt[1,], show_full_gene=FALSE,
control_samples="sample3", curvature_splicegraph=0.5, txdb=txdb,
curvature_coverage=0.5, right_extension=5000, left_extension=5000,
splicegraph_labels="id")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.