#' Construct a region of interest coverage plot
#'
#' Given a list of data frames construct a sequencing coverage view over a
#' region of interest.
#' @name genCov
#' @param x Named list with list elements containing data frames
#' representing samples. Data frame rows should represent read pileups observed
#' in sequencing data. Data frame column names must include "end" and "cov"
#' corresponding to the base end position and coverage of a pileup respectively.
#' Data within data frames must be on the same chromosome as the region of
#' interest, see details!
#' @param txdb Object of class TxDb giving transcription meta data for a genome
#' assembly. See Bioconductor annotation packages.
#' @param gr Object of class GRanges specifying the region of interest and
#' corresponding to a single gene. See Bioconductor package GRanges.
#' @param genome Object of class BSgenome specifying the genome sequence of
#' interest. See Bioconductor annotation packages.
#' @param reduce Boolean specifying whether to collapse gene isoforms within the
#' region of interest into one representative transcript. Experimental use with
#' caution!
#' @param gene_colour Character string specifying the colour of the gene to be
#' plotted in the gene track.
#' @param gene_plotLayer Valid ggplot2 layer to be added to the gene sub-plot.
#' @param gene_name Character string specifying the name of the gene or region
#' of interest.
#' @param label_bgFill Character string specifying the desired background colour
#' of the track labels.
#' @param label_txtFill Character string specifying the desired text colour of
#' the track labels.
#' @param label_borderFill Character string specifying the desired border colour
#' of the track labels.
#' @param label_txtSize Integer specifying the size of the text within the track
#' labels.
#' @param lab2plot_ratio Numeric vector of length 2 specifying the ratio of
#' track labels to plot space.
#' @param cov_colour Character string specifying the colour of the data in the
#' coverage plots.
#' @param cov_plotType Character string specifying one of "line",
#' "bar" or "point". Changes the ggplot2 geom which constructs the data display.
#' @param cov_plotLayer Valid ggplot2 layer to be added to the coverage
#' sub-plots.
#' @param base Numeric vector of log bases to transform the data
#' corresponding to the elements supplied to the variable transform See details.
#' @param transform Character vector specifying what objects to log transform,
#' accepts "Intron", "CDS", and "UTR" See details.
#' @param gene_labelTranscript Boolean specifying whether to plot the transcript
#' names in the gene plot.
#' @param gene_labelTranscriptSize Integer specifying the size of the transcript
#' name text in the gene plot.
#' @param gene_isoformSel Character vector specifying the names
#' (from the txdb object) of isoforms within the region of interest to display.
#' @param out Character vector specifying the object to output, one of
#' "data", "grob", or "plot", defaults to "plot" (see returns).
#' @param subsample Boolean value specifying whether to reduce the provided
#' coverage data to a subset of approximately 1000 points. Used to generate
#' sparse plots that use less disk space and are faster to render.
#' @details genCov is a function designed construct a series of tracks based on
#' a TxDb object giving transcript features, and coverage data supplied to
#' parameter `x`. The function will look at a region of interest specified by
#' the argument supplied to gr and plot transcript features and the
#' corresponding coverage information. The argument supplied to `genome` enables
#' gc content within genomic features to be calculated and displayed. The
#' argument supplied to x must contain data on the same chromosome as the region
#' of interest specified in the parameter `gr`!
#'
#' Typically, introns of a transcript are much larger than exons, while exons are
#' sometimes of greater interest. To address this, genCov will by default
#' scale the x-axis to expand track information according to region type: coding
#' sequence (CDS), untranslated region (UTR), or intron / intergenic (Intron).
#' The amount by which each region is scaled is controlled by the `base` and
#' `transform` arguments. `transform` specifies which regions to scale, and `base`
#' corresponds to the log base transform to apply to those regions. To keep one or
#' more region types from being scaled, omit the corresponding entries from the
#' `base` and `transform` vectors.
#'
#' @return One of the following, a list of dataframes containing data to be
#' plotted, a grob object, or a plot.
#' @importFrom GenomicRanges start
#' @importFrom GenomicRanges end
#' @importFrom plyr adply
#' @examples
#' # Load transcript meta data
#' library(TxDb.Hsapiens.UCSC.hg19.knownGene)
#' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#'
#' # Load BSgenome
#' library(BSgenome.Hsapiens.UCSC.hg19)
#' genome <- BSgenome.Hsapiens.UCSC.hg19
#'
#' # Define a region of interest
#' gr <- GRanges(seqnames=c("chr10"),
#'ranges=IRanges(start=c(89622195), end=c(89729532)), strand=strand(c("+")))
#'
#' # Create Data for input
#' start <- c(89622194:89729524)
#' end <- c(89622195:89729525)
#' chr <- 10
#' cov <- c(rnorm(100000, mean=40), rnorm(7331, mean=10))
#' cov_input_A <- as.data.frame(cbind(chr, start, end, cov))
#'
#' start <- c(89622194:89729524)
#' end <- c(89622195:89729525)
#' chr <- 10
#' cov <- c(rnorm(50000, mean=40), rnorm(7331, mean=10), rnorm(50000, mean=40))
#' cov_input_A <- as.data.frame(cbind(chr, start, end, cov))
#'
#' # Define the data as a list
#' data <- list("Sample A"=cov_input_A)
#'
#' # Call genCov
#' genCov(data, txdb, gr, genome, gene_labelTranscriptSize=3)
#' @export
genCov <- function(x, txdb, gr, genome, reduce=FALSE, gene_colour=NULL,
gene_name='Gene', gene_plotLayer=NULL, label_bgFill="black",
label_txtFill="white", label_borderFill="black",
label_txtSize=10, lab2plot_ratio=c(1, 10),
cov_colour="blue", cov_plotType="point", cov_plotLayer=NULL,
base=c(10, 2, 2),
transform=c("Intron", "CDS", "UTR"),
gene_labelTranscript=TRUE,
gene_labelTranscriptSize=4, gene_isoformSel=NULL, out="plot",
subsample=FALSE)
{
# Perform data quality checks
data <- genCov_qual(x=x, txdb=txdb, gr=gr, genome=genome)
x <- data$x
txdb <- data$txdb
gr <- data$gr
genome <- data$genome
# Obtain a plot for the gene overlapping the Granges object and information
# Used to make the plot
gp_result <- geneViz(txdb, gr, genome, reduce=reduce,
gene_colour=gene_colour, base=base,
transform=transform, isoformSel=gene_isoformSel,
labelTranscriptSize=gene_labelTranscriptSize,
labelTranscript=gene_labelTranscript,
plotLayer=gene_plotLayer)
gene <- gp_result$plot
gene_list <- list()
gene_list[[gene_name]] <- gene
# Remove entries in coverage data file that are not within the GRanges
# object specified
test2 <- function(x, min, max)
{
idx <- -which(x$end <= min | x$end >= max)
if(length(idx)){
x <- x[idx,]
}
return(x)
}
coverage_data <- lapply(x, test2, start(gr), end(gr))
# obtain xlimits for gene plot, this is overwritten if transforms
xlimits <- c(start(gr), end(gr))
# set flag to display x axis labels, overwritten if transforms
display_x_axis <- TRUE
# perform the intronic transform on the coverage data
if(!is.null(transform) && length(transform) > 0)
{
# Obtain a copy of the master gene file
master <- gp_result$master
# Format coverage file so that there is a start column, then map coord
# into transformed intronic space
test <- function(x)
{
x$start <- x$end
return(x)
}
coverage_data <- lapply(coverage_data, test)
message("Mapping coverage data onto transformed gene-space")
coverage_data <- lapply(coverage_data,
geneViz_mapCovCoordSpace,
master=master)
# Replace original coordinates with transformed coordinates
for(i in 1:length(coverage_data))
{
coverage_data[[i]] <- coverage_data[[i]][,c('trans_start',
'trans_end', 'cov')]
colnames(coverage_data[[i]]) <- c('start', 'end', 'cov')
}
# Obtain x limits for gene plot based on granges object
start <- cbind(GenomicRanges::start(gr), GenomicRanges::start(gr))
end <- cbind(GenomicRanges::end(gr), GenomicRanges::end(gr))
temp <- as.data.frame(rbind(start, end))
colnames(temp) <- c('start', 'end')
temp <- plyr::adply(temp, 1, geneViz_mapCoordSpace, master=master)
xlimits <- c(min(temp$trans_start), max(temp$trans_end))
# set flag to not display x axis labels
display_x_axis <- FALSE
}
if(subsample){
transform.coordinates <- function(master_row, x, tx.graph.width){
lt <- x$start <= master_row$trans_end
gt <- x$end >= master_row$trans_start
idx = which(lt & gt)
x.sub <- x[idx,]
width <- dim(x.sub)[1]
adjusted.width <- round(1000 * (master_row$width / tx.graph.width))
if( width > adjusted.width){
s <- seq.int(from = 1, to = width, length.out = adjusted.width)
x.sub <- x.sub[s,]
}
return(x.sub)
}
perform_subsample <- function(x, master){
tx.graph.width <- sum(master$width)
y <- plyr::adply(master, 1, transform.coordinates, x=x, tx.graph.width=tx.graph.width)
y
}
coverage_data <- lapply(coverage_data, perform_subsample, master=master)
}
# obtain coverage plots for the data input as a list
coverage_plot <- lapply(coverage_data, genCov_buildCov,
colour=cov_colour, plot_type=cov_plotType,
x_limits=xlimits, display_x_axis=display_x_axis,
layers=cov_plotLayer)
# Combine both gene and coverage plot lists
merged_data <- c(gene_list, coverage_plot)
# Plot the data on a track
track_coverage_plot <- genCov_trackViz(merged_data, gene_name=gene_name,
bgFill=label_bgFill,
textFill=label_txtFill,
border=label_borderFill,
size=label_txtSize,
axis_align='width',
widthRatio=lab2plot_ratio,
list=TRUE)
# Decide what to output
output <- multi_selectOut(data=merged_data, plot=track_coverage_plot,
draw=TRUE, out=out)
return(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.