Project: r project
.
This report is meant to help explore a set of genomic regions and was generated using the regionReport
r Citep(bib[['regionReport']])
package. While the report is rich, it is meant to just start the exploration of the results and exemplify some of the code used to do so. If you need a more in-depth analysis for your specific data set you might want to use the customCode
argument.
Most plots were made with using ggplot2
r Citep(bib[['ggplot2']])
.
## knitrBoostrap and device chunk options library('knitr') opts_chunk$set(bootstrap.show.code = FALSE, dev = device, crop = NULL) if(!outputIsHTML) opts_chunk$set(bootstrap.show.code = FALSE, dev = device, echo = FALSE)
#### Libraries needed ## Bioconductor library('bumphunter') library('derfinder') library('derfinderPlot') library('GenomeInfoDb') library('GenomicRanges') library('ggbio') ## Transcription database to use by default if(is.null(txdb)) { library('TxDb.Hsapiens.UCSC.hg19.knownGene') txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene } ## CRAN library('ggplot2') if(!is.null(theme)) theme_set(theme) library('grid') library('gridExtra') library('knitr') library('RColorBrewer') library('mgcv') library('whisker') library('DT') library('sessioninfo') #### Code setup ## For ggplot tmp <- regions names(tmp) <- seq_len(length(tmp)) regions.df <- as.data.frame(tmp) regions.df$width <- width(tmp) rm(tmp) ## Special subsets: need at least 3 points for a density plot keepChr <- table(regions.df$seqnames) > 2 regions.df.plot <- subset(regions.df, seqnames %in% names(keepChr[keepChr])) if(hasSignificant) { ## Keep only those sig regions.df.sig <- regions.df[significantVar, ] keepChr <- table(regions.df.sig$seqnames) > 2 regions.df.sig <- subset(regions.df.sig, seqnames %in% names(keepChr[keepChr])) } ## Find which chrs are present in the data set chrs <- levels(seqnames(regions)) ## areaVar initialize areaVar <- NULL
for(i in seq_len(length(pvalueVars))) { densityVarName <- names(pvalueVars[i]) densityVarName <- ifelse(is.null(densityVarName), pvalueVars[i], densityVarName) cat(knit_child(text = whisker.render(templatePvalueDensityInUse, list(varName = pvalueVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n') }
xrange <- range(log10(regions.df.plot$width)) * c(0.95, 1.05) p2a <- ggplot(regions.df.plot, aes(x=log10(width), colour=seqnames)) + geom_line(stat='density') + labs(title='Density of region lengths') + xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) + xlim(xrange) + theme(legend.title=element_blank()) p2b <- ggplot(regions.df.sig, aes(x=log10(width), colour=seqnames)) + geom_line(stat='density') + labs(title='Density of region lengths (significant only)') + xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) + xlim(xrange) + theme(legend.title=element_blank()) grid.arrange(p2a, p2b)
p2a <- ggplot(regions.df.plot, aes(x=log10(width), colour=seqnames)) + geom_line(stat='density') + labs(title='Density of region lengths') + xlab('Region width (log10)') + scale_colour_discrete(limits=chrs) + theme(legend.title=element_blank()) p2a
This plot shows the density of the region lengths for all regions. r ifelse(hasSignificant, 'The bottom panel is restricted to significant regions.', '')
for(i in seq_len(length(densityVars))) { densityVarName <- names(densityVars[i]) densityVarName <- ifelse(is.null(densityVarName), densityVars[i], densityVarName) cat(knit_child(text = whisker.render(templateDensityInUse, list(varName = densityVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n') }
The following plots were made using ggbio
r Citep(bib[['ggbio']])
which in turn uses ggplot2
r Citep(bib[['ggplot2']])
. For more details check plotOverview
in derfinderPlot
r Citep(bib[['derfinderPlot']])
.
## Choose what variable to show on the top tmp <- regions tmp$significant <- factor(significantVar, levels = c('TRUE', 'FALSE')) if(!'area' %in% colnames(mcols(tmp))) { if(hasDensityVars) { tmp$area <- mcols(tmp)[[densityVars[1]]] areaVar <- densityVars[1] areaVar <- ifelse(is.null(names(areaVar)), densityVars[1], names(areaVar)) } else { tmp$area <- 0 areaVar <- NULL } } else { areaVar <- 'area' } plotOverview(regions=tmp, type='pval', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12)) rm(tmp)
This plot shows the genomic locations of the regions found in the analysis. The significant regions are highlighted and the r areaVar
of the regions is shown on top of each chromosome r ifelse(is.null(areaVar), '(skipped because there was no applicable variable)', '(shown in a relative scale)')
.
for(i in seq_len(length(pvalueVars))) { densityVarName <- names(pvalueVars[i]) densityVarName <- ifelse(is.null(densityVarName), pvalueVars[i], densityVarName) cat(knit_child(text = whisker.render(templateManhattanInUse, list(varName = pvalueVars[i], densityVarName = densityVarName)), quiet = TRUE), sep = '\n') }
## Annotate regions with bumphunter if(is.null(annotation)) { genes <- annotateTranscripts(txdb = txdb) annotation <- matchGenes(x = regions, subject = genes) } ## Make the plot plotOverview(regions=regions, annotation=annotation, type='annotation', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12))
This genomic overview plot shows the annotation region type for the regions as determined using bumphunter
r Citep(bib[['bumphunter']])
. Note that the regions are shown only if the annotation information is available. Below is a table of the actual number of results per annotation region type.
annoReg <- table(annotation$region, useNA='always') annoReg.df <- data.frame(Region=names(annoReg), Count=as.vector(annoReg)) if(outputIsHTML) { kable(annoReg.df, format = 'markdown', align=rep('c', 3)) } else { kable(annoReg.df) }
plotOverview(regions=regions[significantVar, ], annotation=annotation[significantVar, ], type='annotation', base_size=overviewParams$base_size, areaRel=overviewParams$areaRel, legend.position=c(0.97, 0.12))
This genomic overview plot shows the annotation region type for the statistically significant regions. Note that the regions are shown only if the annotation information is available. r ifelse(hasSignificant, '', 'Plot skipped because there are no significant regions.')
Below is a table summarizing the number of genomic states per region as determined using derfinder
r Citep(bib[['derfinder']])
.
## Construct genomic state object genomicState <- makeGenomicState(txdb = txdb, chrs = chrs, verbose = FALSE) ## Annotate regions by genomic state annotatedRegions <- annotateRegions(regions, genomicState$fullGenome, verbose = FALSE) ## Genomic states table info <- do.call(rbind, lapply(annotatedRegions$countTable, function(x) { data.frame(table(x)) })) colnames(info) <- c('Number of Overlapping States', 'Frequency') info$State <- gsub('\\..*', '', rownames(info)) rownames(info) <- NULL if(outputIsHTML) { kable(info, format = 'markdown', align=rep('c', 4)) } else { kable(info) }
The following is a venn diagram showing how many regions overlap known exons, introns, and intergenic segments, none of them, or multiple of these groups.
## Venn diagram for all regions venn <- vennRegions(annotatedRegions, counts.col = 'blue', main = 'Regions overlapping genomic states')
r ifelse(hasSignificant, 'The following plot is the genomic states venn diagram only for the significant regions.', '')
## Venn diagram for all regions vennSig <- vennRegions(annotatedRegions, counts.col = 'blue', main = 'Significant regions overlapping genomic states', subsetIndex = significantVar)
Below is an interactive table with the top r min(nrow(regions.df), nBestRegions)
regions (out of r nrow(regions.df)
) as ranked by p-value r ifelse(hasPvalueVars, '', 'without ranking because no p-value information was provided')
. Inf and -Inf are shown as 1e100 and -1e100 respectively. r ifelse(outputIsHTML, 'Use the search function to find your region of interest or sort by one of the columns.', 'Since the report is in PDF format, only the top 20 regions are shown.')
## Add annotation information regions.df <- cbind(regions.df, annotation) ## Rank by p-value (first pvalue variable supplied) if(hasPvalueVars){ topRegions <- head(regions.df[order(regions.df[, pvalueVars[1]], decreasing = FALSE), ], nBestRegions) topRegions <- cbind(data.frame('pvalueRank' = seq_len(nrow(topRegions))), topRegions) } else { topRegions <- head(regions.df, nBestRegions) } ## Clean up -Inf, Inf if present ## More details at https://github.com/ramnathv/rCharts/issues/259 replaceInf <- function(df, colsubset=seq_len(ncol(df))) { for(i in colsubset) { inf.idx <- !is.finite(df[, i]) if(any(inf.idx)) { inf.sign <- sign(df[inf.idx, i]) df[inf.idx, i] <- inf.sign * 1e100 } } return(df) } topRegions <- replaceInf(topRegions, which(sapply(topRegions, function(x) { class(x) %in% c('numeric', 'integer')}))) ## Make the table greptext <- 'value$|area$|mean|log2FoldChange' greppval <- 'pvalues$|qvalues$|fwer$' if(hasPvalueVars) { greppval <- paste0(paste(pvalueVars, collapse = '$|'), '$|', greppval) } if(hasDensityVars) { greptext <- paste0(paste(densityVars, collapse = '$|'), '$|', greptext) } for(i in which(grepl(greppval, colnames(topRegions)))) topRegions[, i] <- format(topRegions[, i], scientific = TRUE) if(outputIsHTML) { datatable(topRegions, options = list(pagingType='full_numbers', pageLength=10, scrollX='100%'), rownames = FALSE) %>% formatRound(which(grepl(greptext, colnames(topRegions))), digits) } else { ## Only print the top part if your output is a PDF file df_top <- head(topRegions, 20) for(i in which(grepl(greptext, colnames(topRegions)))) df_top[, i] <- round(df_top[, i], digits) kable(df_top) }
This report was generated in path r tmpdir
using the following call to renderReport()
:
theCall
Date the report was generated.
## Date the report was generated Sys.time()
Wallclock time spent generating the report.
## Processing time in seconds totalTime <- diff(c(startTime, Sys.time())) round(totalTime, digits=3)
R
session information.
## Session info options(width = 120) session_info()
Pandoc version used: r rmarkdown::pandoc_version()
.
This report was created with regionReport
r Citep(bib[['regionReport']])
using rmarkdown
r Citep(bib[['rmarkdown']])
while knitr
r Citep(bib[['knitr']])
and DT
r Citep(bib[['DT']])
were running behind the scenes. whisker
r Citep(bib[['whisker']])
was used for creating templates for the pvalueVars
and densityVars
.
Citations made with r CRANpkg('RefManageR')
r Citep(bib[['RefManageR']])
. The BibTeX file can be found here.
## Print bibliography PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.