Nothing
#' Bin CpG or CpH coverage to simplify and improve CNA "sketching"
#'
#' Example usage for E-M
#'
#' NOTE: As of early Sept 2019, QDNAseq did not have hg38 capabilities. If you
#' desire to use the hg38 genome, biscuiteer suggests you use a GRanges object
#' to define your bins.
#'
#' @param bsseq A bsseq object - supplied to getCoverage()
#' @param bins Bins to summarize over - from tileGenome or QDNAseq.xxYY
#' @param which Limit to specific regions? - functions as an import()
#' (DEFAULT: NULL)
#' @param QDNAseq Return a QDNAseqReadCounts? - if FALSE, returns a GRanges
#' (DEFAULT: TRUE)
#' @param readLen Correction factor for coverage - read length in bp
#' (DEFAULT: 100)
#'
#' @return Binned read counts
#'
#' @importFrom methods as is new
#' @importFrom Biobase featureNames
#' @importFrom utils packageVersion
#' @import BiocGenerics
#' @import GenomeInfoDb
#' @import GenomicRanges
#' @import Mus.musculus
#' @import Homo.sapiens
#' @import QDNAseq
#'
#' @examples
#'
#' bins <- GRanges(seqnames = rep("chr11",10),
#' strand = rep("*",10),
#' ranges = IRanges(start=100000*0:9, width=100000)
#' )
#'
#' reg <- GRanges(seqnames = rep("chr11",5),
#' strand = rep("*",5),
#' ranges = IRanges(start = c(0,2.8e6,1.17e7,1.38e7,1.69e7),
#' end= c(2.8e6,1.17e7,1.38e7,1.69e7,2.2e7))
#' )
#'
#' orig_bed <- system.file("extdata", "MCF7_Cunha_chr11p15.bed.gz",
#' package="biscuiteer")
#' orig_vcf <- system.file("extdata", "MCF7_Cunha_header_only.vcf.gz",
#' package="biscuiteer")
#' bisc <- readBiscuit(BEDfile = orig_bed, VCFfile = orig_vcf,
#' merged = FALSE)
#'
#' bc <- binCoverage(bsseq = bisc, bins = bins, which = reg, QDNAseq = FALSE)
#'
#' @export
#'
binCoverage <- function(bsseq,
bins,
which = NULL,
QDNAseq = TRUE,
readLen = 100) {
# TODO: turn this into a generic for bsseq objects, for bigWigs, for [...]
# TODO: figure out how to estimate the most likely GCbias ~ DNAme linkage
# Check input types
stopifnot(is.logical(QDNAseq))
stopifnot(is.numeric(readLen))
# turn QDNAseq bins into an annotated GRanges object
if (is(bins, "AnnotatedDataFrame") & # QDNAseq bins
!is.null(attr(bins, "QDNAseq")$build)) {
message("Converting QDNAseq bins to GRanges for coverage")
gr <- makeGRangesFromDataFrame(pData(bins), keep.extra.columns=TRUE)
genome(gr) <- attr(bins, "QDNAseq")$build
} else if (is(bins, "GenomicRanges")) {
message("You really should use QDNAseq bins IF you can.")
message("Output will be a GRanges object.")
gr <- bins
} else {
stop("Don't know what to do with bins of class ", class(bins), "!")
}
origStyle <- seqlevelsStyle(gr)
if (!is.null(which)) {
stopifnot(is(which, "GenomicRanges"))
seqlevelsStyle(gr) <- seqlevelsStyle(which)
gr <- subsetByOverlaps(gr, which)
}
seqlevelsStyle(gr) <- seqlevelsStyle(bsseq)
names(gr) <- as(granges(gr), "character")
summed <- getCoverage(bsseq, gr, what="perRegionTotal", withDimnames=TRUE)
summed <- round(summed/readLen) # mostly so plot titles don't look insane
gc(,TRUE) # cautious
gr$score <- summed
attr(gr, "binned") <- TRUE
if (QDNAseq & is(bins, "AnnotatedDataFrame")) {
seqlevelsStyle(gr) <- origStyle
names(gr) <- as(granges(gr), "character")
phenodata <- data.frame(name=colnames(gr$score),
row.names=colnames(gr$score),
stringsAsFactors=FALSE)
phenodata$total.reads <- colSums(score(gr), na.rm=TRUE)
if (!exists("use")) use <- seq_len(length(gr))
phenodata$used.reads <- colSums(score(subset(gr, use)), na.rm=TRUE)
object <- new("QDNAseqReadCounts",
bins=subset(bins, featureNames(bins) %in% names(gr)),
counts=gr$score, phenodata=phenodata)
object$expected.variance <- QDNAseq:::expectedVariance(object)
annotation(object) <- paste("generated by biscuiteer",
packageVersion("biscuiteer"))
return(object)
} else {
if (!exists("use")) use <- seq_len(length(gr))
return(subset(gr, rowSums(is.na(gr$score)) < ncol(bsseq) & use))
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.