inst/doc/shearwater.R

## ----setup, include=FALSE, cache=FALSE-----------------------------------
require(knitr)
# set global chunk options
opts_chunk$set(fig.path='tmp/deepSNV-', fig.align='center', fig.show='hold', fig.width=4, fig.height=4, out.width='.4\\linewidth', dpi=150)
options(replace.assign=TRUE,width=75)
knit_hooks$set(nice = function(before, options, envir) {
			if (before) par(mar = c(4, 4, .1, .1), mgp=c(2.5,1,0), bty="n")
		})

## ----echo=FALSE, results='asis'------------------------------------------
	print(citation("deepSNV")[2], style="LaTeX")

## ----fig.width=5, fig.height=5, out.width='.6\\linewidth'----------------
library(deepSNV)
library(RColorBrewer)
n <-  100 ## Coverage
n_samples <- 1000 ## Assume 1000 samples
x <-  0:20 ## Nucleotide counts
X <-  cbind(rep(x, each = length(x)), rep(x, length(x))) ## All combinations forward and reverse
par(bty="n", mgp = c(2,.5,0), mar=c(3,3,2,2)+.1, las=1, tcl=-.33, mfrow=c(2,2))
for(nu in 10^c(-4,-2)){ ## Loop over error rates
	## Create counts array with errors
	counts = aperm(array(c(rep(round(n_samples*n* c(nu,1-nu,nu,1-nu)), each=nrow(X)), cbind(n - X, X)[,c(3,1,4,2)]), 
					dim=c(nrow(X) ,4,2)), c(3,1,2))
	for(rho in c(1e-4, 1e-2)){ ## Loop over dispersion factors
		## Compute Bayes factors
		BF = bbb(counts, rho=rho, model="OR", return="BF")
		## Plot
		image(z=log10(matrix(BF[2,,1], nrow=length(x))), 
				x=x, 
				y=x, 
				breaks=c(-100,-8:0), 
				col=rev(brewer.pal(9,"Reds")), 
				xlab = "Forward allele count",
				ylab="Backward allele count", 
				main = paste("rho =", format(rho, digits=2), "nu = ", format(nu, digits=2)), 
				font.main=1)
		text(X[,1],X[,2],ceiling(log10(matrix(BF[2,,1], nrow=length(x)))), cex=0.5)
	}
}

## ----fig.width=5,fig.height=5, out.width='.6\\linewidth'-----------------
par(bty="n", mgp = c(2,.5,0), mar=c(3,3,2,2)+.1, las=1, tcl=-.33, mfrow=c(2,2))
for(nu in 10^c(-4,-2)){ ## Loop over error rates
	## Create counts array with errors
	counts = aperm(array(c(rep(round(n_samples*n* c(nu,1-nu,nu,1-nu)), each=nrow(X)), cbind(n - X, X)[,c(3,1,4,2)]), 
					dim=c(nrow(X) ,4,2)), c(3,1,2))
	for(rho in c(1e-4, 1e-2)){ ## Loop over dispersion factors
		## Compute Bayes factors, mode = "AND"
		BF = bbb(counts, rho=rho, model="AND", return="BF")
		## Plot
		image(z=log10(matrix(BF[2,,1], nrow=length(x))), 
				x=x, 
				y=x, 
				breaks=c(-100,-8:0), 
				col=rev(brewer.pal(9,"Reds")), 
				xlab = "Forward allele count",
				ylab="Backward allele count", 
				main = paste("rho =", format(rho, digits=2), "nu = ", format(nu, digits=2)), 
				font.main=1)
		text(X[,1],X[,2],ceiling(log10(matrix(BF[2,,1], nrow=length(x)))), cex=0.5)
	}
}

## ----rho, fig.width=4,fig.height=4, out.width="6cm", out.height="6cm"----
rho = 10^seq(-6,-1)
rhoHat <- sapply(rho, function(r){
			sapply(1:100, function(i){
						n = 100
						X = rbetabinom(1000, n, 0.01, rho=r)
						X = cbind(X, n-X)
						Y = array(X, dim=c(1000,1,2))
						deepSNV:::estimateRho(Y, Y/n, Y < 1000)[1,1]})
		})
par(bty="n", mgp = c(2,.5,0), mar=c(3,4,1,1)+.1,  tcl=-.33)
plot(rho, type="l", log="y", xaxt="n", xlab="rho", ylab="rhoHat", xlim=c(0.5,6.5), lty=3)
boxplot(t(rhoHat+ 1e-7) ~ rho, add=TRUE, col="#FFFFFFAA", pch=16, cex=.5, lty=1, staplewex=0)
points(colMeans(rhoHat), pch="*", col="red", cex=2)

## ----eval=FALSE----------------------------------------------------------
#  ## Not run..
#  ## Load TxDb
#  library(TxDb.Hsapiens.UCSC.hg19.knownGene)
#  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
#  seqlevels(txdb) <- sub("chr","",seqlevels(txdb))
#  
#  ## Make prior
#  regions <- reduce(exons(txdb, filter=list(gene_id='7157'))) ## TP53 exons
#  cosmic <- readVcf("CosmicCodingMuts_v63_300113.vcf.gz", "hg19", param=ScanVcfParam(which=regions))
#  pi <- makePrior(cosmic, regions, pi.gene = 1)

## ----prior, fig.width=8,fig.height=4, out.width="12cm", out.height="6cm"----
## Load pi
data(pi, package="deepSNV")

## Plot
par(bty="n", mgp = c(2,.5,0), mar=c(3,3,2,2)+.1, tcl=-.33)
plot(pi[,1], type="h", xlab="Position", ylab="Prior", col=brewer.pal(5,"Set1")[1], ylim=c(0,0.075))
for(j in 2:5)
	lines(pi[,j], type="h", col=brewer.pal(5,"Set1")[j])
legend("topleft", col=brewer.pal(5,"Set1"), lty=1, bty="n", c("A","T","C","G","del"))

## ------------------------------------------------------------------------
## Load data from deepSNV example
regions <- GRanges("B.FR.83.HXB2_LAI_IIIB_BRU_K034", IRanges(start = 3120, end=3140))
files <- c(system.file("extdata", "test.bam", package="deepSNV"), system.file("extdata", "control.bam", package="deepSNV"))
counts <- loadAllData(files, regions, q=10)
dim(counts)

## ------------------------------------------------------------------------
## Run (bbb) computes the Bayes factor
bf <- bbb(counts, model = "OR", rho=1e-4)
dim(bf)
vcf <- bf2Vcf(bf, counts, regions, cutoff = 0.5, samples = files, prior = 0.5, mvcf = TRUE) 
show(vcf)

## ----fig.width=4,fig.height=4--------------------------------------------
## Shearwater Bayes factor under AND model
bf <- bbb(counts, model = "AND", rho=1e-4)
## deepSNV P-value with combine.method="fisher" (product)
dpSNV <- deepSNV(test = files[1], control = files[2], regions=regions, q=10, combine.method="fisher")
## Plot
par(bty="n", mgp = c(2,.5,0), mar=c(3,3,2,2)+.1, tcl=-.33)
plot(p.val(dpSNV), bf[1,,]/(1+bf[1,,]), log="xy",
		xlab = "P-value deepSNV",
		ylab = "Posterior odds shearwater"
		)

## ----eval=FALSE----------------------------------------------------------
#  ## Not run
#  files <- dir("bam", pattern="*.bam$", full.names=TRUE)
#  MC_CORES <- getOption("mc.cores", 2L)
#  vcfList <- list()
#  for(gene in levels(mcols(regions)$Gene)){
#  	rgn <-  regions[mcols(regions)$Gene==gene]
#  	counts <-  loadAllData(files, rgn, mc.cores=MC_CORES)
#  	## Split into
#  	BF <-  mcChunk("bbb", split = 200, counts, mc.cores=MC_CORES)
#  	COSMIC <-  readVcf("CosmicCodingMuts_v63_300113.vcf.gz", "GRCh37", param=ScanVcfParam(which=rgn) )
#  	prior <- makePrior(COSMIC, rgn, pi.mut = 0.5)
#  	vcfList[[gene]] <- bf2Vcf(BF = BF, counts=counts, regions=rgn, samples = files, cutoff = 0.5, prior = prior)
#  }
#  ## Collapse vcfList
#  vcf <- do.call(rbind, vcfList)

## ----echo=FALSE, results='asis'------------------------------------------
    toLatex(sessionInfo())

Try the deepSNV package in your browser

Any scripts or data that you put into this service are public.

deepSNV documentation built on Nov. 8, 2020, 8:01 p.m.