Nothing
## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
dev="png",
message=FALSE, error=FALSE, warning=FALSE)
## -----------------------------------------------------------------------------
library(GenoGAM)
## A.
## specify folder and experiment design path
wd <- system.file("extdata/Set1", package='GenoGAM')
folder <- file.path(wd, "bam")
expDesign <- file.path(wd, "experimentDesign.txt")
## B.
## register parallel backend (default is "the number of cores" - 2)
BiocParallel::register(BiocParallel::MulticoreParam(worker = 2))
## C.
## specify chunk and overhang size.
chunkSize <- 50000
overhangSize <- 1000
## D.
## the experiment design reflecting the underlying GAM
## x = position
design <- ~ s(x) + s(x, by = genotype)
## -----------------------------------------------------------------------------
## build the GenoGAMDataSet
ggd <- GenoGAMDataSet(
expDesign, directory = folder,
chunkSize = chunkSize, overhangSize = overhangSize,
design = design)
ggd
## -----------------------------------------------------------------------------
## compute size factors
ggd <- computeSizeFactors(ggd)
ggd
## the data
assay(ggd)
## -----------------------------------------------------------------------------
## compute model without parameter estimation to save time in vignette
result <- genogam(ggd, lambda = 4601, theta = 4.51)
result
## the fit and standard error
fits(result)
se(result)
## -----------------------------------------------------------------------------
result <- computeSignificance(result)
pvalue(result)
## -----------------------------------------------------------------------------
ranges = GRanges("chrI", IRanges(45000, 55000))
plotGenoGAM(result, ranges = ranges)
## -----------------------------------------------------------------------------
library(GenoGAM)
## On multicore machines by default the number of available cores - 2 are registered on the default backend 'Multicore'
BiocParallel::registered()[1]
## -----------------------------------------------------------------------------
BiocParallel::register(BiocParallel::SnowParam(workers = 3))
## -----------------------------------------------------------------------------
BiocParallel::registered()[1]
## -----------------------------------------------------------------------------
folder <- system.file("extdata/Set1", package='GenoGAM')
expDesign <- read.delim(
file.path(folder, "experimentDesign.txt")
)
expDesign
## -----------------------------------------------------------------------------
## build the GenoGAMDataSet
wd_folder <- file.path(folder, "bam")
ggd <- GenoGAMDataSet(
expDesign, directory = wd_folder,
design = ~ s(x) + s(x, by = genotype)
)
assay(ggd)
## overhang size
getOverhangSize(ggd)
## chunk size
getChunkSize(ggd)
## -----------------------------------------------------------------------------
ggd <- GenoGAMDataSet(
expDesign, directory = wd_folder,
design = ~ s(x) + s(x, by = genotype), hdf5 = TRUE
)
## Note the difference in data type compared to the DataFrame above
assay(ggd)
## -----------------------------------------------------------------------------
GenoGAMSettings()
## -----------------------------------------------------------------------------
range <- GRanges("chrI", IRanges(30000, 50000))
params <- Rsamtools::ScanBamParam(which = range)
settings <- GenoGAMSettings(center = FALSE,
bamParams = params)
ggd <- GenoGAMDataSet(
expDesign, directory = wd_folder,
design = ~ s(x) + s(x, by = genotype),
settings = settings)
## Note the higher counts
assay(ggd)
rowRanges(ggd)
## Now with chromosome specification. As only chrI is present
## in the example file, the read in will log an error and generate
## an empty GenoGAMDataSet object
settings <- GenoGAMSettings(chromosomeList = c("chrII", "chrIII"))
ggd <- GenoGAMDataSet(
expDesign, directory = wd_folder,
design = ~ s(x) + s(x, by = genotype),
settings = settings)
## Empty GenoGAMDataSet due to lack of data in the example BAM file
length(ggd)
ggd
## -----------------------------------------------------------------------------
## Note, optimControl and estimControl is a list of more parameters. However none of them besides the shown are important
settings <- GenoGAMSettings(optimMethod = "L-BFGS-B", optimControl = list(maxit = 100L),
estimControl = list(eps = 1e-10, maxiter = 500L))
settings
## -----------------------------------------------------------------------------
## set HDF5 folder to 'myFolder'
tmp <- tempdir()
settings <- GenoGAMSettings(hdf5Control = list(dir = file.path(tmp, "myFolder"), level = 0))
HDF5Array::getHDF5DumpDir()
HDF5Array::getHDF5DumpCompressionLevel()
## Another way to set this would be through the HDF5Array package
HDF5Array::setHDF5DumpDir(file.path(tmp, "myOtherFolder"))
settings
## -----------------------------------------------------------------------------
## For example, we choose a twice as long region but also two times less knots (double the knot spacing),
## which results in the same number of knots per tile as before.
settings <- GenoGAMSettings(dataControl = list(regionSize = 8000, bpknots = 40))
settings
## -----------------------------------------------------------------------------
## read in data again, since the last read-in was an empty GenoGAM
ggd <- GenoGAMDataSet(
expDesign, directory = wd_folder,
design = ~ s(x) + s(x, by = genotype)
)
ggd <- computeSizeFactors(ggd)
sizeFactors(ggd)
## -----------------------------------------------------------------------------
## fit model without parameters estimation
fit <- genogam(ggd, lambda = 4601, theta = 4.51)
fit
## ---- eval = FALSE------------------------------------------------------------
# ## Does not evaluate
# fit_CV <- genogam(ggd, intervalSize = 200)
## -----------------------------------------------------------------------------
ranges = GRanges("chrI", IRanges(45000, 55000))
plotGenoGAM(fit, ranges = ranges)
## -----------------------------------------------------------------------------
plotGenoGAM(fit, ranges = ranges, scale = FALSE)
## -----------------------------------------------------------------------------
fit <- computeSignificance(fit)
pvalue(fit)
## -----------------------------------------------------------------------------
gr <- GRanges("chrI", IRanges(c(1000, 7000), c(4000, 9000)))
db <- computeRegionSignificance(fit, regions = gr, smooth = "s(x):genotype")
db
## -----------------------------------------------------------------------------
peaks <- callPeaks(fit, threshold = 1, smooth = "s(x):genotype")
peaks
peaks <- callPeaks(fit, threshold = 1, peakType = "broad",
cutoff = 0.75, smooth = "s(x):genotype")
peaks
## ---- eval = FALSE------------------------------------------------------------
# ## Not evaluated
# writeToBEDFile(peaks, file = 'myPeaks')
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.