Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# BiocManager::install("pipeFrame")
#
## -----------------------------------------------------------------------------
library(pipeFrame)
## ----eval=TRUE----------------------------------------------------------------
initPipeFrame(availableGenome = c("hg19", "hg38", "mm9", "mm10",
"danRer10", "galGal5", "galGal4",
"rheMac3", "rheMac8", "panTro4",
"rn5", "rn6", "sacCer2","sacCer3",
"susScr3", "testgenome"),
defaultJobName = "test-pipeline"
)
## -----------------------------------------------------------------------------
# display current temporary directory
getTmpDir()
## -----------------------------------------------------------------------------
dir.create("./testdir")
# set a new temporary directory
setTmpDir("./testdir")
# display the new temporary directory
getTmpDir()
## -----------------------------------------------------------------------------
# display current reference directory
getRefDir()
## -----------------------------------------------------------------------------
# set a new reference directory
setRefDir("./refdir")
# display the new reference directory
getRefDir()
## -----------------------------------------------------------------------------
getValidGenome()
## -----------------------------------------------------------------------------
setGenome("hg19")
#display the current configured genome
getGenome()
## ----echo=FALSE---------------------------------------------------------------
checkAndInstall <- function(){
runWithFinishCheck(func = checkAndInstallBSgenome,refName = "bsgenome")
}
## ----eval=FALSE---------------------------------------------------------------
# checkAndInstall <- function(){
# runWithFinishCheck(func = checkAndInstallBSgenome,refName = "bsgenome")
# runWithFinishCheck(func = checkAndInstallGenomeFa,refName = "fasta",
# refFilePath = "genome.fa")
# }
#
## -----------------------------------------------------------------------------
library(BSgenome)
checkAndInstallBSgenome <- function(refFilePath){
genome <- getGenome()
bsgenomename<- BSgenome::available.genomes()[
grepl(paste0(genome,"$"),BSgenome::available.genomes())]
if(length(bsgenomename)==0){
stop("BSgenome does not support this genome")
}
bsgenomeinstall <- BSgenome::installed.genomes()[
grepl(paste0(genome,"$"),BSgenome::installed.genomes())]
if(length(bsgenomeinstall)==0){
message(paste("BSgenome for ",genome,"has not been installed,"))
message("begin to install ...")
BiocManager::install(bsgenomename)
}
return(getBSgenome(bsgenomename))
}
## -----------------------------------------------------------------------------
checkAndInstallGenomeFa <- function(refFilePath){
outFile <- refFilePath
bsgenome<-getRefRc("bsgenome")
if(!is(bsgenome, "BSgenome")){
stop("The variable 'bsgenome' is not a BSgenome")
}
append <- FALSE
for(chrT in seqnames(bsgenome)){
if(is.null(masks(bsgenome[[chrT]])))
chrSeq <- DNAStringSet(bsgenome[[chrT]])
else
chrSeq <- DNAStringSet(injectHardMask(bsgenome[[chrT]],
letter="N"))
names(chrSeq) <- chrT
writeXStringSet(chrSeq, filepath=outFile, format="fasta",
append=append)
append <- TRUE
}
return(NULL)
}
## -----------------------------------------------------------------------------
initPipeFrame(availableGenome = c("hg19", "hg38", "mm9", "mm10"),
defaultJobName = "test-pipeline",
defaultCheckAndInstallFunc = checkAndInstall
)
## -----------------------------------------------------------------------------
# check the max user thread limit
getThreads()
## -----------------------------------------------------------------------------
# customize the max threads number
setThreads(4)
# check the max user thread limit
getThreads()
## -----------------------------------------------------------------------------
# display the current job name
getJobName()
## -----------------------------------------------------------------------------
# set a new job name
setJobName("testJobName")
# display the new job name
getJobName()
getJobDir()
## -----------------------------------------------------------------------------
addEdges(edges = c("RandomRegionOnGenome","OverlappedRandomRegion"),argOrder = 1)
## ----eval=TRUE----------------------------------------------------------------
printMap()
## -----------------------------------------------------------------------------
randomRegionOnGenome <- function(sampleNumb, regionLen = 1000,
genome = NULL, outputBed = NULL, ...){
allpara <- c(list(Class = "RandomRegionOnGenome"),
as.list(environment()),list(...))
step <- do.call(new,allpara)
invisible(step)
}
## -----------------------------------------------------------------------------
overlappedRandomRegion <- function(inputBed, randomBed, outputBed = NULL, ...){
allpara <- c(list(Class = "OverlappedRandomRegion"),
as.list(environment()),list(...))
step <- do.call(new,allpara)
invisible(step)
}
## -----------------------------------------------------------------------------
setGeneric("runOverlappedRandomRegion",function(prevStep,
inputBed,
randomBed = NULL,
outputBed = NULL,
...)
standardGeneric("runOverlappedRandomRegion"))
## -----------------------------------------------------------------------------
setMethod(
f = "runOverlappedRandomRegion",
signature = "Step",
definition = function(prevStep,
inputBed,
randomBed = NULL,
outputBed = NULL,
...){
allpara <- c(list(Class = "OverlappedRandomRegion",
prevSteps = list(prevStep)),
as.list(environment()),list(...))
step <- do.call(new,allpara)
invisible(step)
}
)
## -----------------------------------------------------------------------------
# generate new Step : RandomRegionOnGenome
setClass(Class = "RandomRegionOnGenome",
contains = "Step"
)
# generate another new Step : OverlappedRandomRegion
setClass(Class = "OverlappedRandomRegion",
contains = "Step"
)
## -----------------------------------------------------------------------------
setMethod(
f = "init",
signature = "RandomRegionOnGenome",
definition = function(.Object,prevSteps = list(),...){
# All arguments in function randomRegionOnGenome
# will be passed from "..."
# so get the arguments from "..." first.
allparam <- list(...)
sampleNumb <- allparam[["sampleNumb"]]
regionLen <- allparam[["regionLen"]]
genome <- allparam[["genome"]]
outputBed <- allparam[["outputBed"]]
# no previous steps for this step so ingnore the "prevSteps"
# begin to set input parameters
# no input for this step
# begin to set output parameters
if(is.null(outputBed)){
output(.Object)$outputBed <-
getStepWorkDir(.Object,"random.bed")
}else{
output(.Object)$outputBed <- outputBed
}
# begin to set other parameters
param(.Object)$regionLen <- regionLen
param(.Object)$sampleNumb <- sampleNumb
if(is.null(genome)){
param(.Object)$bsgenome <- getBSgenome(getGenome())
}else{
param(.Object)$bsgenome <- getBSgenome(genome)
}
# don't forget to return .Object
.Object
}
)
setMethod(
f = "init",
signature = "OverlappedRandomRegion",
definition = function(.Object,prevSteps = list(),...){
# All arguments in function overlappedRandomRegion and
# runOerlappedRandomRegion will be passed from "..."
# so get the arguments from "..." first.
allparam <- list(...)
inputBed <- allparam[["inputBed"]]
randomBed <- allparam[["randomBed"]]
outputBed <- allparam[["outputBed"]]
# inputBed can obtain from previous step object when running
# runOerlappedRandomRegion
if(length(prevSteps)>0){
prevStep <- prevSteps[[1]]
input(.Object)$randomBed <- getParam(prevStep,"outputBed")
}
# begin to set input parameters
if(!is.null(inputBed)){
input(.Object)$inputBed <- inputBed
}
if(!is.null(randomBed)){
input(.Object)$randomBed <- randomBed
}
# begin to set output parameters
# the output is recemended to set under the step work directory
if(!is.null(outputBed)){
output(.Object)$outputBed <- outputBed
}else{
output(.Object)$outputBed <-
getAutoPath(.Object, getParam(.Object, "inputBed"),
"bed", suffix = "bed")
# the path can also be generate in this way
# ib <- getParam(.Object,"inputBed")
# output(.Object)$outputBed <-
# file.path(getStepWorkDir(.Object),
# paste0(substring(ib,1,nchar(ib)-3), "bed"))
}
# begin to set other parameters
# no other parameters
# don't forget to return .Object
.Object
}
)
## -----------------------------------------------------------------------------
setMethod(
f = "processing",
signature = "RandomRegionOnGenome",
definition = function(.Object,...){
# All arguments are set in .Object
# so we can get them from .Object
sampleNumb <- getParam(.Object,"sampleNumb")
regionLen <- getParam(.Object,"regionLen")
bsgenome <- getParam(.Object,"bsgenome")
outputBed <- getParam(.Object,"outputBed")
# begin the calculation
chrlens <-seqlengths(bsgenome)
selchr <- grep("_|M",names(chrlens),invert=TRUE)
chrlens <- chrlens[selchr]
startchrlens <- chrlens - regionLen
spchrs <- sample(x = names(startchrlens),
size = sampleNumb, replace = TRUE,
prob = startchrlens / sum(startchrlens))
gr <- GRanges()
for(chr in names(startchrlens)){
startpt <- sample(x = 1:startchrlens[chr],
size = sum(spchrs == chr),replace = FALSE)
gr <- c(gr,
GRanges(seqnames = chr,
ranges = IRanges(start = startpt, width = 1000)))
}
result <- sort(gr,ignore.strand=TRUE)
rtracklayer::export.bed(object = result, con = outputBed)
# don't forget to return .Object
.Object
}
)
setMethod(
f = "genReport",
signature = "RandomRegionOnGenome",
definition = function(.Object, ...){
.Object
}
)
setMethod(
f = "processing",
signature = "OverlappedRandomRegion",
definition = function(.Object,...){
# All arguments are set in .Object
# so we can get them from .Object
allparam <- list(...)
inputBed <- getParam(.Object,"inputBed")
randomBed <- getParam(.Object,"randomBed")
outputBed <- getParam(.Object,"outputBed")
# begin the calculation
gr1 <- import.bed(con = inputBed)
gr2 <- import.bed(con = randomBed)
gr <- second(findOverlapPairs(gr1,gr2))
export.bed(gr,con = outputBed)
# don't forget to return .Object
.Object
}
)
setMethod(
f = "genReport",
signature = "OverlappedRandomRegion",
definition = function(.Object, ...){
.Object
}
)
## ----eval=TRUE----------------------------------------------------------------
library(magrittr)
testInputBedFilePath <- file.path(tempdir(),"test.bed")
library(rtracklayer)
export.bed(GRanges("chr7:1-127473000"),testInputBedFilePath)
result <- randomRegionOnGenome(1000) %>%
runOverlappedRandomRegion(inputBed = testInputBedFilePath)
## ----eval=TRUE----------------------------------------------------------------
result1 <- randomRegionOnGenome(1000)
randombed <- getParam(result1,"outputBed")
randombed
result2 <- overlappedRandomRegion(inputBed = testInputBedFilePath,
randomBed = randombed)
## -----------------------------------------------------------------------------
library(magrittr)
examplePipe <- function(sampleNumb, inputBed, genome, threads = 2,...){
setThreads(threads = threads)
setGenome(genome = genome)
result <- randomRegionOnGenome(sampleNumb = sampleNumb, ...) %>%
runOverlappedRandomRegion(inputBed = inputBed,...)
return(result)
}
## -----------------------------------------------------------------------------
examplePipe(sampleNumb = 1000,inputBed = testInputBedFilePath,genome = "hg19",
RandomRegionOnGenome_pipe.regionLen = 10000)
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.