Nothing
# Helper Functions ---------------------------------------------------
#' @noRd
## function checks wether a tabix file already exists and
## appends number if file already exists
.checkTabixFileExists <- function(tabixfile) {
message("\nchecking wether tabix file already exists:")
tabixfile <- paste0(tabixfile,".bgz")
message(tabixfile)
if(file.exists(tabixfile) ) {
message("tabix file already exists, renaming output file:")
i = 1
filename2 = tabixfile
while(file.exists(filename2) ) {
filename2 = gsub(".txt.bgz",paste0("_",i,".txt.bgz"),tabixfile)
i = i + 1
}
message(filename2)
tabixfile <- filename2
message(paste("HINT: consider using 'suffix' argument to write",
"different function calls to different files"))
} else message("tabix file is new.")
tabixfile <- gsub(".bgz","",tabixfile)
message("continuing now ...\n")
return(tabixfile)
}
# MethylRawDB and MethylRawListDB -----------------------------------------
# filterByCoverage --------------------------------------------------------
#' @aliases filterByCoverage,methylRawDB-method
#' @rdname filterByCoverage-methods
setMethod("filterByCoverage", signature(methylObj="methylRawDB"),
function(methylObj,lo.count,lo.perc,hi.count,hi.perc,chunk.size,
save.db=TRUE,...){
if( is.null(lo.count) & is.null(lo.perc) & is.null(hi.count) &
is.null(hi.perc) ){return(methylObj)}
if(save.db) {
filter <- function(data,lo.count,lo.perc,hi.count,hi.perc) {
.setMethylDBNames(data,"methylRawDB")
#figure out which cut-offs to use, maybe there is more elagent
# ways, quick&dirty works for now
if(is.numeric(lo.count) ){lo.count=lo.count}
if(is.numeric(lo.perc)){lo.count=quantile(data$coverage,lo.perc/100)}
if(is.numeric(hi.count)){hi.count=hi.count}
if(is.numeric(hi.perc)){hi.count=quantile(data$coverage,hi.perc/100)}
if(is.numeric(lo.count)){data=data[data$coverage>=lo.count,]}
if(is.numeric(hi.count)){data=data[data$coverage<hi.count,]}
return(data)
}
# catch additional args
args <- list(...)
dir <- dirname(methylObj@dbpath)
if( "dbdir" %in% names(args) ){
if( !(is.null(args$dbdir)) ){
dir <- .check.dbdir(args$dbdir)
}
}
if(!( "suffix" %in% names(args) ) ){
suffix <- paste0("_","filtered")
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(methylObj@sample.id,collapse = "_"),suffix,".txt")
# tabixfile
filename <- paste0(gsub(".txt.bgz",replacement = "",methylObj@dbpath),
suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(dbtype = "tabix",
sample.id = methylObj@sample.id,
assembly = methylObj@assembly, context = methylObj@context,
resolution = methylObj@resolution)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size,
dir=dir,filename = filename,
return.type = "tabix", FUN = filter,
lo.count=lo.count, lo.perc=lo.perc,
hi.count=hi.count, hi.perc=hi.perc,
tabixHead = tabixHeadString)
readMethylRawDB(dbpath = newdbpath,dbtype = "tabix",
sample.id = methylObj@sample.id,
assembly = methylObj@assembly, context = methylObj@context,
resolution = methylObj@resolution)
} else {
methylObj <- methylObj[]
#print(class(methylObj))
filterByCoverage(methylObj,lo.count,lo.perc,hi.count,hi.perc,
chunk.size,save.db=FALSE,...)
}
})
#' @aliases filterByCoverage,methylRawListDB-method
#' @rdname filterByCoverage-methods
setMethod("filterByCoverage", signature(methylObj="methylRawListDB"),
function(methylObj,lo.count,lo.perc,hi.count,hi.perc,chunk.size,
save.db=TRUE,...){
if(save.db){
args <- list(...)
if( !( "dbdir" %in% names(args)) ){
dbdir <- NULL
} else { dbdir <- basename(.check.dbdir(args$dbdir)) }
new.list=lapply(methylObj,filterByCoverage,lo.count,lo.perc,hi.count,
hi.perc,chunk.size,save.db,dbdir=dbdir,...)
new("methylRawListDB", new.list,treatment=methylObj@treatment)
} else {
new.list=lapply(methylObj,filterByCoverage,lo.count,lo.perc,hi.count,
hi.perc,chunk.size,save.db=FALSE,...)
new("methylRawList", new.list,treatment=methylObj@treatment)
}
})
# getCoverageStats --------------------------------------------------------
#' @rdname getCoverageStats-methods
#' @aliases getCoverageStats,methylRawDB-method
setMethod("getCoverageStats", "methylRawDB",
function(object,plot,both.strands,labels,...,chunk.size){
tmp = applyTbxByChunk(object@dbpath,chunk.size = chunk.size,
return.type = "data.table",
FUN = function(x) {
.setMethylDBNames(x,"methylRawDB");
return(x[,c("strand","coverage"),with=FALSE])} )
if(!plot){
qts=seq(0,0.9,0.1) # get quantiles
qts=c(qts,0.95,0.99,0.995,0.999,1)
if(both.strands){
plus.cov=tmp[strand=="+",coverage]
mnus.cov=tmp[strand=="-",coverage]
cat("read coverage statistics per base\n\n")
cat("FORWARD STRAND:\n")
cat("summary:\n")
print( summary( plus.cov ) )
cat("percentiles:\n")
print(quantile( plus.cov,p=qts ))
cat("\n\n")
cat("REVERSE STRAND:\n")
cat("summary:\n")
print( summary( mnus.cov ) )
cat("percentiles:\n")
print(quantile( mnus.cov,p=qts ))
cat("\n")
}else{
all.cov=tmp[,coverage]
cat("read coverage statistics per base\n")
cat("summary:\n")
print( summary( all.cov ) )
cat("percentiles:\n")
print(quantile( all.cov,p=qts ))
cat("\n")
}
}else{
if(both.strands){
plus.cov=tmp[strand=="+",coverage]
mnus.cov=tmp[strand=="-",coverage]
par(mfrow=c(1,2))
if(labels){
a=hist(log10(plus.cov),plot=FALSE)
my.labs=as.character(round(100*a$counts/length(plus.cov),1))
}else{my.labs=FALSE}
hist(log10(plus.cov),col="chartreuse4",
xlab=paste("log10 of read coverage per",object@resolution),
main=paste("Histogram of", object@context,
"coverage: Forward strand"),
labels=my.labs,...)
mtext(object@sample.id, side = 3)
if(labels){
a=hist(log10(mnus.cov),plot=FALSE)
my.labs=as.character(round(100*a$counts/length(mnus.cov),1))
}else{my.labs=FALSE}
a=hist(log10(mnus.cov),plot=FALSE)
hist(log10(mnus.cov),col="chartreuse4",
xlab=paste("log10 of read coverage per",object@resolution),
main=paste("Histogram of", object@context,
"coverage: Reverse strand"),
labels=my.labs,...)
mtext(object@sample.id, side = 3)
}else{
all.cov=tmp[,coverage]
if(labels){
a=hist(log10(all.cov),plot=FALSE)
my.labs=as.character(round(100*a$counts/length(all.cov),1))
}else{my.labs=FALSE}
hist(log10(all.cov),col="chartreuse4",
xlab=paste("log10 of read coverage per",object@resolution),
main=paste("Histogram of", object@context, "coverage"),
labels=my.labs,...)
mtext(object@sample.id, side = 3)
}
}
})
# getMethylationStats -----------------------------------------------------
#' @rdname getMethylationStats-methods
#' @aliases getMethylationStats,methylRawDB-method
setMethod("getMethylationStats", "methylRawDB",
function(object,plot,both.strands,labels,...,chunk.size){
numCs=coverage=strand=.=NULL
tmp = applyTbxByChunk(object@dbpath,chunk.size = chunk.size,
return.type = "data.table",
FUN = function(x) {
.setMethylDBNames(x,"methylRawDB");
return(x[,.(strand,coverage,numCs)])} )
plus.met=100* tmp[strand=="+",numCs/coverage]
mnus.met=100* tmp[strand=="-",numCs/coverage]
all.met =100* tmp[,numCs/coverage]
if(!plot){
qts=seq(0,0.9,0.1) # get quantiles
qts=c(qts,0.95,0.99,0.995,0.999,1)
if(both.strands){
cat("methylation statistics per base\n\n")
cat("FORWARD STRAND:\n")
cat("summary:\n")
print( summary( plus.met ) )
cat("percentiles:\n")
print(quantile( plus.met,p=qts ))
cat("\n\n")
cat("REVERSE STRAND:\n")
cat("summary:\n")
print( summary( mnus.met ) )
cat("percentiles:\n")
print(quantile( mnus.met,p=qts ))
cat("\n")
}else{
cat("methylation statistics per base\n")
cat("summary:\n")
print( summary( all.met ) )
cat("percentiles:\n")
print(quantile( all.met,p=qts ))
cat("\n")
}
}else{
if(both.strands){
par(mfrow=c(1,2))
if(labels){
a=hist((plus.met),plot=FALSE,...)
my.labs=as.character(round(100*a$counts/length(plus.met),1))
}else{my.labs=FALSE}
hist((plus.met),col="cornflowerblue",
xlab=paste("% methylation per",object@resolution),
main=paste("Histogram of %", object@context,
"methylation: Forward strand"),
labels=my.labs,...)
mtext(object@sample.id, side = 3)
if(labels){
a=hist((mnus.met),plot=FALSE,...)
my.labs=as.character(round(100*a$counts/length(mnus.met),1))
}
else{my.labs=FALSE}
hist((mnus.met),col="cornflowerblue",
xlab=paste("% methylation per",object@resolution),
main=paste("Histogram of %", object@context,
"methylation: Reverse strand"),
labels=my.labs,...)
mtext(object@sample.id, side = 3)
}else{
if(labels){
a=hist((all.met),plot=FALSE,...)
my.labs=as.character(round(100*a$counts/length(all.met),1))
}else{my.labs=FALSE}
hist((all.met),col="cornflowerblue",
xlab=paste("% methylation per",object@resolution),
main=paste("Histogram of %", object@context,"methylation"),
labels=my.labs,...)
mtext(object@sample.id, side = 3)
}
}
})
# adjustMethylC -----------------------------------------------------------
#' @rdname adjustMethylC
#' @aliases adjustMethylC,methylRawDB,methylRawDB-method
setMethod("adjustMethylC", c("methylRawDB","methylRawDB"),
function(mc,hmc,save.db=TRUE,...,chunk.size){
if(save.db) {
lst=new("methylRawListDB",list(mc,hmc),treatment=c(1,0))
base=unite(lst)
adjust <- function(data) {
.setMethylDBNames(data,"methylBaseDB")
diff=(data$numCs1)-round(data$coverage1*(data$numCs2/data$coverage2))
diff[diff<0]=0
data$numCs1=diff
data$numTs1=data$coverage1-data$numCs1
return(data[1:7])
}
# catch additional args
args <- list(...)
dir <- dirname(mc@dbpath)
if( "dbdir" %in% names(args) ){
if( !(is.null(args$dbdir)) ){
dir <- .check.dbdir(args$dbdir)
}
}
if(!( "suffix" %in% names(args) ) ){
suffix <- paste0("_","adjusted")
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(mc@sample.id,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz",replacement = "",mc@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(sample.id=mc@sample.id,
assembly=mc@assembly, context =mc@context,
resolution=mc@resolution,
dbtype = mc@dbtype)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChunk(base@dbpath,chunk.size = chunk.size,
dir=dir,filename = filename,
return.type = "tabix", FUN = adjust,
tabixHead = tabixHeadString)
unlink(list.files(dirname(base@dbpath),
pattern = basename(gsub(".txt.bgz","",base@dbpath)),
full.names = TRUE))
readMethylRawDB(dbpath = newdbpath,sample.id=mc@sample.id,
assembly=mc@assembly, context =mc@context,
resolution=mc@resolution,
dbtype = mc@dbtype)
} else {
mc.tmp <- mc[]
hmc.tmp <- hmc[]
adjustMethylC(mc.tmp,hmc.tmp,save.db=FALSE,...)
}
})
#' @rdname adjustMethylC
#' @aliases adjustMethylC,methylRawListDB,methylRawListDB-method
setMethod("adjustMethylC", c("methylRawListDB","methylRawListDB"),
function(mc,hmc,save.db=TRUE,...,chunk.size){
# check lengths equal if not give error
if(length(mc) != length(hmc)){stop("lengths of methylRawList objects should be same\n")}
if(save.db){
args <- list(...)
if( !( "dbdir" %in% names(args)) ){
dbdir <- NULL
} else { dbdir <- basename(.check.dbdir(args$dbdir)) }
my.list=list()
for(i in 1:length(mc)){
my.list[[i]]=adjustMethylC(mc[[i]],hmc[[i]],save.db,
dbdir=dbdir,...,chunk.size)
}
new("methylRawListDB",my.list,treatment=mc@treatment )
} else {
my.list=list()
for(i in 1:length(mc)){
my.list[[i]]=adjustMethylC(mc[[i]],hmc[[i]],save.db=FALSE,...)
}
new("methylRawList",my.list,treatment=mc@treatment )
}
})
# normalizeCoverage -------------------------------------------------------
#' @rdname normalizeCoverage-methods
#' @aliases normalizeCoverage,methylRawListDB-method
setMethod("normalizeCoverage", "methylRawListDB",
function(obj,method,chunk.size,save.db=TRUE,...){
if( !(method %in% c("median","mean") ) )
{
stop("method option should be either 'mean' or 'median'\n")
}
if(save.db) {
normCov <- function(data,method) {
.setMethylDBNames(data)
if(method=="median"){
x=median(data$coverage)
}else {
x=mean(data$coverage)
}
sc.fac=max(x)/x #get scaling factor
all.cov=data$coverage
fCs =data$numCs/all.cov
fTs =data$numT/all.cov
data$coverage=round(sc.fac*data$coverage)
data$numCs =round(data$coverage*fCs)
data$numTs =round(data$coverage*fTs)
return(data)
}
# catch additional args
args <- list(...)
if( ( "dbdir" %in% names(args)) ){
# if(!(is.null(args$dbdir))) {
dir <- .check.dbdir(args$dbdir)
# }
} else {
dir <- dirname(obj[[1]]@dbpath)
}
if(!( "suffix" %in% names(args) ) ){
suffix <- paste0("_","normed")
} else {
suffix <- paste0("_",args$suffix)
}
outList <- list()
for(i in 1:length(obj)){
# filename <- paste0(paste(obj[[i]]@sample.id,collapse = "_"),
# suffix,".txt")
filename <- paste0(gsub(".txt.bgz",replacement = "",
obj[[i]]@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(sample.id=obj[[i]]@sample.id,
assembly=obj[[i]]@assembly,
context =obj[[i]]@context,
resolution=obj[[i]]@resolution,
dbtype = obj[[i]]@dbtype)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChunk(obj[[i]]@dbpath,
chunk.size = chunk.size,
dir=dir,filename = filename,
return.type = "tabix",
FUN = normCov,method = method,
tabixHead = tabixHeadString)
outList[[i]] <- readMethylRawDB(dbpath = newdbpath,
sample.id=obj[[i]]@sample.id,
assembly=obj[[i]]@assembly,
context =obj[[i]]@context,
resolution=obj[[i]]@resolution,
dbtype = obj[[i]]@dbtype)
}
new("methylRawListDB",outList,treatment=obj@treatment)
} else {
# coerce methylRawDB to methylRaw
tmp.list <- lapply(obj,function(x) x[])
tmp <- new("methylRawList",tmp.list,treatment=obj@treatment)
normalizeCoverage(tmp,method,save.db=FALSE)
}
})
# reorganize --------------------------------------------------------------
#' @rdname reorganize-methods
#' @aliases reorganize,methylRawListDB-method
setMethod("reorganize", signature(methylObj="methylRawListDB"),
function(methylObj,sample.ids,treatment,chunk.size,save.db=TRUE,...){
#sample.ids length and treatment length should be equal
if(length(sample.ids) != length(treatment) ){
stop("length of sample.ids should be equal to treatment")
}
# get ids from the list of methylRaw
orig.ids=sapply(methylObj,function(x) x@sample.id)
if( ! all(sample.ids %in% orig.ids) ){
stop("provided sample.ids is not a subset of the sample ids of the object")
}
# get the column order in the original matrix
col.ord=order(match(orig.ids,sample.ids))[1:length(sample.ids)]
if(save.db) {
# catch additional args
args <- list(...)
outList=list()
# do not rename per default
suffix <- NULL
if( ("dbdir" %in% names(args)) | ( "suffix" %in% names(args) ) ) {
if( "suffix" %in% names(args) ) { suffix <- paste0("_",args$suffix) }
if( ( "dbdir" %in% names(args)) ){
dir <- .check.dbdir(args$dbdir)
for(i in 1:length(sample.ids)){
obj <- methylObj[[ col.ord[i] ]]
# filename <- paste0(dir,"/",paste(obj@sample.id,collapse = "_"),
# suffix,".txt.bgz")
filename <- paste0(dir,"/",gsub(".txt.bgz",replacement =
"",obj@dbpath),suffix,".txt.bgz")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
file.copy(obj@dbpath,filename)
outList[[i]]=readMethylRawDB(dbpath = filename,
dbtype = obj@dbtype,
sample.id = obj@sample.id,
assembly = obj@assembly,
context = obj@context,
resolution = obj@resolution)
}
} else {
for(i in 1:length(sample.ids)){
obj <- methylObj[[ col.ord[i] ]]
# filename <- paste0(dirname(obj@dbpath),paste(obj@sample.id,collapse = "_")
# ,suffix,".txt.bgz")
filename <- paste0(gsub(".txt.bgz",replacement = "",obj@dbpath),
suffix,".txt.bgz")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
file.copy(obj@dbpath,filename)
outList[[i]]=readMethylRawDB(dbpath = filename,
dbtype = obj@dbtype,
sample.id = obj@sample.id,
assembly = obj@assembly,
context = obj@context,
resolution = obj@resolution)
}
}
} else {
for(i in 1:length(sample.ids)){
outList[[i]]=methylObj[[ col.ord[i] ]]
}
}
new("methylRawListDB",outList,treatment=treatment)
} else {
outList=list()
for(i in 1:length(sample.ids)){
outList[[i]]=methylObj[[ col.ord[i] ]][]
}
new("methylRawList",outList,treatment=treatment)
}
})
# MethylBaseDB ------------------------------------------------------------
# unite -------------------------------------------------------------------
unite.methylRawListDB <- function(object,destrand=FALSE,min.per.group=NULL,
chunk.size=1e6,mc.cores=1,save.db=TRUE,...){
if(save.db) {
#check if assemblies,contexts and resolutions are same type NOT IMPLEMENTED
if( length(unique(vapply(object,function(x) x@context,FUN.VALUE="character"))) > 1)
{
stop("supplied methylRawList object have different methylation contexts:not all methylation events from the same bases")
}
if( length(unique(vapply(object,function(x) x@assembly,FUN.VALUE="character"))) > 1)
{
stop("supplied methylRawList object have different genome assemblies")
}
if( length(unique(vapply(object,function(x) x@resolution,FUN.VALUE="character"))) > 1)
{
stop("supplied methylRawList object have different methylation resolutions:some base-pair some regional")
}
if( (!is.null(min.per.group)) & ( ! is.integer( min.per.group ) ) )
{
stop("min.per.group should be an integer\ntry providing integers as 1L, 2L,3L etc.\n")
}
if( any(min.per.group > min(table(object@treatment))) )
{
stop("min.per.group can not be higher than number of samples in smallest group\n")
}
if(Sys.info()['sysname']=="Windows") {mc.cores = 1}
# destrand single objects contained in methylRawListDB
if(destrand) {
message("destranding...")
destrandFun <- function(obj){
if(obj@resolution == "base") {
dir <- dirname(obj@dbpath)
filename <- paste(gsub(".txt.bgz","",obj@dbpath),
"destrand.txt",sep="_")
# filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(dbtype = "tabix",
sample.id = obj@sample.id,
assembly = obj@assembly,
context = obj@context,
resolution = obj@resolution)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
tabixHead = tabixHead)
# need to use .CpG.dinuc.unifyOld because output needs to be ordered
newdbpath <- applyTbxByChunk(obj@dbpath,
chunk.size = chunk.size,
dir=dir,filename = filename,
return.type = "tabix",
FUN = function(x) {
.CpG.dinuc.unify(.setMethylDBNames(x,
"methylRawDB") )},
tabixHead = tabixHeadString)
readMethylRawDB(dbpath = newdbpath,dbtype = "tabix",
sample.id = obj@sample.id,
assembly = obj@assembly, context = obj@context,
resolution = obj@resolution)
}else {obj}
}
new.list=suppressMessages(lapply(object,destrandFun))
object <- new("methylRawListDB", new.list,treatment=object@treatment)
on.exit(unlink(c(getDBPath(object),paste0(getDBPath(object),".tbi"))),add = TRUE)
}
#merge raw methylation calls together
message("uniting...")
objList <- getDBPath(object)
args <- list(...)
#print(args)
if( ( "dbdir" %in% names(args)) )
{
dir <- .check.dbdir(args$dbdir)
} else {
dir <- dirname(object[[1]]@dbpath)
}
# if(!( "dbtype" %in% names(args) ) ){
# dbtype <- "tabix"
# } else { dbtype <- args$dbtype }
if(!( "suffix" %in% names(args) ) ){
suffix <- NULL
} else {
suffix <- args$suffix
suffix <- paste0("_",suffix)
}
# filename will be "methylBase" concatenated with either 13-char random string or suffix
filename <- paste0( dir, "/", ifelse(is.null(suffix),
yes = tempfile(pattern = "methylBase_",tmpdir = "."),
no = paste0("methylBase",suffix)),
".txt")
filename <- .checkTabixFileExists(filename)
# remove the "./" that tempfile prepends
filename <- basename(filename)
# filename <- tempfile(pattern = "methylBase",tmpdir = ".",fileext = ".txt")
#filename <- paste0(paste(getSampleID(object),collapse = "_"),".txt")
# get indices of coverage in the data frame
coverage.ind=seq(5,by=3,length.out=length(object))
numCs.ind =coverage.ind+1
numTs.ind =coverage.ind+2
slotList <- list(dbtype = object[[1]]@dbtype,
sample.ids = getSampleID(object),assembly = object[[1]]@assembly,
context = object[[1]]@context,resolution = object[[1]]@resolution,
coverage.index=coverage.ind,numCs.index=numCs.ind,numTs.index=numTs.ind,
treatment = object@treatment,destranded = destrand)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
tabixHead = tabixHead)
if(is.null(min.per.group)) {
message("merging tabix files ...")
dbpath <- mergeTabix(tabixList = objList ,dir = dir,
filename = filename,mc.cores = mc.cores,
tabixHead = tabixHeadString)
dbpath <- gsub(".tbi","",dbpath)
} else {
# if the the min.per.group argument is supplied, remove the rows
# that doesn't have enough coverage
message("merging tabix files ...")
# keep rows with no matching in all samples
tmpPath <- mergeTabix(tabixList = objList ,dir = dir,
filename = paste0(filename,".tmp"),
mc.cores = mc.cores,all=TRUE,
tabixHead = tabixHeadString)
tmpPath <- gsub(".tbi","",tmpPath)
# get indices of coverage in the data frame
coverage.ind=seq(5,by=3,length.out=length(object))
filter <- function(df, coverage.ind, treatment,min.per.group){
df <- as.data.table(df)
for(i in unique(treatment) ){
my.ind=coverage.ind[treatment==i]
ldat = !is.na(df[,my.ind,with=FALSE])
if( is.null(dim(ldat)) ){ # if there is only one dimension
df=df[ldat>=min.per.group,]
}else{
df=df[rowSums(ldat)>=min.per.group,]
}
}
return(as.data.frame(df))
}
message("filter mincov per group ...")
dbpath <- applyTbxByChunk(tbxFile = tmpPath, chunk.size = chunk.size,
dir = dir, filename = filename,
return.type = "tabix", FUN = filter,
treatment=object@treatment, coverage.ind=coverage.ind,
min.per.group=min.per.group, tabixHead = tabixHeadString)
unlink(list.files(dirname(tmpPath),pattern = basename(gsub(".txt.bgz","",tmpPath)),full.names = TRUE))
}
obj <- tryCatch(expr = {
readMethylBaseDB(dbpath = dbpath,dbtype = object[[1]]@dbtype,
sample.ids = getSampleID(object),assembly = object[[1]]@assembly,
context = object[[1]]@context,resolution = object[[1]]@resolution,
treatment = object@treatment,destranded = destrand)
},
error = function(e) {
stop(sprintf("no %s were united. try adjusting 'min.per.group'.",
object[[1]]@resolution))
}
)
message(paste0("flatfile located at: ",obj@dbpath))
obj
} else {
obj <- object
class(obj) <- "methylRawList"
unite(obj,destrand,min.per.group,chunk.size,mc.cores,save.db=FALSE,...)
}
}
#' @rdname unite-methods
#' @aliases unite,methylRawListDB-method
setMethod("unite", "methylRawListDB",unite.methylRawListDB)
# getCorrelation ----------------------------------------------------------
#' @rdname getCorrelation-methods
#' @aliases getCorrelation,methylBaseDB-method
setMethod("getCorrelation", "methylBaseDB",
function(object,method,plot,nrow=2e6){
if(is.null(nrow)){
meth.fun <- function(data, numCs.index, numTs.index){
data[, numCs.index]/( data[,numCs.index] + data[,numTs.index] )
}
meth.mat = applyTbxByChunk(object@dbpath,return.type = "data.frame",
FUN = meth.fun, numCs.index = object@numCs.index,
numTs.index = object@numTs.index)
}else{
data = headTabix(object@dbpath,nrow=nrow,return.type = "data.frame")
meth.mat = data[, object@numCs.index]/( data[,object@numCs.index] +
data[,object@numTs.index] )
}
names(meth.mat)=object@sample.ids
method <- match.arg(method)
print( cor(meth.mat,method=method) )
if(plot) {
.plotCorrelation(meth.mat = meth.mat,
title = paste(object@context, object@resolution ,method,"cor."),
method = method)
}
}
)
# reconstruct -------------------------------------------------------------
#' @rdname reconstruct-methods
#' @aliases reconstruct,methylBaseDB-method
setMethod("reconstruct",signature(mBase="methylBaseDB"),
function(methMat,mBase,chunk.size,save.db=TRUE,...){
if(save.db){
if(is.matrix(methMat) ) {
# check if indeed methMat is percent methylation matrix
if(min(methMat)<0 | max(methMat)>100 ){
stop("\nmake sure 'methMat' is percent methylation matrix ",
"(values between 0-100) \n")
}
# check if methMat is percent methylation matrix fitting to mBase
if(nrow(methMat) != mBase@num.records |
ncol(methMat) != length(mBase@numCs.index) ){
stop("\nmethMat dimensions do not match number of samples\n",
"and number of bases in methylBase object\n")
}
rndFile=paste(sample(c(0:9, letters, LETTERS),9, replace=TRUE),collapse="")
matFile=paste(rndFile,"methMat.txt",sep="_")
.write.bed(methMat,matFile,quote=FALSE,col.names=FALSE,row.names=FALSE,
sep="\t")
methMat = matFile
} else {
# methMat can also be a file containing a percent methylation matrix
mat <- fread(methMat,header = FALSE)
# check if indeed methMat is percent methylation matrix
if(min(methMat)<0 | max(methMat)>100 ){
stop("\nmake sure 'methMat' is percent methylation matrix ",
"(values between 0-100) \n")
}
# check if methMat is percent methylation matrix fitting to mBase
if(nrow(mat) != mBase@num.records | ncol(mat) != length(mBase@numCs.index) ){
stop("\nmethMat dimensions do not match number of samples\n",
"and number of bases in methylBase object\n")
}
rm(mat)
}
reconstr <- function(data, methMat, chunk, numCs.index, numTs.index) {
mat=data[,numCs.index]+data[,numTs.index]
methMat = read.table(methMat,header = FALSE, nrows = chunk, sep="\t")
# get new unmethylated and methylated counts
numCs=round(methMat*mat/100)
numTs=round((100-methMat)*mat/100)
data[,numCs.index]=numCs
data[,numTs.index]=numTs
return(data)
}
# catch additional args
args <- list(...)
if( !( "dbdir" %in% names(args)) ){
dir <- dirname(mBase@dbpath)
} else { dir <- .check.dbdir(args$dbdir) }
if(!( "suffix" %in% names(args) ) ){
suffix <- "_reconstructed"
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(mBase@sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",mBase@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
con <- file(methMat,open = "r")
## creating the tabix header
slotList <- list(dbtype = mBase@dbtype,
sample.ids = mBase@sample.ids,assembly = mBase@assembly,
context = mBase@context,resolution = mBase@resolution,
treatment = mBase@treatment,destranded = mBase@destranded,
coverage.index = mBase@coverage.index,
numCs.index = mBase@numCs.index,
numTs.index = mBase@numTs.index)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChunk(tbxFile = mBase@dbpath,chunk.size = chunk.size,
dir=dir,filename = filename,
return.type = "tabix", FUN = reconstr,
con,chunk.size,mBase@numCs.index,
mBase@numTs.index, tabixHead = tabixHeadString)
close(con)
if(file.exists(matFile)) {unlink(matFile)}
readMethylBaseDB(dbpath = newdbpath,dbtype = mBase@dbtype,
sample.ids = mBase@sample.ids,assembly = mBase@assembly,
context = mBase@context,resolution = mBase@resolution,
treatment = mBase@treatment,destranded = mBase@destranded)
} else {
tmp <- mBase[]
reconstruct(methMat,tmp,save.db=FALSE)
}
}
)
# removeComp --------------------------------------------------------------
#' @rdname removeComp-methods
#' @aliases removeComp,methylBaseDB-method
setMethod("removeComp",signature(mBase="methylBaseDB"),
function(mBase,comp,chunk.size,save.db=TRUE,...){
if(anyNA(comp) || is.null(comp)){
stop("no component to remove\n")
}
if(any(comp > length(mBase@sample.ids) )){
stop("'comp' elements can only take values between 1 and number of samples\n")
}
scale=TRUE
center=TRUE
mat=percMethylation(mBase,chunk.size=chunk.size)
mat=scale(mat,scale=scale,center=center)
centers <- attr(mat,'scaled:center')
scales <- attr(mat,'scaled:scale')
pr=prcomp(mat,scale.=FALSE,center=FALSE)
pr$rotation[,comp]=0
res=pr$x %*% t(pr$rotation)
res=(scale(res,center=(-centers/scales),scale=1/scales))
attr(res,"scaled:center")<-NULL
attr(res,"scaled:scale")<-NULL
res[res>100]=100
res[res<0]=0
reconstruct(res,mBase,chunk.size,save.db = save.db,...=...)
}
)
# percMethylation ---------------------------------------------------------
#' @rdname percMethylation-methods
#' @aliases percMethylation,methylBaseDB-method
setMethod("percMethylation", "methylBaseDB",
function(methylBase.obj,
rowids = FALSE,
save.txt = FALSE,
chunk.size = 1e6) {
meth.fun <- function(data, numCs.index, numTs.index, rowids) {
dat = 100 * data[, numCs.index] / (data[, numCs.index] +
data[, numTs.index])
if(rowids) {
dat = cbind(pos =
paste(as.character(data[, 1]),
data[, 2], data[, 3],
sep = "."),
dat)
}
return(dat)
}
if (save.txt) {
filename <- paste0(basename(gsub(".txt.bgz","",
methylBase.obj@dbpath)),"_methMath.txt")
textHeader = methylBase.obj@sample.ids
if(rowids) textHeader = c("pos",textHeader)
outfile = applyTbxByChunk(methylBase.obj@dbpath,
return.type = "text",
textHeader = textHeader,
chunk.size = chunk.size,
dir = dirname(methylBase.obj@dbpath),
filename = filename,
FUN = meth.fun,
numCs.index = methylBase.obj@numCs.index,
numTs.index = methylBase.obj@numTs.index,
rowids = rowids)
return(outfile)
} else {
meth.mat = applyTbxByChunk(methylBase.obj@dbpath,
return.type = "data.frame",
chunk.size = chunk.size,
FUN = meth.fun,
numCs.index = methylBase.obj@numCs.index,
numTs.index = methylBase.obj@numTs.index,
rowids = rowids)
if(rowids){
rownames(meth.mat) = meth.mat$pos
meth.mat$pos = NULL
}
names(meth.mat)=methylBase.obj@sample.ids
return(as.matrix(meth.mat))
}
})
# clusterSamples ----------------------------------------------------------
#' @rdname clusterSamples-methods
#' @aliases clusterSamples,methylBaseDB-method
setMethod("clusterSamples", "methylBaseDB",
function(.Object, dist, method ,sd.filter, sd.threshold,
filterByQuantile, plot,chunk.size)
{
getMethMat <- function(mat,numCs.index,numTs.index,sd.filter,
sd.threshold, filterByQuantile){
# remove rows containing NA values, they might be
# introduced at unite step
mat =mat[ rowSums(is.na(mat))==0, ]
meth.mat = mat[, numCs.index]/
(mat[,numCs.index] + mat[,numTs.index] )
# if Std. Dev. filter is on remove rows with low variation
if(sd.filter){
if(filterByQuantile){
sds=rowSds(as.matrix(meth.mat))
cutoff=quantile(sds,sd.threshold)
meth.mat=meth.mat[sds>cutoff,]
}else{
meth.mat=meth.mat[rowSds(as.matrix(meth.mat))>sd.threshold,]
}
}
}
meth.mat <- applyTbxByChunk(.Object@dbpath,chunk.size = chunk.size,
return.type = "data.frame",
FUN=getMethMat,
numCs.ind=.Object@numCs.index,
numTs.ind=.Object@numTs.index,
sd.filter=sd.filter,
sd.threshold=sd.threshold,
filterByQuantile=filterByQuantile)
names(meth.mat)=.Object@sample.ids
.cluster(meth.mat, dist.method=dist, hclust.method=method,
plot=plot, treatment=.Object@treatment,
sample.ids=.Object@sample.ids,
context=.Object@context)
}
)
# PCASamples --------------------------------------------------------------
#' @rdname PCASamples-methods
#' @aliases PCASamples,methylBaseDB-method
setMethod("PCASamples", "methylBaseDB",
function(.Object, screeplot, adj.lim,scale,center,comp,
transpose,sd.filter, sd.threshold,
filterByQuantile,obj.return,chunk.size)
{
getMethMat <- function(mat,numCs.index,numTs.index,sd.filter,
sd.threshold, filterByQuantile){
# remove rows containing NA values, they might be introduced at unite step
mat =mat[ rowSums(is.na(mat))==0, ]
meth.mat = mat[, numCs.index]/
(mat[,numCs.index] + mat[,numTs.index] )
# if Std. Dev. filter is on remove rows with low variation
if(sd.filter){
if(filterByQuantile){
sds=rowSds(as.matrix(meth.mat))
cutoff=quantile(sds,sd.threshold)
meth.mat=meth.mat[sds>cutoff,]
}else{
meth.mat=meth.mat[rowSds(as.matrix(meth.mat))>sd.threshold,]
}
}
}
meth.mat <- applyTbxByChunk(.Object@dbpath,chunk.size = chunk.size,
return.type = "data.frame",FUN=getMethMat,
numCs.ind=.Object@numCs.index,
numTs.ind=.Object@numTs.index,
sd.filter=sd.filter, sd.threshold=sd.threshold,
filterByQuantile=filterByQuantile)
names(meth.mat)=.Object@sample.ids
if(transpose){
.pcaPlotT(meth.mat,comp1=comp[1],comp2=comp[2],screeplot=screeplot,
adj.lim=adj.lim,
treatment=.Object@treatment,sample.ids=.Object@sample.ids,
context=.Object@context
,scale=scale,center=center,obj.return=obj.return)
}else{
.pcaPlot(meth.mat,comp1=comp[1],comp2=comp[2],screeplot=screeplot,
adj.lim=adj.lim,
treatment=.Object@treatment,sample.ids=.Object@sample.ids,
context=.Object@context,
scale=scale,center=center, obj.return=obj.return)
}
}
)
# reorganize --------------------------------------------------------------
#' @rdname reorganize-methods
#' @aliases reorganize,methylBaseDB-method
setMethod("reorganize", signature(methylObj="methylBaseDB"),
function(methylObj,sample.ids,treatment,chunk.size,save.db=TRUE,...){
#sample.ids length and treatment length should be equal
if(length(sample.ids) != length(treatment) ){
stop("length of sample.ids should be equal to treatment")
}
if( ! all(sample.ids %in% methylObj@sample.ids) ){
stop("provided sample.ids is not a subset of the sample ids of the object")
}
if(save.db) {
temp.id = methylObj@sample.ids # get the subset of ids
# get the column order in the original matrix
col.ord = order(match(temp.id,sample.ids))[1:length(sample.ids)]
# make a matrix indices for easy access
ind.mat=rbind(methylObj@coverage.index[col.ord],
methylObj@numCs.index[col.ord],
methylObj@numTs.index[col.ord])
# get indices of coverage,numCs and numTs in the data frame
coverage.ind=seq(5,by=3,length.out=length(sample.ids))
numCs.ind =coverage.ind+1
numTs.ind =coverage.ind+2
getSub <- function(data,ind.mat) {
newdat =data[,1:4]
for(i in 1:ncol(ind.mat))
{
newdat=cbind(newdat,data[,ind.mat[,i]])
}
return(newdat)
}
# catch additional args
args <- list(...)
if( ( "dbdir" %in% names(args)) ){
dir <- .check.dbdir(args$dbdir)
} else { dir <- dirname(methylObj@dbpath) }
if(!( "suffix" %in% names(args) ) ){
suffix <- "_reorganized"
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",methylObj@dbpath)
,suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(
dbtype = methylObj@dbtype,
sample.ids = sample.ids,
assembly = methylObj@assembly,
context = methylObj@context,
treatment = treatment,
destranded = methylObj@destranded,
resolution = methylObj@resolution,
coverage.index = coverage.ind,
numCs.index = numCs.ind,
numTs.index = numTs.ind
)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChunk(tbxFile = methylObj@dbpath,
chunk.size = chunk.size, dir=dir,
filename = filename,
return.type = "tabix", FUN = getSub,
ind.mat=ind.mat,tabixHead = tabixHeadString)
readMethylBaseDB(dbpath = newdbpath,dbtype = methylObj@dbtype,
sample.ids=sample.ids,
assembly=methylObj@assembly,context=methylObj@context,
treatment=treatment,destranded=methylObj@destranded,
resolution=methylObj@resolution )
} else {
obj <- methylObj[]
reorganize(obj,sample.ids,treatment,save.db=FALSE,...)
}
})
# pool --------------------------------------------------------------------
#' @rdname pool-methods
#' @aliases pool,methylBaseDB-method
setMethod("pool", "methylBaseDB",
function(obj,sample.ids,chunk.size,save.db=TRUE,...){
treat = unique(obj@treatment)
if(length(treat) != length(sample.ids)) {
stop("Length of sample.ids should be equal to \n",
"length of the unique treatment vector")}
if(save.db) {
mypool <- function(df,treatment,numCs.index){
treat=unique(treatment)
res=df[,1:4]
for(i in 1:length(treat) ){
# get indices
setCs=numCs.index[treatment==treat[i]]
setTs=setCs+1
set.cov=setCs-1
if(length(setCs)>1){
Cs=rowSums(df[,setCs],na.rm=TRUE)
Ts=rowSums(df[,setTs],na.rm=TRUE)
covs=rowSums(df[,set.cov],na.rm=TRUE)
}else{
Cs =df[,setCs]
Ts =df[,setTs]
covs=df[,set.cov]
}
res=cbind(res,covs,Cs,Ts) # bind new data
}
return(res)
}
coverage.ind=3*(1:length(treat)) + 2
# catch additional args
args <- list(...)
if( ( "dbdir" %in% names(args)) ){
if( !(is.null(args$dbdir)) ) {
dir <- .check.dbdir(args$dbdir) }
} else { dir <- dirname(obj@dbpath) }
if(!( "suffix" %in% names(args) ) ){
suffix <- "_pooled"
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",obj@dbpath)
,suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
# if(file.exists(paste0(filename,".bgz"))) message("overwriting file.")
# get indices of coverage in the data frame
coverage.ind=seq(5,by=3,length.out=length(sample.ids))
numCs.ind =coverage.ind+1
numTs.ind =coverage.ind+2
## creating the tabix header
slotList <- list(dbtype = obj@dbtype,
sample.ids=sample.ids,
assembly=obj@assembly,context=obj@context,
treatment=treat,destranded=obj@destranded,
coverage.index=coverage.ind,numCs.index=numCs.ind,
numTs.index=numTs.ind,
resolution=obj@resolution)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChunk(tbxFile = obj@dbpath,chunk.size = chunk.size,
dir=dir,filename = filename,
return.type = "tabix", FUN = mypool,
treatment = obj@treatment,
numCs.index = obj@numCs.index,
tabixHead = tabixHeadString)
objdb <- readMethylBaseDB(dbpath = newdbpath,dbtype = obj@dbtype,
sample.ids=sample.ids,
assembly=obj@assembly,context=obj@context,
treatment=treat,destranded=obj@destranded,
resolution=obj@resolution )
message(paste0("flatfile located at: ",getDBPath(objdb)))
objdb
} else {
tmp <- obj[]
pool(tmp,sample.ids,save.db=FALSE)
}
})
# MethylDiffDB ------------------------------------------------------------
# calculateDiffMeth -------------------------------------------------------
#' @aliases calculateDiffMeth,methylBaseDB-method
#' @rdname calculateDiffMeth-methods
setMethod("calculateDiffMeth", "methylBaseDB",
function(.Object, covariates, overdispersion, adjust,
effect, parShrinkMN,
test ,mc.cores , slim , weighted.mean,
chunk.size, save.db=TRUE, ...){
if(save.db) {
# check object before starting calculations
.checksCalculateDiffMeth(treatment = .Object@treatment,
covariates = covariates,
Tcols = .Object@numTs.index)
# catch additional args
args <- list(...)
dir <- dirname(.Object@dbpath)
if( ( "dbdir" %in% names(args)) ){
if( !(is.null(args$dbdir)) ) {
dir <- .check.dbdir(args$dbdir)
}
}
if(!( "suffix" %in% names(args) ) ){
suffix <- NULL
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(.Object@sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub("methylBase","methylDiff",
gsub(".txt.bgz","",.Object@dbpath)
),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(dbtype = .Object@dbtype,
sample.ids=.Object@sample.ids,
assembly=.Object@assembly,
context=.Object@context,
destranded=.Object@destranded,
treatment=.Object@treatment,
resolution=.Object@resolution)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylDiffDB",
tabixHead = tabixHead)
dbpath <- applyTbxByChunk(.Object@dbpath,dir =
dir,chunk.size = chunk.size,
filename = filename,
return.type = "tabix",
FUN = .calculateDiffMeth,
treatment=.Object@treatment,
overdispersion=overdispersion,
effect=effect,
parShrinkMN=parShrinkMN,
test=test,
adjust=adjust,
mc.cores=mc.cores,
tabixHead = tabixHeadString)
obj=readMethylDiffDB(dbpath = dbpath,dbtype = .Object@dbtype,
sample.ids=.Object@sample.ids,
assembly=.Object@assembly,
context=.Object@context,
destranded=.Object@destranded,
treatment=.Object@treatment,
resolution=.Object@resolution)
message(paste0("flatfile located at: ",obj@dbpath))
obj
} else {
tmp <- .Object[]
calculateDiffMeth(tmp,covariates,overdispersion=overdispersion,
adjust=adjust,
effect=effect,parShrinkMN=parShrinkMN,
test=test,mc.cores=mc.cores,
slim=slim,weighted.mean=weighted.mean,
save.db=FALSE)
}
}
)
# getMethylDiff -----------------------------------------------------------
#' @aliases getMethylDiff,methylDiffDB-method
#' @rdname getMethylDiff-methods
setMethod(f="getMethylDiff", signature="methylDiffDB",
definition=function(.Object,difference,qvalue,type,
chunk.size,save.db=TRUE,...) {
if(!( type %in% c("all","hyper","hypo") )){
stop("Wrong 'type' argument supplied for the function, ",
"it can be 'hypo', 'hyper' or 'all' ")
}
meth.diff=NULL
if(save.db) {
#function applied to data
f <- function(data,difference,qv,type){
data <- data.table(data)
.setMethylDBNames(data,methylDBclass = "methylDiffDB")
if(type=="all"){
data <- data[(qvalue < qv) & (abs(meth.diff) > difference)]
}else if(type=="hyper"){
data <- data[(qvalue < qv) & (meth.diff > difference)]
}else if(type=="hypo"){
data <- data[(qvalue < qv) & (meth.diff < -1*difference)]
}
return(data)
}
# catch additional args
args <- list(...)
dir <- dirname(.Object@dbpath)
if( ( "dbdir" %in% names(args)) ){
if( !(is.null(args$dbdir)) ) {
dir <- .check.dbdir(args$dbdir)
}
}
if(!( "suffix" %in% names(args) ) ){
suffix <- paste0("_",type)
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(.Object@sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",.Object@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(dbtype = .Object@dbtype,
sample.ids=.Object@sample.ids,
assembly=.Object@assembly,context=.Object@context,
destranded=.Object@destranded,
treatment=.Object@treatment,resolution=.Object@resolution)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylDiffDB",
tabixHead = tabixHead)
dbpath <- applyTbxByChunk(.Object@dbpath,chunk.size = chunk.size,
dir = dir, filename = filename,
return.type = "tabix", FUN = f,
difference = difference, qv = qvalue, type = type,
tabixHead = tabixHeadString)
obj=readMethylDiffDB(dbpath = dbpath,dbtype = .Object@dbtype,
sample.ids=.Object@sample.ids,
assembly=.Object@assembly,context=.Object@context,
destranded=.Object@destranded,
treatment=.Object@treatment,resolution=.Object@resolution)
return(obj)
} else {
tmp <- .Object[]
getMethylDiff(tmp,difference,qvalue,type,save.db=FALSE)
}
})
# diffMethPerChr ----------------------------------------------------------
#' @aliases diffMethPerChr,methylDiffDB-method
#' @rdname diffMethPerChr-methods
setMethod("diffMethPerChr", signature(x = "methylDiffDB"),
function(x,plot,qvalue.cutoff, meth.cutoff,exclude,keep.empty.chrom,...){
res <-
applyTbxByChr(
x@dbpath,
return.type = "data.frame",
FUN = .diffMethPerChr,
qvalue.cutoff = qvalue.cutoff,
meth.cutoff = meth.cutoff,
keep.empty.chrom = keep.empty.chrom
)
# order the chromosomes
dmc.hyper.hypo = res[order(as.numeric(sub("chr", "", res$chr))),]
# get percentages of hypo/ hyper
dmc.hyper = 100 * sum(res$number.of.hypermethylated) / x@num.records
dmc.hypo = 100 * sum(res$number.of.hypomethylated) / x@num.records
all.hyper.hypo = data.frame(
number.of.hypermethylated = sum(res$number.of.hypermethylated),
percentage.of.hypermethylated = dmc.hyper,
number.of.hypomethylated = sum(res$number.of.hypomethylated),
percentage.of.hypomethylated = dmc.hypo
)
if (all(dmc.hyper.hypo$chr %in% exclude)) {
warning("Cannot plot figure, excluded all available chromosomes.")
plot <- FALSE
}
if (plot) {
.plotDiffMethPerChr(dmc.hyper.hypo, exclude, qvalue.cutoff, meth.cutoff,...)
} else {
return(list(diffMeth.per.chr = dmc.hyper.hypo,
diffMeth.all = all.hyper.hypo))
}
})
# Regionalize methods ----------------------------------------------------
# regionCounts ------------------------------------------------------------
#' @rdname regionCounts
#' @aliases regionCounts,methylRawDB,GRanges-method
setMethod("regionCounts", signature(object="methylRawDB",regions="GRanges"),
function(object,regions,cov.bases,
strand.aware,chunk.size,save.db=TRUE,...){
if(save.db) {
# sort regions
regions <- sortSeqlevels(regions)
regions <- sort(regions,ignore.strand=TRUE)
getCounts <- function(data,regions,cov.bases,strand.aware){
.setMethylDBNames(data)
# overlap data with regions
# convert data to GRanges without metacolumns
g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),strand =strand))
if(!strand.aware){
strand(g.meth)="*"
mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
#mat=matchMatrix( findOverlaps(regions,g.meth ) )
}else{
mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
#mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
}
# create a temporary data.table row ids from regions and counts from object
temp.dt=data.table(id = mat[, 1], data[mat[, 2], c(5, 6, 7)])
coverage=NULL
numCs=NULL
numTs=NULL
id=covered=NULL
# use data.table to sum up counts per region
sum.dt=temp.dt[,list(coverage=sum(coverage),
numCs =sum(numCs),
numTs =sum(numTs),covered=length(numTs)),by=id]
sum.dt=sum.dt[covered>=cov.bases,]
temp.df=as.data.frame(regions) # get regions to a dataframe
#create a new methylRaw object to return
new.data=data.frame(#id =new.ids,
chr =temp.df[sum.dt$id,"seqnames"],
start =temp.df[sum.dt$id,"start"],
end =temp.df[sum.dt$id,"end"],
strand =temp.df[sum.dt$id,"strand"],
coverage=sum.dt$coverage,
numCs =sum.dt$numCs,
numTs =sum.dt$numTs)
}
# catch additional args
args <- list(...)
dir <- dirname(object@dbpath)
if( "dbdir" %in% names(args) ){
if( !(is.null(args$dbdir)) ){
dir <- .check.dbdir(args$dbdir)
}
}
if(!( "suffix" %in% names(args) ) ){
suffix <- "_regions"
} else {
suffix <- paste0("_",args$suffix)
}
filename <- paste0(gsub(".txt.bgz","",object@dbpath)
,suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(sample.id=object@sample.id,
assembly=object@assembly, context =object@context,
resolution="region",
dbtype = object@dbtype)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByOverlap(object@dbpath,chunk.size = chunk.size,
ranges=regions, dir=dir,filename = filename,
return.type = "tabix", FUN = getCounts,
regions = regions ,cov.bases = cov.bases,
strand.aware = strand.aware,
tabixHead = tabixHeadString)
readMethylRawDB(dbpath = newdbpath,sample.id=object@sample.id,
assembly=object@assembly, context =object@context,
resolution="region",
dbtype = object@dbtype)
} else {
tmp <- object[]
regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
}
}
)
#' @rdname regionCounts
#' @aliases regionCounts,methylRawDB,GRangesList-method
# assume that each name of the element in the GRangesList is unique and
setMethod("regionCounts", signature(object="methylRawDB",regions="GRangesList"),
function(object,regions,cov.bases,strand.aware,chunk.size,save.db=TRUE,...){
# combine and sort GRanges from List
regions <- unlist(regions)
regions <- sortSeqlevels(regions)
regions <- sort(regions,ignore.strand=TRUE)
regions <- unique(regions)
if(save.db) {
getCounts <- function(data,regions,cov.bases,strand.aware){
.setMethylDBNames(data)
# overlap data with regions
# convert data to GRanges without metacolumns
g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),strand =strand))
if(!strand.aware){
strand(g.meth)="*"
mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
#mat=matchMatrix( findOverlaps(regions,g.meth ) )
}else{
mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
#mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
}
# create a temporary data.table row ids from regions and counts from object
temp.dt=data.table(id = mat[, 1], data[mat[, 2], c(5, 6, 7)])
coverage=NULL
numCs=NULL
numTs=NULL
id=covered=NULL
# use data.table to sum up counts per region
sum.dt=temp.dt[,list(coverage=sum(coverage),
numCs =sum(numCs),
numTs =sum(numTs),covered=length(numTs)),by=id]
sum.dt=sum.dt[covered>=cov.bases,]
temp.df=as.data.frame(regions) # get regions to a dataframe
#create a new methylRaw object to return
new.data=data.frame(#id =new.ids,
chr =temp.df[sum.dt$id,"seqnames"],
start =temp.df[sum.dt$id,"start"],
end =temp.df[sum.dt$id,"end"],
strand =temp.df[sum.dt$id,"strand"],
coverage=sum.dt$coverage,
numCs =sum.dt$numCs,
numTs =sum.dt$numTs)
}
# catch additional args
args <- list(...)
dir <- dirname(object@dbpath)
if( "dbdir" %in% names(args) ){
if( !(is.null(args$dbdir)) ){
dir <- .check.dbdir(args$dbdir)
}
}
if(!( "suffix" %in% names(args) ) ){
suffix <- "_regions"
} else {
suffix <- paste0("_",args$suffix)
}
filename <- paste0(gsub(".txt.bgz","",object@dbpath)
,suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(sample.id=object@sample.id,
assembly=object@assembly,
context =object@context,
resolution="region",
dbtype = object@dbtype)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByOverlap(object@dbpath,
chunk.size = chunk.size,
ranges=regions, dir=dir,filename = filename,
return.type = "tabix",
FUN = getCounts,regions = regions ,
cov.bases = cov.bases,
strand.aware = strand.aware,
tabixHead = tabixHeadString)
readMethylRawDB(dbpath = newdbpath,sample.id=object@sample.id,
assembly=object@assembly, context =object@context,resolution="region",
dbtype = object@dbtype)
} else {
tmp <- object[]
regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
}
}
)
#' @rdname regionCounts
#' @aliases regionCounts,methylRawListDB,GRanges-method
setMethod("regionCounts", signature(object="methylRawListDB",regions="GRanges"),
function(object,regions,cov.bases,strand.aware,chunk.size,save.db=TRUE,...){
if(save.db){
args <- list(...)
if( !( "dbdir" %in% names(args)) ){
dbdir <- NULL
} else { dbdir <- basename(.check.dbdir(args$dbdir)) }
outList = lapply(object,regionCounts,regions,cov.bases, strand.aware,
chunk.size,save.db,dbdir=dbdir,...)
new("methylRawListDB", outList,treatment=object@treatment)
} else {
outList = lapply(object,regionCounts,regions,cov.bases,strand.aware,
chunk.size,save.db=FALSE)
new("methylRawList", outList,treatment=object@treatment)
}
}
)
#' @rdname regionCounts
#' @aliases regionCounts,methylRawListDB,GRangesList-method
setMethod("regionCounts", signature(object="methylRawListDB",
regions="GRangesList"),
function(object,regions,cov.bases,strand.aware,chunk.size,save.db=TRUE,...){
if(save.db){
args <- list(...)
if( !( "dbdir" %in% names(args)) ){
dbdir <- NULL
} else { dbdir <- basename(.check.dbdir(args$dbdir)) }
outList = lapply(object,regionCounts,regions,cov.bases, strand.aware,
chunk.size,save.db,dbdir=dbdir,...)
new("methylRawListDB", outList,treatment=object@treatment)
} else {
outList = lapply(object,regionCounts,regions,cov.bases,strand.aware,
chunk.size,save.db=FALSE)
new("methylRawList", outList,treatment=object@treatment)
}
}
)
#' @rdname regionCounts
#' @aliases regionCounts,methylBaseDB,GRanges-method
setMethod("regionCounts", signature(object="methylBaseDB",regions="GRanges"),
function(object,regions,cov.bases,strand.aware,chunk.size,
save.db=TRUE,...){
if(save.db) {
# sort regions
regions <- sortSeqlevels(regions)
regions <- sort(regions,ignore.strand=TRUE)
getCounts <- function(data,regions,cov.bases,strand.aware){
.setMethylDBNames(data)
# overlap data with regions
# convert data to GRanges without metacolumns
g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
strand =strand))
if(!strand.aware){
strand(g.meth)="*"
mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
#mat=matchMatrix( findOverlaps(regions,g.meth ) )
}else{
mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
#mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
}
#require(data.table)
# create a temporary data.table row ids from regions and counts
#from object
temp.dt=data.table(id = mat[, 1], data[mat[, 2], 5:ncol(data)])
coverage=NULL
numCs=NULL
numTs=NULL
id=NULL
.SD=NULL
numTs1=covered=NULL
# use data.table to sum up counts per region
sum.dt=temp.dt[,c(lapply(.SD,sum),covered=length(numTs1)),by=id]
sum.dt=sum.dt[covered>=cov.bases,]
temp.df=as.data.frame(regions) # get regions to a dataframe
#create a new methylRaw object to return
new.data=data.frame(#id =new.ids,
chr =temp.df[sum.dt$id,"seqnames"],
start =temp.df[sum.dt$id,"start"],
end =temp.df[sum.dt$id,"end"],
strand =temp.df[sum.dt$id,"strand"],
as.data.frame(sum.dt[,c(2:(ncol(sum.dt)-1)),with=FALSE]),
stringsAsFactors=FALSE)
}
# catch additional args
args <- list(...)
dir <- dirname(object@dbpath)
if( "dbdir" %in% names(args) ){
dir <- .check.dbdir(args$dbdir)
}
if(!( "suffix" %in% names(args) ) ){
suffix <- "_regions"
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(object@sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
destranded = ifelse( test = (strand.aware & !(object@destranded)),
FALSE, TRUE)
## creating the tabix header
slotList <- list(dbtype = object@dbtype,
sample.ids=object@sample.ids,
assembly=object@assembly,
context=object@context,
treatment=object@treatment,
destranded=destranded,
resolution="region")
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByOverlap(object@dbpath,chunk.size = chunk.size,
ranges=regions, dir=dir,
filename = filename,
return.type = "tabix",
FUN = getCounts,regions = regions ,
cov.bases = cov.bases,
strand.aware = strand.aware,
tabixHead = tabixHeadString)
readMethylBaseDB(dbpath = newdbpath,dbtype = object@dbtype,
sample.ids=object@sample.ids,
assembly=object@assembly,
context=object@context,treatment=object@treatment,
destranded=destranded,resolution="region")
} else {
tmp <- object[]
regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
}
}
)
#' @rdname regionCounts
#' @aliases regionCounts,methylBaseDB,GRangesList-method
setMethod("regionCounts", signature(object="methylBaseDB",regions="GRangesList"),
function(object,regions,cov.bases,strand.aware,chunk.size,
save.db=TRUE,...){
# combine and sort GRanges from List
regions <- unlist(regions)
regions <- sortSeqlevels(regions)
regions <- sort(regions,ignore.strand=TRUE)
regions <- unique(regions)
if(save.db) {
getCounts <- function(data,regions,cov.bases,strand.aware){
.setMethylDBNames(data)
# overlap data with regions
# convert data to GRanges without metacolumns
g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
strand =strand))
if(!strand.aware){
strand(g.meth)="*"
mat=IRanges::as.matrix( findOverlaps(regions,g.meth ) )
#mat=matchMatrix( findOverlaps(regions,g.meth ) )
}else{
mat=IRanges::as.matrix( findOverlaps(regions,g.meth) )
#mat=matchMatrix( findOverlaps(regions,as(object,"GRanges")) )
}
#require(data.table)
# create a temporary data.table row ids from regions and counts
#from object
temp.dt=data.table(id = mat[, 1], data[mat[, 2], 5:ncol(data)])
#dt=data.table::data.table(dt)
#dt=data.table(id=mat[,1],data[mat[,2],c(5,6,7)] )
#worked with data.table 1.7.7
coverage=NULL
numCs=NULL
numTs=NULL
id=NULL
.SD=NULL
numTs1=covered=NULL
# use data.table to sum up counts per region
sum.dt=temp.dt[,c(lapply(.SD,sum),covered=length(numTs1)),by=id]
sum.dt=sum.dt[covered>=cov.bases,]
temp.df=as.data.frame(regions) # get regions to a dataframe
#create a new methylRaw object to return
new.data=data.frame(#id =new.ids,
chr =temp.df[sum.dt$id,"seqnames"],
start =temp.df[sum.dt$id,"start"],
end =temp.df[sum.dt$id,"end"],
strand =temp.df[sum.dt$id,"strand"],
as.data.frame(sum.dt[,c(2:(ncol(sum.dt)-1)),with=FALSE]),
stringsAsFactors=FALSE)
}
# catch additional args
args <- list(...)
dir <- dirname(object@dbpath)
if( "dbdir" %in% names(args) ){
dir <- .check.dbdir(args$dbdir)
}
if(!( "suffix" %in% names(args) ) ){
suffix <- paste0("_","regions")
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(object@sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
destranded = ifelse( test = (strand.aware & !(object@destranded)),
FALSE, TRUE)
## creating the tabix header
slotList <- list(dbtype = object@dbtype,
sample.ids=object@sample.ids,
assembly=object@assembly,
context=object@context,
treatment=object@treatment,
destranded=destranded,
resolution="region")
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByOverlap(object@dbpath,chunk.size = chunk.size,
ranges=regions, dir=dir,
filename = filename,
return.type = "tabix",
FUN = getCounts,regions = regions ,
cov.bases = cov.bases,
strand.aware = strand.aware,
tabixHead = tabixHeadString)
readMethylBaseDB(dbpath = newdbpath,dbtype = object@dbtype,
sample.ids=object@sample.ids,
assembly=object@assembly,
context=object@context,treatment=object@treatment,
destranded=destranded,resolution="region")
} else {
tmp <- object[]
regionCounts(tmp,regions,cov.bases,strand.aware,save.db=FALSE)
}
}
)
# tileMethylCounts --------------------------------------------------------
#' @aliases tileMethylCounts,methylRawDB-method
#' @rdname tileMethylCounts-methods
setMethod("tileMethylCounts", signature(object="methylRawDB"),
function(object,win.size,step.size,cov.bases,mc.cores,
save.db=TRUE,...){
if(save.db) {
tileCount <- function(data,win.size,step.size,cov.bases,resolution)
{
.setMethylDBNames(data,"methylRawDB")
# overlap data with regions
# convert data to GRanges without metacolumns
g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
strand =strand))
chrs =as.character(unique(seqnames(g.meth)))
# get max length of feature covered chromosome
max.length=max(IRanges::end(g.meth))
# get start of sliding window
min.length=min(IRanges::end(g.meth))
win.start = floor(min.length/win.size)*win.size
#get sliding windows with covered CpGs
numTiles=floor( (max.length-(win.size-step.size)- win.start)/step.size )+1
all.wins=GRanges(seqnames=rep(chrs,numTiles),
ranges=IRanges(start=win.start + 1 + 0:(numTiles-1)*step.size,
width=rep(win.size,numTiles)) )
# clean up
rm(g.meth)
tmp <- new("methylRaw",data,sample.id="temp",resolution=object@resolution)
# clean up
rm(data)
res <- regionCounts(tmp,all.wins,cov.bases,strand.aware=FALSE)
# clean up
rm(tmp)
return(getData(res))
}
# catch additional args
args <- list(...)
dir <- dirname(object@dbpath)
if( "dbdir" %in% names(args) ){
if( !(is.null(args$dbdir)) ){
dir <- .check.dbdir(args$dbdir)
}
}
if(!( "suffix" %in% names(args) ) ){
suffix <- "_tiled"
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(object@sample.id,collapse = "_"),
# suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(sample.id=object@sample.id,
assembly=object@assembly,
context =object@context,
resolution="region",
dbtype = object@dbtype)
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylRawDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChr(object@dbpath, return.type = "tabix",
dir = dir,filename = filename,
FUN = tileCount, win.size = win.size,
step.size = step.size,
cov.bases = cov.bases,
resolution=object@resolution,
mc.cores = mc.cores,
tabixHead = tabixHeadString)
readMethylRawDB(dbpath = newdbpath,sample.id=object@sample.id,
assembly=object@assembly, context =object@context,
resolution="region",
dbtype = object@dbtype)
} else {
tmp <- object[]
tileMethylCounts(tmp,win.size,step.size,cov.bases,mc.cores)
}
}
)
#' @aliases tileMethylCounts,methylRawListDB-method
#' @rdname tileMethylCounts-methods
setMethod("tileMethylCounts", signature(object="methylRawListDB"),
function(object,win.size,step.size,cov.bases,mc.cores,save.db=TRUE,...){
if(save.db){
args <- list(...)
if( !( "dbdir" %in% names(args)) ){
dbdir <- NULL
} else { dbdir <- basename(.check.dbdir(args$dbdir)) }
new.list=lapply(object,tileMethylCounts,win.size,step.size,
cov.bases,mc.cores,save.db,dbdir=dbdir,...)
new("methylRawListDB", new.list,treatment=object@treatment)
} else {
new.list=lapply(object,tileMethylCounts,win.size,step.size,
cov.bases,save.db=FALSE)
new("methylRawList", new.list,treatment=object@treatment)
}
})
#' @aliases tileMethylCounts,methylBaseDB-method
#' @rdname tileMethylCounts-methods
setMethod("tileMethylCounts", signature(object="methylBaseDB"),
function(object,win.size,step.size,cov.bases,
mc.cores,save.db=TRUE,...){
if(save.db) {
tileCount <- function(data,win.size,step.size,cov.bases,
resolution,destranded) {
.setMethylDBNames(data,"methylBaseDB")
# overlap data with regions
# convert data to GRanges without metacolumns
g.meth=with(data, GRanges(chr, IRanges(start=start, end=end),
strand =strand))
chrs =as.character(unique(seqnames(g.meth)))
# get max length of feature covered chromosome
max.length=max(IRanges::end(g.meth))
# get start of sliding window
min.length=min(IRanges::end(g.meth))
win.start = floor(min.length/win.size)*win.size
#get sliding windows with covered CpGs
numTiles=floor( (max.length-(win.size-step.size)- win.start)/step.size )+1
all.wins=GRanges(seqnames=rep(chrs,numTiles),
ranges=IRanges(start=win.start + 1 + 0:(numTiles-1)*step.size,
width=rep(win.size,numTiles)) )
# clean up
rm(g.meth)
tmp <- new("methylBase",data,
resolution=object@resolution,
destranded=object@destranded)
# clean up
rm(data)
res <- regionCounts(tmp,all.wins,cov.bases,strand.aware=FALSE)
# clean up
rm(tmp)
getData(res)
}
# catch additional args
args <- list(...)
dir <- dirname(object@dbpath)
if( "dbdir" %in% names(args) ){
dir <- .check.dbdir(args$dbdir)
}
if(!( "suffix" %in% names(args) ) ){
suffix <- paste0("_","tiled")
} else {
suffix <- paste0("_",args$suffix)
}
# filename <- paste0(paste(object@sample.ids,collapse = "_"),suffix,".txt")
filename <- paste0(gsub(".txt.bgz","",object@dbpath),suffix,".txt")
filename <- .checkTabixFileExists(filename)
filename <- basename(filename)
## creating the tabix header
slotList <- list(dbtype = object@dbtype,
sample.ids=object@sample.ids,
assembly=object@assembly,context=object@context,
treatment=object@treatment,
destranded=TRUE,resolution="region")
tabixHead <- makeTabixHeader(slotList)
tabixHeadString <- .formatTabixHeader(class = "methylBaseDB",
tabixHead = tabixHead)
newdbpath <- applyTbxByChr(object@dbpath, return.type = "tabix",
dir = dir,filename = filename,
FUN = tileCount, win.size = win.size,
step.size = step.size,
cov.bases = cov.bases,
resolution=object@resolution,
mc.cores = mc.cores,
tabixHead = tabixHeadString)
readMethylBaseDB(dbpath = newdbpath,dbtype = object@dbtype,
sample.ids=object@sample.ids,
assembly=object@assembly,context=object@context,
treatment=object@treatment,
destranded=TRUE,resolution="region")
} else {
tmp <- object[]
tileMethylCounts(tmp,win.size,step.size,cov.bases,mc.cores)
}
}
)
# BedGraph methods --------------------------------------------------------
# to be able to write w/o scientific notation, there might be better methods
.write.bed<-function(...){
defsci=options()$scipen
options(scipen = 999)
write.table(...)
options(scipen = defsci)
}
#' @rdname bedgraph-methods
#' @aliases bedgraph,methylRawDB-method
setMethod("bedgraph", signature(methylObj="methylRawDB"),
function(methylObj,file.name,col.name,unmeth,log.transform,negative
,add.on,chunk.size){
if(!col.name %in% c('coverage', 'numCs','numTs','perc.meth') ){
stop("col.name argument is not one of 'coverage', 'numCs','numTs','perc.meth'")
}
bedgr <- function(data,col.name,file.name,unmeth,log.transform,
negative,add.on,sample.id){
data <- as.data.table(data)
.setMethylDBNames(df = data,methylDBclass = "methylRawDB")
chr=start=end=score=numCs=coverage=.=NULL
if(col.name=="perc.meth"){
df= data[,.(chr,start=start-1,end,score=100*numCs/coverage )]
}else{
df=data[,.(chr,start=start-1,end,get(col.name) )]
df <- as.data.frame(df)
names(df)[4] <- col.name
if(log.transform){
df[,4]=log10(df[,4])
}
if(negative){
df[,4]=-1*(df[,4])
}
}
if(is.null(file.name)){
return(df)
}else{
if(unmeth & col.name=="perc.meth")
{
# write meth data to single file
.write.bed(df,file=paste0(file.name,"_meth"),quote=FALSE,
col.names=FALSE,row.names=FALSE,sep="\t",
append=TRUE)
# write unmeth data to single file
df[,4]=100-df[,4]
.write.bed(df,file=paste0(file.name,"_unmeth"),quote=FALSE,
col.names=FALSE,row.names=FALSE,sep="\t",
append=TRUE)
}else{
.write.bed(df,file=file.name,quote=FALSE,col.names=FALSE,
row.names=FALSE,sep="\t",append=TRUE)
}
}
}
if(is.null(file.name)){
applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size,
return.type = "data.frame", FUN = bedgr,
col.name = col.name, file.name = file.name,
unmeth = unmeth, log.transform=log.transform,
negative= negative,
add.on = add.on, sample.id = methylObj@sample.id)
} else {
rndFile=paste(sample(c(0:9, letters, LETTERS),9, replace=TRUE),collapse="")
filename2=paste(file.name,rndFile,sep="_")
applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size,
return.type = "data.frame", FUN = bedgr,
col.name = col.name, file.name= filename2,
unmeth = unmeth, log.transform=log.transform,
negative= negative,
add.on = add.on, sample.id = methylObj@sample.id)
if(unmeth & col.name=="perc.meth")
{
# combine single files produced by bedgr function
track.line=paste(
"track type=bedGraph name='",methylObj@sample.id," METH Cs",
"' description='",methylObj@sample.id," METH Cs",
"' visibility=full color=255,0,0 maxHeightPixels=80:80:11 ",
add.on,sep="")
cat(track.line,"\n",file=file.name)
file.append(file.name,paste0(filename2,"_meth"))
track.line2=paste(
"track type=bedGraph name='",methylObj@sample.id," UNMETH Cs",
"' description='",methylObj@sample.id," UNMETH Cs",
"' visibility=full color=0,0,255 maxHeightPixels=80:80:11 ",
add.on,sep="")
cat(track.line2,"\n",file=file.name,append=TRUE)
file.append(file.name,paste0(filename2,"_unmeth"))
}else{
track.line=paste(
"track type=bedGraph name='",methylObj@sample.id," ",col.name,
"' description='",methylObj@sample.id," ",col.name,
"' visibility=full color=255,0,0 maxHeightPixels=80:80:11 ",
add.on,sep="")
cat(track.line,"\n",file=file.name)
file.append(file.name,filename2)
}
# tidy up
unlink(list.files(path = dirname(file.name),pattern = rndFile,
full.names = TRUE))
}
}
)
#' @rdname bedgraph-methods
#' @aliases bedgraph,methylRawListDB-method
setMethod("bedgraph", signature(methylObj="methylRawListDB"),
function(methylObj,file.name,col.name,unmeth,log.transform,negative,add.on,chunk.size){
if(!col.name %in% c('coverage', 'numCs','numTs','perc.meth') ){
stop("col.name argument is not one of 'coverage', 'numCs','numTs','perc.meth' options")
}
if( is.null(file.name) ){
result=list()
for(i in 1:length(methylObj))
{
result[[ methylObj[[i]]@sample.id ]]=bedgraph(methylObj[[i]],
file.name=NULL,
col.name=col.name,
unmeth=FALSE,
log.transform=log.transform,
negative=negative,
chunk.size=chunk.size)
}
return(result)
}else{
bedgraph(methylObj[[1]],file.name=file.name,
col.name=col.name,unmeth=unmeth,log.transform=log.transform,
negative=negative,chunk.size=chunk.size)
for(i in 2:length(methylObj))
{
bedgraph(methylObj[[i]],file.name=paste(file.name,i,sep = "_"),
col.name=col.name,unmeth=unmeth,log.transform=log.transform,
negative=negative,chunk.size=chunk.size)
file.append(file.name,paste(file.name,i,sep = "_"))
}
unlink(list.files(path = dirname(file.name),
pattern = paste0(basename(file.name),
"_[[:digit:]]+"),full.names = TRUE))
}
})
#' @rdname bedgraph-methods
#' @aliases bedgraph,methylDiffDB-method
setMethod("bedgraph", signature(methylObj="methylDiffDB"),
function(methylObj,file.name,col.name,log.transform,
negative,add.on,chunk.size){
if(! col.name %in% c('pvalue','qvalue', 'meth.diff') )
{
stop("col.name argument is not one of 'pvalue','qvalue', 'meth.diff'")
}
bedgr <- function(data,col.name,file.name,log.transform,negative,add.on,
sample.id){
data <- as.data.table(data)
.setMethylDBNames(df = data,methylDBclass = "methylDiffDB")
chr=start=end=.=NULL
df=data[,.(chr,start=start-1,end,get(col.name) )]
df <- as.data.frame(df)
names(df)[4] <- col.name
if(log.transform){
df[,4]=log10(df[,4])
}
if(negative){
df[,4]=-1*(df[,4])
}
if(is.null(file.name)){
return(df)
}else{
.write.bed(df,file=file.name,quote=FALSE,col.names=FALSE,
row.names=FALSE,sep="\t",append=TRUE)
}
}
if(is.null(file.name)){
applyTbxByChunk(methylObj@dbpath,chunk.size = chunk.size,
return.type = "data.frame", FUN = bedgr,
col.name = col.name, file.name= file.name,
log.transform=log.transform, negative= negative,
add.on = add.on, sample.id = methylObj@sample.id)
} else {
rndFile=paste(sample(c(0:9, letters, LETTERS),9, replace=TRUE),
collapse="")
filename2=paste(file.name,rndFile,sep="_")
txtPath <- applyTbxByChunk(methylObj@dbpath,
chunk.size = chunk.size,
return.type = "data.frame",
FUN = bedgr,
col.name = col.name,
file.name= filename2,
log.transform=log.transform,
negative= negative,
add.on = add.on,
sample.id = methylObj@sample.id)
track.line=paste(
"track type=bedGraph name='",file.name,"' description='",col.name,
"' visibility=full color=255,0,255 altColor=102,205,170 maxHeightPixels=80:80:11 ",
add.on,sep="")
cat(track.line,"\n",file=file.name)
file.append(file.name,filename2)
unlink(filename2)
}
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.