#' This function takes as input a CX report file produced by Bismark
#' and returns a \code{\link{GRanges}} object with four metadata columns
#' The file represents the bisulfite sequencing methylation data.
#'
#' @title Read Bismark
#' @param file The filename (including path) of the methylation
#' (CX report generated by Bismark) to be read.
#' @return the methylation data stored as a \code{\link{GRanges}}
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' @examples
#'
#' # load methylation data object
#' data(methylationDataList)
#'
#' # save the one datasets into a file
#' saveBismark(methylationDataList[["WT"]], "chr3test_a_thaliana_wt.CX_report")
#'
#' # load the data
#' methylationDataWT <- readBismark("chr3test_a_thaliana_wt.CX_report")
#'
#' #check that the loading worked
#' all(methylationDataWT == methylationDataList[["WT"]])
#'
#' @author Nicolae Radu Zabet and Jonathan Michael Foonlan Tsang
#'
#' @export
readBismark <- function(file) {
.stopIfNotAll(c(!is.null(file),
is.character(file),
file.exists(file)),
" file does not exist")
cat("Reading file: ",file,"\n",sep="")
cx <- scan(
file = file,
what = list(character(), integer(), character(), integer(), integer(), character(), character()),
sep = "\t",
skip = 0);
chrs <- unique(cx[[1]])
chrs <- chrs[order(chrs)]
dat <- data.frame(
chr = factor(cx[[1]], levels = chrs),
pos = as.integer(cx[[2]]),
strand = factor(cx[[3]], levels =c("+","-")),
context = factor(cx[[6]], levels = c("CG","CHG","CHH")),
trinucleotide_context = cx[[7]],
readsM = as.integer(cx[[4]]),
readsN = as.integer((cx[[4]]+cx[[5]])));
#dat <- dat[with(dat, order(chr, pos, strand)), ];
dat <- GRanges(seqnames = dat$chr,
ranges = IRanges(start = dat$pos, end = dat$pos),
strand = dat$strand,
context = dat$context,
readsM = dat$readsM,
readsN = dat$readsN,
trinucleotide_context = dat$trinucleotide_context);
cat("Finished reading file: ",file,"\n",sep="")
return(dat)
}
#' This function takes as input a vector of CX report file produced by Bismark
#' and returns a \code{\link{GRanges}} object with four metadata columns
#' (see \code{\link{methylationDataList}}). The file represents the pooled
#' bisulfite sequencing data.
#'
#' @title Read Bismark pool
#' @param files The filenames (including path) of the methylation
#' (CX report generated with Bismark) to be read
#' @return the methylation data stored as a \code{\link{GRanges}}
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' @examples
#'
#' # load methylation data object
#' data(methylationDataList)
#'
#' # save the two datasets
#' saveBismark(methylationDataList[["WT"]],
#' "chr3test_a_thaliana_wt.CX_report")
#' saveBismark(methylationDataList[["met1-3"]],
#' "chr3test_a_thaliana_met13.CX_report")
#'
#' # reload the two datasets and pool them
#' filenames <- c("chr3test_a_thaliana_wt.CX_report",
#' "chr3test_a_thaliana_met13.CX_report")
#' methylationDataPool <- readBismarkPool(filenames)
#'
#' @author Nicolae Radu Zabet and Jonathan Michael Foonlan Tsang
#'
#' @export
readBismarkPool <- function(files) {
.stopIfNotAll(c(!is.null(files),
length(files) > 0),
" files is a vector containing filenames")
#read the data
data <- GRangesList()
for(i in 1:length(files)){
data <- c(data, GRangesList(readBismark(files[i])))
}
#pool the data
pooledData <- poolMethylationDatasets(data)
return(pooledData)
}
#' This function takes as input a \code{\link{GRanges}} object generated with
#' \code{\link{readBismark}} and saves the output to a file using
#' Bismark CX report format.
#'
#' @title Save Bismark
#' @param methylationData the methylation data stored as a \code{\link{GRanges}}
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#' @param filename the filename where the data will be saved.
#' @return Invisibly returns \code{NULL}
#' @examples
#'
#' # load methylation data object
#' data(methylationDataList)
#'
#' # save one dataset to a file
#' saveBismark(methylationDataList[["WT"]], "chr3test_a_thaliana_wt.CX_report")
#'
#' @author Nicolae Radu Zabet
#'
#' @export
saveBismark <- function(methylationData, filename){
.stopIfNotAll(c(!is.null(filename),
is.character(filename),
length(filename)==1),
" filename should specify the filename, where the methylation data will be saved")
.validateMethylationData(methylationData)
chrs <- as.character(seqnames(methylationData))
start <- start(methylationData)
strand <- as.character(strand(methylationData))
readsM <- methylationData$readsM
readsU <- methylationData$readsN - methylationData$readsM
context <- methylationData$context
trinucleotide_context <- methylationData$trinucleotide_context
dataFile <- data.frame("chr"=chrs, "position"=start, "strand"=strand, "count methylated"=readsM, "count unmethylated"=readsU, "context"=context, "trinucleotide context"=trinucleotide_context)
write.table(dataFile, file=filename, quote=FALSE, row.names = FALSE, col.names = FALSE, sep = "\t")
invisible(NULL)
}
#' This function takes as input two \code{\link{GRanges}} objects of CX reports
#' (Bismark) for two conditions and returns a \code{\link{GRanges}} object with
#' five metadata columns
#'
#' @title Join methylation data
#' @param cx1 a \code{\link{GRanges}} object of CX reports (Bismark) with three
#' metadata columns the cytosine context (CG, CHG or CHH), the number of
#' methylated reads and the total number of reads at that position in condition
#' 1
#' @param cx2 a \code{\link{GRanges}} object of CX reports (Bismark) with three
#' metadata columns the cytosine context (CG, CHG or CHH), the number of
#' methylated reads and the total number of reads at that position in condition
#' 2
#' @return a the methylation data stored as a \code{\link{GRanges}}
#' object with six metadata columns (see \code{\link{methylationDataList}}).
.joinMethylationData <- function(cx1, cx2){
overlaps <- findOverlaps(cx1, cx2)
indexes <- which(!duplicated(queryHits(overlaps)))
methylData <- GRanges(seqnames = seqnames(cx1[queryHits(overlaps)[indexes]]),
ranges = ranges(cx1[queryHits(overlaps)[indexes]]),
strand = strand(cx1[queryHits(overlaps)[indexes]]),
context = cx1$context[queryHits(overlaps)[indexes]],
trinucleotide_context = cx1$trinucleotide_context[queryHits(overlaps)[indexes]],
readsM1 = cx1$readsM[queryHits(overlaps)[indexes]],
readsN1 = cx1$readsN[queryHits(overlaps)[indexes]],
readsM2 = cx2$readsM[subjectHits(overlaps)[indexes]],
readsN2 = cx2$readsN[subjectHits(overlaps)[indexes]])
return(methylData)
}
#' This function pools together multiple methylation datasets.
#'
#' @title Pool methylation data
#' @param methylationDataList a \code{\link{GRangesList}} object where each
#' element of the list is a \code{\link{GRanges}} object with the methylation
#' data in the corresponding condition (see \code{\link{methylationDataList}}).
#' @return the methylation data stored as a \code{\link{GRanges}}
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#'
#' @examples
#' # load methylation data object
#' data(methylationDataList)
#'
#' # pools the two datasets together
#' pooledMethylationData <- poolMethylationDatasets(methylationDataList)
#'
#' @author Nicolae Radu Zabet
#'
#' @export
poolMethylationDatasets <- function(methylationDataList){
.validateMethylationDataList(methylationDataList)
pooledMethylationData <- methylationDataList[[1]]
if(length(methylationDataList) > 1){
for(i in 2:length(methylationDataList)){
cat("joining two datasets ...\n")
buffer <- .joinMethylationData(pooledMethylationData, methylationDataList[[i]])
pooledMethylationData <- GRanges(seqnames = seqnames(buffer),
ranges = ranges(buffer),
strand = strand(buffer),
context = buffer$context,
readsM = (buffer$readsM1 + buffer$readsM2),
readsN = (buffer$readsN1 + buffer$readsN2),
trinucleotide_context = buffer$trinucleotide_context)
cat("the sum was performed ...\n")
}
}
return(pooledMethylationData)
}
#' This function pools together two methylation datasets.
#'
#' @title Pool two methylation datasets
#' @param methylationData1 a \code{\link{GRanges}} object with the methylation
#' data (see \code{\link{methylationDataList}}).
#' @param methylationData2 a \code{\link{GRanges}} object with the methylation
#' data (see \code{\link{methylationDataList}}).
#' @return the methylation data stored as a \code{\link{GRanges}}
#' object with four metadata columns (see \code{\link{methylationDataList}}).
#'
#' @examples
#' # load methylation data object
#' data(methylationDataList)
#'
#' # save the two datasets together
#' pooledMethylationData <- poolTwoMethylationDatasets(methylationDataList[[1]],
#' methylationDataList[[2]])
#'
#' @author Nicolae Radu Zabet
#'
#' @export
poolTwoMethylationDatasets <- function(methylationData1, methylationData2){
.validateMethylationData(methylationData1)
.validateMethylationData(methylationData2)
cat("joining two datasets ...\n")
buffer <- .joinMethylationData(methylationData1, methylationData2)
pooledMethylationData <- GRanges(seqnames = seqnames(buffer),
ranges = ranges(buffer),
strand = strand(buffer),
context = buffer$context,
readsM = (buffer$readsM1 + buffer$readsM2),
readsN = (buffer$readsN1 + buffer$readsN2),
trinucleotide_context = buffer$trinucleotide_context)
cat("the sum was performed ...\n")
return(pooledMethylationData)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.