#' Create BSSeq object from methylation tabix file
#'
#' @param methy the NanoMethResult object or path to the methylation tabix file.
#' @param out_folder the folder to store intermediate files. One file is created
#' for each sample and contains columns "chr", "pos", "total" and
#' "methylated".
#' @param verbose TRUE if progress messages are to be printed
#'
#' @return a BSSeq object.
#' @export
#'
#' @examples
#' nmr <- load_example_nanomethresult()
#' bsseq <- methy_to_bsseq(nmr)
methy_to_bsseq <- function(
methy,
out_folder = tempdir(),
verbose = TRUE
) {
if (is(methy, "NanoMethResult")) {
methy_path <- NanoMethViz::methy(methy)
} else {
methy_path <- methy
assert_that(fs::file_exists(methy_path))
}
timed_log("creating intermediate files...", verbose = verbose)
files <- convert_methy_to_dss(methy_path, out_folder)
if (is(methy, "NanoMethResult")) {
sample_anno <- NanoMethViz::samples(methy)
} else {
sample_anno <- tibble::tibble(sample = files$sample)
}
if (verbose) {
timed_log("creating bsseq object...")
}
out <- create_bsseq_from_files(files$file_path, sample_anno, verbose = verbose)
if (verbose) {
timed_log("done")
}
out
}
convert_methy_to_dss <- function(
methy,
out_folder
) {
assert_that(
fs::file_exists(methy)
)
if (!fs::dir_exists(out_folder)) {
fs::dir_create(out_folder, recurse = TRUE)
}
samples <- convert_methy_to_dss_cpp(
fs::path_expand(methy),
fs::path_expand(out_folder)
)
data.frame(
sample = samples,
file_path = path(out_folder, paste0(samples, ".txt"))
)
}
#' @importFrom S4Vectors DataFrame
#' @importFrom SummarizedExperiment colData
#' @importFrom purrr map
#' @importFrom dplyr select distinct arrange mutate
#' @importFrom bsseq BSseq
create_bsseq_from_files <- function(paths, sample_anno, verbose = TRUE) {
readr::local_edition(1) # temporary fix for vroom bad value
read_dss <- purrr::partial(
read_tsv,
col_types = cols(
chr = col_character(),
pos = col_double(),
total = col_double(),
methylated = col_double()
)
)
samples <- sample_anno$sample
if (verbose) {
timed_log("reading in parsed data...")
}
# read in data
dat <- purrr::map(paths, read_dss)
if (verbose) {
timed_log("constructing matrices...")
}
# get unique positions
combine_distinct_gpos <- function(x, y) {
rbind(
dplyr::select(x, "chr", "pos"),
dplyr::select(y, "chr", "pos")
) %>%
dplyr::distinct()
}
unique_pos_df <- purrr::reduce(
dat,
combine_distinct_gpos) %>%
dplyr::arrange(.data$chr, .data$pos) %>%
dplyr::mutate(id = paste(.data$chr, .data$pos))
# create methylation matrix
methylation_mat <- matrix(
0,
nrow = nrow(unique_pos_df),
ncol = length(dat),
dimnames = list(NULL, samples))
for (i in seq_along(dat)) {
row_inds <- paste(dat[[i]]$chr, dat[[i]]$pos) %>%
factor(levels = unique_pos_df$id)
methylation_mat[row_inds, i] <- dat[[i]]$methylated
}
# create coverage matrix
cov_mat <- matrix(
0,
nrow = nrow(unique_pos_df),
ncol = length(dat),
dimnames = list(NULL, samples))
for (i in seq_along(dat)) {
row_inds <- paste(dat[[i]]$chr, dat[[i]]$pos) %>%
factor(levels = unique_pos_df$id)
cov_mat[row_inds, i] <- dat[[i]]$total
}
# create BSseq object
result <- bsseq::BSseq(
chr = unique_pos_df$chr,
pos = unique_pos_df$pos,
M = methylation_mat,
Cov = cov_mat,
sampleNames = samples
)
SummarizedExperiment::colData(result) <- S4Vectors::DataFrame(sample_anno)
rownames(SummarizedExperiment::colData(result)) <- samples
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.