#### readEPIC
# Author: Tyler Gorrie-Stone
# LS add in support for epicv2 01-09-2023
# Last Modified: 19-01-2016
# Creation Date: 20-11-2015
# Based heavily on 'methylumIDAT' and modified to read Epic arrays.
# Functionality for 450k and 27k arrays remains (relatively) unchanged.
# Although certain stops have been removed/moved to a different
# place due to the implausibility of determining sample based on name
# As differing between the three chips requires reading in .idats.
###
# Usage:
# readEPIC : Instead of inputting the barcodes, it is possible to input
# path to a directory (containing idats or folders containing idats).
# This constructs a vector of the barcodes which is passed to
# methylumIDAT.
#
# methylumIDATepic: Usage remains the same as methylumIDAT. Although
# might be slightly changed due to the introduction of recursive=T
# argument in the list.files()
###
# Leaving 27k and 450k array functionality in function for the purpose
# of convenience, however may be removed in the future when such arrays
# are not used anymore.
###
# {{{ # Utility function for dealing with single samples (still not 100% perfect...)
columnMatrix <- function(x, row.names = NULL) {
if (is.null(dim(x)[2]))
dim(x) = c(length(x), 1)
if (!is.null(row.names))
rownames(x) = row.names
return(x)
}
# }}}
# {{{ getMethylationBeadMappers2
## require()s the appropriate package for annotating a chip & sets up mappings
## Potentially may temporarily require more indepth ordering file for 450k
## arrays until the 450k is completely replaced by epic chips.
getMethylationBeadMappers2 <- function(chipType = c("450k", "27k", "Epic", "Epicv2", ".manifest"),
genome = c("hg19", "hg18", "hg38")) {
#### genomic annotation package
genome <- match.arg(genome) ## default to FDb.InfiniumMethylation.hg19
pkg <- paste0("FDb.InfiniumMethylation.", genome)
require(pkg, character.only = TRUE)
#### I think we can now do away with this
chipType <- sub("^IlluminaHumanMethylation", "", chipType)
#### this appears to be dispensable cruft. chipType is a string
if (class(chipType) %in% c("NChannelSet", "MethyLumiSet", "MethyLumiM")) {
chipType <- sub("^IlluminaHumanMethylation", "", annotation(chipType))
chipType <- sub(".db$", "", annotation(chipType))
}
chipType <- match.arg(chipType) # backwards compatibility purposes
#### have added .manifest chiptype to work with readPepo
## addressA=U, addressB=M
getProbes <- switch(chipType, `27k` = function(color = NULL, ...) {
data(hm27.ordering)
what <- c("Probe_ID", "M", "U")
r <- split(hm27.ordering[, what], hm27.ordering$col)
if (is.null(color)) return(r) else return(r[[substr(color, 1, 1)]])
}, `450k` = function(design = NULL, color = NULL, ...) {
data(hm450.ordering)
what <- c("Probe_ID", "M", "U")
r <- split(hm450.ordering[, c(what, "col")], hm450.ordering$DESIGN)
r$I <- split(r$I[, what], r$I$col)
r$II <- r$II[, what]
if (is.null(design)) {
return(r)
} else if (design == "I" && !is.null(color)) {
return(r[[design]][[substr(color, 1, 1)]])
} else {
return(r[[design]])
}
}, Epic = { function(design = NULL, color = NULL, ...) {
# data(epic.ordering)
epic.ordering <<- generateManifest("EPIC")
what <- c("Probe_ID", "M", "U")
r <- split(epic.ordering[, c(what, "col")], epic.ordering$DESIGN)
r$I <- split(r$I[, what], r$I$col)
r$II <- r$II[, what]
if (is.null(design)) {
r <- return(r)
} else if (design == "I" && !is.null(color)) {
r <- return(r[[design]][[substr(color, 1, 1)]])
} else {
r <- return(r[[design]])
}}
}, Epicv2 = function(design = NULL, color = NULL, ...) {
# data(epic.ordering)
epicV2.ordering <<- generateManifest("EPICv2")
what <- c("Probe_ID", "M", "U")
r <- split(epicV2.ordering[, c(what, "col")], epicV2.ordering$DESIGN)
r$I <- split(r$I[, what], r$I$col)
r$II <- r$II[, what]
if (is.null(design)) {
r <- return(r)
} else if (design == "I" && !is.null(color)) {
r <- return(r[[design]][[substr(color, 1, 1)]])
} else {
r <- return(r[[design]])
}
} , .manifest = function(design = NULL, color = NULL, ...) {
# data(epic.ordering)
# epicV2.ordering <<- generateManifest("EPICv2")
what <- c("Probe_ID", "M", "U")
#r <- split(epicV2.ordering[, c(what, "col")], epicV2.ordering$DESIGN)
r <- list()
#r$I <- split(r$I[, what], r$I$col)
I <- data.frame(.manifest@data$TypeI)
nI <- colnames(I)
nI[1:3] <- c("Probe_ID", "U", "M")
colnames(I) <- nI
r$I <- split(I[,what], I$Color)
names(r$I)<- c('G','R')
r$II <- data.frame(.manifest@data$TypeII)
colnames(r$II)[c(1,2)] <- nI[c(1,2)]
if (is.null(design)) {
r <- return(r)
} else if (design == "I" && !is.null(color)) {
r <- return(r[[design]][[substr(color, 1, 1)]])
} else {
r <- return(r[[design]])
}
}
)
getControls <- switch(chipType, `27k` = function() {
data(hm27.controls)
return(hm27.controls)
}, `450k` = function() {
data(hm450.controls)
return(hm450.controls)
}, Epic = function() {
data(epic.controls)
return(epic.controls)
}, Epicv2 = function() {
data(epicV2.controls)
return(epicV2.controls)
}, .manifest = function(){
kont <- data.frame(.manifest@data$TypeControl)
colnames(kont) <- c("Address", "Type", "Color_Channel", "Name")
kont
}
)
## we may be able to eliminate ordering <<no, this is the fData
getOrdering <- switch(chipType, `27k` = function() {
ord <- c("Probe_ID", "DESIGN", "COLOR_CHANNEL")
return(hm27.ordering[, ord])
}, `450k` = function() {
ord <- c("Probe_ID", "DESIGN", "COLOR_CHANNEL")
return(hm450.ordering[, ord])
}, Epic = function() {
ord <- c("Probe_ID", "DESIGN", "COLOR_CHANNEL")
return(epic.ordering[, ord])
}, Epicv2 = function() {
ord <- c("Probe_ID", "DESIGN", "COLOR_CHANNEL")
return(epicV2.ordering[, ord])
}, .manifest = function(){
ord <- c("Probe_ID", "DESIGN", "COLOR_CHANNEL")
man1 <- data.frame(
Probe_ID = .manifest@data$TypeI$Name,
DESIGN = 'I',
COLOR_CHANNEL = .manifest@data$TypeI$Color
)
man2 <- data.frame(
Probe_ID = .manifest@data$TypeII$Name,
DESIGN = 'II',
COLOR_CHANNEL = ''
)
rbind(man1,man2)
}
)
mapper <- list(probes = getProbes, controls = getControls, ordering = getOrdering)
return(mapper)
} # }}}
#{{{ IDATtoMatrix2: process a single IDAT (just the mean intensities) {{{
IDATtoMatrix2 <- function(x, fileExts = list(Cy3 = "Grn", Cy5 = "Red"), idatPath = ".") {
chs = names(fileExts)
names(chs) = fileExts
processed = lapply(fileExts, function(ch) {
ext = paste(ch, "idat", sep = ".")
message(sprintf("Reading in: %s", paste(x, ext, sep = "_")))
dat = readIDAT(file.path(idatPath, paste(x, ext, sep = "_")))
Quants = data.matrix(dat$Quants)
colnames(Quants) = paste(chs[ch], colnames(Quants), sep = ".")
return(list(Quants = Quants, RunInfo = dat$RunInfo, ChipType = dat$ChipType))
})
probe.data = do.call(cbind, lapply(processed, function(x) x[["Quants"]]))
probe.data <- probe.data[, c("Cy3.Mean", "Cy5.Mean", "Cy3.NBeads", "Cy5.NBeads")]
# Nbeads for beadcount switch TGS, at this point add something that takes
# the beadcount with the lower number of beads.
attr(probe.data, "RunInfo") = processed[[1]][["RunInfo"]]
attr(probe.data, "ChipType") = processed[[1]][["ChipType"]]
return(probe.data)
} # }}}
# {{{ IDATsToMatrices2
IDATsToMatrices2 <- function(barcodes, fileExts = list(Cy3 = "Grn", Cy5 = "Red"),
parallel = F, idatPath = ".") {
names(barcodes) = as.character(barcodes)
if (parallel) {
mats = .mclapply(barcodes, IDATtoMatrix2, fileExts = fileExts, idatPath = idatPath)
} else {
mats = lapply(barcodes, IDATtoMatrix2, fileExts = fileExts, idatPath = idatPath)
}
names(mats) = as.character(barcodes)
return(mats)
} # }}}
#{{{ extractAssayDataFromList2
## might consider doing the following in C++ via Rcpp to avoid copying data
extractAssayDataFromList2 <- function(assay, mats, fnames) {
d <- do.call("cbind", lapply(mats, function(x) x[fnames, assay])) # Select Common Probes
if (!is.matrix(d))
d <- data.matrix(d)
colnames(d) <- names(mats)
rownames(d) <- fnames
return(d)
} # }}}
#{{{ DataToNChannelSet2
## a faster rewrite of DFsToNChannelSet() so that I can decommission it...
## Stop-check to ensure different platforms are read simulatenously.
## note EPIC and EPICv2 have the same chip type and there are several
## different EPIC manifests
DataToNChannelSet2 <- function(
mats,
chans = c(Cy3 = "GRN", Cy5 = "RED"),
parallel = F,
protocol.data = F,
IDAT = TRUE,
force = F
) {
epic = hm27 = hm450 = 0
qw <- unlist(lapply(mats, function(x) attr(x, "ChipType")))
epic = sum(grepl("BeadChip 8x5", qw))
message(paste(epic, "HumanMethylationEpic / Epicv2 samples found"))
hm450 = sum(grepl("BeadChip 12x8", qw))
message(paste(hm450, "HumanMethylation450 samples found"))
hm27 = sum(grepl("BeadChip 12x1", qw))
message(paste(hm27, "HumanMethylation27 samples found"))
if (
hm27 > 0 && hm450 > 0 |
hm27 > 0 && epic > 0 |
hm450 > 0 && epic > 0
) {
stop(
"Cannot process multiple platforms simultaneously;
please run separately."
)
}
stopifnot(is(mats, "list"))
assayNames = paste0(names(chans), c(".Mean"))
assayNames = c(assayNames, paste0(names(chans), c(".NBeads")))
names(assayNames) = assayNames
assaylengths <- sapply(mats, nrow)
names(assaylengths) <- names(mats)
# Minfi Method for handling early version epic IDATS. from read.meth.R by
# Kaspar Hansen.
if (length(unique(assaylengths)) > 1) {
commonAddresses <- as.character(
Reduce(
"intersect",
lapply(mats, function(x) rownames(x))
)
)
if (!force) {
if (!all(assaylengths == length(commonAddresses))) {
print(as.matrix(assaylengths))
stop(
"Cannot combine IDATs of differing lengths,
possibly try force = T")
}
}
fnames <- commonAddresses
} else {
fnames <- rownames(mats[[1]])
}
extract <- function(assay){
extractAssayDataFromList2(assay, mats, fnames)
}
assays = lapply(assayNames, extract)
nb <- pmin(assays[["Cy3.NBeads"]], assays[["Cy5.NBeads"]])
obj = new(
"NChannelSet",
assayData = assayDataNew(
R = assays[["Cy5.Mean"]],
G = assays[["Cy3.Mean"]],
N = nb)
)
featureNames(obj) = fnames
if (IDAT){
message("Attempting to extract protocolData() from list...")
ChipType = attr(mats[[1]], "ChipType")
RunInfo = lapply(mats, function(d) attr(d, "RunInfo"))
if (protocol.data){
scanDates <- data.frame(
DecodeDate = rep(NA, length(mats)),
ScanDate = rep(NA, length(mats))
)
rownames(scanDates) <- names(mats)
for (i in seq_along(mats)) {
cat(
"decoding protocolData for",
names(mats)[i],
"...\n"
)
if (nrow(RunInfo[[i]]) >= 2) {
scanDates$DecodeDate[i] <- RunInfo[[i]][1, 1]
scanDates$ScanDate[i] <- RunInfo[[i]][2, 1]
}
}
protocoldata = new("AnnotatedDataFrame",
data = scanDates,
varMetadata = data.frame(
labelDescription = colnames(scanDates),
row.names = colnames(scanDates)
)
)
protocolData(obj) = protocoldata
}
message("Determining chip type from IDAT protocolData...")
if (ChipType == "BeadChip 12x1") {
annotation(obj) = "IlluminaHumanMethylation27k"
} else if (ChipType == "BeadChip 12x8") {
annotation(obj) = "IlluminaHumanMethylation450k"
} else if (ChipType == "BeadChip 8x5") {
annotation(obj) = "IlluminaHumanMethylationEpic"
}
if (ChipType == "BeadChip 8x5" && dim(obj)[1] > 1.1e+06) {
annotation(obj) = "IlluminaHumanMethylationEpicv2"
}
}
return(obj)
} # }}}
# {{{ getControlProbes2
getControlProbes2 <- function(NChannelSet) {
fD <- getMethylationBeadMappers2(annotation(NChannelSet))$controls()
ctls <- match(fD[["Address"]], featureNames(NChannelSet))
## FIXME: make this happen in the annotations, to avoid redundancy in
## names!
rownames(fD) <- ctlnames <- make.names(fD[, "Name"], unique = T)
fD <- data.frame(fD)
fvD <- data.frame(labelDescription = c("Address of this control bead", "Purpose of this control bead",
"Color channel for this bead", "Reporter group ID for this bead"))
fDat <- new("AnnotatedDataFrame", data = fD, varMetadata = fvD)
methylated <- assayDataElement(NChannelSet, "G")[ctls, , drop = FALSE] # Cy3
unmethylated <- assayDataElement(NChannelSet, "R")[ctls, , drop = FALSE] # Cy5
beadcount <- assayDataElement(NChannelSet, "N")[ctls, , drop = FALSE] ## Testing TGS
rownames(beadcount) <- rownames(methylated) <- rownames(unmethylated) <- ctlnames
aDat <- assayDataNew(methylated = methylated, unmethylated = unmethylated, beadcount = beadcount) #TGS
new("MethyLumiQC", assayData = aDat, featureData = fDat, annotation = annotation(NChannelSet))
} # }}}
## {{{ designItoMandU2
## 27k design, both probes same channel; ~100,000 of the 450k probes as well
## epic and epic II too
designItoMandU2 <- function(NChannelSet, parallel = F, n = F, n.sd = F, oob = T) {
mapper <- getMethylationBeadMappers2(annotation(NChannelSet))
probes <- mapper$probes(design = "I") # as list(G=..., R=...)
channels <- c("G", "R")
names(channels) <- channels
getIntCh <- function(NChannelSet, ch, al) {
newprobes <- lapply(probes, function(y) {
lapply(y, function(x) {
x[probes[[ch]][[al]] %in% rownames(assayDataElement(NChannelSet, ch))]
})
})
a = assayDataElement(NChannelSet, ch)[as.character(newprobes[[ch]][[al]]),
, drop = FALSE]
rownames(a) = as.character(newprobes[[ch]][["Probe_ID"]])
return(a)
}
getOOBCh <- function(NChannelSet, ch, al) {
ch.oob <- ifelse(ch == "R", "G", "R")
newprobes <- lapply(probes, function(y) {
lapply(y, function(x) {
x[probes[[ch]][[al]] %in% rownames(assayDataElement(NChannelSet,
ch))]
})
})
a = assayDataElement(NChannelSet, ch.oob)[as.character(newprobes[[ch]][[al]]),
, drop = FALSE]
rownames(a) = as.character(newprobes[[ch]][["Probe_ID"]])
return(a)
}
getNbeadCh <- function(NChannelSet, ch, al) {
#tgs
newprobes <- lapply(probes, function(y) {
lapply(y, function(x) {
x[probes[[ch]][[al]] %in% rownames(assayDataElement(NChannelSet,
ch))]
})
})
n = assayDataElement(NChannelSet, "N")[as.character(newprobes[[ch]][[al]]),
, drop = FALSE]
rownames(n) = as.character(newprobes[[ch]][["Probe_ID"]])
return(n)
}
getAllele <- function(NChannelSet, al, parallel = F, n = n, n.sd = T, oob = T) {
fluor = lapply(channels, function(ch) getIntCh(NChannelSet, ch, al))
fluor.oob = lapply(channels, function(ch) getOOBCh(NChannelSet, ch, al))
bc = lapply(channels, function(ch) getNbeadCh(NChannelSet, ch, al)) #tgs
res = list()
res[["I"]] = fluor
if (n)
res[["BC"]] = bc
if (oob)
res[["OOB"]] = fluor.oob
lapply(res, function(r) {
names(r) = channels
return(r)
})
}
signal <- lapply(c(M = "M", U = "U"), function(al) {
getAllele(NChannelSet, al, parallel = F, n = n, n.sd = n.sd, oob = oob)
})
retval = list(methylated = rbind(signal$M$I$R, signal$M$I$G), unmethylated = rbind(signal$U$I$R,
signal$U$I$G))
if (n) {
# tgs
retval[["m.beadcount"]] = rbind(signal$M$BC$R, signal$M$BC$G)
retval[["u.beadcount"]] = rbind(signal$U$BC$R, signal$U$BC$G)
# NBeads takes the lowest bead count for each type I probe.
retval[["NBeads"]] = pmin(retval[["m.beadcount"]], retval[["u.beadcount"]])
}
if (oob) {
retval[["methylated.OOB"]] = rbind(signal$M$OOB$R, signal$M$OOB$G)
retval[["unmethylated.OOB"]] = rbind(signal$U$OOB$R, signal$U$OOB$G)
}
return(retval)
} # }}}
## {{{ designIItoMandU2
## 450k/GoldenGate design (green=methylated, red=unmethylated, single address)
# loads the annotation DB so we can run SQL queries
designIItoMandU2 <- function(NChannelSet, parallel = F, n = F, n.sd = F, oob = T) {
mapper <- getMethylationBeadMappers2(annotation(NChannelSet))
probes2 <- mapper$probes(design = "II")
probes2$M <- probes2$U ## horrid kludge
#readPepo assigns $U to match
getIntCh <- function(NChannelSet, ch = NULL, al) {
ch <- ifelse(al == "M", "G", "R")
newprobes <- lapply(probes2, function(x) {
x[probes2[[al]] %in% rownames(assayDataElement(NChannelSet, ch))]
})
a <- assayDataElement(NChannelSet, ch)[as.character(newprobes[[al]]), , drop = F]
rownames(a) <- as.character(newprobes[["Probe_ID"]])
return(a)
}
getNbeadCh <- function(NChannelSet, ch = NULL, al) {
# tgs
ch <- ifelse(al == "M", "G", "R")
newprobes <- lapply(probes2, function(x) {
x[probes2[[al]] %in% rownames(assayDataElement(NChannelSet, ch))]
})
n <- assayDataElement(NChannelSet, "N")[as.character(newprobes[[al]]), ,
drop = F]
rownames(n) <- as.character(newprobes[["Probe_ID"]])
return(n)
}
getAllele <- function(NChannelSet, al, n = F, n.sd = F, oob = F) {
ch <- ifelse(al == "M", "G", "R")
res <- list()
res[["I"]] <- getIntCh(NChannelSet, ch, al)
if (n)
res[["BC"]] <- getNbeadCh(NChannelSet, ch, al)
if (oob) {
res[["OOB"]] <- res[["I"]]
is.na(res[["OOB"]]) <- TRUE
}
return(res)
} #
## M == Grn/Cy3 and U == Red/Cy5, same address
alleles = c(M = "M", U = "U")
signal = lapply(alleles, function(a) getAllele(NChannelSet, a, n, n.sd, oob))
retval = list(methylated = signal$M$I, unmethylated = signal$U$I)
if (n) {
# tgs
retval[["m.beadcount"]] = signal$M$BC
retval[["u.beadcount"]] = signal$U$BC
retval[["NBeads"]] = signal$M$BC #TGSbc
}
if (oob) {
retval[["methylated.OOB"]] = signal$M$OOB
retval[["unmethylated.OOB"]] = signal$U$OOB
}
return(retval)
} # }}}
#{{{ mergeProbeDesigns2
## TGS: possible to do this better?
mergeProbeDesigns2 <- function(
NChannelSet,
parallel = F,
n = F, n.sd = F,
oob = T, too=TRUE
) {
if(length(annotation(NChannelSet))==0) annotation(NChannelSet) <- '.manifest'
if( !too | annotation(NChannelSet)== "IlluminaHumanMethylation27k") {
res <- designItoMandU2(
NChannelSet, parallel = parallel, n = n, n.sd = n.sd, oob = oob
)
} else { # all subsequent human arrays
design1 <- designItoMandU2(
NChannelSet, parallel = parallel, n = n, n.sd = n.sd, oob = oob
)
design2 <- designIItoMandU2(
NChannelSet, parallel = parallel, n = n, n.sd = n.sd, oob = oob
)
res <- list()
for (i in names(design1)) {
res[[i]] <- rbind(design1[[i]], design2[[i]])
rownames(res[[i]]) <- c(rownames(design1[[i]]), rownames(design2[[i]]))
}
}
return(res) # reorder on the way out...
} # }}}
# {{{ NChannelSetToMethyLumiSet2
#' For internal use, is read using minfi-like machinery and then
#' preprocessed into the more flexible and convenient methylumi
#' object used by wateRmelon/bigmelon
#'
#' @param NChannelSet an NChannelSet (raw red and green values not yet
#' mapped to Illumina IDs/CpG names
#' @param parallel no effect, included for future parallelisation
#' @param pval detection pval threshold for filtering. Inactivated.
#' @param n keep nbeads data (min of m & u)
#' @param n.sd process SD of U and M (not currently implemented)
#' @param oob keep out-of-band signals
#' @param to does chip have type I and II probes?
#'
#' @return A methylumi object with betas, U and M, optionally
#' additional data
NChannelSetToMethyLumiSet2 <- function(
NChannelSet,
parallel = F,
pval = 0.05,
n = F,
n.sd = F,
oob = T,
to=TRUE
){
history.submitted = as.character(Sys.time())
results = mergeProbeDesigns2(
NChannelSet,
parallel = parallel,
n = n,
n.sd = n.sd,
oob = oob,
too=to
)
#The next part is somewhat messy, will think of an better way to do this
# ----especially since these are non-options
if (oob && n) {
aDat <- with(
results,
assayDataNew(
methylated = methylated,
unmethylated = unmethylated,
methylated.N = m.beadcount,
unmethylated.N = u.beadcount,
methylated.OOB = methylated.OOB,
unmethylated.OOB = unmethylated.OOB,
betas = methylated/(methylated + unmethylated),
# should this not be +100 in denom?
pvals = methylated/(methylated + unmethylated),
# is there really not a less shit alternative?
NBeads = NBeads)
)
} else if (oob) {
aDat <- with(
results,
assayDataNew(
methylated = methylated,
unmethylated = unmethylated,
methylated.OOB = methylated.OOB,
unmethylated.OOB = unmethylated.OOB,
betas = methylated/(methylated + unmethylated),
pvals = methylated/(methylated + unmethylated)
)
)
} else if (n) {
aDat <- with(
results,
assayDataNew(
methylated = methylated,
unmethylated = unmethylated,
methylated.N = m.beadcount,
unmethylated.N = u.beadcount,
betas = methylated/(methylated + unmethylated),
pvals = methylated/(methylated + unmethylated),
NBeads = NBeads
)
)
} else {
aDat <- with(
results,
assayDataNew(
methylated = methylated,
unmethylated = unmethylated,
betas = methylated/(methylated + unmethylated),
pvals = methylated/(methylated + unmethylated)
)
)
}
rm(results)
gc()
## now return the MethyLumiSet (which can be directly coerced to
## MethyLumiM)
x.lumi = new("MethyLumiSet", assayData = aDat)
x.lumi@QC <- getControlProbes2(NChannelSet)
x.lumi@protocolData <- protocolData(NChannelSet)
x.lumi@annotation <- annotation(NChannelSet)
x.lumi@QC@annotation <- annotation(NChannelSet)
pdat <- data.frame(barcode = sampleNames(NChannelSet))
rownames(pdat) <- sampleNames(NChannelSet)
pData(x.lumi) <- pdat
varLabels(x.lumi) <- c("barcode")
varMetadata(x.lumi)[, 1] <- c("Illumina BeadChip barcode")
mapper <- getMethylationBeadMappers2(annotation(NChannelSet))
fdat <- mapper$ordering()
rownames(fdat) <- fdat$Probe_ID
x.fnames <- rownames(betas(x.lumi))
fdat <- fdat[x.fnames, ]
## Regression tests: fail noisily if there is an ordering issue
stopifnot(identical(rownames(methylated(x.lumi)), rownames(unmethylated(x.lumi))))
stopifnot(identical(rownames(betas(x.lumi)), rownames(unmethylated(x.lumi))))
stopifnot(identical(rownames(fdat), rownames(betas(x.lumi))))
# The only way to feasibly clean up this section would be to outsource the
# possible metadatas to another script.
fData(x.lumi) <- fdat
possibleLabels <- c("Probe_ID", "DESIGN", "COLOR_CHANNEL", "PROBE_TYPE", "SNP10",
"SYMBOL", "CHR36", "CPG36", "CPGS")
fvarLabels(x.lumi) <- possibleLabels[1:ncol(fdat)]
possibleMetadata <- c("Illumina probe ID from manifest", "Infinium design type (I or II)",
"Color channel (for type I probes)", "Probe locus type (CpG, CpH, or SNP)",
"SNP (dbSNP build 128) within 10bp of target?", "Gene symbol (if probe is annotated to a gene)",
"Chromosome mapping for probe in hg18 assembly", "Coordinates of interrogated cytosine in hg18",
"Number of CpG dinucleotides in probe sequence")
# something happens to readPepo betas after this point
fvarMetadata(x.lumi)[, 1] <- possibleMetadata[1:ncol(fdat)]
#pval.detect(x.lumi) <- pval # culprit!
pv <- detectP(x.lumi)
pvals(x.lumi) <- pv
history.finished <- as.character(Sys.time())
history.command <- deparse(match.call()) # can't be it
x.lumi@history <- rbind(
x.lumi@history,
data.frame(
submitted = history.submitted,
finished = history.finished,
command = history.command)
) # cbi
# if(normalize) x.lumi = normalizeMethyLumiSet(x.lumi)
return(x.lumi)
} # }}}
# {{{ methylumIDATepic
methylumIDATepic <- function(barcodes = NULL, pdat = NULL, parallel = F, n = F, n.sd = F,
oob = T, idatPath = getwd(), force = F, tw=TRUE, ...) {
if (is(barcodes, "data.frame"))
pdat = barcodes
if ((is.null(barcodes)) & (is.null(pdat) | (!("barcode" %in% names(pdat)))))
{
stop("\"barcodes\" or \"pdat\" (with pdat$barcode defined) must be supplied.")
}
if (!is.null(pdat) && "barcode" %in% tolower(names(pdat))) {
names(pdat)[which(tolower(names(pdat)) == "barcode")] = "barcode"
barcodes = pdat$barcode
if (any(grepl("idat", ignore.case = TRUE, barcodes))) {
message("Warning: filtering out raw filenames")
barcodes = gsub("_(Red|Grn)", "", barcodes, ignore.case = TRUE)
barcodes = gsub(".idat", "", barcodes, ignore.case = TRUE)
}
if (any(duplicated(barcodes)))
{
message("Warning: filtering out duplicates")
pdat = pdat[-which(duplicated(barcodes)), ]
barcodes = pdat$barcode
}
} else {
if (any(grepl("idat", ignore.case = TRUE, barcodes))) {
message("Warning: filtering out raw filenames")
barcodes = unique(gsub("_(Red|Grn)", "", barcodes, ignore.case = TRUE))
barcodes = unique(gsub(".idat", "", barcodes, ignore.case = TRUE))
}
if (any(duplicated(barcodes))) {
message("Warning: filtering out duplicate barcodes")
barcodes = barcodes[which(!duplicated(barcodes))]
}
}
files.present = rep(TRUE, length(barcodes))
idats = sapply(barcodes, function(b) paste(b, c("_Red", "_Grn"), ".idat", sep = ""))
for (i in colnames(idats)) for (j in idats[, i]) if (!j %in% list.files(idatPath,
recursive = TRUE)) {
message(paste("Error: file", j, "is missing for sample", i))
files.present = FALSE
}
stopifnot(all(files.present))
mats <- IDATsToMatrices2(barcodes, parallel = parallel, idatPath = idatPath)
dats <- DataToNChannelSet2(mats, IDAT = T, parallel = parallel, force = force)
mlumi <- NChannelSetToMethyLumiSet2(dats, parallel = parallel, oob = oob, n = n, to=tw)
if (is.null(pdat)) {
pdat = data.frame(barcode = as.character(barcodes))
rownames(pdat) = pdat$barcode
pData(mlumi) = pdat
} else {
pData(mlumi) = pdat
}
if (!is.null(mlumi@QC))
{
sampleNames(mlumi@QC) = sampleNames(mlumi)
}
# finally
colnames(mlumi) <- as.character(colnames(mlumi)) # Fixing _that_ one bug.
return(mlumi[sort(featureNames(mlumi)), ])
} # }}}
#{{{ readEPIC
readEPIC <- function(idatPath, barcodes = NULL, pdat = NULL, parallel = F, n = T,
oob = F, force = F, two=TRUE, ...) {
# path: file path to folder containing idat files, if working directory
# is folder containing idats, use path <- './' the gsub will catch all
# types of arrays as it is technically impossible to distinguish chiptype
# through barcode.
if (is.null(barcodes)) {
barcodes <- idatPath
methylumIDATepic(bfp(barcodes), pdat = pdat, parallel = parallel, n = n,
n.sd = n.sd, oob = oob, idatPath = idatPath, force = force, tw=two, ...)
} else {
methylumIDATepic(barcodes, pdat = pdat, parallel = parallel, n = n, n.sd = n.sd,
oob = oob, idatPath = idatPath, force = force, tw=two, ...)
}
} # }}}
bfp <- function(path) { #{{{
# Barcodes from path, incase one wishes to use MethylumIDATepic manually.
# bar <- dir(path, recursive=T)[grepl('R0[12345678]C0[12]_(Red|Grn).idat',
# dir(path, recursive=T))]
bar <- dir(path, recursive = TRUE)[grepl("_(Red|Grn).idat", dir(path, recursive = TRUE))]
bar <- gsub("_(Red|Grn).idat", "", bar, ignore.case = TRUE)
# Automatically sift out duplicated filenames!
bar <- bar[!duplicated(basename(bar))]
return(bar)
}#}}}
generateManifest <- function(anno = c("450k", "EPIC", "EPICv2", ".manifest")) { #{{{
anno <- match.arg(anno)
# Possible to add more manifests here!
anno <- switch(anno,
`450k` = "IlluminaHumanMethylation450kanno.ilmn12.hg19",
EPIC = "IlluminaHumanMethylationEPICanno.ilm10b4.hg19",
EPICv2 = "IlluminaHumanMethylationEPICv2anno.20a1.hg38",
.manifest = "alreadyCooked"
)
if (anno == "alreadyCooked") {man <- .manifest} else
{man <- getAnnotationObject(anno)}
x <- getAnnotation(man)[, c("Name", "AddressB", "AddressA", "Type", "Color")]
# Name = Name AddressB = M AddressA = U Type = DESIGN Color = COLOR_CHANNEL
# Generate manifest.
snpI <- getProbeInfo(man, type = "SnpI" )[, c(1, 2, 3, 4)]
snpII <- getProbeInfo(man, type = "SnpII")[, c(1, 2)]
snpI <- cbind(snpI[, c("Name", "AddressB", "AddressA")], rep("I", nrow(snpI)),
snpI[, "Color"])
snpII <- cbind(snpII[, "Name"], rep("", nrow(snpII)), snpII[, "AddressA"], rep("II",
nrow(snpII)), rep("", nrow(snpII)))
colnames(snpI) <- colnames(snpII) <- colnames(x)
x1 <- rbind(data.frame(x, stringsAsFactors = F), data.frame(snpI, stringsAsFactors = F),
data.frame(snpII, stringsAsFactors = F))
x1$col <- x1$Color
x1$Color[x1$Color == "" ] <- "Both"
x1$col [x1$Color == "Red"] <- "R"
x1$col [x1$Color == "Grn"] <- "G"
is.na(x1$col) <- x1$Color == "Both"
colnames(x1) <- c("Probe_ID", "M", "U", "DESIGN", "COLOR_CHANNEL", "col")
x1$COLOR_CHANNEL <- factor(x1$COLOR_CHANNEL)
x1$col <- factor(x1$col)
return(x1)
} #}}}
# memory hole {{{
## for HM27k, all probes are of "design I": single-channel, two-address pairings
## for HM450k, probes are either "design I" or "design II" as noted in manifest!
## for epic, probes are either "design I" or "design II" as noted in manifest
## IDATs from GoldenGate methylation arrays are not supported at this time.
#cy3 <- function(object) { #
# if(is.element('Color_Channel', fvarLabels(object)) &&
# !is.element('COLOR_CHANNEL', fvarLabels(object))) {
# fvarLabels(object)<-gsub('Color_Channel','COLOR_CHANNEL',fvarLabels(object))
# }
# if(!is.element('COLOR_CHANNEL', fvarLabels(object))) {
# annotChip <- paste(annotation(object),'db',sep='.')
# object <- addColorChannelInfo(object, annotChip)
# }
# return(which(fData(object)$COLOR_CHANNEL=='Grn'))
#} #
#cy5 <- function(object) { #
# if(is.element('Color_Channel', fvarLabels(object)) &&
# !is.element('COLOR_CHANNEL', fvarLabels(object))) {
# fvarLabels(object)<-gsub('Color_Channel','COLOR_CHANNEL',fvarLabels(object))
# }
# if(!is.element('COLOR_CHANNEL', fvarLabels(object))) {
# annotChip <- paste(annotation(object),'db',sep='.')
# object <- addColorChannelInfo(object, annotChip)
# }
# return(which(fData(object)$COLOR_CHANNEL=='Red'))
#} # }}}
#.onLoad = function(libname, pkgname){
# anchorenv <- new.env(parent = getNamespace("wateRmelon"))
# assign("anchorenv", anchorenv, envir = getNamespace("wateRmelon"))
# assign("types", character(0), envir=anchorenv)
# assign("index", list(), envir=anchorenv)
# assign("history", list(), envir=anchorenv)
#} method of creading a hidey hole for packge variables see
#https://hydroecology.net/implementing-session-cache-r-packages/
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.