xcmsSet <- function(files = NULL, snames = NULL, sclass = NULL, phenoData = NULL,
profmethod = "bin", profparam = list(),
polarity = NULL, lockMassFreq=FALSE,
mslevel=NULL, nSlaves=0, progressCallback=NULL,
scanrange=NULL, ...) {
object <- new("xcmsSet")
## initialise progress information
xcms.options <- getOption("BioC")$xcms
xcms.methods <- c(paste("group", xcms.options$group.methods,sep="."), paste("findPeaks", xcms.options$findPeaks.methods,sep="."),
paste("retcor", xcms.options$retcor.methods,sep="."), paste("fillPeaks", xcms.options$fillPeaks.methods,sep="."))
eval(parse(text=paste("object@progressInfo <- list(",paste(xcms.methods,"=0",sep="",collapse=","),")") ))
if (is.function(progressCallback))
object@progressCallback <- progressCallback
filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]",
"[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
if (is.null(files))
files <- getwd()
info <- file.info(files)
listed <- list.files(files[info$isdir], pattern = filepattern,
recursive = TRUE, full.names = TRUE)
files <- c(files[!info$isdir], listed)
## try making paths absolute
files_abs <- file.path(getwd(), files)
exists <- file.exists(files_abs)
files[exists] <- files_abs[exists]
if(lockMassFreq==TRUE){
## remove the 02 files if there here
lockMass.files<-grep("02.CDF", files)
if(length(lockMass.files) > 0){
files<-files[-lockMass.files]
}
}
filepaths(object) <- files
if (length(files) == 0)
stop("No NetCDF/mzXML/mzData/mzML files were found.\n")
## determine experimental design
fromPaths <- phenoDataFromPaths(files)
if (is.null(snames)) {
snames <- rownames(fromPaths)
} else {
rownames(fromPaths) <- snames
}
pdata <- phenoData
if (is.null(pdata)) {
pdata <- sclass
if (is.null(pdata))
pdata <- fromPaths
}
phenoData(object) <- pdata
if (is.null(phenoData))
rownames(phenoData(object)) <- snames
rtlist <- list(raw = vector("list", length(snames)),
corrected = vector("list", length(snames)))
if ("step" %in% names(profparam)) {
if ("step" %in% names(list(...)) && profparam$step != list(...)$step) {
stop("different step values defined in profparam and step arguments")
}
profstep <- profparam$step
profparam <- profparam[names(profparam) != "step"]
} else if ("step" %in% names(list(...))) {
profstep <- list(...)$step
} else {
profstep <- 0.1
}
if ("method" %in% names(profparam)) {
if (profparam$method != profmethod) {
stop("different method values defined in profparam and profmethod arguments")
}
profmethod <- profparam$method
profparam <- profparam[names(profparam) != "method"]
}
profinfo(object) <- c(list(method = profmethod, step = profstep), profparam)
object@polarity <- as.character(polarity)
includeMSn=FALSE
## implicitely TRUE if selecting MSn
includeMSn <- !is.null(mslevel) && mslevel>1
## implicitely TRUE if MS1 parent peak picking
xcmsSetArgs <- as.list(match.call())
if (!is.null(xcmsSetArgs$method)) {
if (xcmsSetArgs$method=="MS1") {
includeMSn=TRUE
}
}
parmode <- xcmsParallelSetup(nSlaves=nSlaves)
runParallel <- parmode$runParallel
parMode <- parmode$parMode
snowclust <- parmode$snowclust
params <- list(...);
params$profmethod <- profmethod;
params$profparam <- profparam;
params$includeMSn <- includeMSn;
params$scanrange <- scanrange;
params$mslevel <- mslevel; ## Actually, this is
params$lockMassFreq <- lockMassFreq;
ft <- cbind(file=files,id=1:length(files))
argList <- apply(ft,1,function(x) list(file=x["file"],id=as.numeric(x["id"]),params=params))
if (parMode == "MPI") {
res <- xcmsPapply(argList, findPeaksPar)
mpi.close.Rslaves()
} else if (parMode == "SOCK") {
res <- xcmsClusterApply(cl=snowclust, x=argList, fun=findPeaksPar, msgfun=msgfun.featureDetection)
stopCluster(snowclust)
} else {
## serial mode
res <- lapply(argList, findPeaksPar)
}
peaklist <- lapply(res, function(x) x$peaks)
rtlist$raw <- rtlist$corrected <- lapply(res, function(x) x$scantime)
if(lockMassFreq){
object@dataCorrection[1:length(files)]<-1
}
lapply(1:length(peaklist), function(i) {
if (is.null(peaklist[[i]]))
warning("No peaks found in sample ", snames[i], call. = FALSE)
else if (nrow(peaklist[[i]]) == 0)
warning("No peaks found in sample ", snames[i], call. = FALSE)
else if (nrow(peaklist[[i]]) == 1)
warning("Only 1 peak found in sample ", snames[i], call. = FALSE)
else if (nrow(peaklist[[i]]) < 10)
warning("Only ", nrow(peaklist[[i]]), " peaks found in sample",
snames[i], call. = FALSE)
})
peaks(object) <- do.call(rbind, peaklist)
object@rt <- rtlist
object
}
setMethod("show", "xcmsSet", function(object) {
cat("An \"xcmsSet\" object with", nrow(object@phenoData), "samples\n\n")
cat("Time range: ", paste(round(range(object@peaks[,"rt"]), 1), collapse = "-"),
" seconds (", paste(round(range(object@peaks[,"rt"])/60, 1), collapse = "-"),
" minutes)\n", sep = "")
cat("Mass range:", paste(round(range(object@peaks[,"mz"], na.rm = TRUE), 4), collapse = "-"),
"m/z\n")
cat("Peaks:", nrow(object@peaks), "(about",
round(nrow(object@peaks)/nrow(object@phenoData)), "per sample)\n")
cat("Peak Groups:", nrow(object@groups), "\n")
cat("Sample classes:", paste(levels(sampclass(object)), collapse = ", "), "\n\n")
if (length(object@profinfo)) {
cat("Profile settings: ")
for (i in seq(along = object@profinfo)) {
if (i != 1) cat(" ")
cat(names(object@profinfo)[i], " = ", object@profinfo[[i]], "\n", sep = "")
}
cat("\n")
}
memsize <- object.size(object)
cat("Memory usage:", signif(memsize/2^20, 3), "MB\n")
})
c.xcmsSet <- function(...) {
lcsets <- list(...)
object <- new("xcmsSet")
peaklist <- vector("list", length(lcsets))
namelist <- vector("list", length(lcsets))
if (any(duplicated(unlist(namelist)))) {
stop("Duplicated sample names\n")
}
classlist <- vector("list", length(lcsets))
cdflist <- vector("list", length(lcsets))
rtraw <- vector("list", 0)
rtcor <- vector("list", 0)
nsamp <- 0
for (i in seq(along = lcsets)) {
peaklist[[i]] <- peaks(lcsets[[i]])
namelist[[i]] <- sampnames(lcsets[[i]])
classlist[[i]] <- sampclass(lcsets[[i]])
classlist[[i]] <- levels(classlist[[i]])[classlist[[i]]]
cdflist[[i]] <- filepaths(lcsets[[i]])
rtraw <- c(rtraw, lcsets[[i]]@rt$raw)
rtcor <- c(rtcor, lcsets[[i]]@rt$corrected)
sampidx <- seq(along = namelist[[i]]) + nsamp
peaklist[[i]][,"sample"] <- sampidx[peaklist[[i]][,"sample"]]
nsamp <- nsamp + length(namelist[[i]])
}
peaks(object) <- do.call(rbind, peaklist)
sampnames(object) <- unlist(namelist)
classlist <- unlist(classlist)
sampclass(object) <- factor(classlist, unique(classlist))
filepaths(object) <- unlist(cdflist)
profinfo(object) <- profinfo(lcsets[[1]])
object@rt <- list(raw = rtraw, corrected = rtcor)
invisible(object)
}
split.xcmsSet <- function(x, f, drop = TRUE, ...) {
if (!is.factor(f))
f <- factor(f)
sampidx <- unclass(f)
peakmat <- peaks(x)
samples <- sampnames(x)
classlabel <- sampclass(x)
cdffiles <- filepaths(x)
prof <- profinfo(x)
rtraw <- x@rt$raw
rtcor <- x@rt$corrected
lcsets <- vector("list", length(levels(f)))
names(lcsets) <- levels(f)
for (i in unique(sampidx)) {
lcsets[[i]] <- new("xcmsSet")
samptrans <- numeric(length(f))
samptrans[sampidx == i] <- rank(which(sampidx == i))
samp <- samptrans[peakmat[,"sample"]]
sidx <- which(samp != 0)
cpeaks <- peakmat[sidx,, drop=FALSE]
cpeaks[,"sample"] <- samp[sidx]
peaks(lcsets[[i]]) <- cpeaks
sampnames(lcsets[[i]]) <- samples[sampidx == i]
sampclass(lcsets[[i]]) <- classlabel[sampidx == i, drop = TRUE]
filepaths(lcsets[[i]]) <- cdffiles[sampidx == i]
profinfo(lcsets[[i]]) <- prof
lcsets[[i]]@rt$raw <- rtraw[sampidx == i]
lcsets[[i]]@rt$corrected <- rtcor[sampidx == i]
}
if (drop)
lcsets <- lcsets[seq(along = lcsets) %in% sampidx]
lcsets
}
setMethod("peaks", c("xcmsSet","missing"), function(object) object@peaks)
setGeneric("peaks<-", function(object, value) standardGeneric("peaks<-"))
setReplaceMethod("peaks", "xcmsSet", function(object, value) {
object@peaks <- value
object
})
setGeneric("groups", function(object) standardGeneric("groups"))
setMethod("groups", "xcmsSet", function(object) object@groups)
setGeneric("groups<-", function(object, value) standardGeneric("groups<-"))
setReplaceMethod("groups", "xcmsSet", function(object, value) {
object@groups <- value
object
})
setGeneric("groupidx", function(object) standardGeneric("groupidx"))
setMethod("groupidx", "xcmsSet", function(object) object@groupidx)
setGeneric("groupidx<-", function(object, value) standardGeneric("groupidx<-"))
setReplaceMethod("groupidx", "xcmsSet", function(object, value) {
object@groupidx <- value
object
})
setMethod("sampnames", "xcmsSet", function(object) rownames(object@phenoData))
setGeneric("sampnames<-", function(object, value) standardGeneric("sampnames<-"))
setReplaceMethod("sampnames", "xcmsSet", function(object, value) {
if (length(object@phenoData)==0) {
object@phenoData <- data.frame(class=rep("dummy", length(value)))
}
rownames(object@phenoData) <- value
object
})
setGeneric("sampclass", function(object) standardGeneric("sampclass"))
setMethod("sampclass", "xcmsSet", function(object) {
if (ncol(object@phenoData) >0) {
interaction(object@phenoData)
} else {
factor()
}
})
setGeneric("sampclass<-", function(object, value) standardGeneric("sampclass<-"))
setReplaceMethod("sampclass", "xcmsSet", function(object, value) {
if (!is.factor(value))
value <- factor(value, unique(value))
object@phenoData$class <- value
object
})
setGeneric("phenoData", function(object) standardGeneric("phenoData"))
setMethod("phenoData", "xcmsSet", function(object) object@phenoData)
setGeneric("phenoData<-", function(object, value) standardGeneric("phenoData<-"))
setReplaceMethod("phenoData", "xcmsSet", function(object, value) {
if (is.matrix(value))
value <- as.data.frame(value)
## if (is.data.frame(value) && !("class" %in% colnames(value)))
## value[,"class"] <- interaction(value)
## else
if (!is.data.frame(value))
value <- data.frame(class = value)
object@phenoData <- value
object
})
setGeneric("filepaths", function(object) standardGeneric("filepaths"))
setMethod("filepaths", "xcmsSet", function(object) object@filepaths)
setGeneric("filepaths<-", function(object, value) standardGeneric("filepaths<-"))
setReplaceMethod("filepaths", "xcmsSet", function(object, value) {
object@filepaths <- value
object
})
setGeneric("profinfo", function(object) standardGeneric("profinfo"))
setMethod("profinfo", "xcmsSet", function(object) object@profinfo)
setGeneric("profinfo<-", function(object, value) standardGeneric("profinfo<-"))
setReplaceMethod("profinfo", "xcmsSet", function(object, value) {
object@profinfo <- value
object
})
setGeneric("calibrate", function(object, ...) standardGeneric("calibrate"))
setMethod("calibrate", "xcmsSet", function(object,calibrants,method="linear",
mzabs=0.0001, mzppm=5,
neighbours=3, plotres=FALSE) {
nsamp = length(unique(object@peaks[,"sample"]))
if (!sum(method == c("shift","linear","edgeshift")))
stop("unknown calibration method!")
if (is.list(calibrants))
if (length(calibrants) != nsamp)
stop("Error: Number of masslists differs with number of samples")
for (s in 1:nsamp){
peaklist = object@peaks[which(object@peaks[,"sample"]==s),]
if (is.list(calibrants)) {
masslist <- calibrants[s]
}else{
masslist <- calibrants
}
masses <- matchpeaks(peaklist,masslist,mzabs,mzppm,neighbours)
if (length(masses)==0){
warning("No masses close enough!\n")
next
}
if (nrow(masses)==1 & method!="shift") {
cat("Warning: only one peak found, fallback to shift.")
method="shift"
}
params <- estimate (masses, method)
mzu <- peaklist[,"mz"]
mposs <- masses[,"pos"]
mdiffs <- masses[,"dif"]
a <- params[1]
b <- params[2]
## cat("a=",a,"b=",b)
if (method != "edgeshift"){
mzu <- mzu - (a * mzu + b)
} else {
mzu[c(1:(min(mposs)-1))] <- mzu[c(1:(min(mposs)-1))] - (a * mzu[min(mposs)] + b)
mzu[c((min(mposs)):(max(mposs)))] <-
mzu[c((min(mposs)):(max(mposs)))] - (a * mzu[c((min(mposs)):(max(mposs)))] + b)
mzu[c((max(mposs)+1):length(mzu))] <-
mzu[c((max(mposs)+1):length(mzu))] - (a * mzu[max(mposs)] + b)
}
peaklist[,"mz"] <- mzu
object@peaks[which(object@peaks[,"sample"]==s),] <- peaklist
}
if (plotres) {
plot(mzu[mposs],mdiffs, xlim=c(min(mzu),max(mzu)))
if (method!="edgeshift") {abline(b,a)}else{
lines(c(min(mzu),mzu[min(mposs)]),c(a * mzu[min(mposs)] + b,a * mzu[min(mposs)] + b))
lines(c(mzu[min(mposs)],mzu[max(mposs)]),c(a * mzu[min(mposs)] + b,a * mzu[max(mposs)] + b))
lines(c(mzu[max(mposs)],max(mzu)),c(a * mzu[max(mposs)] + b,a * mzu[max(mposs)] + b))
}
}
invisible(object)
})
setMethod("groupnames", "xcmsSet", function(object, mzdec = 0, rtdec = 0,
template = NULL) {
if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
stop("No group information. Use group().")
}
if (!missing(template)) {
tempsplit <- strsplit(template[1], "[T_]")
tempsplit <- strsplit(unlist(tempsplit), "\\.")
if (length(tempsplit[[1]]) > 1)
mzdec <- nchar(tempsplit[[1]][2])
else
mzdec <- 0
if (length(tempsplit[[2]]) > 1)
rtdec <- nchar(tempsplit[[2]][2])
else
rtdec <- 0
}
mzfmt <- paste("%.", mzdec, "f", sep = "")
rtfmt <- paste("%.", rtdec, "f", sep = "")
gnames <- paste("M", sprintf(mzfmt, groups(object)[,"mzmed"]), "T",
sprintf(rtfmt, groups(object)[,"rtmed"]), sep = "")
if (any(dup <- duplicated(gnames)))
for (dupname in unique(gnames[dup])) {
dupidx <- which(gnames == dupname)
gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_")
}
gnames
})
## derive experimental design from set of file paths
phenoDataFromPaths <- function(paths) {
## create factors from filesystem hierarchy
sclass <- gsub("^\\.$", "sample", dirname(paths))
lev <- strsplit(sclass, "/")
levlen <- sapply(lev, length)
if(length(lev) > 1 && !all(levlen[1] == levlen))
stop("Directory tree must be level")
pdata <- as.data.frame(matrix(unlist(lev), nrow=length(lev), byrow=TRUE))
redundant <- apply(pdata, 2, function(col) length(unique(col)) == 1)
if (!any(!redundant)) {
redundant[length(redundant)] <- FALSE
}
pdata <- pdata[,!redundant,drop=FALSE]
if (ncol(pdata) == 1) { ## if not multiple factors, behave as before
## Make the default group names less redundant
scomp <- strsplit(substr(sclass, 1, min(nchar(sclass))), "")
scomp <- matrix(c(scomp, recursive = TRUE), ncol = length(scomp))
i <- 1
while(all(scomp[i,1] == scomp[i,-1]) && i < nrow(scomp))
i <- i + 1
i <- min(i, tail(c(0, which(scomp[1:i,1] == .Platform$file.sep)), n = 1) + 1)
if (i > 1 && i <= nrow(scomp))
sclass <- substr(sclass, i, max(nchar(sclass)))
pdata <- data.frame(class = sclass)
}
rownames(pdata) <- gsub("\\.[^.]*$", "", basename(paths))
pdata
}
setGeneric("group.density", function(object, ...) standardGeneric("group.density"))
setMethod("group.density", "xcmsSet", function(object, bw = 30, minfrac = 0.5, minsamp = 1,
mzwid = 0.25, max = 50, sleep = 0) {
samples <- sampnames(object)
classlabel <- sampclass(object)
classnames <- as.character(unique(sampclass(object)))
classlabel <- as.vector(unclass(classlabel))
classnum <- table(classlabel)
peakmat <- peaks(object)
porder <- order(peakmat[,"mz"])
peakmat <- peakmat[porder,, drop=FALSE]
rownames(peakmat) <- NULL
retrange <- range(peakmat[,"rt"])
mass <- seq(peakmat[1,"mz"], peakmat[nrow(peakmat),"mz"] + mzwid, by = mzwid/2)
masspos <- findEqualGreaterM(peakmat[,"mz"], mass)
groupmat <- matrix(nrow = 512, ncol = 7 + length(classnum))
groupindex <- vector("list", 512)
endidx <- 0
num <- 0
gcount <- integer(length(classnum))
for (i in seq(length = length(mass)-2)) {
if (i %% 500 == 0) {
cat(round(mass[i]), "")
flush.console()
}
startidx <- masspos[i]
endidx <- masspos[i+2]-1
if (endidx - startidx < 0)
next
speakmat <- peakmat[startidx:endidx,,drop=FALSE]
den <- density(speakmat[,"rt"], bw, from = retrange[1]-3*bw, to = retrange[2]+3*bw)
maxden <- max(den$y)
deny <- den$y
gmat <- matrix(nrow = 5, ncol = 2+length(classnum))
snum <- 0
while (deny[maxy <- which.max(deny)] > maxden/20 && snum < max) {
grange <- descendMin(deny, maxy)
deny[grange[1]:grange[2]] <- 0
gidx <- which(speakmat[,"rt"] >= den$x[grange[1]] & speakmat[,"rt"] <= den$x[grange[2]])
gnum <- classlabel[unique(speakmat[gidx,"sample"])]
for (j in seq(along = gcount))
gcount[j] <- sum(gnum == j)
if (! any(gcount >= classnum*minfrac & gcount >= minsamp))
next
snum <- snum + 1
num <- num + 1
### Double the size of the output containers if they're full
if (num > nrow(groupmat)) {
groupmat <- rbind(groupmat, matrix(nrow = nrow(groupmat), ncol = ncol(groupmat)))
groupindex <- c(groupindex, vector("list", length(groupindex)))
}
groupmat[num, 1] <- median(speakmat[gidx, "mz"])
groupmat[num, 2:3] <- range(speakmat[gidx, "mz"])
groupmat[num, 4] <- median(speakmat[gidx, "rt"])
groupmat[num, 5:6] <- range(speakmat[gidx, "rt"])
groupmat[num, 7] <- length(gidx)
groupmat[num, 7+seq(along = gcount)] <- gcount
groupindex[[num]] <- sort(porder[(startidx:endidx)[gidx]])
}
if (sleep > 0) {
plot(den, main = paste(round(min(speakmat[,"mz"]), 2), "-", round(max(speakmat[,"mz"]), 2)))
for (i in seq(along = classnum)) {
idx <- classlabel[speakmat[,"sample"]] == i
points(speakmat[idx,"rt"], speakmat[idx,"into"]/max(speakmat[,"into"])*maxden, col = i, pch=20)
}
for (i in seq(length = snum))
abline(v = groupmat[num-snum+i, 5:6], lty = "dashed", col = i)
Sys.sleep(sleep)
}
}
cat("\n")
colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
"npeaks", classnames)
groupmat <- groupmat[seq(length = num),,drop=FALSE]
groupindex <- groupindex[seq(length = num)]
## Remove groups that overlap with more "well-behaved" groups
numsamp <- rowSums(groupmat[,(match("npeaks", colnames(groupmat))+1):ncol(groupmat),drop=FALSE])
uorder <- order(-numsamp, groupmat[,"npeaks"])
uindex <- rectUnique(groupmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE],
uorder)
groups(object) <- groupmat[uindex,,drop=FALSE]
groupidx(object) <- groupindex[uindex]
object
})
setGeneric("group.mzClust", function(object, ...) standardGeneric("group.mzClust"))
setMethod("group.mzClust", "xcmsSet", function(object,
mzppm = 20,
mzabs = 0,
minsamp = 1,
minfrac=0.5)
{
samples <- sampnames(object)
classlabel <- sampclass(object)
peaks <- peaks(object)
groups <- mzClustGeneric(peaks[,c("mz","sample")],
sampclass=classlabel,
mzppm=mzppm,mzabs=mzabs,
minsamp=minsamp,
minfrac=minfrac)
if(is.null(nrow(groups$mat))) {
matColNames <- names(groups$mat)
groups$mat <- matrix(groups$mat,
ncol=length(groups$mat),byrow=F);
colnames(groups$mat) <- matColNames
}
rt <- c(rep(-1,nrow(groups$mat)))
groups(object) <- cbind(groups$mat[,(1:3),drop=FALSE],rt,rt,rt,groups$mat[,(4:ncol(groups$mat)),drop=FALSE])
colnames(groups(object)) <- c(colnames(groups$mat[,(1:3),drop=FALSE]), "rtmed", "rtmin", "rtmax", colnames(groups$mat[,(4:ncol(groups$mat)),drop=FALSE]))
groupidx(object) <- groups$idx
object
})
setGeneric("group.nearest", function(object, ...) standardGeneric("group.nearest"))
setMethod("group.nearest", "xcmsSet", function(object, mzVsRTbalance=10,
mzCheck=0.2, rtCheck=15, kNN=10) {
## If ANN is available ...
RANN = "RANN"
if (!require(RANN)) {
stop("RANN is not installed")
}
## classlabel <- sampclass(object)
classlabel <- as.vector(unclass(sampclass(object)))
samples <- sampnames(object)
gcount <- integer(length(unique(sampclass(object))))
peakmat <- peaks(object)
parameters <- list(mzVsRTBalance = mzVsRTbalance, mzcheck = mzCheck, rtcheck = rtCheck, knn = kNN)
ptable <- table(peaks(object)[,"sample"])
pord <- ptable[order(ptable, decreasing = TRUE)]
sid <- as.numeric(names(pord))
pn <- as.numeric(pord)
samples <- sampnames(object)
cat("sample:", basename(samples[sid[1]]), " ")
mplenv <- new.env(parent = .GlobalEnv)
mplenv$mplist <- matrix(0, pn[1], length(sid))
mplenv$mplist[, sid[1]] <- which(peakmat[,"sample"] == sid[1])
mplenv$mplistmean <- data.frame(peakmat[which(peakmat[,"sample"] == sid[1]),c("mz","rt")])
mplenv$peakmat <- peakmat
assign("peakmat", peakmat, env = mplenv)
sapply(sid[2:length(sid)], function(sample, mplenv, object){
## require(parallel)
## cl <- makeCluster(getOption("cl.cores", nSlaves))
## clusterEvalQ(cl, library(RANN))
## parSapply(cl, 2:length(samples), function(sample,mplenv, object){
for(mml in seq(mplenv$mplist[,1])){
mplenv$mplistmean[mml,"mz"] <- mean(mplenv$peakmat[mplenv$mplist[mml,],"mz"])
mplenv$mplistmean[mml,"rt"] <- mean(mplenv$peakmat[mplenv$mplist[mml,],"rt"])
}
cat("sample:",basename(samples[sample])," ")
mplenv$peakIdxList <- data.frame(peakidx=which(mplenv$peakmat[,"sample"]==sample),
isJoinedPeak=FALSE)
if(length(mplenv$peakIdxList$peakidx)==0){
cat("Warning: No peaks in sample",s,"\n")
}
## scoreList <- data.frame(score=numeric(0),peak=integer(0),mpListRow=integer(0),
## isJoinedPeak=logical(0), isJoinedRow=logical(0))
##
## for(currPeak in mplenv$peakIdxList$peakidx){
## pvrScore <- patternVsRowScore(currPeak,parameters,mplenv) ## does the actual NN
## scoreList <- rbind(scoreList,pvrScore)
## }
## this really doesn't take a long time not worth parallel version here.
## but make an apply loop now faster even with rearranging the data :D : PB
scoreList <- sapply(mplenv$peakIdxList$peakidx, function(currPeak, para, mplenv){
patternVsRowScore(currPeak,para,mplenv)
}, parameters, mplenv)
if(is.list(scoreList)){
idx<-which(scoreList != "NULL")
scoreList<-matrix(unlist(scoreList[idx]), ncol=5, nrow=length(idx), byrow=T)
colnames(scoreList)<-c("score", "peak", "mpListRow", "isJoinedPeak", "isJoinedRow")
} else {
scoreList <- data.frame(score=unlist(scoreList["score",]), peak=unlist(scoreList["peak",]), mpListRow=
unlist(coreList["mpListRow",]), isJoinedPeak=unlist(scoreList["isJoinedPeak",]),
isJoinedRow=unlist(scoreList["isJoinedRow",]))
}
## Browse scores in order of descending goodness-of-fit
scoreListcurr <- scoreList[order(scoreList[,"score"]),]
if (nrow(scoreListcurr) > 0)
for (scoreIter in 1:nrow(scoreListcurr)) {
iterPeak <-scoreListcurr[scoreIter, "peak"]
iterRow <- scoreListcurr[scoreIter, "mpListRow"]
## Check if master list row is already assigned with peak
if (scoreListcurr[scoreIter, "isJoinedRow"]==TRUE) {
next
}
## Check if peak is already assigned to some master list row
if (scoreListcurr[scoreIter, "isJoinedPeak"]==TRUE) { next }
## Check if score good enough
## Assign peak to master peak list row
mplenv$mplist[iterRow,sample] <- iterPeak
## Mark peak as joined
setTrue <- which(scoreListcurr[,"mpListRow"] == iterRow)
scoreListcurr[setTrue,"isJoinedRow"] <- TRUE
setTrue <- which(scoreListcurr[,"peak"] == iterPeak)
scoreListcurr[setTrue, "isJoinedPeak"] <- TRUE
mplenv$peakIdxList[which(mplenv$peakIdxList$peakidx==iterPeak),]$isJoinedPeak <- TRUE
}
notJoinedPeaks <- mplenv$peakIdxList[which(mplenv$peakIdxList$isJoinedPeak==FALSE),]$peakidx
for(notJoinedPeak in notJoinedPeaks) {
mplenv$mplist <- rbind(mplenv$mplist,matrix(0,1,dim(mplenv$mplist)[2]))
mplenv$mplist[length(mplenv$mplist[,1]),sample] <- notJoinedPeak
}
## Clear "Joined" information from all master peaklist rows
rm(peakIdxList,envir=mplenv)
## updateProgressInfo
object@progressInfo$group.nearest <- (sample - 1) / (length(samples) - 1)
progressInfoUpdate(object)
}, mplenv, object)
## stopCluster(cl)
gc()
groupmat <- matrix(0,nrow(mplenv$mplist), 7+length(levels(sampclass(object))))
colnames(groupmat) <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
"npeaks", levels(sampclass(object)))
groupindex <- vector("list", nrow(mplenv$mplist))
for (i in 1:nrow(mplenv$mplist)) {
groupmat[i, "mzmed"] <- median(peakmat[mplenv$mplist[i,],"mz"])
groupmat[i, c("mzmin", "mzmax")] <- range(peakmat[mplenv$mplist[i,],"mz"])
groupmat[i, "rtmed"] <- median(peakmat[mplenv$mplist[i,],"rt"])
groupmat[i, c("rtmin", "rtmax")] <- range(peakmat[mplenv$mplist[i,],"rt"])
groupmat[i, "npeaks"] <- length(which(peakmat[mplenv$mplist[i,]]>0))
gnum <- classlabel[unique(peakmat[mplenv$mplist[i,],"sample"])]
for (j in seq(along = gcount))
gcount[j] <- sum(gnum == j)
groupmat[i, 7+seq(along = gcount)] <- gcount
groupindex[[i]] <- mplenv$mplist[i, (which(mplenv$mplist[i,]>0))]
}
groups(object) <- groupmat
groupidx(object) <- groupindex
invisible(object)
})
patternVsRowScore <- function(currPeak, parameters, mplenv){
mplistmeanCurr <- mplenv$mplistmean[,c("mz","rt")]
mplistmeanCurr[,"mz"] <- mplistmeanCurr[,"mz"] * parameters$mzVsRTBalance
peakmatCurr <- mplenv$peakmat[currPeak,c("mz","rt"),drop=FALSE]
peakmatCurr[,"mz"] <- peakmatCurr[,"mz"] * parameters$mzVsRTBalance
nnDist <- nn2(mplistmeanCurr,peakmatCurr[,c("mz","rt"),drop=FALSE],
k=min(length(mplistmeanCurr[,1]),parameters$knn))
for(mplRow in 1:length(nnDist$nn.idx)){
mplistMZ <- mplenv$mplistmean[nnDist$nn.idx[mplRow],"mz"]
mplistRT <- mplenv$mplistmean[nnDist$nn.idx[mplRow],"rt"]
## Calculate differences between M/Z and RT values of current peak and median of the row
diffMZ = abs(mplistMZ-mplenv$peakmat[[currPeak,"mz"]])
diffRT = abs(mplistRT-mplenv$peakmat[[currPeak,"rt"]])
## What type of RT tolerance is used?
rtTolerance = parameters$rtcheck
## Calculate if differences within tolerancdiffRT < rtTolerance)es
if ((diffMZ < parameters$mzcheck) & (diffRT < rtTolerance)) {
return(data.frame(score=nnDist$nn.dists[mplRow],
peak=currPeak, mpListRow=nnDist$nn.idx[mplRow],
isJoinedPeak=FALSE, isJoinedRow=FALSE))
}
}
return(data.frame(score=NA, peak=currPeak, mpListRow=nnDist$nn.idx[mplRow],
isJoinedPeak=FALSE, isJoinedRow=FALSE))
}
setGeneric("group", function(object, ...) standardGeneric("group"))
setMethod("group", "xcmsSet", function(object, method=getOption("BioC")$xcms$group.method,
...) {
method <- match.arg(method, getOption("BioC")$xcms$group.methods)
if (is.na(method))
stop("unknown method : ", method)
method <- paste("group", method, sep=".")
invisible(do.call(method, alist(object, ...)))
})
setGeneric("groupval", function(object, ...) standardGeneric("groupval"))
setMethod("groupval", "xcmsSet", function(object, method = c("medret", "maxint"),
value = "index", intensity = "into") {
if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
stop("xcmsSet is not been group()ed.")
}
method <- match.arg(method)
peakmat <- peaks(object)
groupmat <- groups(object)
groupindex <- groupidx(object)
sampnum <- seq(length = length(sampnames(object)))
retcol <- match("rt", colnames(peakmat))
intcol <- match(intensity, colnames(peakmat))
sampcol <- match("sample", colnames(peakmat))
values <- matrix(nrow = length(groupindex), ncol = length(sampnum))
if (method == "medret") {
for (i in seq(along = groupindex)) {
gidx <- groupindex[[i]][order(abs(peakmat[groupindex[[i]],retcol] - median(peakmat[groupindex[[i]],retcol])))]
values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
}
} else {
for (i in seq(along = groupindex)) {
gidx <- groupindex[[i]][order(peakmat[groupindex[[i]],intcol], decreasing = TRUE)]
values[i,] <- gidx[match(sampnum, peakmat[gidx,sampcol])]
}
}
if (value != "index") {
values <- peakmat[values,value]
dim(values) <- c(length(groupindex), length(sampnum))
}
colnames(values) <- sampnames(object)
rownames(values) <- paste(round(groupmat[,"mzmed"],1), round(groupmat[,"rtmed"]), sep = "/")
values
})
setGeneric("retcor", function(object, ...) standardGeneric("retcor"))
setMethod("retcor", "xcmsSet", function(object, method=getOption("BioC")$xcms$retcor.method,
...) {
## Backward compatibility for old "methods"
if (method == "linear" || method == "loess") {
return(invisible(do.call(retcor.peakgroups, alist(object, smooth=method, ...))))
}
method <- match.arg(method, getOption("BioC")$xcms$retcor.methods)
if (is.na(method))
stop("unknown method : ", method)
method <- paste("retcor", method, sep=".")
invisible(do.call(method, alist(object, ...)))
})
setGeneric("retcor.peakgroups", function(object, ...) standardGeneric("retcor.peakgroups"))
setMethod("retcor.peakgroups", "xcmsSet", function(object, missing = 1, extra = 1,
smooth = c("loess", "linear"), span = .2,
family = c("gaussian", "symmetric"),
plottype = c("none", "deviation", "mdevden"),
col = NULL, ty = NULL) {
peakmat <- peaks(object)
groupmat <- groups(object)
if (length(groupmat) == 0)
stop("No group information found")
samples <- sampnames(object)
classlabel <- as.vector(unclass(sampclass(object)))
n <- length(samples)
corpeaks <- peakmat
smooth <- match.arg(smooth)
plottype <- match.arg(plottype)
family <- match.arg(family)
if (length(object@rt) == 2)
rtcor <- object@rt$corrected
else {
fnames <- filepaths(object)
rtcor <- vector("list", length(fnames))
for (i in seq(along = fnames)) {
xraw <- xcmsRaw(fnames[i])
rtcor[[i]] <- xraw@scantime
}
object@rt <- list(raw = rtcor, corrected = rtcor)
}
nsamp <- rowSums(groupmat[,match("npeaks", colnames(groupmat))+unique(classlabel),drop=FALSE])
idx <- which(nsamp >= n-missing & groupmat[,"npeaks"] <= nsamp + extra)
if (length(idx) == 0)
stop("No peak groups found for retention time correction")
idx <- idx[order(groupmat[idx,"rtmed"])]
rt <- groupval(object, "maxint", "rt")[idx,, drop=FALSE]
cat("Retention Time Correction Groups:", nrow(rt), "\n")
rtdev <- rt - apply(rt, 1, median, na.rm = TRUE)
if (smooth == "loess") {
mingroups <- min(colSums(!is.na(rt)))
if (mingroups < 4) {
smooth <- "linear"
warning("Too few peak groups, reverting to linear method")
} else if (mingroups*span < 4) {
span <- 4/mingroups
warning("Span too small, resetting to ", round(span, 2))
}
}
rtdevsmo <- vector("list", n)
## Code for checking to see if retention time correction is overcorrecting
rtdevrange <- range(rtdev, na.rm = TRUE)
warn.overcorrect <- FALSE
for (i in 1:n) {
pts <- na.omit(data.frame(rt = rt[,i], rtdev = rtdev[,i]))
if (smooth == "loess") {
lo <- suppressWarnings(loess(rtdev ~ rt, pts, span = span, degree = 1, family = family))
rtdevsmo[[i]] <- xcms:::na.flatfill(predict(lo, data.frame(rt = rtcor[[i]])))
### Remove singularities from the loess function
rtdevsmo[[i]][abs(rtdevsmo[[i]]) > quantile(abs(rtdevsmo[[i]]), 0.9)*2] <- NA
if (length(naidx <- which(is.na(rtdevsmo[[i]]))))
rtdevsmo[[i]][naidx] <- suppressWarnings(approx(na.omit(data.frame(rtcor[[i]], rtdevsmo[[i]])),
xout = rtcor[[i]][naidx], rule = 2)$y)
while (length(decidx <- which(diff(rtcor[[i]] - rtdevsmo[[i]]) < 0))) {
d <- diff(rtcor[[i]] - rtdevsmo[[i]])[tail(decidx, 1)]
rtdevsmo[[i]][tail(decidx, 1)] <- rtdevsmo[[i]][tail(decidx, 1)] - d
if (abs(d) <= 1e-06)
break;
}
rtdevsmorange <- range(rtdevsmo[[i]])
if (any(rtdevsmorange/rtdevrange > 2)) warn.overcorrect <- TRUE
} else {
if (nrow(pts) < 2) {
stop("Not enough ``well behaved'' peak groups even for linear smoothing of retention times")
}
fit <- lsfit(pts$rt, pts$rtdev)
rtdevsmo[[i]] <- rtcor[[i]] * fit$coef[2] + fit$coef[1]
ptsrange <- range(pts$rt)
minidx <- rtcor[[i]] < ptsrange[1]
maxidx <- rtcor[[i]] > ptsrange[2]
rtdevsmo[[i]][minidx] <- rtdevsmo[[i]][head(which(!minidx), n = 1)]
rtdevsmo[[i]][maxidx] <- rtdevsmo[[i]][tail(which(!maxidx), n = 1)]
}
}
if (warn.overcorrect) {
warning(paste("Fitted retention time deviation curves exceed points by more than 2x.",
"This is dangerous and the algorithm is probably overcorrecting your data.",
"Consider increasing the span parameter or switching to the linear smoothing method.",
sep = "\n"))
}
if (plottype == "mdevden") {
split.screen(matrix(c(0, 1, .3, 1, 0, 1, 0, .3), ncol = 4, byrow = TRUE))
screen(1)
par(mar = c(0, 4.1, 4.1, 2), xaxt = "n")
}
if (plottype %in% c("deviation", "mdevden")) {
### Set up the colors and line type
if (missing(col) || is.null(col)) {
col <- integer(n)
for (i in 1:max(classlabel))
col[classlabel == i] <- 1:sum(classlabel == i)
}
if (missing(ty) || is.null(ty)) {
ty <- integer(n)
for (i in 1:max(col))
ty[col == i] <- 1:sum(col == i)
}
if (length(palette()) < max(col))
mypal <- rainbow(max(col), end = 0.85)
else
mypal <- palette()[1:max(col)]
rtrange <- range(do.call(c, rtcor))
devrange <- range(do.call(c, rtdevsmo))
plot(0, 0, type="n", xlim = rtrange, ylim = devrange, main = "Retention Time Deviation vs. Retention Time", xlab = "Retention Time", ylab = "Retention Time Deviation")
legend(rtrange[2], devrange[2], samples, col = mypal[col], lty = ty, pch = ceiling(1:n/length(mypal)), xjust = 1)
for (i in 1:n) {
points(data.frame(rt = rt[,i], rtdev = rtdev[,i]), col = mypal[col[i]], pch = ty[i], type="p")
points(rtcor[[i]], rtdevsmo[[i]], type="l", col = mypal[col[i]], lty = ty[i])
}
}
if (plottype == "mdevden") {
screen(2)
par(mar = c(5.1, 4.1, 0, 2), yaxt = "n")
allden <- density(peakmat[,"rt"], bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2])[c("x","y")]
corden <- density(rt, bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2], na.rm = TRUE)[c("x","y")]
allden$y <- allden$y / sum(allden$y)
corden$y <- corden$y / sum(corden$y)
maxden <- max(allden$y, corden$y)
plot(c(0,0), xlim = rtrange, ylim = c(0, maxden), type = "n", main = "", xlab = "Retention Time", ylab = "Peak Density")
points(allden, type = "l", col = 1)
points(corden, type = "l", col = 2)
abline(h = 0, col = "grey")
legend(rtrange[2], maxden, c("All", "Correction"), col = 1:2, lty = c(1,1), xjust = 1)
close.screen(all = TRUE)
}
for (i in 1:n) {
cfun <- stepfun(rtcor[[i]][-1] - diff(rtcor[[i]])/2, rtcor[[i]] - rtdevsmo[[i]])
rtcor[[i]] <- rtcor[[i]] - rtdevsmo[[i]]
sidx <- which(corpeaks[,"sample"] == i)
corpeaks[sidx, c("rt", "rtmin", "rtmax")] <- cfun(corpeaks[sidx, c("rt", "rtmin", "rtmax")])
}
object@rt$corrected <- rtcor
peaks(object) <- corpeaks
groups(object) <- matrix(nrow = 0, ncol = 0)
groupidx(object) <- list()
invisible(object)
})
setGeneric("retcor.obiwarp", function(object, ...) standardGeneric("retcor.obiwarp"))
setMethod("retcor.obiwarp", "xcmsSet", function(object, plottype = c("none", "deviation"),
profStep=1, center=NULL,
col = NULL, ty = NULL,
response=1, distFunc="cor_opt",
gapInit=NULL, gapExtend=NULL,
factorDiag=2, factorGap=1,
localAlignment=0, initPenalty=0) {
if (is.null(gapInit)) {
if (distFunc=="cor") {gapInit=0.3}
if (distFunc=="cor_opt") {gapInit=0.3}
if (distFunc=="cov") {gapInit=0.0}
if (distFunc=="euc") {gapInit=0.9}
if (distFunc=="prd") {gapInit=0.0}
}
if (is.null(gapExtend)) {
if (distFunc=="cor") {gapExtend=2.4}
if (distFunc=="cor_opt") {gapExtend=2.4}
if (distFunc=="cov") {gapExtend= 11.7}
if (distFunc=="euc") {gapExtend= 1.8}
if (distFunc=="prd") {gapExtend= 7.8}
}
peakmat <- peaks(object)
samples <- sampnames(object)
classlabel <- as.vector(unclass(sampclass(object)))
N <- length(samples)
corpeaks <- peakmat
plottype <- match.arg(plottype)
if (length(object@rt) == 2) {
rtcor <- object@rt$corrected
} else {
fnames <- filepaths(object)
rtcor <- vector("list", length(fnames))
for (i in seq(along = fnames)) {
xraw <- xcmsRaw(fnames[i])
rtcor[[i]] <- xraw@scantime
}
object@rt <- list(raw = rtcor, corrected = rtcor)
}
rtimecor <- vector("list", N)
rtdevsmo <- vector("list", N)
plength <- rep(0, N)
if (missing(center)) {
for(i in 1:N){
plength[i] <- length(which(peakmat[,"sample"]==i))
}
center <- which.max(plength)
}
cat("center sample: ", samples[center], "\nProcessing: ")
idx <- which(seq(1,N) != center)
obj1 <- xcmsRaw(object@filepaths[center], profmethod="bin", profstep=0)
## added t automatically find the correct scan range from the xcmsSet object
if(length(obj1@scantime) != length(object@rt$raw[[1]])){
##figure out the scan time range
scantime.start <-object@rt$raw[[1]][1]
scantime.end <-object@rt$raw[[1]][length(object@rt$raw[[1]])]
scanrange.start <-which.min(abs(obj1@scantime - scantime.start))
scanrange.end <-which.min(abs(obj1@scantime - scantime.end))
scanrange<-c(scanrange.start, scanrange.end)
obj1 <- xcmsRaw(object@filepaths[center], profmethod="bin", profstep=0, scanrange=scanrange)
} else{
scanrange<-NULL
}
for (si in 1:length(idx)) {
s <- idx[si]
cat(samples[s], " ")
profStepPad(obj1) <- profStep ## (re-)generate profile matrix, since it might have been modified during previous iteration
if(is.null(scanrange)){
obj2 <- xcmsRaw(object@filepaths[s], profmethod="bin", profstep=0)
} else{
obj2 <- xcmsRaw(object@filepaths[s], profmethod="bin", profstep=0, scanrange=scanrange)
}
profStepPad(obj2) <- profStep ## generate profile matrix
mzmin <- min(obj1@mzrange[1], obj2@mzrange[1])
mzmax <- max(obj1@mzrange[2], obj2@mzrange[2])
mz <- seq(mzmin,mzmax, by=profStep)
mz <- as.double(mz)
mzval <- length(mz)
scantime1 <- obj1@scantime
scantime2 <- obj2@scantime
mstdiff <- median(c(diff(scantime1), diff(scantime2)))
rtup1 <- c(1:length(scantime1))
rtup2 <- c(1:length(scantime2))
mst1 <- which(diff(scantime1)>5*mstdiff)[1]
if(!is.na(mst1)) {
rtup1 <- which(rtup1<=mst1)
cat("Found gaps: cut scantime-vector at ", scantime1[mst1],"seconds", "\n")
}
mst2 <- which(diff(scantime2)>5*mstdiff)[1]
if(!is.na(mst2)) {
rtup2 <- which(rtup2<=mst2)
cat("Found gaps: cut scantime-vector at ", scantime2[mst2],"seconds", "\n")
}
scantime1 <- scantime1[rtup1]
scantime2 <- scantime2[rtup2]
rtmaxdiff <- abs(diff(c(scantime1[length(scantime1)],
scantime2[length(scantime2)])))
if(rtmaxdiff>(5*mstdiff)){
rtmax <- min(scantime1[length(scantime1)],
scantime2[length(scantime2)])
rtup1 <- which(scantime1<=rtmax)
rtup2 <- which(scantime2<=rtmax)
}
scantime1 <- scantime1[rtup1]
scantime2 <- scantime2[rtup2]
valscantime1 <- length(scantime1)
valscantime2 <- length(scantime2)
if(length(obj1@scantime)>valscantime1) {
obj1@env$profile <- obj1@env$profile[,-c((valscantime1+1):length(obj1@scantime))]
}
if(length(obj2@scantime)>valscantime2) {
obj2@env$profile <- obj2@env$profile[,-c((valscantime2+1):length(obj2@scantime))]
}
if(mzmin < obj1@mzrange[1]) {
seqlen <- length(seq(mzmin, obj1@mzrange[1], profStep))-1
x <- matrix(0, seqlen,dim(obj1@env$profile)[2])
obj1@env$profile <- rbind(x, obj1@env$profile)
}
if(mzmax > obj1@mzrange[2]){
seqlen <- length(seq(obj1@mzrange[2], mzmax, profStep))-1
x <- matrix(0, seqlen, dim(obj1@env$profile)[2])
obj1@env$profile <- rbind(obj1@env$profile, x)
}
if(mzmin < obj2@mzrange[1]){
seqlen <- length(seq(mzmin, obj2@mzrange[1], profStep))-1
x <- matrix(0, seqlen, dim(obj2@env$profile)[2])
obj2@env$profile <- rbind(x, obj2@env$profile)
}
if(mzmax > obj2@mzrange[2]){
seqlen <- length(seq(obj2@mzrange[2], mzmax, profStep))-1
x <- matrix(0, seqlen, dim(obj2@env$profile)[2])
obj2@env$profile <- rbind(obj2@env$profile, x)
}
intensity1 <- obj1@env$profile
intensity2 <- obj2@env$profile
if ((mzval * valscantime1 != length(intensity1)) || (mzval * valscantime2 != length(intensity2)))
stop("Dimensions of profile matrices do not match !\n")
rtimecor[[s]] <-.Call("R_set_from_xcms",
valscantime1,scantime1,mzval,mz,intensity1,
valscantime2,scantime2,mzval,mz,intensity2,
response, distFunc,
gapInit, gapExtend,
factorDiag, factorGap,
localAlignment, initPenalty)
if(length(obj2@scantime) > valscantime2) {
object@rt$corrected[[s]] <- c(rtimecor[[s]],
obj2@scantime[(max(rtup2)+1):length(obj2@scantime)])
} else {
object@rt$corrected[[s]] <- rtimecor[[s]]
}
rtdevsmo[[s]] <- round(rtcor[[s]]-object@rt$corrected[[s]],2)
rm(obj2)
gc()
## updateProgressInfo
object@progressInfo$retcor.obiwarp <- si / length(idx)
xcms:::progressInfoUpdate(object)
}
cat("\n")
rtdevsmo[[center]] <- round(rtcor[[center]] - object@rt$corrected[[center]], 2)
if (plottype == "deviation") {
## Set up the colors and line type
if (missing(col)) {
col <- integer(N)
for (i in 1:max(classlabel))
col[classlabel == i] <- 1:sum(classlabel == i)
}
if (missing(ty)) {
ty <- integer(N)
for (i in 1:max(col))
ty[col == i] <- 1:sum(col == i)
}
if (length(palette()) < max(col))
mypal <- rainbow(max(col), end = 0.85)
else
mypal <- palette()[1:max(col)]
rtrange <- range(do.call("c", rtcor))
devrange <- range(do.call("c", rtdevsmo))
layout(matrix(c(1, 2),ncol=2, byrow=F),widths=c(1,0.3))
par(mar=c(4,4,2,0))
plot(0, 0, type="n", xlim = rtrange, ylim = devrange,
main = "Retention Time Deviation vs. Retention Time",
xlab = "Retention Time", ylab = "Retention Time Deviation")
for (i in 1:N) {
points(rtcor[[i]], rtdevsmo[[i]], type="l", col = mypal[col[i]], lty = ty[i])
}
plot.new() ; par(mar= c(2, 0, 2, 0))
plot.window(c(0,1), c(0,1))
legend(0,1.04, basename(samples), col = mypal[col], lty = ty)
}
for (i in 1:N) {
cfun <- stepfun(rtcor[[i]][-1] - diff(rtcor[[i]])/2, rtcor[[i]] - rtdevsmo[[i]])
rtcor[[i]] <- rtcor[[i]] - rtdevsmo[[i]]
sidx <- which(corpeaks[,"sample"] == i)
corpeaks[sidx, c("rt", "rtmin", "rtmax")] <- cfun(corpeaks[sidx, c("rt", "rtmin", "rtmax")])
}
peaks(object) <- corpeaks
groups(object) <- matrix(nrow = 0, ncol = 0)
groupidx(object) <- list()
invisible(object)
})
setGeneric("plotrt", function(object, ...) standardGeneric("plotrt"))
setMethod("plotrt", "xcmsSet", function(object, col = NULL, ty = NULL, leg = TRUE, densplit = FALSE) {
samples <- sampnames(object)
classlabel <- as.vector(unclass(sampclass(object)))
n <- length(samples)
rtuncor <- object@rt$raw
rtcor <- object@rt$corrected
if (missing(col)) {
col <- integer(n)
for (i in 1:max(classlabel))
col[classlabel == i] <- 1:sum(classlabel == i)
}
if (missing(ty)) {
ty <- integer(n)
for (i in 1:max(col))
ty[col == i] <- 1:sum(col == i)
}
if (length(palette()) < max(col))
mypal <- rainbow(max(col), end = 0.85)
else
mypal <- palette()[1:max(col)]
rtdevsmo <- vector("list", n)
for (i in 1:n)
rtdevsmo[[i]] <- rtuncor[[i]] - rtcor[[i]]
rtrange <- range(do.call(c, rtuncor))
devrange <- range(do.call(c, rtdevsmo))
if (densplit) {
split.screen(matrix(c(0, 1, .3, 1, 0, 1, 0, .3), ncol = 4, byrow = TRUE))
screen(1)
par(mar = c(0, 4.1, 4.1, 2), xaxt = "n")
}
plot(0, 0, type="n", xlim = rtrange, ylim = devrange, main = "Retention Time Deviation vs. Retention Time", xlab = "Retention Time", ylab = "Retention Time Deviation")
if (leg)
legend(rtrange[2], devrange[2], samples, col = mypal[col], lty = ty, pch = ceiling(1:n/length(mypal)), xjust = 1)
for (i in 1:n)
points(rtuncor[[i]], rtdevsmo[[i]], type="l", col = mypal[col[i]], lty = ty[i])
if (densplit) {
screen(2)
par(mar = c(5.1, 4.1, 0, 2), yaxt = "n")
allden <- density(object@peaks[,"rt"], bw = diff(rtrange)/200, from = rtrange[1], to = rtrange[2])[c("x","y")]
plot(allden, xlim = rtrange, type = "l", main = "", xlab = "Retention Time", ylab = "Peak Density")
abline(h = 0, col = "grey")
close.screen(all = TRUE)
}
})
setGeneric("fillPeaks.chrom", function(object, ...) standardGeneric("fillPeaks.chrom"))
setMethod("fillPeaks.chrom", "xcmsSet", function(object, nSlaves=NULL) {
## development mockup:
if (FALSE) {
library(xcms)
library(faahKO)
object <- group(faahko)
gf <- fillPeaks(object)
pkgEnv = getNamespace("xcms")
attach(pkgEnv)
}
peakmat <- peaks(object)
groupmat <- groups(object)
if (length(groupmat) == 0)
stop("No group information found")
files <- filepaths(object)
samp <- sampnames(object)
classlabel <- as.vector(unclass(sampclass(object)))
prof <- profinfo(object)
rtcor <- object@rt$corrected
## Remove groups that overlap with more "well-behaved" groups
numsamp <- rowSums(groupmat[,(match("npeaks", colnames(groupmat))+1):ncol(groupmat),drop=FALSE])
uorder <- order(-numsamp, groupmat[,"npeaks"])
uindex <- rectUnique(groupmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE],
uorder)
groupmat <- groupmat[uindex,]
groupindex <- groupidx(object)[uindex]
gvals <- groupval(object)[uindex,]
peakrange <- matrix(nrow = nrow(gvals), ncol = 4)
colnames(peakrange) <- c("mzmin","mzmax","rtmin","rtmax")
mzmin <- peakmat[gvals,"mzmin"]
dim(mzmin) <- c(nrow(gvals), ncol(gvals))
peakrange[,"mzmin"] <- apply(mzmin, 1, median, na.rm = TRUE)
mzmax <- peakmat[gvals,"mzmax"]
dim(mzmax) <- c(nrow(gvals), ncol(gvals))
peakrange[,"mzmax"] <- apply(mzmax, 1, median, na.rm = TRUE)
retmin <- peakmat[gvals,"rtmin"]
dim(retmin) <- c(nrow(gvals), ncol(gvals))
peakrange[,"rtmin"] <- apply(retmin, 1, median, na.rm = TRUE)
retmax <- peakmat[gvals,"rtmax"]
dim(retmax) <- c(nrow(gvals), ncol(gvals))
peakrange[,"rtmax"] <- apply(retmax, 1, median, na.rm = TRUE)
lastpeak <- nrow(peakmat)
lastpeakOrig <- lastpeak
## peakmat <- rbind(peakmat, matrix(nrow = sum(is.na(gvals)), ncol = ncol(peakmat)))
cnames <- colnames(object@peaks)
ft <- cbind(file=files,id=1:length(files))
argList <- apply(ft,1,function(x) {
## Add only those samples which actually have NA in them
if (!any(is.na(gvals[,as.numeric(x["id"])]))) {
## nothing to do.
list()
} else {
list(file=x["file"],id=as.numeric(x["id"]),
params=list(method="chrom",
gvals=gvals,
prof=prof,
dataCorrection=object@dataCorrection,
polarity=object@polarity,
rtcor=object@rt$corrected[[as.numeric(x["id"])]],
peakrange=peakrange))
}
})
nonemptyIdx <- (sapply(argList, length) > 0)
if (!any(nonemptyIdx)) {
## Nothing to do
return(invisible(object))
}
argList <- argList[nonemptyIdx]
parmode <- xcmsParallelSetup(nSlaves=nSlaves)
runParallel <- parmode$runParallel
parMode <- parmode$parMode
snowclust <- parmode$snowclust
if (parMode == "MPI") {
newpeakslist <- xcmsPapply(argList, fillPeaksChromPar)
mpi.close.Rslaves()
} else if (parMode == "SOCK") {
newpeakslist <- xcmsClusterApply(cl=snowclust, x=argList,
fun=fillPeaksChromPar,
msgfun=msgfunGeneric)
stopCluster(snowclust)
} else {
## serial mode
newpeakslist <- lapply(argList, fillPeaksChromPar)
}
o <- order(sapply(newpeakslist, function(x) x$myID))
newpeaks <- do.call(rbind, lapply(newpeakslist[o], function(x) x$newpeaks))
## Make sure colnames are compatible
newpeaks <- newpeaks[, match(cnames, colnames(newpeaks)), drop = FALSE]
colnames(newpeaks) <- cnames
peakmat <- rbind(peakmat, newpeaks)
for (i in seq(along = files)) {
naidx <- which(is.na(gvals[,i]))
for (j in seq(along = naidx))
groupindex[[naidx[j]]] <- c(groupindex[[naidx[j]]], lastpeak+j)
lastpeak <- lastpeak + length(naidx)
}
peaks(object) <- peakmat
object@filled <- seq((lastpeakOrig+1),nrow(peakmat))
groups(object) <- groupmat
groupidx(object) <- groupindex
invisible(object)
})
setGeneric("fillPeaks.MSW", function(object, ...) standardGeneric("fillPeaks.MSW"))
setMethod("fillPeaks.MSW", "xcmsSet", function(object, mrange=c(0,0), sample=NULL) {
peakmat <- peaks(object)
groupmat <- groups(object)
if (length(groupmat) == 0)
stop("No group information found")
files <- filepaths(object)
samp <- sampnames(object)
classlabel <- as.vector(unclass(sampclass(object)))
prof <- profinfo(object)
rtcor <- object@rt$corrected
## Remove groups that overlap with more "well-behaved" groups
numsamp <- rowSums(groupmat[,(match("npeaks", colnames(groupmat))+1):ncol(groupmat),drop=FALSE])
uorder <- order(-numsamp, groupmat[,"npeaks"])
uindex <- rectUnique(groupmat[,c("mzmin","mzmax","rtmin","rtmax"),drop=FALSE],
uorder)
groupmat <- groupmat[uindex,]
groupindex <- groupidx(object)[uindex]
gvals <- groupval(object)[uindex,]
peakrange <- matrix(nrow = nrow(gvals), ncol = 4)
colnames(peakrange) <- c("mzmin","mzmax","rtmin","rtmax")
mzmin <- peakmat[gvals,"mzmin"]
dim(mzmin) <- c(nrow(gvals), ncol(gvals))
peakrange[,"mzmin"] <- apply(mzmin, 1, min, na.rm = TRUE)
mzmax <- peakmat[gvals,"mzmax"]
dim(mzmax) <- c(nrow(gvals), ncol(gvals))
peakrange[,"mzmax"] <- apply(mzmax, 1, min, na.rm = TRUE)
retmin <- peakmat[gvals,"rtmin"]
dim(retmin) <- c(nrow(gvals), ncol(gvals))
peakrange[,"rtmin"] <- apply(retmin, 1, median, na.rm = TRUE)
retmax <- peakmat[gvals,"rtmax"]
dim(retmax) <- c(nrow(gvals), ncol(gvals))
peakrange[,"rtmax"] <- apply(retmax, 1, median, na.rm = TRUE)
lastpeak <- nrow(peakmat)
peakmat <- rbind(peakmat, matrix(nrow = sum(is.na(gvals)), ncol = ncol(peakmat)))
cnames <- colnames(object@peaks)
for (i in seq(along = files)) {
cat(samp[i], "")
flush.console()
naidx <- which(is.na(gvals[,i]))
newpeaks <- matrix(nrow=length(naidx), ncol=9)
nppos<-0 ## line position in the newpeaks matrix
if (length(naidx)) {
lcraw <- xcmsRaw(files[i], profmethod = prof$method, profstep = 0)
ngs <- as.vector(naidx)
for (g in ngs)
{
nppos <- nppos+1
mzpos <- which(abs(lcraw@env$mz - groupmat[g,"mzmed"]) == min(abs(lcraw@env$mz - groupmat[g,"mzmed"])))
mmzpos <- mzpos[which(lcraw@env$intensity[mzpos] == max(lcraw@env$intensity[mzpos]))]
mmzr <- seq((mmzpos-mrange[1]),(mmzpos+mrange[2]))
maxo <- max(lcraw@env$intensity[mmzr])
## this is the new one, summing the scale-range
## calculating scale, adding intensitiesin this scale
medMZmin <- median(peakmat[groupindex[[g]],"mzmin"])
medMZmax <- median(peakmat[groupindex[[g]],"mzmax"])
minMzpos <- min(which(abs(lcraw@env$mz - medMZmin) == min(abs(lcraw@env$mz - medMZmin))))
maxMzpos <- max(which(abs(lcraw@env$mz - medMZmax) == min(abs(lcraw@env$mz - medMZmax))))
into = sum(lcraw@env$intensity[minMzpos:maxMzpos])
newpeaks[nppos,] <- c(groupmat[g,"mzmed"],medMZmin,medMZmax,-1,-1,-1,into,maxo,i)
}
colnames(newpeaks) <- c("mz","mzmin","mzmax","rt","rtmin","rtmax","into","maxo","sample")
rm(lcraw)
gc()
newcols <-colnames(newpeaks)[colnames(newpeaks) %in% cnames]
peakmat[lastpeak+seq(along = naidx),newcols] <- newpeaks[,newcols]
for (i in seq(along = naidx))
groupindex[[naidx[i]]] <- c(groupindex[[naidx[i]]], lastpeak+i)
lastpeak <- lastpeak + length(naidx)
}
}
cat("\n")
peaks(object) <- peakmat
object@filled <- seq((lastpeak+1),nrow(peakmat))
groups(object) <- groupmat
groupidx(object) <- groupindex
invisible(object)
})
setGeneric("fillPeaks", function(object, ...) standardGeneric("fillPeaks"))
setMethod("fillPeaks", "xcmsSet", function(object, method=getOption("BioC")$xcms$fillPeaks.method,...) {
method <- match.arg(method, getOption("BioC")$xcms$fillPeaks.methods)
if (is.na(method))
stop("unknown method : ", method)
method <- paste("fillPeaks", method, sep=".")
invisible(do.call(method, alist(object, ...)))
})
setMethod("getEIC", "xcmsSet", function(object, mzrange, rtrange = 200,
groupidx, sampleidx = sampnames(object),
rt = c("corrected", "raw")) {
files <- filepaths(object)
grp <- groups(object)
samp <- sampnames(object)
prof <- profinfo(object)
rt <- match.arg(rt)
if (is.numeric(sampleidx))
sampleidx <- sampnames(object)[sampleidx]
sampidx <- match(sampleidx, sampnames(object))
if (!missing(groupidx)) {
if (is.numeric(groupidx))
groupidx <- groupnames(object)[unique(as.integer(groupidx))]
grpidx <- match(groupidx, groupnames(object, template = groupidx))
}
if (missing(mzrange)) {
if (missing(groupidx))
stop("No m/z range or groups specified")
if (any(is.na(groupval(object, value = "mz"))))
stop('Please use fillPeaks() to fill up NA values !')
mzmin <- -rowMax(-groupval(object, value = "mzmin"))
mzmax <- rowMax(groupval(object, value = "mzmax"))
mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2)
} else if (all(c("mzmin","mzmax") %in% colnames(mzrange)))
mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE]
else if (is.null(dim(mzrange)))
stop("mzrange must be a matrix")
colnames(mzrange) <- c("mzmin", "mzmax")
if (length(rtrange) == 1) {
if (missing(groupidx))
rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)),
ncol = 2, byrow = TRUE)
else {
rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange)
}
} else if (is.null(dim(rtrange)))
stop("rtrange must be a matrix or single number")
colnames(rtrange) <- c("rtmin", "rtmax")
if (missing(groupidx))
gnames <- character(0)
else
gnames <- groupidx
eic <- vector("list", length(sampleidx))
names(eic) <- sampleidx
for (i in seq(along = sampidx)) {
cat(sampleidx[i], "")
flush.console()
lcraw <- xcmsRaw(files[sampidx[i]], profmethod = prof$method, profstep = 0)
if(length(object@dataCorrection) > 1){
if(object@dataCorrection[i] == 1)
lcraw<-stitch(lcraw, AutoLockMass(lcraw))
}
if (rt == "corrected")
lcraw@scantime <- object@rt$corrected[[sampidx[i]]]
if (length(prof) > 2)
lcraw@profparam <- prof[seq(3, length(prof))]
currenteic <- getEIC(lcraw, mzrange, rtrange, step = prof$step)
eic[[i]] <- currenteic@eic[[1]]
rm(lcraw)
gc()
}
cat("\n")
invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange,
rt = rt, groupnames = gnames))
})
getSpecWindow <- function(xs, gidxs, borderwidth=1){
groupidx <- groupidx(xs)
if (length(groupidx)==0) {
stop("no groups found in xcmsSet object.")
}
minmaxs <- matrix(ncol=2, nrow=length(gidxs))
cat("Processing data from sample: ")
for (a in 1:length(gidxs)){
## 1st step: getting boundaries
mzmin <- min(peaks(xs)[groupidx[[gidxs[a]]],"mzmin"]) ## lower bound
mzmax <- max(peaks(xs)[groupidx[[gidxs[a]]],"mzmax"]) ## upper bound
mzw <- mzmax-mzmin
minmaxs[a,1] <- mzmin - borderwidth*mzw
minmaxs[a,2] <- mzmax + borderwidth*mzw ## a peakwidth left and right
}
mzlistlist=list()
for (s in 1:length(gidxs)) mzlistlist[[s]] <- list()
mzlistlist$minmax <- minmaxs
for (s in 1:length(sampnames(xs))){
## 2nd Step: getting each sample
cat(" ",s)
xr <- xcmsRaw(xs@filepaths[s])
for (a in 1:length(gidxs)){
pmin <- min(which(abs(xr@env$mz - minmaxs[a,1]) == min(abs(xr@env$mz - minmaxs[a,1]))))
pmax <- min(which(abs(xr@env$mz - minmaxs[a,2]) == min(abs(xr@env$mz - minmaxs[a,2]))))
mzlistlist[[a]][[s]] <- matrix(ncol=2, nrow=(pmax-pmin+1),
data=c(xr@env$mz[pmin:pmax],xr@env$intensity[pmin:pmax]))
}
}
cat("\n")
invisible(mzlistlist)
}
plotSpecWindow <- function(xs, gidxs, borderwidth=1){
if (length(groupidx(xs))==0) {
stop("no groups found in xcmsSet object.")
}
mzll <- getSpecWindow(xs, gidxs, borderwidth)
minmax <- mzll$minmax
groupidx <- groupidx(xs)
pcolors <- c("black","darkred", "green","blue")
ecolors <- c("grey","red","lightgreen","lightblue")
cat("\ngroup: ")
for (a in 1:length(gidxs)){
cat(" ",gidxs[a])
nsa <- length(levels(sampclass(xs)))
pcol <- pcolors[which(levels(sampclass(xs)) == sampclass(xs)[1])]
ecol <- ecolors[which(levels(sampclass(xs)) == sampclass(xs)[1])]
mzmin <- minmax[a,1]
mzmax <- minmax[a,2]
maxints <- NA
for (s in 1:length(mzll[[a]])) {
maxints[s] <- max(mzll[[a]][[s]][,2])
}
maxo <- max(maxints)
for (n in 1:length(mzll[[a]])){
par(lty=1)
pcol <- pcolors[which(levels(sampclass(xs)) == sampclass(xs)[n])]
ecol <- ecolors[which(levels(sampclass(xs)) == sampclass(xs)[n])]
if(n==1) {
plot(mzll[[a]][[n]][,1],
mzll[[a]][[n]][,2],
xlim=c(mzmin,mzmax), ylim=c(0,(maxo+10000)),
type='l', col=ecol, xlab="", ylab="") ## complete raw-range
} else {
lines(mzll[[a]][[n]][,1],
mzll[[a]][[n]][,2],
xlim=c(mzmin,mzmax), ylim=c(0,(maxo+10000)),
type='l', col=ecol, xlab="", ylab="") ## complete raw-range}
}
if (length(which(peaks(xs)[groupidx[[gidxs[a]]],"sample"] == n))>0){
## peak entry of the first sample
apeak <- groupidx[[gidxs[a]]][min(which(peaks(xs)[groupidx[[gidxs[a]]],"sample"] == n))]
if (apeak %in% xs@filled) {
par(lty=2)
}else{
par(lty=1)
}
ppmin <- min(which(abs(mzll[[a]][[n]][,1] - peaks(xs)[apeak,"mzmin"]) ==
min( abs(mzll[[a]][[n]][,1] - peaks(xs)[apeak,"mzmin"]))))
ppmax <- min(which(abs(mzll[[a]][[n]][,1] - peaks(xs)[apeak,"mzmax"]) ==
min( abs(mzll[[a]][[n]][,1] - peaks(xs)[apeak,"mzmax"]))))
lines(mzll[[a]][[n]][ppmin:ppmax,1],mzll[[a]][[n]][ppmin:ppmax,2],
xlim=c(mzmin,mzmax), col=pcol, type='l')
}
}
title(main=paste("m/z vs. intensity for group",gidxs[a]), xlab="m/z", ylab="intensity")
legend("topright", as.vector(levels(sampclass(xs))),col=pcolors[1:nsa],lty=rep(1,nsa))
}
cat("\n")
}
setGeneric("peakTable", function(object, ...) standardGeneric("peakTable"))
setMethod("peakTable", "xcmsSet", function(object, filebase = character(), ...) {
if (length(sampnames(object)) == 1) {
return(object@peaks)
}
if (nrow(object@groups) < 1) {
stop ('First argument must be an xcmsSet with group information or contain only one sample.')
}
groupmat <- groups(object)
if (! "value" %in% names(list(...))) {
ts <- data.frame(cbind(groupmat,groupval(object, value="into", ...)), row.names = NULL)
} else {
ts <- data.frame(cbind(groupmat,groupval(object, ...)), row.names = NULL)
}
cnames <- colnames(ts)
if (cnames[1] == 'mzmed') {
cnames[1] <- 'mz'
} else {
stop ('mzmed column missing')
}
if (cnames[4] == 'rtmed') {
cnames[4] <- 'rt'
} else {
stop ('mzmed column missing')
}
colnames(ts) <- cnames
if (length(filebase))
write.table(ts, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
ts
})
setGeneric("diffreport", function(object, ...) standardGeneric("diffreport"))
setMethod("diffreport", "xcmsSet", function(object, class1 = levels(sampclass(object))[1],
class2 = levels(sampclass(object))[2],
filebase = character(), eicmax = 0, eicwidth = 200,
sortpval = TRUE, classeic = c(class1,class2),
value = c("into","maxo","intb"), metlin = FALSE,
h = 480, w = 640, mzdec=2, ...) {
if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
stop("No group information. Use group().")
}
require(multtest) || stop("Couldn't load multtest")
value <- match.arg(value)
groupmat <- groups(object)
if (length(groupmat) == 0)
stop("No group information found")
samples <- sampnames(object)
n <- length(samples)
classlabel <- sampclass(object)
classlabel <- levels(classlabel)[as.vector(unclass(classlabel))]
values <- groupval(object, "medret", value=value)
indecies <- groupval(object, "medret", value = "index")
if (!all(c(class1,class2) %in% classlabel))
stop("Incorrect Class Labels")
## c1 and c2 are column indices of class1 and class2 resp.
c1 <- which(classlabel %in% class1)
c2 <- which(classlabel %in% class2)
ceic <- which(classlabel %in% classeic)
if (length(intersect(c1, c2)) > 0)
stop("Intersecting Classes")
## Check against missing Values
if (any(is.na(values[,c(c1,c2)]))) {
stop("NA values in xcmsSet. Use fillPeaks()")
}
mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE)
mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE)
## Calculate fold change.
## For foldchange <1 set fold to 1/fold
## See tstat to check which was higher
fold <- mean2 / mean1
fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1]
testval <- values[,c(c1,c2)]
testclab <- c(rep(0,length(c1)),rep(1,length(c2)))
if (ncol(testval) > 2) {
tstat <- mt.teststat(testval, testclab, ...)
pvalue <- pval(testval, testclab, tstat)
} else {
tstat <- pvalue <- rep(NA,nrow(testval))
}
stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue)
if (length(levels(sampclass(object))) >2) {
pvalAnova<-c()
for(i in 1:nrow(values)){
var<-as.numeric(values[i,])
ano<-summary(aov(var ~ sampclass(object)) )
pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"])
}
stat<-cbind(stat, anova= pvalAnova)
}
if (metlin) {
neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1)
metlin <- abs(metlin)
digits <- ceiling(-log10(metlin))+1
metlinurl <- paste("http://metlin.scripps.edu/metabo_list.php?mass_min=",
round(neutralmass - metlin, digits), "&mass_max=",
round(neutralmass + metlin, digits), sep="")
values <- cbind(metlin = metlinurl, values)
}
twosamp <- cbind(name = groupnames(object), stat, groupmat, values)
if (sortpval) {
tsidx <- order(twosamp[,"pvalue"])
twosamp <- twosamp[tsidx,]
rownames(twosamp) <- 1:nrow(twosamp)
values<-values[tsidx,]
} else
tsidx <- 1:nrow(values)
if (length(filebase))
write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
if (eicmax > 0) {
if (length(unique(peaks(object)[,"rt"])) > 1) {
## This looks like "normal" LC data
eicmax <- min(eicmax, length(tsidx))
eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic,
groupidx = tsidx[seq(length = eicmax)])
if (length(filebase)) {
eicdir <- paste(filebase, "_eic", sep="")
boxdir <- paste(filebase, "_box", sep="")
dir.create(eicdir)
dir.create(boxdir)
if (capabilities("png")){
xcmsBoxPlot(values[seq(length = eicmax),],
sampclass(object), dirpath=boxdir, pic="png", width=w, height=h)
png(file.path(eicdir, "%003d.png"), width = w, height = h)
} else {
xcmsBoxPlot(values[seq(length = eicmax),],
sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h)
pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
height = h/72, onefile = FALSE)
}
}
plot(eics, object, rtrange = eicwidth, mzdec=mzdec)
if (length(filebase))
dev.off()
} else {
## This looks like a direct-infusion single spectrum
if (length(filebase)) {
specdir <- paste(filebase, "_spec", sep="")
dir.create(specdir)
if (capabilities("png")){
png(file.path(specdir, "%003d.png"), width = w, height = h)
}else{
pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
height = h/72, onefile = FALSE)
}
}
plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1)
if (length(filebase))
dev.off()
}
}
invisible(twosamp)
})
setGeneric("progressCallback", function(object) standardGeneric("progressCallback"))
setMethod("progressCallback", "xcmsSet", function(object) object@progressCallback)
setGeneric("progressCallback<-", function(object, value) standardGeneric("progressCallback<-"))
setReplaceMethod("progressCallback", "xcmsSet", function(object, value) {
object@progressCallback <- value
object
})
setGeneric("progressInfoUpdate", function(object) standardGeneric("progressInfoUpdate"))
setMethod("progressInfoUpdate", "xcmsSet", function(object)
do.call(object@progressCallback, list(progress=object@progressInfo))
)
xcmsBoxPlot<-function(values, className, dirpath, pic, width=640, height=480){
if (pic == "png"){
png(file.path(dirpath, "%003d.png"), width, height)
} else{
pdf(file.path(dirpath, "%003d.pdf"), width = width/72, height = height/72, onefile = FALSE)
}
ind<-which(colnames(values) != "metlin")
for (i in 1:nrow(values)){
boxplot(as.numeric(values[i,ind]) ~ className, col="blue",
outline=FALSE, main=paste("Feature ", row.names(values)[i] ))
}
if (length(values) > 0) {
dev.off()
}
}
retexp <- function(peakrange, width = 200) {
retmean <- rowMeans(peakrange[,c("rtmin", "rtmax"),drop=FALSE])
peakrange[,"rtmin"] <- retmean-width/2
peakrange[,"rtmax"] <- retmean+width/2
peakrange
}
#### Fill in NA values with the minimum and maximum value
na.flatfill <- function(x) {
realloc <- which(!is.na(x))
if (realloc[1] > 1)
x[1:(realloc[1]-1)] <- x[realloc[1]]
if (realloc[length(realloc)] < length(x))
x[(realloc[length(realloc)]+1):length(x)] <- x[realloc[length(realloc)]]
x
}
pval <- function(X, classlabel, teststat) {
n1 <- rowSums(!is.na(X[,classlabel == 0]))
n2 <- rowSums(!is.na(X[,classlabel == 1]))
A <- apply(X[,classlabel == 0], 1, sd, na.rm=TRUE)^2/n1 ## sd(t(X[,classlabel == 0]), na.rm = TRUE)^2/n1
B <- apply(X[,classlabel == 1], 1, sd, na.rm=TRUE)^2/n2 ## sd(t(X[,classlabel == 1]), na.rm = TRUE)^2/n2
df <- (A+B)^2/(A^2/(n1-1)+B^2/(n2-1))
pvalue <- 2 * (1 - pt(abs(teststat), df))
invisible(pvalue)
}
panel.cor <- function(x, y, digits=2, prefix="", cex.cor)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use = "pairwise.complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.