#' @rdname recMissComp
#' @title Missing compound recovery
#' @description Missing compounds recovery: fits a general model (all the compounds above a certain minimum number of samples) to all the samples.
#' @usage recMissComp(Experiment, min.samples, free.model = F)
#' @param Experiment A 'MetaboSet' S4 object containing the experiment data previously created by newExp, deconvolved by deconvolveComp and aligned by alignComp.
#' @param min.samples The minimum number of samples in which a compound has to appear to be considered for searching into the rest of the samples where this compound missing.
#' @param free.model If TRUE, the spectra found in the samples where the compound is missing is used to get the final average spectra. (See details)
#' @details
#' WARNING: If compounds were previously identified, they have to be identified again after applying the "recMissComp" function. This means that "identifyComp" function has to be executed always after "recMissComp" for identification of compounds, even if "identifyComp" has been previously applied before.
#'
#'The free.model parameter is recomended to be always FALSE (except for carbon tracking applications). This is because the spectra of the samples where the compound is missing is usually affected by noise, and this could decrease the matching score for a certain compound.
#' @return The function returns an updated S4 'MetaboSet' class, where the GC-MS samples have been now aligned.
#' @references
#' [1] Domingo-Almenara X, et al. Compound deconvolution in GC-MS-based metabolomics by blind source separation. Journal of Chromatography A (2015). Vol. 1409: 226-233. DOI: 10.1016/j.chroma.2015.07.044
#' @author Xavier Domingo-Almenara. xavier.domingo@urv.cat
#' @seealso \code{\link{newExp}} \code{\link{alignComp}} \code{\link{setAlPar}} \code{\link{setDecPar}}
#' @export
setGeneric('recMissComp',function(Experiment, min.samples, free.model=F){
standardGeneric('recMissComp')
})
#' @rdname recMissComp
setMethod('recMissComp',signature = 'MetaboSet',
function(Experiment, min.samples, free.model=F){
alParameters <- Experiment@Results@Parameters@Alignment
al.par <- list(alignment.algorithm=alParameters$alignment.algorithm, min.spectra.cor=alParameters$min.spectra.cor, max.time.dist=alParameters$max.time.dist, mz.range=alParameters$mz.range, min.samples=min.samples)
Experiment@Results@Parameters@Alignment <- al.par
align.list <- as.matrix(alignList(Experiment))
data.list <- as.matrix(align.list[,-c(1:4)])
## Delete the peaks below the minimum peak threshold
delete.indexes <- which(as.numeric(as.vector(apply(data.list,1,function(x) max(as.numeric(as.vector(x))))))<Experiment@Data@Parameters$min.peak.height)
if(length(delete.indexes)!=0) {data.list <- data.list[-delete.indexes,]; align.list <- align.list[-delete.indexes,]}
meta.data <- metaData(Experiment)
sample.filenames <- as.vector(apply(as.matrix(colnames(data.list)),1,function(x) meta.data[which(x==meta.data),"filename"]))
model.indexes <- which(as.numeric(as.vector(align.list[,"FoundIn"]))>=min.samples)
#if(length(re.analyze.indexes)==0) stop("Model already fitted for the min.samples value selected")
times.list <- align.list[model.indexes,c("AlignID","tmean")]
total.iterations <- 0
for(sample in 1:length(sample.filenames)) total.iterations <- total.iterations + length(data.list[model.indexes,sample])
pb <- progress_bar$new(
format = " recovering [:bar] :percent eta: :eta",
total = length(sample.filenames), clear = FALSE)
pb$tick(0)
k.it <- 0
for(sample in 1:length(sample.filenames)){
## Error on sample 23
re.analyze.in <- which(as.numeric(data.list[model.indexes,sample])==0)
if(length(re.analyze.in)!=0)
{
re.analyze <- model.indexes[re.analyze.in]
if(Experiment@MetaData@DataDirectory=="") {filename <- as.character(Experiment@MetaData@Instrumental$filename[sample])
}else{filename <- paste(Experiment@MetaData@DataDirectory,"/",Experiment@MetaData@Instrumental$filename[sample], sep="")}
sampleRD <- load.file(filename)
sampleRD@scans.per.second <- Experiment@Data@Parameters$scans.per.second
sampleRD@compression.coef <- Experiment@Data@Parameters$compression.coef
sampleRD@avoid.processing.mz <- Experiment@Data@Parameters$avoid.processing.mz
sampleRD@min.peak.width <- Experiment@Data@Parameters$min.peak.width
sampleRD@noise.threshold <- Experiment@Data@Parameters$noise.threshold
sampleRD@min.peak.height <- Experiment@Data@Parameters$min.peak.height
#sampleRD@factor.minimum.sd <- Experiment@Data@Parameters$factor.minimum.sd
#avoid.mz <- sampleRD@avoid.processing.mz - (sampleRD@min.mz - 1)
#avoid.mz <- avoid.mz[-which(avoid.mz<1)]
#sampleRD@data[,avoid.mz] <- 0
sampleRD <- avoid.processing(sampleRD)
moving.maximas <- apply(sampleRD@data,2,function(x){max(x)})
sampleRD@data[,moving.maximas<sampleRD@noise.threshold] <- 0
#sigma.object <- get.SigmaModel(sampleRD)
#sigma.model <- sigma.object$sigma
#sigma.scans <- sigma.object$sigma.scans
sigma.scans <-Experiment@Data@Parameters$min.peak.width*Experiment@Data@Parameters$scans.per.second*60
sampleRD@data <- pre.process(sampleRD@data, sigma.scans)
#sampleRD@min.peak.width <- sigma.object$sigma
analyzed.vector <- vector()
for(j in 1:length(re.analyze))
{
k.it <- k.it+1
lost.FactorID <- as.numeric(as.vector(align.list[re.analyze[j],"AlignID"]))
lost.factor <- tryCatch({
fit.model.in.data.tosd(sampleRD, Experiment,lost.FactorID, free.model)
}, error = function(e) {
cat("Compound recovery could not be done in ", sample)
lost.factor <- NULL
})
if(is.null(lost.factor)) next
free.factor.index <- which(Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"AlignID"==lost.FactorID)
if(length(free.factor.index)!=0) {Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"AlignID"[free.factor.index] <- 0}
new.list <- as.data.frame(matrix(0,ncol=7,nrow=1))
colnames(new.list) <- c("ID","RT","Area","Peak Height","Spectra","Profile","AlignID")
new.list$"ID" <- length(Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"ID")+1
new.list$"Peak Height" <- lost.factor$height
new.list$"RT" <- as.numeric(lost.factor$rt)
new.list$"Area" <- lost.factor$area
new.list$"Spectra" <- lost.factor$lost.spectra
new.list$"Profile" <- lost.factor$lost.profile
new.list$"AlignID" <- lost.FactorID
Experiment@Data@FactorList[colnames(data.list)[sample]][[1]] <- data.frame(rbind(as.matrix(Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]),as.matrix(new.list)))
Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"ID" <- as.integer(Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"ID")
Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"RT" <- as.numeric(as.vector(Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"RT"))
Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"AlignID" <- as.numeric(as.vector(Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]$"AlignID"))
colnames(Experiment@Data@FactorList[colnames(data.list)[sample]][[1]]) <- c("ID", "RT", "Area", "Peak Height", "Spectra", "Profile", "AlignID")
}
}
pb$tick()
}
cat("\n Updating alignment table... \n")
Experiment@Results@Alignment <- create.factorlist.table(Experiment)
cat("Model fitted! \n")
Experiment
}
)
fit.model.in.data.tosd <- function(sampleRD, Experiment, lost.factor.alignId, free.model)
{
maxMZ <- sampleRD@max.mz
minMZ <- sampleRD@min.mz
lost.factor.index <- which(Experiment@Results@Alignment[,"AlignID"]==lost.factor.alignId)
lost.factor.position <- Experiment@Results@Alignment[lost.factor.index,"tmean"]*sampleRD@scans.per.second*60-sampleRD@start.time*sampleRD@scans.per.second
min.peak.width <- sampleRD@min.peak.width
max.time.dist <- Experiment@Results@Parameters@Alignment$max.time.dist*sampleRD@scans.per.second*60
window.start <- as.integer(lost.factor.position - max.time.dist - min.peak.width*4)
if(window.start<=0) window.start <- 1
if(window.start>nrow(sampleRD@data)) window.start <- as.integer(nrow(sampleRD@data) - max.time.dist - min.peak.width*4)
window.end <- as.integer(lost.factor.position + max.time.dist + min.peak.width*4)
if(window.end>nrow(sampleRD@data)) window.end <- nrow(sampleRD@data)
analysis.window <- sampleRD@data[window.start:window.end,]
if(max(analysis.window)<Experiment@Data@Parameters$noise.threshold) return(NULL)
## Lost Spectra:
lost.spectra <- rep(0,(maxMZ-minMZ)+1)
lost.spectra <- normalize(convertMSPspectra(Experiment@Results@Alignment[lost.factor.index,"Spectra"], maxMZ)[minMZ:maxMZ])
## Lost Profile:
#C.model <- getC.tOSD(analysis.window, lost.spectra)
#C.model <- getC.tP(analysis.window, lost.spectra)
#C.model <- getC.rq(analysis.window, lost.spectra)
C.model <- try(getC.rq(analysis.window, lost.spectra), silent=T)
if(inherits(C.model,"try-error")) C.model <- rep(0, nrow(analysis.window))
C.model <- chrom.isoreg(C.model)
Smod <- getS.OSD(C.model, analysis.window, ref.response=lost.spectra)
# if(is.na(suppressWarnings(cor(Smod, lost.spectra))<0.75))
# {
# max.mz <- apply(analysis.window,2,max)
# max.mz[max.mz==0] <- max(max.mz)
# C.model <- analysis.window[,which.min(max.mz)]
# }else{
# if(suppressWarnings(cor(Smod, lost.spectra))<0.75)
# {
# max.mz <- apply(analysis.window,2,max)
# max.mz[max.mz==0] <- max(max.mz)
# C.model <- analysis.window[,which.min(max.mz)]
# }
# }
if(free.model==T) lost.spectra <- Smod
lost.profile <- C.model
## Results Formatting
#t.vect <- (window.start:window.end)/sampleRD@scans.per.second/60 + sampleRD@start.time/60
#matplot(t.vect, analysis.window, type="l", col="gray", lty=1)
#matplot(t.vect, lost.profile, type="l", add=T, lty=1)
return.list <- list()
lost.rt <- ((window.start-1) + which.max(lost.profile))/sampleRD@scans.per.second/60 + sampleRD@start.time/60
#lost.area <- sum(as.matrix(lost.profile) %*% t(as.matrix(lost.spectra)))
lost.area <- sum(as.matrix(lost.profile))
lost.spectra[normalize(lost.spectra)<0.005] <- 0
spectra.index <- which(lost.spectra!=0)
spectra.pos <- spectra.index + (sampleRD@min.mz - 1)
spectra.int <- round(lost.spectra[spectra.index]*1000)
del.pos <- which(spectra.int==0)
if(length(del.pos)!=0) {spectra.int <- spectra.int[-del.pos]; spectra.pos <- spectra.pos[-del.pos]}
spectra.text <- paste(sweep(as.matrix(spectra.pos),1,as.matrix(spectra.int),"paste.sp"), collapse=" ")
lost.height <- lost.profile[which.max(lost.profile)]
lost.profile[normalize(lost.profile)<0.005] <- 0
profile.index <- which(lost.profile!=0)
profile.pos <- ((profile.index + (window.start - 1))/sampleRD@scans.per.second/60) + (sampleRD@start.time/60)
profile.int <- lost.profile[profile.index]/max(lost.profile)
profile.text <- paste(sweep(as.matrix(profile.pos),1,as.matrix(profile.int),"paste.sp"), collapse=" ")
return.list <- list(lost.profile=profile.text, lost.spectra=spectra.text, rt=lost.rt, area=lost.area, height=lost.height)
#lines(c(t.vect[lost.factor.index],t.vect[lost.factor.index]),c(0,max(analysis.window)), col="red")
return.list
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.