Nothing
#' Run ncGTW alignment
#'
#' This function applies ncGTW alignment to the input feature.
#' @param ncGTWinput A \code{\link{ncGTWinput}} object.
#' @param xcmsLargeWin A \code{\link[xcms]{xcmsSet-class}} object.
#' @param parSamp Decide how many samples are in each group when considering
#' parallel computing, and the default is 10.
#' @param k1Num Decide how many different k1 will be tested in stage 1. The
#' default is 3.
#' @param k2Num Decide how many different k2 will be tested in stage 2. The
#' default is 1.
#' @param bpParam A object of \pkg{BiocParallel} to control parallel processing,
#' and can be created by
#' \code{\link[BiocParallel:SerialParam-class]{SerialParam}},
#' \code{\link[BiocParallel:MulticoreParam-class]{MulticoreParam}}, or
#' \code{\link[BiocParallel:SnowParam-class]{SnowParam}}.
#' @param ncGTWparam A \code{\link{ncGTWparam}} object.
#' @details This function realign the input feature with ncGTW alignment
#' function with given m/z and RT range.
#' @return A \code{\link{ncGTWoutput}} object.
#' @examples
#' # obtain data
#' data('xcmsExamples')
#' xcmsLargeWin <- xcmsExamples$xcmsLargeWin
#' xcmsSmallWin <- xcmsExamples$xcmsSmallWin
#' ppm <- xcmsExamples$ppm
#'
#' # detect misaligned features
#' excluGroups <- misalignDetect(xcmsLargeWin, xcmsSmallWin, ppm)
#'
#' # obtain the paths of the sample files
#' filepath <- system.file("extdata", package = "ncGTW")
#' file <- list.files(filepath, pattern="mzxml", full.names=TRUE)
#'
#' tempInd <- matrix(0, length(file), 1)
#' for (n in seq_along(file)){
#' tempCha <- file[n]
#' tempLen <- nchar(tempCha)
#' tempInd[n] <- as.numeric(substr(tempCha, regexpr("example", tempCha) + 7,
#' tempLen - 6))
#' }
#' # sort the paths by data acquisition order
#' file <- file[sort.int(tempInd, index.return = TRUE)$ix]
#' \dontrun{
#' # load the sample profiles
#' ncGTWinputs <- loadProfile(file, excluGroups)
#'
#' # initialize the parameters of ncGTW alignment with default
#' ncGTWparam <- new("ncGTWparam")
#'
#' # run ncGTW alignment
#' ncGTWoutputs <- vector('list', length(ncGTWinputs))
#' for (n in seq_along(ncGTWinputs))
#' ncGTWoutputs[[n]] <- ncGTWalign(ncGTWinputs[[n]], xcmsLargeWin, 5,
#' ncGTWparam = ncGTWparam)
#' }
#' @export
ncGTWalign <- function(ncGTWinput, xcmsLargeWin, parSamp=10, k1Num=3, k2Num=1,
bpParam=BiocParallel::SnowParam(workers=1), ncGTWparam=NULL){
if (!is(xcmsLargeWin, 'xcmsSet'))
stop('xcmsLargeWin should be a "xcmsSet" object.')
if (!is(ncGTWinput, 'ncGTWinput'))
stop('ncGTWinput should be a "ncGTWinput" object.')
if (parSamp <= 1 || parSamp %% 1 != 0)
stop('parSamp should be an integer which is larger than 1.')
if (k1Num <= 0 || k1Num %% 1 != 0)
stop('k1Num should be an integer which is larger than 0.')
if (k2Num <= 0 || k2Num %% 1 != 0)
stop('k2Num should be an integer which is larger than 0.')
ncGTWparam <- initHidparam(ncGTWparam, ncGTWinput, k1Num, k2Num)
groupInd <- ncGTWinput@groupInfo[1]
scanRangeOld <- ncGTWinput@rtRaw
ncGTWpar <- ncGTWsplit(ncGTWinput, ncGTWparam, parSamp)
parSamp <- ncGTWpar$parSamp
parSect <- ncGTWpar$parSect
parNum <- ncGTWpar$parNum
specLen <- ncGTWpar$specLen
scanRange <- ncGTWpar$scanRange
message("ncGTW is realigning group ", groupInd, "...")
## ncGTW 1st layer alignment
ncGTW1stOutput <- BiocParallel::bplapply(ncGTWpar$parInfos, ncGTW1stLayer,
xcmsLargeWin, scanRange, scanRangeOld, ncGTWparam, BPPARAM=bpParam)
parPath <- replicate(parSect, list(vector('list', parSamp)))
parWarped <- array(0, c(parSamp, specLen, parSect))
parStat <- array(0, c(k1Num * k2Num, 5, parSect))
for (n in seq_len(parSect)){
parPath[[n]] <- ncGTW1stOutput[[n]]$parPath
parWarped[seq_len(parNum[n]) , , n] <- ncGTW1stOutput[[n]]$parWarped
parStat[ , , n] <- ncGTW1stOutput[[n]]$parStat
}
## Stop if only 1 layer
if (parSect == 1){
warning('Only one layer!!!')
warpedAll <- parWarped[ , , 1]
smoM2 <- matrix(0, 3, 6)
return(new("ncGTWoutput", alignData=ncGTWpar$data, scanRange=scanRange,
ncGTWpath=parPath[[1]], downSample=downSample))
}
## Build super samples for 2nd layer
superSample <- matrix(0, parSect, specLen)
for (n in seq_len(parSect)){
maxInd <- which.max(rowSums(parWarped[ , , n], 2))
minInd <- which.min(rowSums(parWarped[ , , n], 2))
superSample[n, ] <- colMeans(parWarped[setdiff(seq_len(parNum[n]),
c(maxInd, minInd)), , n, drop=FALSE])
}
## ncGTW 2nd layer alignment
ncGTWparam2 <- ncGTWparam
if (ncGTWparam2$maxStp * 1.5 > specLen){
ncGTWparam2$maxStp <- specLen - 1
} else{
ncGTWparam2$maxStp <- round(ncGTWparam$maxStp * 1.5)
}
ncGTWparam2$mir <- FALSE
allInfo <- list(groupInd=groupInd, num=0, all=0, parSpec=superSample,
parNum=parSect, parInd=seq_len(parSect))
ncGTWsmoRes2 <- ncGTWalignSmo(allInfo, xcmsLargeWin, scanRange,
scanRangeOld, ncGTWparam2, TRUE, ncGTWpar$data, parPath,
ncGTWpar$parInd)
smoM2 <- ncGTWsmoRes2$smoM
minRange <- min(smoM2[ , 3])
bestSmo <- which(smoM2[ , 3] == minRange)
bestSmo <- bestSmo[which.min(smoM2[bestSmo, 1])]
path2 <- ncGTWsmoRes2$tempPath[[bestSmo]]
## Warp samples
parSuperWarped <- matrix(0, parSect, specLen)
for (m in seq_len(parSect))
parSuperWarped[m, ] <- warpCurve(superSample[m, ], path2[[m]])
warpedAll <- matrix(0, dim(ncGTWpar$data)[1], dim(ncGTWpar$data)[2])
tempCount <- 0
for (n in seq_len(parSect)){
for (m in seq_len(parNum[n]))
warpedAll[tempCount + m, ] <-
warpCurve(parWarped[m, , n], path2[[n]])
tempCount <- tempCount + parNum[n]
}
path <- pathCombine(parPath, path2, ncGTWpar$parInd)
return(new("ncGTWoutput", alignData=ncGTWpar$data, scanRange=scanRange,
ncGTWpath=path, downSample=ncGTWparam$downSample))
}
initHidparam <- function(ncGTWparam, ncGTWinput, k1Num, k2Num){
ncGTWparam2 <- list()
ncGTWparam2$downSample <- if (is.null(ncGTWparam@downSample)) 2 else
ncGTWparam@downSample
ncGTWparam2$nor <- if (is.null(ncGTWparam@nor)) 1 else ncGTWparam@nor
ncGTWparam2$strNum <- if (is.null(ncGTWparam@strNum)) 1 else
ncGTWparam@strNum
ncGTWparam2$diaNum <- if (is.null(ncGTWparam@diaNum)) 1 else
ncGTWparam@diaNum
ncGTWparam2$mir <- TRUE
ncGTWparam2$stpRat <- if (is.null(ncGTWparam@stpRat)) 0.6 else
ncGTWparam@stpRat
ncGTWparam2$maxStp <- if (is.nan(ncGTWparam@maxStp))
round(dim(ncGTWinput@rtRaw)[2] %/% ncGTWparam2$downSample *
ncGTWparam2$stpRat) else ncGTWparam@maxStp
ncGTWparam2$rangeThre <- 1
ncGTWparam2$biP <- TRUE
ncGTWparam2$mu <- 0
ncGTWparam2$sigma <- 1
ncGTWparam2$weiP <- 0
ncGTWparam2$logt <- 0
ncGTWparam2$dia <- 0
ncGTWparam2$noiseVar <- 1
ncGTWparam2$k1Num <- k1Num
ncGTWparam2$k2Num <- k2Num
return(ncGTWparam2)
}
ncGTWsplit <- function(ncGTWinput, ncGTWparam, parSamp){
downSample <- ncGTWparam$downSample
dataOri <- ncGTWinput@profiles
dataFil <- t(apply(dataOri, 1, gaussFilter))
dataNum <- dim(dataFil)[1]
specLen <- dim(dataFil)[2]
data <- matrix(0, dataNum, specLen %/% downSample)
scanRange <- matrix(0, dataNum, specLen %/% downSample)
for (i in seq_len(dataNum)){
for (j in seq_len(specLen %/% downSample)){
subInd <- ((j - 1) * downSample + 1):(j * downSample)
maxInd <- which.max(dataFil[i, subInd])
data[i, j] <- dataFil[i, subInd[maxInd]]
scanRange[i, j] <- ncGTWinput@rtRaw[i, subInd[maxInd]]
}
}
data <- cbind(dataFil[ , 1], data)
scanRange <- cbind(ncGTWinput@rtRaw[ , 1], scanRange)
if (any(data[ , dim(data)[2]] != dataFil[ , specLen])){
data <- cbind(data, dataFil[ , specLen])
scanRange <- cbind(scanRange, ncGTWinput@rtRaw[ , specLen])
}
specLen <- dim(data)[2]
parSect <- dataNum / parSamp
if (parSect > floor(dataNum / parSamp)){
parSect <- floor(dataNum / parSamp) + 1
}
parSpec <- array(0, c(parSamp, specLen, parSect))
parInd <- matrix(0, parSamp, parSect)
parNum <- matrix(0, parSect, 1) + parSamp
if ((dataNum - (parSect - 1) * parSamp ) > parSamp / 2){
parNum[parSect] <- dataNum - (parSect - 1) * parSamp
} else{
parNum[parSect - 1] <- ceiling((dataNum - (parSect - 2) * parSamp) / 2)
parNum[parSect] <- floor((dataNum - (parSect - 2) * parSamp) / 2)
}
tempCount <- 0
for (n in seq_len(parSect)){
parSpec[seq_len(parNum[n]), , n] <-
data[(tempCount + 1):(tempCount + parNum[n]), ]
parInd[seq_len(parNum[n]), n] <- (tempCount + 1):(tempCount + parNum[n])
tempCount <- tempCount + parNum[n]
}
parInfos <- vector('list', parSect)
for (n in seq_len(parSect)){
parInfos[[n]]$groupInd <- ncGTWinput@groupInfo['index']
parInfos[[n]]$num <- n
parInfos[[n]]$all <- parSect
parInfos[[n]]$parSpec <- parSpec[seq_len(parNum[n]), , n]
parInfos[[n]]$parNum <- parNum[n]
parInfos[[n]]$parInd <- parInd[ , n]
}
return(list(parInfos=parInfos, scanRange=scanRange, parSamp=parSamp,
specLen=specLen, parSect=parSect, parNum=parNum, data=data,
parInd=parInd))
}
ncGTW1stLayer <- function(parInfo, xcmsLargeWin, scanRange, scanRangeOld,
ncGTWparam){
ncGTWsmoRes <- ncGTWalignSmo(parInfo, xcmsLargeWin, scanRange, scanRangeOld,
ncGTWparam)
warpRes <- chooseSmo(ncGTWsmoRes$smoM, ncGTWsmoRes$tempPath, parInfo,
ncGTWparam)
return(list(parPath=warpRes$parPath, parWarped=warpRes$parWarped,
parStat=warpRes$parStat, parS1BrokenNum=ncGTWsmoRes$parS1BrokenNum,
parS2BrokenNum=ncGTWsmoRes$parS2BrokenNum, parS2=ncGTWsmoRes$parS2))
}
ncGTWalignSmo <- function(parInfo, xcmsLargeWin, scanRange,
scanRangeOld, ncGTWparam, second=FALSE, data=NULL, parPath=NULL,
parInd=NULL){
groupInd <- parInfo$groupInd
n <- parInfo$num
parSect <- parInfo$all
parnum <- parInfo$parNum
parind <- parInfo$parInd[seq_len(parnum)]
parspec <- parInfo$parSpec[seq_len(parnum), ]
mir <- ncGTWparam$mir
strNum <- ncGTWparam$strNum
diaNum <- ncGTWparam$diaNum
biP <- ncGTWparam$biP
noiseVar <- ncGTWparam$noiseVar
downSample <- ncGTWparam$downSample
rangeThre <- ncGTWparam$rangeThre
maxStp <- ncGTWparam$maxStp
k1Num <- ncGTWparam$k1Num
k2Num <- ncGTWparam$k2Num
gtwPrep <- buildMultiParaValidmap(parspec, mir, strNum, diaNum, biP)
eeBet <- betEdge(gtwPrep, 10^-32, ncGTWparam)
gtwRes0 <- gtwCut(gtwPrep, 0, ncGTWparam)
cut0 <- gtwRes0$cut
costMin <- cut0[length(gtwRes0$cut)]
s1BrokenNum <- sum((cut0[eeBet[ ,1]] + cut0[eeBet[ ,2]]) == 1)
parS1BrokenNum <- s1BrokenNum #####
gtwResMerge <- gtwCut(gtwPrep, 10^32, ncGTWparam, 2)
cutMerge <- gtwResMerge$cut
costMax <- cutMerge[length(cutMerge)]
smoTemp <- (costMax - costMin) / s1BrokenNum
BNumDTW1 <- matrix(-1, 3, 3)
BNumDTW1[1, 1] <- 0
BNumDTW1[2, 1] <- s1BrokenNum
BNumDTW1[3, 1] <- costMin
BNumDTW1[1, 3] <- Inf
BNumDTW1[2, 3] <- 0
BNumDTW1[3, 3] <- costMax
smoM <- array(999, c(k1Num, k2Num, 5))
smoM[1, ,1] <- smoTemp
BNumDTW1[1, 2] <- smoTemp
tempPath <- vector('list', k1Num * k2Num)
for (smo in seq_len(dim(smoM)[1])){
gtwRes1 <- gtwCut(gtwPrep, smoM[smo, 1,1], ncGTWparam)
cut1 <- gtwRes1$cut
gtwInfo1 <- gtwRes1$gtwInfo
path1 <- label2path(gtwRes1$cut, gtwRes1$gtwInfo)
if (n != 0){
message(format(Sys.time()),
' Solved the 1st maximum flow, section ', n, ' of ', parSect,
', bottom layer.')
} else{
message(format(Sys.time()),
' Solved the 1st maximum flow, top layer.')
}
brokenNum1 <- sum((cut1[eeBet[ ,1]] + cut1[eeBet[ ,2]]) == 1)
tempInd <- (smo - 2^floor(log2(smo)) + 1) * 2
BNumDTW1[2, tempInd] <- brokenNum1
BNumDTW1[3, tempInd] <- cut1[length(cut1)] - smoM[smo, 1,1] * brokenNum1
smoM[smo, ,2] <- brokenNum1
validMap2 <- matrix(0, parnum, parnum)
for (jj in seq_len(parnum))
for (ii in seq((jj + 1), parnum, length = max(0, parnum - jj)))
validMap2[ii, jj] <- 1
tst2 <- matrix(0, dim(parspec)[1], dim(parspec)[2])
ref2 <- tst2
smoothnessV2 <- 0.0000000001 / noiseVar
gtwPrep2 <- list(validMap=validMap2, tst=tst2, ref=ref2)
gtwRes2 <- gtwCut(gtwPrep2, smoothnessV2, ncGTWparam, 1, path1,
gtwPrep$pairMap)
eeBet2 <- gtwRes2$eeBet
s2BrokenNum <- gtwRes2$cut[length(gtwRes2$cut)] / smoothnessV2
parS2BrokenNum <- s2BrokenNum #####
parS2 <- 2 * (maxStp - 1) * parnum / s2BrokenNum ###############
smoothnessV2 <- parS2 / noiseVar
BNumDTW2 <- matrix(-1, 3, 3)
BNumDTW2[1, 1] <- 0
BNumDTW2[2, 1] <- s2BrokenNum
BNumDTW2[3, 1] <- 0
BNumDTW2[1, 3] <- Inf
BNumDTW2[2, 3] <- 0
BNumDTW2[3, 3] <- 2 * (maxStp - 1) * parnum
smoM[smo, 1, 4] <- smoothnessV2
BNumDTW2[1, 2] <- smoothnessV2
for (smo2 in seq_len(dim(smoM)[2])){
gtwRes2 <- gtwCut(gtwPrep2, smoM[smo, smo2,4], ncGTWparam, 1, path1,
gtwPrep$pairMap)
cut2 <- gtwRes2$cut
gtwInfo2 <- gtwRes2$gtwInfo
path2 <- label2path(cut2, gtwInfo2)
tempPath[[(smo-1) * k2Num + smo2]] <- path2
if (n != 0){
message(format(Sys.time()),
' Solved the 2nd maximum flow, section ', n, ' of ', parSect,
', bottom layer.')
} else{
message(format(Sys.time()),
' Solved the 2nd maximum flow, top layer.')
}
brokenNum2 <- sum((cut2[eeBet2[ ,1]] + cut2[eeBet2[ ,2]]) == 1)
tempInd <- (smo2 - 2^floor(log2(smo2)) + 1) * 2
BNumDTW2[2, tempInd] <- brokenNum2
BNumDTW2[3, tempInd] <- cut2[length(cut2)] -
smoM[smo, smo2, 4] * brokenNum2
smoM[smo, smo2, 5] <- brokenNum2
if (second){
temp2path <- pathCombine(parPath, path2, parInd)
statResult <- smoTest(xcmsLargeWin, groupInd, data, scanRange,
seq_len(dim(data)[1]), temp2path, downSample, scanRangeOld)
} else{
statResult <- smoTest(xcmsLargeWin, groupInd, parspec, scanRange,
parind, path2, downSample, scanRangeOld)
}
tempRange <- statResult[3, 2]
smoM[smo, smo2, 3] <- tempRange
if (tempRange <= rangeThre)
break
if ((as.integer(log2(smo2+1)) - log2(smo2+1)) == 0 &&
smo2 < dim(smoM)[2]){
tBNumDTW2 <- matrix(-1, 3, smo2*2 + 3)
for (tsmo2 in seq_len(dim(tBNumDTW2)[2])){
if (as.integer(tsmo2/2) != tsmo2/2){
tBNumDTW2[, tsmo2] = BNumDTW2[,(tsmo2+1)/2]
} else{
tBNumDTW2[1, tsmo2] =
(BNumDTW2[3, tsmo2/2 + 1] - BNumDTW2[3, tsmo2/2]) /
(BNumDTW2[2, tsmo2/2] - BNumDTW2[2, tsmo2/2 + 1])
if (smo2 + tsmo2/2 <= dim(smoM)[2])
smoM[smo, smo2 + tsmo2/2, 4] = tBNumDTW2[1, tsmo2]
}
}
BNumDTW2 <- tBNumDTW2
}
}
if (tempRange <= rangeThre)
break
if ((as.integer(log2(smo+1)) - log2(smo+1)) == 0 && smo < dim(smoM)[1]){
tBNumDTW1 <- matrix(-1, 3, smo*2 + 3)
for (tsmo in seq_len(dim(tBNumDTW1)[2])){
if (as.integer(tsmo/2) != tsmo/2){
tBNumDTW1[, tsmo] = BNumDTW1[,(tsmo+1)/2]
} else{
tBNumDTW1[1, tsmo] =
(BNumDTW1[3, tsmo/2 + 1] - BNumDTW1[3, tsmo/2]) /
(BNumDTW1[2, tsmo/2] - BNumDTW1[2, tsmo/2 + 1])
if (smo + tsmo/2 <= dim(smoM)[1])
smoM[smo + tsmo/2, ,1] = tBNumDTW1[1, tsmo]
}
}
BNumDTW1 <- tBNumDTW1
}
}
return(list(smoM=matrix(aperm(smoM, c(2,1,3)), k1Num * k2Num, 5),
tempPath=tempPath, parS1BrokenNum = parS1BrokenNum,
parS2BrokenNum = parS2BrokenNum, parS2 = parS2))
}
chooseSmo <- function(smoM, tempPath, parInfo, ncGTWparam, bestSmo=NULL){
rangeThre <- ncGTWparam$rangeThre
parnum <- parInfo$parNum
parspec <- parInfo$parSpec[seq_len(parnum), ]
if (is.null(bestSmo)){
bestSmo <- which(smoM[ , 3] < rangeThre)##
if (length(bestSmo) > 1){
bestSmo <- bestSmo[which.max(smoM[bestSmo, 1] / smoM[bestSmo, 4])]
} else if (length(bestSmo) == 0){
bestSmo <- which.min(smoM[ , 3])
}
}
path2 <- tempPath[[bestSmo]]##
tempWarped <- parspec * 0
for (m in seq_len(parnum))
tempWarped[m, ] <- warpCurve(parspec[m, ], path2[[m]])
parwarped <- tempWarped##
tempPath2 <- vector('list', parnum)
tempPath2[seq_len(parnum)] <- path2
parpath <- tempPath2##
parstat <- smoM ##
return(list(parPath=parpath, parWarped=parwarped, parStat=parstat))
}
gtwCut <- function(gtwPrep, smooth, ncGTWparam, type=0, path=NA, pairMap=NA){
param <- initGtwParam(gtwPrep$validMap, ncGTWparam$noiseVar,
ncGTWparam$maxStp, smooth, ncGTWparam$dia, ncGTWparam$logt,
ncGTWparam$nor)
gtwInfo <- buildGTWgraph(gtwPrep$ref, gtwPrep$tst, gtwPrep$validMap, param,
ncGTWparam$mu, ncGTWparam$sigma, ncGTWparam$biP, ncGTWparam$weiP, type,
path, pairMap)
eeBet <- gtwInfo$ee[gtwInfo$ee[, 3] == smooth & gtwInfo$ee[, 4] == smooth, ]
cut <- graphCut(gtwInfo$ss, gtwInfo$ee)
return(list(gtwInfo = gtwInfo, eeBet = eeBet, cut = cut))
}
betEdge <- function(gtwPrep, smooth, ncGTWparam){
param <- initGtwParam(gtwPrep$validMap, ncGTWparam$noiseVar,
ncGTWparam$maxStp, smooth, ncGTWparam$dia, ncGTWparam$logt,
ncGTWparam$nor)
gtwInfo <- buildGTWgraph(gtwPrep$ref, gtwPrep$tst, gtwPrep$validMap, param,
ncGTWparam$mu, ncGTWparam$sigma, ncGTWparam$biP, ncGTWparam$weiP)
eeBet <-
gtwInfo$ee[(gtwInfo$nEdgeGrid * gtwInfo$nPix + 1):dim(gtwInfo$ee)[1], ]
return(eeBet)
}
#' @useDynLib ncGTW
#' @importFrom Rcpp sourceCpp
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.