##' An S4 class to represent the parameters and data for data processing
##'
##' @export
##' @param para A metaXpara object
##' @param value New value
##' @slot dir.case The path names of the NetCDF/mzXML files to read
##' @slot dir.ctrl The path names of the NetCDF/mzXML files to read
##' @slot sampleListFile The file name of containing the experiment design
##' @slot sampleList A data.frame containing the experiment design
##' @slot ratioPairs A character containing the ratio pairs, such as "A:B;A:C"
##' @slot pairTest Paired test, default is FALSE
##' @slot missValueImputeMethod A character of missing value imputation method
##' @slot sampleListHead The name of head of \code{sampleListFile}
##' @slot outdir The output directory
##' @slot prefix The prefix of output file
##' @slot xcmsPeakListFile The file of output from \pkg{XCMS}
##' @slot fig A \code{list} of file names of figures
##' @slot peaksData A \code{data.frame} containing the peaks data
##' @slot VIP A \code{data.frame} containing the VIP
##' @slot rawPeaks A \code{data.frame} containg the raw peaks data
##' @slot xcmsSetObj An object of \code{\link{xcmsSet}}
##' @slot quant A \code{data.frame} containing the quantification result
##' @slot idres A \code{data.frame} containing the identification result
##' @slot xcmsSet.method Method to use for peak detection. See details
##' \code{\link{findPeaks}} in package \pkg{XCMS}
##' @slot xcmsSet.ppm The maxmial tolerated m/z deviation in consecutive scans,
##' in ppm (parts per million)
##' @slot xcmsSet.peakwidth Chromatographic peak width, given as
##' range (min,max) in seconds
##' @slot xcmsSet.snthresh The signal to noise ratio cutoff, definition see
##' \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.prefilter prefilter=c(k,I),
##' see \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.mzCenterFun See \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.integrate See \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.mzdiff See \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.noise See \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.verbose.columns See \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.polarity Filter raw data for positive/negative scans.
##' See \code{\link{xcmsSet}}
##' @slot xcmsSet.profparam Parameters to use for profile generation.
##' See \code{\link{xcmsSet}}
##' @slot xcmsSet.nSlaves The number of slaves/cores to be used for parallel
##' peak detection. See \code{\link{xcmsSet}}
##' @slot xcmsSet.fitgauss See \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.sleep The number of seconds to pause between plotting
##' peak finding cycles. See \code{\link{findPeaks.centWave}}
##' @slot xcmsSet.fwhm See \code{\link{findPeaks.matchedFilter}}
##' @slot xcmsSet.max See \code{\link{findPeaks.matchedFilter}}
##' @slot xcmsSet.step See \code{\link{findPeaks.matchedFilter}}
##' @slot group.bw0 See \code{\link{group.density}}
##' @slot group.mzwid0 See \code{\link{group.density}}
##' @slot group.bw See \code{\link{group.density}}
##' @slot group.mzwid See \code{\link{group.density}}
##' @slot group.minfrac See \code{\link{group.density}}
##' @slot group.minsamp See \code{\link{group.density}}
##' @slot group.max See \code{\link{group.density}}
##' @slot group.sleep See \code{\link{group.density}}
##' @slot retcor.method See \code{\link{retcor}}
##' @slot retcor.profStep See \code{\link{retcor.obiwarp}}
##' @slot retcor.plottype See \code{\link{retcor.obiwarp}}
##' @slot qcRlscSpan The value of span for QC-RLSC
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @return A object of metaXpara
setClass("metaXpara", slots=c(
## input
dir.case = "character",
dir.ctrl = "character",
sampleListFile = "character",
sampleListHead = "character",
sampleList = "data.frame",
ratioPairs = "character",
missValueImputeMethod = "character",
pairTest = "logical",
## output
outdir = "character",
prefix = "character",
xcmsPeakListFile = "character",
fig = "list",
peaksData = "data.frame",
VIP = "data.frame",
#sampleList = "data.frame",
rawPeaks = "data.frame",
xcmsSetObj = "xcmsSet",
quant = "data.frame",
ID = "character",
## protein, gene or mrna, metabolite
dataType = "character",
## identification result
idres = "data.frame",
xcmsSet = representation(
method = "character",
ppm = "numeric",
peakwidth = "numeric",
snthresh = "numeric",
prefilter = "numeric",
mzCenterFun = "character",
integrate = "numeric",
mzdiff = "numeric",
noise = "numeric",
verbose.columns = "logical",
polarity = "character",
profparam = "list",
nSlaves = "numeric" ,
fitgauss = "logical",
sleep = "numeric",
## findPeaks.matchedFilter-methods
fwhm = "numeric",
max = "numeric",
step = "numeric"
),
group = representation(
## The first round
bw0 = "numeric",
mzwid0 = "numeric",
## The second round
bw = "numeric",
mzwid = "numeric",
minfrac = "numeric",
minsamp = "numeric",
max = "numeric",
sleep = "numeric"
),
retcor = representation(
method = "character",
profStep = "numeric",
plottype = "character"
),
qcRlscSpan = "numeric"
),
prototype = prototype(xcmsSet.method = "centWave",
xcmsSet.ppm = 10,
xcmsSet.peakwidth = c(5,15),
xcmsSet.snthresh = 6,
xcmsSet.prefilter = c(1,5000),
xcmsSet.mzCenterFun = "wMean",
xcmsSet.integrate = 1,
xcmsSet.mzdiff = -0.001,
xcmsSet.noise = 5000,
xcmsSet.verbose.columns = TRUE,
xcmsSet.polarity = "positive",
xcmsSet.profparam = list(step=0.005),
xcmsSet.nSlaves = 1,
xcmsSet.fitgauss = FALSE,
xcmsSet.sleep = 0,
## findPeaks.matchedFilter
xcmsSet.fwhm = 30,
xcmsSet.max = 5,
xcmsSet.step = 0.1,
group.bw0 = 10,
group.mzwid0 = 0.015,
group.bw = 5,
group.mzwid = 0.015,
group.minfrac = 0.3,
group.sleep = 0,
group.minsamp = 1,
group.max = 1000,
retcor.method = "obiwarp",
retcor.plottype = "deviation",
retcor.profStep = 0.005,
outdir = "./",
prefix = "metaX",
peaksData = NULL,
VIP = NULL,
ratioPairs = NULL,
qcRlscSpan = 0,
missValueImputeMethod = "knn",#"svdImpute",
quant = NULL,
idres = NULL,
pairTest = FALSE,
ID = NULL,
dataType = "metabolite",
sampleListHead = c(sample="sample",
order="order",
batch="batch",
class="class",
isQC="isQC")
)
)
## show function for metaXpara
setMethod("show","metaXpara",function(object){
#samList <- read.delim(object@sampleListFile)
if(is.null(object@sampleList) || is.na(object@sampleList) ||
nrow(object@sampleList) ==0){
samList <- read.delim(object@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- object@sampleList
}
message("total samples:")
message(nrow(samList))
message("total features:")
if(!is.null(object@peaksData)){
message(length(unique(object@peaksData$ID)))
}
samList$class <- as.character(samList$class)
if(any(is.na(samList$class))){
samList$class[is.na(samList$class)] <- "QC"
}
message("sample group information:")
sl <- as.data.frame(table(samList$class,useNA = "ifany"))
names(sl) <- c("Group","Number of samples")
print(sl)
message("batch information:")
bl <- as.data.frame(table(samList$batch))
names(bl) <- c("Batch","Number of samples")
print(bl)
})
setMethod("initialize", "metaXpara", function(.Object, ...) {
.Object <- callNextMethod()
if(is.null(.Object@outdir)){
stop("Please set the outdir parameter!")
}else{
if(!isDirectory(.Object@outdir)){
dir.create(.Object@outdir)
}
}
.Object
})
##' An S4 class to represent the parameters for PLS-DA analysis
##'
##' @export
##' @param para A oplsDAPara object
##' @param value New value
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @slot scale The method used to scale the data,
##' see \code{\link{preProcess}} in \pkg{metaX}
##' @slot center A logical which indicates if the matrix should be mean
##' centred or not
##' @slot t The method used to transform the data,
##' see \code{\link{transformation}} in \pkg{metaX}
##' @slot validation The method for validation, default is "CV"
##' @slot ncomp The number of components used for PLS-DA, default is 2
##' @slot nperm The number of permutations, default is 200
##' @slot kfold The number of folds for cross-validation, default is 7
##' @slot do A logical which indicates whether to do the plsDA analysis,
##' default is TRUE
##' @slot method The method used in PLS-DA. See \code{\link{plsr}}
##' in \pkg{pls}
##' @slot cpu The number of cpus used, default is all cpus.
##' @return A object of plsDAPara
setClass("plsDAPara", slots=c(
scale = "character",
center = "logical",
t = "numeric",
validation = "character",
ncomp = "numeric",
nperm = "numeric",
kfold = "numeric",
do = "logical",
method = "character",
cpu = "numeric"),
prototype = prototype(scale = "uv",
center = TRUE,
t = 1, #1=log,2=cubic
validation = "CV",#LOO
ncomp = 2,
nperm = 200,
kfold = 7,
do = TRUE,
cpu = 0,
method = "oscorespls")
)
##' An S4 class to represent the parameters for OPLS-DA analysis
##' @export
##' @param para A oplsDAPara object
##' @param value New value
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @slot scale The method used to scale the data,
##' see \code{\link{preProcess}} in \pkg{metaX}
##' @slot center A logical which indicates if the matrix should be mean
##' centred or not
##' @slot t The method used to transform the data,
##' see \code{\link{transformation}} in \pkg{metaX}
##' @slot ncomp The number of components used for OPLS-DA, default is 2
##' @slot northo The number of orthogonal components
##' @slot nperm The number of permutations, default is 200
##' @slot kfold The number of folds for cross-validation, default is 7
##' @slot do A logical which indicates whether to do the plsDA analysis,
##' default is TRUE
##' @return A object of oplsDAPara
setClass("oplsDAPara", slots=c(
scale = "character",
center = "logical",
t = "numeric",
ncomp = "numeric",
northo = "numeric",
nperm = "numeric",
kfold = "numeric",
do = "logical"),
prototype = prototype(scale = "uv",
center = TRUE,
t = 1, #1=log,2=cubic
ncomp = 1,
northo = 1,
nperm = 200,
kfold = 7,
do = TRUE)
)
##' @title Create directory
##' @description Create directory
##' @rdname makeDirectory
##' @docType methods
##' @param para A metaXpara object
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @return NULL
##' @examples
##' para <- new("metaXpara")
##' outdir(para) <- "outdir"
##' makeDirectory(para)
setGeneric("makeDirectory",function(para) standardGeneric("makeDirectory"))
##' @describeIn makeDirectory
setMethod("makeDirectory", signature(para = "metaXpara"), function(para){
if(!isDirectory(para@outdir)){
dir.create(para@outdir,recursive=TRUE)
}
}
)
##' @title Peak detection by using XCMS package
##' @description peakFinder takes a set of MS sample data and performs a peak
##' detection, retention time correction and peak grouping steps using XCMS
##' package.
##' @rdname peakFinder
##' @docType methods
##' @param para A metaXpara object
##' @param ... Additional parameter
##' @return A metaXpara object
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' \dontrun{
##' library(faahKO)
##' para <- new("metaXpara")
##' dir.case(para) <- system.file("cdf/KO", package = "faahKO")
##' dir.ctrl(para) <- system.file("cdf/WT", package = "faahKO")
##' ## set parameters for peak picking
##' xcmsSet.peakwidth(para) <- c(20,50)
##' xcmsSet.snthresh(para) <- 10
##' xcmsSet.prefilter(para) <- c(3,100)
##' xcmsSet.noise(para) <- 0
##' xcmsSet.nSlaves(para) <- 4
##' ## run peak picking
##' p <- peakFinder(para)
##' }
setGeneric("peakFinder",function(para,...) standardGeneric("peakFinder"))
##' @describeIn peakFinder
setMethod("peakFinder", signature(para = "metaXpara"), function(para,...){
message(date(),"\txcmsSet...")
makeDirectory(para)
sampleClassA = basename(para@dir.case)
sampleClassB = basename(para@dir.ctrl)
msFiles <- c(list.files(para@dir.case,recursive=FALSE,
full.names=TRUE),
list.files(para@dir.ctrl,recursive=FALSE,
full.names=TRUE))
xset<-xcmsSet(msFiles,
method = para@xcmsSet.method,
ppm = para@xcmsSet.ppm,
peakwidth = para@xcmsSet.peakwidth,
snthresh = para@xcmsSet.snthresh,
prefilter = para@xcmsSet.prefilter,
mzCenterFun = para@xcmsSet.mzCenterFun,
integrate = para@xcmsSet.integrate,
mzdiff = para@xcmsSet.mzdiff,
noise = para@xcmsSet.noise,
verbose.columns = para@xcmsSet.verbose.columns,
fitgauss = para@xcmsSet.fitgauss,
sleep = para@xcmsSet.sleep,
polarity = para@xcmsSet.polarity,
#profparam = para@xcmsSet.profparam,
#nSlaves = para@xcmsSet.nSlaves,...
...
)
message(date(),"\txcmsSet done!")
xset
message(date(),"\tgroup...")
xset1<-group(xset,bw = para@group.bw0, mzwid = para@group.mzwid0)
retcor.pdf <- paste(para@outdir,"/", para@prefix, "-retcor.pdf",
sep="")
pdf(retcor.pdf)
if(para@retcor.method == "obiwarp"){
xset2<-retcor(xset1, method="obiwarp",
profStep = para@retcor.profStep,
plottype = para@retcor.plottype,...)
}else{
xset2<-retcor(xset1, method=para@retcor.method,
plottype = para@retcor.plottype,...)
}
dev.off()
#xset2<-group(xset2,bw=5,mzwid=0.01,minfrac=0.5,minsamp=1,max=1000,sleep=0)
xset2<-group(xset2,
bw = para@group.bw,
mzwid = para@group.mzwid,
minfrac = para@group.minfrac,
minsamp = para@group.minsamp,
max = para@group.max,
sleep = para@group.sleep,...)
message(date(),"\tfillpeaks...")
xset3<-fillPeaks(xset2)
xset3
message(date(),"\tfillpeaks done!")
para@xcmsSetObj <- xset3
reporttab<-diffreport(xset3,sampleClassA,sampleClassB,
paste(para@outdir,"/",para@prefix,sep=""),
15000,sortpval=FALSE,
h=480,w=640)
reporttab[1:4,]
message(date(),"\tannotate...")
## In parallel mode, the adducts result are empty.
xsa<-xsAnnotate(xset3,
#nSlaves=para@xcmsSet.nSlaves,
polarity = para@xcmsSet.polarity)
xsaF<-groupFWHM(xsa)
xsaFI<-findIsotopes(xsaF,
ppm = para@xcmsSet.ppm
#polarity = para@xcmsSet.polarity,
#maxcharge=3,
#maxiso=3,
#ppm=10,
#mzabs=0.01
)
xsaC<-groupCorr(xsaFI,calcIso=TRUE,calcCiS=TRUE,calcCaS=TRUE)
xsaFA<-findAdducts(
xsaC,
ppm=para@xcmsSet.ppm,
#mzabs=0.01,
#multiplier=3,
polarity = para@xcmsSet.polarity
)
peaklist<-getPeaklist(xsaFA)
message(date(),"\tannotate done")
reporttab.combine<-cbind(reporttab,
peaklist[,c("isotopes","adduct","pcgroup")])
featureFile = paste(para@outdir, "/", para@prefix, "-feature.txt",
sep="")
message("Save the feature data to file: ",featureFile)
write.table(reporttab.combine, file = featureFile,
quote = FALSE,
sep="\t",
row.names = FALSE,
col.names = TRUE)
para@rawPeaks <- reporttab.combine
date()
para@xcmsPeakListFile = featureFile
return(para)
}
)
##' @title Normalisation of peak intensity
##' @description The normalize method performs normalisation on peak
##' intensities.
##' @rdname normalize
##' @docType methods
##' @param para A metaXpara object.
##' @param method The normalization method: sum, median, vsn, quantiles,
##' quantiles.robust, sum, pqn, combat and tmm. Default is sum.
##' @param valueID The name of the column which will be normalized.
##' @param norFactor The factor that will be used for normalization. This is
##' usually used in urine data normalization. The factor is the column name in
##' sample list file that contains the normalization factor value, such as the
##' value "osmolality". Default the value is NULL. Only if the value of
##' norFactor is not NULL and the parameter "method" is NULL, this
##' normalization will work.
##' @param useClass Whether or not to use class information when perform
##' batch correction using ComBat method. Default is False.
##' @param ... Additional parameter
##' @return A metaXpara object.
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' peaksData <- as.data.frame(groupval(xset,"medret",value="into"))
##' peaksData$name <- row.names(peaksData)
##' para <- new("metaXpara")
##' rawPeaks(para) <- peaksData
##' sampleListFile(para) <- system.file("extdata/faahKO_sampleList.txt",
##' package = "metaX")
##' para <- reSetPeaksData(para)
##' para <- metaX::normalize(para)
setGeneric("normalize",function(para,method="sum",valueID="value",
norFactor=NULL,useClass=FALSE,...)
standardGeneric("normalize"))
##' @describeIn normalize
setMethod("normalize",signature(para="metaXpara"),
function(para,method="sum",valueID="value",norFactor=NULL,useClass=FALSE,...){
message("normalize: ",valueID)
cat("Normalization method: ", method, "\n")
#para@peaksData->x
#x<-dcast(x,ID~sample,value.var = valueID)
x <- para@peaksData %>%
select_("sample",valueID,"ID") %>%
spread_("sample",valueID)
dat <- as.matrix(x[,-1])
if(!is.null(method) && method == "vsn"){
fdata <- ExpressionSet(assayData=dat)
fit<-vsn2(fdata)
fitdata = predict(fit, newdata=fdata) ## apply fit
nd<-exprs(fitdata)
x[,-1] <- 2^nd
}else if(!is.null(method) && method == "quantiles"){
x[,-1] <- preprocessCore::normalize.quantiles(dat)
}else if(!is.null(method) && method == "quantiles.robust"){
x[,-1] <- preprocessCore::normalize.quantiles.robust(dat)
}else if(!is.null(method) && method == "sum"){
x[,-1] <- apply(dat,2,function(x){x/sum(x,na.rm=TRUE)}) * mean(apply(dat,2,function(x){sum(x,na.rm=TRUE)}))
}else if(!is.null(method) && method == "median"){
x[,-1] <- apply(dat,2,function(x){x/median(x,na.rm=TRUE)})
}else if(!is.null(method) && method == "pqn"){
## Normalization of data with the Probabilistic Quotient
## Normalization method.
## "pqn": the Probabilistic Quotient Normalization is computed as
## described in Dieterle, et al. (2006).
x[,-1] <- apply(dat,2,function(y){y/sum(y,na.rm=TRUE)})
newX <- t(apply(x[,-1],1,function(y){y/median(y,na.rm=TRUE)}))
coe <- apply(newX, 2, median, na.rm = TRUE)
for(i in 2:ncol(x)){
x[,i] <- x[,i]/coe[i-1]
}
x[,-1] <- x[,-1] * mean(apply(dat,2,function(x){sum(x,na.rm=TRUE)}))
#save(x,file="x.rda")
}else if(!is.null(method) && method == "combat"){
## The ComBat function adjusts for known batches using an
## empirical Bayesian framework
## http://biostatistics.oxfordjournals.org/content/8/1/118.full
message("normalization: ComBat\n")
sample_meta <- data.frame(sample=names(x)[-1],
stringsAsFactors = FALSE)
# samList <- read.delim(para@sampleListFile,stringsAsFactors=FALSE)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
samList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- para@sampleList
}
samList$class <- as.character(samList$class)
samList$class[is.na(samList$class)] <- "QC"
m <- left_join(sample_meta,samList,by="sample")
dat_batch <- m$batch
if(useClass==TRUE){
modcombat = model.matrix(~as.factor(class), data=m)
}else{
modcombat = model.matrix(~1, data=m)
}
## log2 transformation
# dat <- dat[dat<=0] <- NA
#save(dat,dat_batch,modcombat,file="combat.rda")
dat <- log2(dat)
x_tmp = ComBat(dat=dat, batch=dat_batch, mod=modcombat,
par.prior=TRUE,
prior.plot=FALSE)
x_tmp <- 2^x_tmp
message("<=0:",sum(x_tmp<=0),"/",length(x_tmp),"\n")
# x_tmp[x_tmp<=0] <- NA
x_tmp[x_tmp<=0] <- 0
x[,-1] <- x_tmp
}else if(!is.null(method) && method == "tmm"){
dat[is.na(dat)] <- 0
dlist <- DGEList(counts = dat)
dlist <- calcNormFactors(dlist,method = "TMM")
dat <- cpm(dlist)
# dat[dat<=0] <- NA
dat[dat<=0] <- 0
x[,-1] <- dat
}else if(!is.null(method) && method == "median"){
x[,-1] <- apply(dat,2,function(x){x/median(x,na.rm=TRUE)})
}else if(is.null(method) && !is.null(norFactor)){
message("Normalization by ",norFactor)
if(norFactor %in% names(para@peaksData)){
para@peaksData[,valueID] <- para@peaksData[,valueID]/para@peaksData[,norFactor]
}else{
stop("Not found ",norFactor," in para@peaksData!")
}
}
#else{
# stop("Normalization method must be in ",
# "'vsn,quantiles,quantiles.robust,sum'\n")
#}
if(!is.null(method) && method != "none"){
normValue <- melt(x,id.vars="ID",value.name=valueID,
variable.name="sample")
para@peaksData[,valueID] <- NULL
para@peaksData <- plyr::join(para@peaksData,normValue,
by = c("ID","sample"))
}
return(para)
}
)
##' @title Perform batch correction
##' @description Perform batch correction
##' @param para An metaXpara object
##' @param valueID The name of value, default is "value"
##' @param impute_method Missing value imputation method, default is knn.
##' @param use_class Whether use class information, default is TRUE
##' @param cpu The number of CPUs used for missing value imputation,
##' default is 1
##' @return A new metaXpara object
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- batchCorrect(para)
##' para <- missingValueImpute(para)
##' para <- transformation(para,valueID = "value")
##' metaX::plotPCA(para,valueID="value",scale="uv",center=TRUE)
batchCorrect=function(para, valueID="value", impute_method = "knn",
use_class=TRUE, cpu=1){
cat("Perform batch correction.\n")
## get missing value information
# miss_value_id <- para@peaksData %>%
# dplyr::filter(is.na(!!valueID) | !!valueID <=0) %>%
# dplyr::select(sample,ID)
x <- para@peaksData
miss_value_id <- x[ is.na(x[,valueID]) | x[,valueID] <=0, c("sample","ID")]
para@missValueImputeMethod <- impute_method
para <- missingValueImpute(para,valueID = valueID,
method = impute_method, cpu = cpu)
para <- metaX::normalize(para,valueID=valueID, method = "combat",
useClass=use_class)
para@peaksData$tmp_id <- paste(para@peaksData$sample,para@peaksData$ID,sep="|")
if(nrow(miss_value_id) >= 1){
miss_value_id <- paste(miss_value_id$sample,miss_value_id$ID,sep="|")
cat("The number of missing values:",length(miss_value_id),"\n")
para@peaksData[para@peaksData$tmp_id %in% miss_value_id,valueID] <- 0
para@peaksData$tmp_id <- NULL
}else{
cat("No missing value found!\n")
}
return(para)
}
##' @title Batch correction using SVR normalization
##' @description Batch correction using SVR normalization
##' @param para A metaXpara object
##' @param ntop Default is top 5 correlated peaks
##' @param impute Whether do the imputation before and after normalization
##' @param cpu The number of cpu used, default is 0 which means using all cpu.
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @return A new object of metaXpara
##' @references X. Shen, X. Gong, Y. Cai, Y. Guo, J. Tu, H. Li, T.Zhang,
##' J. Wang, F. Xue, and Z.-J. Zhu, Normalization and
##' Integration of Large-Scale Metabolomics Data Using Support Vector
##' Regression, Metabolomics, 2016, 12: 89
##' @export
svrNormalize=function(para,ntop=5,impute=TRUE,cpu=0){
res <- list()
message("Using cpu: ",cpu)
peaksData <- para@peaksData
if(sum(is.na(peaksData$class)) <=0 ){
message("Don't find QC sample in your data!")
return(para)
}
message("The number of NA value in peaksData before SVR normalization: ",
sum(is.na(peaksData$value) | peaksData$value <= 0))
qcData <- peaksData[is.na(peaksData$class),]
sampleData <- peaksData[!is.na(peaksData$class),]
# samList <- read.delim(para@sampleListFile,stringsAsFactors=FALSE)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
samList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- para@sampleList
}
assign(x="maxOrder",value=max(samList[,para@sampleListHead['order']]),
envir=.GlobalEnv)
# require(doSMP) # will no longer work...
cpu = ifelse(cpu==0,detectCores(),cpu)
cl <- makeCluster(getOption("cl.cores", cpu))
clusterExport(cl, c("svrfit","svrfit2"),envir=environment())
qcData$ID_batch <- paste(qcData$ID,qcData$batch,sep="_")
#qcData$ID_batch <- qcData$ID
message(date(),"\tSVR modeling ...")
if(ntop!=1){
## fit intensity with top n correlation peaks
dmat <- qcData %>%
select_("ID","sample","value") %>%
spread_("ID","value") %>% select(-sample)
corQC<-pcor(as.matrix(dmat),use="complete.obs")
## generate
dsData <- list(dmat=list(),smat=list())
for(batchID in unique(peaksData$batch)){
dsData$dmat[[batchID]] <- filter(qcData,batch==batchID) %>%
select_("ID","sample","value") %>%
spread_("ID","value") %>% select(-sample)
dsData$smat[[batchID]] <- filter(peaksData,batch==batchID) %>%
select_("ID","sample","value") %>%
spread_("ID","value")
}
gc()
#intPredict <- parLapply(cl,unique(qcData$ID_batch),svrfit3,corQC=corQC,
# qcData=qcData,ntop=5,peaksData=peaksData)
intPredict <- parLapply(cl,unique(qcData$ID_batch),svrfit3,corQC=corQC,
ntop=5,dsData=dsData,qcData=qcData)
intPredict <- rbindlist(intPredict)
intPredict <- as.data.frame(intPredict)
}else{
## fit intensity with injection order
intPredict <- parLapply(cl,unique(qcData$ID_batch),svrfit,
qcData=qcData,maxOrder=maxOrder)
intPredict <- rbindlist(intPredict)
intPredict <- as.data.frame(intPredict)
}
stopCluster(cl)
message(date(),"\tSVR modeling done.")
if("newOrder" %in% names(intPredict)){
intPredict <- dplyr::rename(intPredict,order=newOrder)
}
## head: sample ID value batch class order valuePredict
peaksData$valuePredict <- NULL
peaksData$valueNorm <- NULL
#peaksData <- plyr::join(peaksData,intPredict,
peaksData <- inner_join(peaksData,intPredict,
by=intersect(names(peaksData),names(intPredict)))
#mpa <- ddply(peaksData,.(ID),summarise,mpa=median(value,na.rm = TRUE))
mpa <- peaksData %>% dplyr::group_by(ID) %>% dplyr::summarise(mpa=median(value,na.rm = TRUE))
#peaksData <- plyr::join(peaksData,mpa,
# save(mpa,peaksData,file="test.rda")
peaksData <- inner_join(peaksData,mpa,
by=intersect(names(peaksData),names(mpa)))
peaksData <- dplyr::mutate(peaksData,valuePredict=valuePredict/mpa)
peaksData$mpa <- NULL
message("change value which ==0 to NA")
peaksData$value[ peaksData$value<=0] <- NA
peaksData$valueNorm <- peaksData$value/peaksData$valuePredict
######################################################################
######################################################################
peaksData$valuePredict[ peaksData$valuePredict<=0] <- NA
## calculate CV using this value
peaksData$valueNorm[ peaksData$valueNorm<=0] <- NA
message("The number of NA value in peaksData after SVR normalization:",
sum(is.na(peaksData$valueNorm)))
## TODO: Do we still need to do missing value imputation are QC-based
## correction?
if(impute){
message(date(),"\tDo missing value imputation after SVR normalization...")
paraTmp <- para
paraTmp@peaksData <- peaksData
paraTmp <- missingValueImpute(x = paraTmp,valueID="valueNorm")
peaksData <- paraTmp@peaksData
}
## For each batch
## CV plot
#cvStat <- ddply(peaksData[is.na(peaksData$class),],.(ID,batch),
# summarise,
# rawCV=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE),
# normCV=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
cvStat <- peaksData[is.na(peaksData$class),] %>% group_by(ID,batch) %>%
dplyr::summarise(rawCV=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE),
normCV=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
#cvStatForEachBatch <- melt(cvStat,id.vars = c("ID","batch"),
# variable.name = "CV")
cvStatForEachBatch <- cvStat %>% tidyr::gather(key = "CV", value = "value",-ID,-batch)
cvStatForEachBatch$batch <- as.factor(cvStatForEachBatch$batch)
## output information
message("Summary information of the CV for QC samples:")
#cvTable <- ddply(cvStatForEachBatch,.(batch,CV),summarise,
# lessThan30=sum(value<=0.3,na.rm = TRUE),
# total=length(value),ratio=lessThan30/total)
cvTable <- cvStatForEachBatch %>% group_by(batch,CV) %>%
dplyr::summarise(lessThan30=sum(value<=0.3,na.rm = TRUE),
total=length(value),
ratio=lessThan30/total)
print(cvTable)
res$cvBatch <- cvTable
message("\n")
para@fig$cv<- paste(para@outdir,"/",para@prefix,"-cv.pdf",sep="")
pdf(para@fig$cv,width = 6,height = 6)
p<-ggplot(data=cvStatForEachBatch,aes(x=value,fill=CV,colour=CV))+
facet_grid(batch~.)+
geom_density(alpha = 0.5)+
xlab(label = "CV")
print(p)
p<-ggplot(data=cvStatForEachBatch,aes(x=value,fill=CV,colour=CV))+
facet_grid(batch~.)+
geom_density(alpha = 0.5)+
xlim(0,2)+
xlab(label = "CV")
print(p)
dev.off()
#######################################
## For all
#cvStat <- ddply(peaksData[is.na(peaksData$class),],.(ID),
# summarise,
# rawCV=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE),
# normCV=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
cvStat <- peaksData[is.na(peaksData$class),] %>% group_by(ID) %>%
dplyr::summarise(rawCV=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE),
normCV=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
#cvStatForAll <- melt(cvStat,id.vars = c("ID"),
# variable.name = "CV")
cvStatForAll <- cvStat %>% tidyr::gather(key = "CV", value = "value", -ID)
## output information
message("Summary information of the CV for QC samples:")
#cvTable <- ddply(cvStatForAll,.(CV),summarise,
# lessThan30=sum(value<=0.3,na.rm = TRUE),
# total=length(value),ratio=lessThan30/total)
cvTable <- cvStatForAll %>% group_by(CV) %>%
dplyr::summarise(lessThan30=sum(value<=0.3,na.rm = TRUE),
total=length(value),ratio=lessThan30/total)
print(cvTable)
res$cvAll <- cvTable
message("\n")
########################################################
message("Peaks with CV > ",0.3,"!")
message(sum(cvStat$normCV > 0.3,na.rm = TRUE))
tmpPeaksData <- merge(peaksData,cvStat,by="ID")
if(nrow(tmpPeaksData)!=nrow(peaksData)){
error_file <- paste(para@outdir,"/",para@prefix,"-doSVR-error.rda",
sep="")
message("Please see the file: ",error_file," for detail!")
save(peaksData,cvStat,file=error_file)
stop("Please see detailed data in ",error_file)
}
peaksData <- tmpPeaksData
peaksData <- dplyr::rename(peaksData,cv=normCV)
para@peaksData <- peaksData
#para@peaksData <- dplyr::filter(peaksData,!is.na(cv) & cv<=cvFilter)
res$metaXpara <- para
return(res)
}
svrfit=function(id,qcData,maxOrder){
out <- tryCatch({
dat <- data.frame(newOrder=1:maxOrder)
piece <- qcData[ qcData$ID_batch==id,]
svm_reg <- svm(piece$order,piece$value)
dat$valuePredict=e1071:::predict.svm(svm_reg,data.frame(y=dat$newOrder))
dat$ID <- piece$ID[1]
dat$batch <- piece$batch[1]
dat
},
error=function(e){
message("Please see the file: runFit_error.rda for related data!")
save(e,id,qcData,maxOrder,file="runFit_error.rda")
stop("error in runFit!")
return(NULL)
},
warning=function(cond){
message("Please see the file: runFit_warning.rda for related data!")
save(cond,id,qcData,maxOrder,file="runFit_warning.rda")
return(NULL)
})
return(out)
}
## cor is calculated for each batch
svrfit2=function(id,qcData,peaksData,ntop=5){
out <- tryCatch({
piece <- qcData[ qcData$ID_batch==id,]
## get batch ID
batchID <- unlist(str_split(id,"_"))
batchID <- batchID[length(batchID)] %>% as.numeric()
#dat <- data.frame()
#dmat <- spread(qcData %>% filter(batch==batchID),ID,value)
dmat <- dsData$dmat[[batchID]]
smat <- dsData$smat[[batchID]]
corQC<-pcor(as.matrix(dmat),use="complete.obs")
i <- which(row.names(corQC)==piece$ID[1])
corPeak<-as.numeric(which(corQC[,i]%in%rev(sort(corQC[-i,i]))[1:as.numeric(ntop)]))
fid <- colnames(dmat)[corPeak]
svm_reg<-svm(dmat[,corPeak,drop=FALSE],dmat[,i])
dat<- data.frame(valuePredict=e1071:::predict.svm(svm_reg,
data.frame(x=smat[,colnames(smat) %in% fid,drop=FALSE]))
)
dat$ID <- piece$ID[1]
dat$batch <- piece$batch[1]
dat$sample <- smat$sample
dat
},
error=function(e){
message("Please see the file: runFit_error.rda for related data!")
save(e,id,qcData,maxOrder,file="runFit_error.rda")
stop("error in runFit!")
return(NULL)
},
warning=function(cond){
message("Please see the file: runFit_warning.rda for related data!")
save(cond,id,qcData,maxOrder,file="runFit_warning.rda")
return(NULL)
})
return(out)
}
## cor is calculated for all batch.
svrfit3=function(id,qcData,dsData,ntop=5,corQC){
out <- tryCatch({
piece <- qcData[ qcData$ID_batch==id,]
## get batch ID
batchID <- unlist(str_split(id,"_"))
batchID <- as.numeric(batchID[length(batchID)])
#dat <- data.frame()
#dmat <- spread(qcData %>% filter(batch==batchID),ID,value)
dmat <- dsData$dmat[[batchID]]
smat <- dsData$smat[[batchID]]
#corQC<-pcor(as.matrix(dmat),use="complete.obs")
i <- which(row.names(corQC)==piece$ID[1])
corPeak<-as.numeric(which(corQC[,i]%in%rev(sort(corQC[-i,i]))[1:as.numeric(ntop)]))
fid <- colnames(dmat)[corPeak]
svm_reg<-svm(dmat[,corPeak,drop=FALSE],dmat[,i])
dat<- data.frame(valuePredict=e1071:::predict.svm(svm_reg,smat[,colnames(smat) %in% fid,drop=FALSE]))
dat$ID <- piece$ID[1]
dat$batch <- piece$batch[1]
dat$sample <- smat$sample
dat
},
error=function(e){
message("Please see the file: runFit_error.rda for related data!")
save(e,id,qcData,maxOrder,file="runFit_error.rda")
stop("error in runFit!")
return(NULL)
},
warning=function(cond){
message("Please see the file: runFit_warning.rda for related data!")
save(cond,id,qcData,maxOrder,file="runFit_warning.rda")
return(NULL)
})
return(out)
}
svmRadialfit=function(id,qcData,maxOrder,repeats=10,
verbose=FALSE,...){
out <- tryCatch({
dat <- data.frame(newOrder=1:maxOrder)
piece <- qcData[ qcData$ID_batch==id,]
cpara <- seq(0.1,2,by=0.1)
sigmapara <- seq(0.1,0.5,by=0.05)
trainctrl <- trainControl(method = "LOOCV",
repeats = repeats,
verboseIter=verbose)
trModel <- train(data.frame(order=piece$order),piece$value,
metric = "RMSE",
method="svmRadial",
preProc = c("center","scale"),
tuneGrid = data.frame(.C=cpara,
.sigma=rep(sigmapara,each=length(cpara))),
trControl = trainctrl)
dat$valuePredict <- caret::predict.train(trModel,data.frame(y=dat$newOrder))
dat$ID <- piece$ID[1]
dat$batch <- piece$batch[1]
dat
},
error=function(e){
message("Please see the file: runFit_error.rda for related data!")
save(e,id,qcData,maxOrder,file="runFit_error.rda")
stop("error in runFit!")
return(NULL)
},
warning=function(cond){
message("Please see the file: runFit_warning.rda for related data!")
save(cond,id,qcData,maxOrder,file="runFit_warning.rda")
return(NULL)
})
return(out)
}
##' @title Automatically detect outlier samples
##' @description Automatically detect outlier samples
##' @rdname autoRemoveOutlier
##' @docType methods
##' @param para A metaXpara object
##' @param outTol A factor to define the outlier tolerance, default is 1.2
##' @param pcaMethod See \code{\link{pca}} in \pkg{pcaMethods}
##' @param valueID The name of the column which will be used
##' @param scale Scaling, see \code{\link{pca}} in \pkg{pcaMethods}
##' @param center Centering, see \code{\link{pca}} in \pkg{pcaMethods}
##' @param ... Additional parameter
##' @return The name of outlier samples
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' rs <- autoRemoveOutlier(para,valueID="value")
setGeneric("autoRemoveOutlier",
function(para,outTol=1.2,pcaMethod="svdImpute",
valueID="valueNorm",scale="none",center=FALSE,...)
standardGeneric("autoRemoveOutlier"))
##' @describeIn autoRemoveOutlier
setMethod("autoRemoveOutlier", signature(para = "metaXpara"),
function(para,outTol=1.2,pcaMethod="svdImpute",valueID="valueNorm",
scale="none",center=FALSE,...){
out.tol <- outTol
prefix <- para@prefix
outdir <- para@outdir
# sampleList <- read.delim(para@sampleListFile)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
sampleList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
sampleList <- para@sampleList
}
## Removal of outliers :performs automated removal of outliers in the
## pre-processed data based on expansion of the Hotellings T2
## distribution ellipse.Ref:http://www.ncbi.nlm.nih.gov/pubmed/25348215
para@peaksData->x
x<-dcast(x,ID~sample,value.var = valueID)
x$ID <- NULL
allSampleNames <- names(x)
# sample2class <- data.frame(sample=names(x))
# sample2class <- merge(sample2class,samList,by="sample",
# sort=FALSE)
# sample2class$class[is.na(sample2class$class)] <- "QC"
# sample2class$class <- is.factor(sample2class$class)
outLog <- paste(outdir,"/",prefix,"-autoRemoveOutlier.log",sep="")
cat(date(),"\n",file = outLog)
## x is a data.frame, each col is a sample, each row is a feature
## The 1st round
## Please notice the impact of the missing value and outlier
## sample.
pca.res <- pca(t(x), nPcs = 2, method=pcaMethod,scale=scale,
center=center, cv="q2",seed=123)
outFig <- paste(outdir,"/",prefix,"-autoRemoveOutlier.pdf",sep="")
pdf(outFig)
plotData <- data.frame(x=pca.res@scores[,1],
y=pca.res@scores[,2],
sample=names(x))
plotData <- merge(plotData,sampleList,by="sample",sort=FALSE)
plotData$class <- as.character(plotData$class)
plotData$class[is.na(plotData$class)] <- "QC"
ggobj <-ggplot(data = plotData,aes(x=x,y=y,colour=class))+
geom_point()+
xlab("PC1")+
ylab("PC2")+
theme(legend.justification=c(1,1), legend.position=c(1,1))
print(ggobj)
plot(pca.res@scores[,1],pca.res@scores[,2],main="Raw PCA plot",
xlab="PC1",ylab="PC2")
abline(h = 0,v = 0,lty=2,col="gray")
text(pca.res@scores[,1],pca.res@scores[,2],labels = names(x),cex=0.5,
col = ifelse(names(x) %in% sampleList$sample[is.na(sampleList$class)],
"red","blue"))
scores <- pca.res@scores
HotEllipse<-abs(cbind(HotE(scores[,1],scores[,2])))*out.tol
outliers<-as.numeric()
for (i in 1:nrow(scores)){
sample<-abs(scores[i,])
out.PC1<-which(HotEllipse[,1]<sample[1])
out.PC1.PC2<-any(HotEllipse[out.PC1,2]<sample[2])*1
outlier<-ifelse(out.PC1.PC2>0,1,0)#+out.PC1.PC3+out.PC2.PC3>0,1,0)
outliers<-c(outliers,outlier)
}
#outliers
plotPcs(pca.res, type = "scores",col=outliers+1,pch=19,cex=0.8,
main="1st PCA model")
outliers<-which(outliers==1)
cat("Outliers were detected in the 1st PCA model calculation and removed:",
names(x)[outliers],"\n")
cat("Outliers were detected in the 1st PCA model calculation and removed:",
names(x)[outliers],"\n",file = outLog,append = TRUE)
if(length(outliers)>=1){
## The 2st round
x <- x[,-outliers]
pca.res2 <- pca(t(x), nPcs = 2, method=pcaMethod,scale=scale,
center=center, cv="q2",seed=123)
scores2 <- pca.res2@scores
plotData <- data.frame(x=pca.res2@scores[,1],
y=pca.res2@scores[,2],
sample=names(x))
plotData <- merge(plotData,sampleList,by="sample",sort=FALSE)
plotData$class <- as.character(plotData$class)
plotData$class[is.na(plotData$class)] <- "QC"
ggobj <-ggplot(data = plotData,aes(x=x,y=y,colour=class))+
geom_point()+
xlab("PC1")+
ylab("PC2")+
theme(legend.justification=c(1,1), legend.position=c(1,1))
print(ggobj)
##HotE(scores[,2],scores[,3])))*out.tol
HotEllipse<-abs(cbind(HotE(scores2[,1],scores2[,2])))*out.tol
outliers2<-as.numeric()
for (i in 1:nrow(scores2)){
sample<-abs(scores2[i,])
out.PC1<-which(HotEllipse[,1]<sample[1])
out.PC1.PC2<-any(HotEllipse[out.PC1,2]<sample[2])*1
outlier<-ifelse(out.PC1.PC2>0,1,0)#+out.PC1.PC3+out.PC2.PC3>0,1,0)
outliers2<-c(outliers2,outlier)
}
plotPcs(pca.res2, type = "scores",col=outliers2+1,pch=19,cex=0.8,
main="2st PCA model")
outliers2<-which(outliers2==1)
cat("Outliers were detected in the 2st PCA model",
"calculation and removed:",
names(x)[outliers2],"\n")
cat("Outliers were detected in the 2st PCA model",
"calculation and removed:",
names(x)[outliers2],"\n",file = outLog,append = TRUE)
if(length(outliers2)>=1) {
## The 3st round
x <- x[,-outliers2]
pca.res3 <- pca(t(x), nPcs = 2, method=pcaMethod,scale=scale,
center=center, cv="q2",seed=123)
scores3 <- pca.res3@scores
plotData <- data.frame(x=pca.res3@scores[,1],
y=pca.res3@scores[,2],
sample=names(x))
plotData <- merge(plotData,sampleList,by="sample",sort=FALSE)
plotData$class <- as.character(plotData$class)
plotData$class[is.na(plotData$class)] <- "QC"
ggobj <-ggplot(data = plotData,aes(x=x,y=y,colour=class))+
geom_point()+
xlab("PC1")+
ylab("PC2")+
theme(legend.justification=c(1,1), legend.position=c(1,1))
print(ggobj)
##HotE(scores[,2],scores[,3])))*out.tol
HotEllipse<-abs(cbind(HotE(scores3[,1],scores3[,2])))*out.tol
outliers3<-as.numeric()
for (i in 1:nrow(scores3)){
sample<-abs(scores3[i,])
out.PC1<-which(HotEllipse[,1]<sample[1])
out.PC1.PC2<-any(HotEllipse[out.PC1,2]<sample[2])*1
#+out.PC1.PC3+out.PC2.PC3>0,1,0)
outlier<-ifelse(out.PC1.PC2>0,1,0)
outliers3<-c(outliers3,outlier)
}
plotPcs(pca.res3, type = "scores",col=outliers3+1,pch=19,cex=0.8,
main="3st PCA model")
outliers3<-which(outliers3==1)
cat("Outliers were detected in the 3st PCA model",
"calculation and removed:",
names(x)[outliers3],"\n")
cat("Outliers were detected in the 3st PCA model",
"calculation and removed:",
names(x)[outliers3],"\n",file = outLog,append = TRUE)
if (length(outliers3)>=1){
message("WARNING: after two rounds of outlier removal",
" outliers are still detected either:")
x <- x[,-outliers3]
}
}
}
dev.off()
removeSampleNames <- allSampleNames[!allSampleNames %in% names(x)]
return(removeSampleNames)
}
)
##' @title Export a csv file which can be used for MetaboAnalyst
##' @description Export a csv file which can be used for MetaboAnalyst
##' @rdname makeMetaboAnalystInput
##' @docType methods
##' @param para A metaXpara object
##' @param rmQC A logical indicates whether remove the QC data
##' @param valueID The name of the column which will be used
##' @param zero2NA A logical indicates whether convert the value <=0 to NA
##' @param prefix The prefix of output file
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' makeMetaboAnalystInput(para,valueID="value")
setGeneric("makeMetaboAnalystInput",
function(para,rmQC=TRUE,valueID="valueNorm",zero2NA=TRUE,prefix=NA,
...)
standardGeneric("makeMetaboAnalystInput"))
##' @describeIn makeMetaboAnalystInput
setMethod("makeMetaboAnalystInput", signature(para = "metaXpara"),
function(para,rmQC=TRUE,valueID="valueNorm",zero2NA=TRUE,prefix=NA,
...){
if(is.na(prefix)){
prefix <- para@prefix
}else{
prefix <- paste(para@prefix,"-",prefix,sep="")
}
metaboAnalystInput <- paste(para@outdir,"/",prefix,
"-metaboAnalystInput.csv",sep="")
# samList <- read.delim(para@sampleListFile)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
samList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- para@sampleList
}
if(zero2NA){
para <- zero2NA(para,valueID=valueID)
}
x<-dcast(para@peaksData,ID~sample,value.var = valueID)
## remove qc sample data
if(rmQC==TRUE){
x <- x[,!names(x) %in% samList$sample[is.na(samList$class)]]
}
sample2class <- data.frame(sample=names(x)[-1])
sample2class <- merge(sample2class,samList,by="sample",sort=FALSE)
sample2class$class <- as.character(sample2class$class)
sample2class$class[is.na(sample2class$class)] <- "QC"
sample2class$Sample <- paste(sample2class$class,sample2class$order,sep="_")
sample2class$Lable <- as.character(sample2class$class)
sample2class$Lable[is.na(sample2class$Lable)] <- "QC"
sample2class <- t(sample2class[,c("Sample","Lable")])
message("Save data to file for MetaboAnalyst input: ",metaboAnalystInput)
write.table(sample2class, file = metaboAnalystInput,
col.names=FALSE,sep=",")
write.table(x,file = metaboAnalystInput,row.names = FALSE,
col.names=FALSE,sep=",",append = TRUE)
}
)
##' @title Do the univariate and multivariate statistical analysis
##' @description Do the univariate and multivariate statistical analysis
##' @rdname peakStat
##' @docType methods
##' @param para A \code{metaXpara} object
##' @param plsdaPara A \code{plsDAPara} object
##' @param doROC A logical indicates whether to calculate the ROC
##' @param pcaLabel The label used for PCA score plot, "none","order" and
##' "sample" are supported. Default is "none"
##' @param saveRds Boolean, setting the argument to TRUE to save some objects to
##' disk for debug. Only useful for developer. Default is TRUE.
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @return An object of \code{metaXpara}
##' @examples
##' \dontrun{
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' ratioPairs(para) <- "S:C"
##' addValueNorm(para) <- para
##' plsdaPara <- new("plsDAPara")
##' res <- peakStat(para,plsdaPara)
##' }
setGeneric("peakStat",function(para,plsdaPara,doROC=TRUE,saveRds=TRUE,...) standardGeneric("peakStat"))
##' @describeIn peakStat
setMethod("peakStat", signature(para = "metaXpara",plsdaPara = "plsDAPara"),
function(para,plsdaPara,doROC=TRUE,saveRds=TRUE,pcaLabel="none",...){
ratioPairs <- para@ratioPairs #A:B;A:C;B:C
ratioPairs <- unlist(strsplit(x = ratioPairs, split = ";"))
if(length(ratioPairs)<1){
stop("Please check the parameter: ",ratioPairs)
}
## output fig
#outFig <- paste(para@outdir,"/",para@prefix,"-peakStat.pdf",sep="")
#pdf(file = outFig,width = 5,height = 5)
assign(x="plsdaPara",value=plsdaPara,envir=.GlobalEnv)
statResult <- lapply(ratioPairs,function(rp){
cgroup <- unlist(strsplit(x = rp, split = ":"))
peaksData <- para@peaksData[para@peaksData$class%in%cgroup,]
peaksData$class <- as.character(peaksData$class)
if(nrow(peaksData)==0){
stop("Please check your class ",rp)
}
message(date(),"\tUnivariate analysis")
if(para@pairTest==TRUE){
## paired test
statTest <- ddply(peaksData,.(ID),here(summarise),
ratio = calcRatio(x = valueNorm,
class = class,
method = "mean",
case = cgroup[1],
control = cgroup[2]),
#t.test_p.value = t.test(value~class)$p.value,
t.test_p.value = .tTestPair(valueNorm,class,pair=pair),
wilcox.test_p.value = .wilcoxTestPair(valueNorm,class,pair=pair))
}else{
statTest <- ddply(peaksData,.(ID),here(summarise),
ratio = calcRatio(x = valueNorm,
class = class,
method = "mean",
case = cgroup[1],
control = cgroup[2]),
#t.test_p.value = t.test(value~class)$p.value,
t.test_p.value = .tTest(valueNorm,class),
wilcox.test_p.value = wilcox.test(valueNorm~class)$p.value)
}
## Adjust P-values for Multiple Comparisons
statTest$t.test_p.value_BHcorrect <- p.adjust(p=statTest$t.test_p.value,
method = "BH")
statTest$wilcox.test_p.value_BHcorrect <-
p.adjust(p=statTest$wilcox.test_p.value,method = "BH")
message(date(),"\tUnivariate analysis done")
## ROC
if(length(cgroup)==2 && doROC==TRUE){
message(date(),"\tclassical univariate ROC analysis")
rocRes <- myCalcAUROC(para = para,cgroup = cgroup)
statTest <- merge(statTest,rocRes,by="ID")
message(date(),"\tclassical univariate ROC analysis done!")
}
## plot ratio fig
## t.test with fdr correct
plotdata <- data.frame(x=statTest$ratio,
y=statTest$t.test_p.value_BHcorrect)
## maybe there are negative value
plotdata <- plotdata[ isValid(plotdata$x),]
plotdata$sig <- ( abs(log2(plotdata$x)) >= log2(1.5) ) & plotdata$y <= 0.05
ggobj <- ggplot(data=plotdata,aes(x=log2(x),y = -log10(y), colour=sig))+
geom_point(alpha=0.7)+
scale_colour_manual(values=c("#9999CC","red"))+
xlab("Log2(Fold change)")+
ylab("-Log10(P-value)")+
ggtitle(paste(rp," t.test with FDR correct"))+
theme(legend.position="none")
png(filename = paste(para@outdir,"/",para@prefix,"-ttest-FDR-",sub(pattern = ":",replacement = "_",x = rp),".png",sep=""),width = 600,height = 600,res = 120)
print(ggobj)
dev.off()
## t.test
plotdata <- data.frame(x=statTest$ratio,
y=statTest$t.test_p.value)
## maybe there are negative value
plotdata <- plotdata[ isValid(plotdata$x),]
plotdata$sig <- ( abs(log2(plotdata$x)) >= log2(1.5) ) & plotdata$y <= 0.05
ggobj <- ggplot(data=plotdata,aes(x=log2(x),y = -log10(y), colour=sig))+
geom_point(alpha=0.7)+
scale_colour_manual(values=c("#9999CC","red"))+
xlab("Log2(Fold change)")+
ylab("-Log10(P-value)")+
ggtitle(paste(rp," t.test"))+
theme(legend.position="none")
png(filename = paste(para@outdir,"/",para@prefix,"-ttest-",sub(pattern = ":",replacement = "_",x = rp),".png",sep=""),width = 600,height = 600,res = 120)
print(ggobj)
dev.off()
## histogram of log2(ratio)
ggobj <- ggplot(data=plotdata,aes(x=log2(x)))+
geom_histogram(aes(y = ..density..),colour="white",fill="#56B4E9",
binwidth=0.1)+
geom_density(colour="#E69F00")+
xlab("Log2(Fold change)")+
stat_function(fun=dnorm, colour="blue",
args=list(mean=mean(log2(plotdata$x)),
sd=sd(log2(plotdata$x))))+
geom_vline(xintercept=0,size=1.1,colour="#D55E00")
png(filename = paste(para@outdir,"/",para@prefix,"-log2ratio-hist-",sub(pattern = ":",replacement = "_",x = rp),".png",sep=""),width = 600,height = 600,res = 120)
print(ggobj)
dev.off()
## wilcox.test with fdr correct
plotdata <- data.frame(x=statTest$ratio,
y=statTest$wilcox.test_p.value_BHcorrect)
## maybe there are negative value
plotdata <- plotdata[ isValid(plotdata$x),]
plotdata$sig <- ( abs(log2(plotdata$x)) >= log2(1.5) ) & plotdata$y <= 0.05
ggobj <- ggplot(data=plotdata,aes(x=log2(x),y = -log10(y), colour=sig))+
geom_point(alpha=0.7)+
#scale_colour_manual(values=c("gray","red"))+
scale_colour_manual(values=c("#9999CC","red"))+
xlab("Log2(Fold change)")+
ylab("-Log10(P-value)")+
ggtitle(paste(rp," wilcox.test with FDR correct"))+
theme(legend.position="none")
png(filename = paste(para@outdir,"/",para@prefix,"-wilcox-FDR-",sub(pattern = ":",replacement = "_",x = rp),".png",sep=""),width = 600,height = 600,res = 120)
print(ggobj)
dev.off()
## wilcox.test
plotdata <- data.frame(x=statTest$ratio,
y=statTest$wilcox.test_p.value)
## maybe there are negative value
plotdata <- plotdata[ isValid(plotdata$x),]
plotdata$sig <- ( abs(log2(plotdata$x)) >= log2(1.5) ) & plotdata$y <= 0.05
ggobj <- ggplot(data=plotdata,aes(x=log2(x),y = -log10(y), colour=sig))+
geom_point(alpha=0.7)+
# scale_colour_manual(values=c("gray","red"))+
scale_colour_manual(values=c("#9999CC","red"))+
xlab("Log2(Fold change)")+
ylab("-Log10(P-value)")+
ggtitle(paste(rp," wilcox.test"))+
theme(legend.position="none")
png(filename = paste(para@outdir,"/",para@prefix,"-wilcox-",sub(pattern = ":",replacement = "_",x = rp),".png",sep=""),width = 600,height = 600,res = 120)
print(ggobj)
dev.off()
if(plsdaPara@do==TRUE){
## PLS-DA
message(date(),"\tPLS-DA analysis")
plsda.res <- runPLSDA(para = para,plsdaPara = plsdaPara,sample = cgroup,label = "none")
## save the PLS-DA model to file
if(saveRds){
sfile <- paste(para@outdir,"/",para@prefix,"-",
paste(cgroup,collapse = "_"),"-plsDAmodel.rds",sep="")
saveRDS(object = plsda.res,file = sfile)
}
message(date(),"\tPLS-DA analysis done!")
peaksVIP <- as.data.frame(plsda.res$VIP) # for plsDA
peaksVIP <- peaksVIP[,ncol(peaksVIP),drop=FALSE]
names(peaksVIP)[1] <- "VIP"
peaksVIP$ID <- row.names(peaksVIP)
statTest <- plyr::join(statTest,peaksVIP,by = "ID")
}
statTest$sample <- rp
## PCA analysis
message(date(),"\tPCA analysis")
ppca <- para
## only retain the classes we want to do the pca analysis
ppca@peaksData <- ppca@peaksData %>% filter(class %in% cgroup)
ppca <- transformation(ppca,method = plsdaPara@t,valueID = "valueNorm")
ppca <- metaX::preProcess(ppca,scale = plsdaPara@scale,center = TRUE,
valueID = "valueNorm")
ppca@prefix <- paste(cgroup,collapse = "_")
ppca@prefix <- paste(para@prefix,"-",ppca@prefix,sep="")
fig <- metaX::plotPCA(ppca,valueID = "valueNorm",scale = "none",batch = TRUE,
rmQC = FALSE,label = pcaLabel,...)
## no QC, no batch
ppca@prefix <- paste(ppca@prefix,"-nobatch-noqc",sep="")
fig <- metaX::plotPCA(ppca,valueID = "valueNorm",scale = "none",batch = FALSE,
rmQC = TRUE,label = pcaLabel,...)
## The venn plot of significant metabolites defined by different methods
venn.fig <- paste(para@outdir,"/",para@prefix,"-",rp,
"-sigMetaboliteVenn.tiff",sep="")
venn.fig <- gsub(pattern = ":",replacement = "VS",x=venn.fig)
## maybe there are negative value
tmp <- statTest[ isValid(statTest$ratio),]
sigA <- tmp$ID[ abs(log2(tmp$ratio)) >= log2(1.5) &
tmp$t.test_p.value_BHcorrect <= 0.05 ]
sigB <- tmp$ID[ abs(log2(tmp$ratio)) >= log2(1.5) &
tmp$wilcox.test_p.value_BHcorrect <= 0.05]
if(plsdaPara@do==TRUE){
sigC <- tmp$ID[ tmp$VIP >=1 ]
venn.data <- list("t.test"=sigA,"wilcox.test"=sigB,"VIP"=sigC)
VennDiagram::venn.diagram(x = venn.data,
filename = venn.fig,col = "transparent",
fill = c("cornflowerblue", "green", "yellow"),
alpha = 0.50,
label.col = "black",
cat.col=c("darkblue","darkgreen","darkgreen"))
}else{
venn.data <- list("t.test"=sigA,"wilcox.test"=sigB)
if(length(venn.data$t.test)!=0 || length(venn.data$wilcox.test)!=0){
VennDiagram::venn.diagram(x = venn.data,
filename = venn.fig,col = "transparent",
fill = c("cornflowerblue", "green"),
alpha = 0.50,
label.col = "black",
cat.col=c("darkblue","darkgreen"))
}
}
## return value
statTest
})
statResult <- do.call(rbind,statResult)
## if there are CV (RSD) information in peaksData, put it into statResult
cvInd <- grep(pattern = "cv",x = names(para@peaksData),ignore.case = TRUE)
if(length(cvInd)>=1){
cvInfo <- dplyr::distinct(dplyr::select(para@peaksData,ID,
contains("cv",
ignore.case = TRUE)))
statResult <- dplyr::left_join(statResult,cvInfo,by="ID")
}
para@quant <- statResult
outFile <- paste(para@outdir,"/",para@prefix,"-quant.txt",
sep="")
message("Save quantification result to file: ",outFile)
write.table(para@quant,file = outFile,
col.names = TRUE,
row.names = FALSE,quote=FALSE,sep="\t")
#dev.off()
return(para)
})
##' @title Plot figures for QC-RLSC
##' @description Plot figures for QC-RLSC
##' @rdname plotQCRLSC
##' @docType methods
##' @param para A \code{metaXpara} object
##' @param maxf The number of features to plot
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @seealso \code{\link{doQCRLSC}}
##' @exportMethod
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)[1:20,]
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' res <- doQCRLSC(para,cpu=1)
##' plotQCRLSC(res$metaXpara)
setGeneric("plotQCRLSC",function(para,maxf=100) standardGeneric("plotQCRLSC"))
##' @describeIn plotQCRLSC
setMethod("plotQCRLSC", signature(para = "metaXpara"), function(para,maxf=100){
x <- para@peaksData
fig <- paste(para@outdir,"/",para@prefix,"-QC-RLSC.png",sep="")
highfig <- sub(pattern = "png$",replacement = "pdf",x = fig)
pdf(file = highfig,width = 8,height = 6)
x$isQC <- ifelse(is.na(x$class),"QC","Sample")
fid <- unique(x$ID)
#for(i in 1:length(fid)){
maxn <- ifelse(length(fid)>=maxf,maxf,length(fid))
for(i in 1:maxn){
dat <- x[x$ID==fid[i],]
y <- melt(dat,
measure.vars = c("value","valueNorm"),
value.name = "Intensity",
variable.name = "Method")
y$class <- as.character(y$class)
y$class[is.na(y$class)] <- "QC"
y$batch <- as.character(y$batch)
y$Intensity <- log10(y$Intensity)
ggObj <- ggplot(data=y,aes(x=order,y=Intensity,
shape=batch,
colour=class))+
geom_point()+
scale_shape_manual(values=1:n_distinct(y$batch))+
geom_line(data=y[y$class=="QC",],
aes(x=order,y=Intensity))+
facet_grid(Method~.)+
ggtitle(label = fid[i])+
ylab("log10(Intensity)")
#scale_y_log10()
print(ggObj)
if(i==1){
png(filename = fig,width = 8,height = 6,units = "in",res = 150)
print(ggObj)
dev.off()
}
}
dev.off()
res <- list(fig=fig,highfig=highfig)
return(res)
}
)
##' @title Performa PCA analysis and plot PCA figure
##' @description Performa PCA analysis and plot PCA figure
##' @rdname plotPCA
##' @docType methods
##' @param para A \code{metaXpara} object
##' @param pcaMethod See \code{\link{pca}} in \pkg{pcaMethods}
##' @param valueID The name of the column which will be used
##' @param scale Scaling, see \code{\link{pca}} in \pkg{pcaMethods}
##' @param center Centering, see \code{\link{pca}} in \pkg{pcaMethods}
##' @param label The label used for plot PCA figure, default is "order".
##' This value can be "order" or "sample". If the value of this parameter
##' is NA or NULL, then no label will be shown for each point.
##' @param pointLabel The labels for each point, default is NULL. When users
##' want to highlight some samples, this parameter is useful.
##' @param pointSize The size of the point, default is 1.4
##' @param labelSize The size of the label, default is 4
##' @param rmQC A logical indicates whether remove QC data
##' @param batch A logical indicates whether output batch information
##' @param saveRds Boolean, setting the argument to TRUE to save some objects to
##' disk for debug. Only useful for developer. Default is TRUE.
##' @param legendRowBatch Specify the number of column of legend for batch.
##' Default is NULL and use the default setting.
##' @param legendRowClass Specify the number of column of legend for group.
##' Default is NULL and use the default setting.
##' @param showCircle Whether or not to show circle for each class, default is TRUE.
##' @param showClass Whether or not to show class information, default is TRUE.
##' This parameter is useful when users want to check the batch effect.
##' @param classColor A data frame for color setting of class
##' @param pdfWidth Width for PDF, default is 4.38
##' @param pdfHeight Height for PDF, default is 3.7
##' @param pngWidth Width for PNG, default is 4 inch
##' @param pngHeight Height for PNG, default is 4.2 inch
##' @param res resolution for PNG, default is 120
##' @param reverse Default is FALSE. Change type between class and batch.
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' para <- transformation(para,valueID = "value")
##' metaX::plotPCA(para,valueID="value",scale="uv",center=TRUE)
setGeneric("plotPCA",function(para,pcaMethod="svdImpute",valueID="valueNorm",
label="order",pointLabel=NULL,pointSize=1.4,labelSize=4,rmQC=TRUE,
batch=FALSE,showCircle=TRUE,showClass=TRUE,
scale="none",center=FALSE,saveRds=TRUE,
legendRowBatch = NULL, legendRowClass = NULL,
legendPosition="top",
classColor = NULL,
pdfWidth = 4.38,pdfHeight = 3.7,
pngWidth = 4,pngHeight = 4.2, res = 120,
plot3d=FALSE,reverse=FALSE,...)
standardGeneric("plotPCA"))
##' @describeIn plotPCA
setMethod("plotPCA", signature(para = "metaXpara"),
function(para,pcaMethod="svdImpute",valueID="valueNorm",
label="order",pointLabel=NULL,pointSize=1.4,labelSize=4,rmQC=TRUE,batch=FALSE,
showCircle=TRUE,showClass=TRUE,
scale="none",center=FALSE,saveRds=TRUE,legendRowBatch = NULL,
legendRowClass = NULL,
legendPosition="top",
classColor = NULL,
pdfWidth = 4.38,pdfHeight = 3.7,
pngWidth = 4.5,pngHeight = 4.1, res = 120,
plot3d=FALSE,
reverse=FALSE,...){
message("plot PCA for value '",valueID,"'")
prefix <- para@prefix
outdir <- para@outdir
fig <- paste(para@outdir,"/",para@prefix,"-PCA.png",sep="")
highfig <- sub(pattern = "png$",replacement = "pdf",x = fig)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
sampleList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
sampleList <- para@sampleList
}
x <- para@peaksData
x$class <- as.character(x$class)
if(rmQC==TRUE){
x <- x[!is.na(x$class),]
}else{
x$class[is.na(x$class)] <- "QC"
}
x<-dcast(x,ID~sample,value.var = valueID)
row.names(x) <- x$ID
x$ID <- NULL
cat("scaling in PCA:",scale,"\n")
cat("centering in PCA:",center,"\n")
pca.res <- pca(t(x), nPcs = 3, method=pcaMethod,cv="q2",
scale=scale,center=center,seed=123,...)
row.names(pca.res@loadings) <- row.names(x)
if(saveRds){
saveRDS(object = pca.res,file = paste(para@outdir,"/",
para@prefix,"-pca.rds",sep=""))
}
plotLoading(pca.res,fig=paste(para@outdir,"/",para@prefix,"-pcaloading.png",sep=""))
plotLoading(pca.res,fig=paste(para@outdir,"/",para@prefix,"-pcaloading.pdf",sep=""),label = 100)
plotData <- data.frame(x=pca.res@scores[,1],
y=pca.res@scores[,2],
z=pca.res@scores[,3],
sample=names(x),stringsAsFactors=FALSE)
plotData <- merge(plotData,sampleList,by="sample",sort=FALSE)
plotData$class <- as.character(plotData$class)
plotData$class[is.na(plotData$class)] <- "QC"
## Only work for the dataset which has batch design
## Calculate the the Bhattacharyya distance of different batch
## to evaluate of batch corrections
if(length(unique(plotData$batch))>=2){
batch_means <-lapply(unique(plotData$batch),function(bch){
colMeans(plotData[plotData$batch==bch,c("x","y"),drop=FALSE])})
#names(batch_means) <- unique(plotData$batch)
batch_covs <- lapply(unique(plotData$batch), function(bch)
cov(plotData[plotData$batch==bch,c("x","y"),drop=FALSE]))
#names(batch_covs) <- unique(plotData$batch)
noCov_idx <- which(sapply(batch_covs, function(x) all(x < 1e-8)))
nnoCov <- length(noCov_idx)
nbatches <- length(unique(plotData$batch))
if ((nnoCov <- length(noCov_idx)) > 0) {
warning(paste("Too little information for batch correction in the following batches:\n",
unique(sampleList$batch)[noCov_idx],
"- ignoring these batches in the PCA criterion"))
nbatches <- length(unique(sampleList$batch)) - nnoCov
batch_covs <- batch_covs[-noCov_idx]
batch_means <- batch_means[-noCov_idx]
}
batch_dist <- matrix(0, nbatches, nbatches)
for (i in 2:nbatches){
for (j in 1:(i-1)){
batch_dist[j, i] <- bhattacharyya.dist(batch_means[[j]],
batch_means[[i]],
batch_covs[[j]],
batch_covs[[i]])
}
}
message("Bhattacharyya distance matrix:")
print(batch_dist + t(batch_dist))
message("Mean of the Bhattacharyya distance:")
print(mean(batch_dist[col(batch_dist) > row(batch_dist)]))
}
if(batch==TRUE){
plotData$batch <- factor(as.character(plotData$batch),
levels = as.character(sort(unique(plotData$batch))))
}
if(!is.null(pointLabel)){
if(!is.null(label) && !is.na(label) && label == "order"){
plotData$order[!(plotData$order %in% pointLabel)] <- ""
}else if(!is.null(label) && !is.na(label) && label == "sample"){
# cat("Yes\n")
plotData$sample[!(plotData$sample %in% pointLabel)] <- ""
}
}
#save(plotData,file="plotdata.rda")
if(showClass==TRUE){
if(is.null(classColor)){
if(reverse==FALSE){
ggobj <- ggplot(data = plotData,aes(x=x,y=y,colour=class))
}else{
ggobj <- ggplot(data = plotData,aes(x=x,y=y,shape=class))
}
}else{
cat("Use user defined color for point!\n")
#save(plotData,classColor,file = "test.rda")
classColor$class <- as.character(classColor$class)
classColor$class <- factor(classColor$class,levels = classColor$class)
plotData$class <- factor(plotData$class,levels = classColor$class)
ggobj <- ggplot(data = plotData,aes(x=x,y=y,colour=class)) +
scale_color_manual(breaks = classColor$class,
values = classColor$col)
}
}else{
ggobj <- ggplot(data = plotData,aes(x=x,y=y))
}
ggobj <- ggobj + geom_hline(yintercept=0,colour="gray")+
geom_vline(xintercept=0,colour="gray")+
#geom_point()+
xlab(paste("PC1"," (",sprintf("%.2f%%",100*pca.res@R2[1]),") ",sep=""))+
ylab(paste("PC2"," (",sprintf("%.2f%%",100*pca.res@R2[2]),") ",sep=""))+
theme_bw()+
theme(#legend.justification=c(1,1),
#legend.position=c(1,1),
legend.position = legendPosition,
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
#panel.background=element_rect(fill="#E3E3EE"))+
#theme(legend.direction = 'horizontal', legend.position = 'top')+
#stat_ellipse(geom = "polygon", type="euclid",alpha = 0.4,
# aes(fill = class))+
if(showCircle==TRUE){
ggobj <- ggobj + stat_ellipse(geom = "path")
}
if(!is.null(label) && !is.na(label) && label == "order"){
ggobj <- ggobj + geom_text(aes(label=order),size=labelSize,hjust=-0.2)
}else if(!is.null(label) && !is.na(label) && label == "sample"){
ggobj <- ggobj + geom_text(aes(label=sample),size=labelSize,hjust=-0.2)
}
if(batch==TRUE){
if(reverse==FALSE){
ggobj <- ggobj + geom_point(aes(shape=batch),size=pointSize)+
scale_shape_manual(values=1:n_distinct(plotData$batch))
}else{
ggobj <- ggobj + geom_point(aes(colour=batch),size=pointSize)+
scale_shape_manual(values=1:n_distinct(plotData$batch))
}
}else{
ggobj <- ggobj + geom_point(size=pointSize)
}
if(!is.null(legendRowBatch)){
ggobj <- ggobj + guides(shape = guide_legend(nrow = legendRowBatch))
}
if(showClass==TRUE){
if(!is.null(legendRowClass)){
ggobj <- ggobj + guides(col = guide_legend(nrow = legendRowClass))
}
}
pdf(file = highfig,width = pdfWidth,height = pdfHeight)
print(ggobj)
if(plot3d==TRUE){
## 3D PCA plot
#par(mgp=c(1.6,1,0))
col <- as.numeric(as.factor(plotData$class))
s3d <- scatterplot3d(plotData$x,plotData$y,plotData$z,type="h",angle = 24,
col.grid="lightblue",lty.hplot=2,pch="",color="gray",
xlab = paste("PC1"," (",
sprintf("%.2f%%",100*pca.res@R2[1]),") ",sep=""),
ylab = paste("PC2"," (",
sprintf("%.2f%%",100*pca.res@R2[2]),") ",sep=""),
zlab = paste("PC3"," (",
sprintf("%.2f%%",100*pca.res@R2[3]),") ",sep="")
)#color = as.numeric(as.factor(plotData$class)))
s3d$points(plotData$x,plotData$y,plotData$z, pch = 1,col = col)
s3d.coords <- s3d$xyz.convert(plotData$x,plotData$y,plotData$z)
text(s3d.coords$x, s3d.coords$y, labels = plotData$order,
pos = 4,cex=0.5,col = col)
classLabel <- levels(as.factor(plotData$class))
legend(s3d$xyz.convert(max(plotData$x)*0.7,
max(plotData$y), min(plotData$z)),
col=as.numeric(as.factor(classLabel)), yjust=0,pch=1,
legend = classLabel, cex = 0.8)
}
dev.off()
png(filename = fig,width = pngWidth,height = pngHeight,res = res,units = "in")
print(ggobj)
dev.off()
plotData$dataSet = para@ID
res <- list(fig=fig,highfig=highfig,pca=pca.res,plotdata=plotData)
return(res)
}
)
##' @title Plot PLS-DA figure
##' @description Plot PLS-DA figure
##' @rdname plotPLSDA
##' @docType methods
##' @param para A \code{metaXpara} object
##' @param valueID The name of the column which will be used
##' @param label The label used for plot PLS-DA figure, default is "order"
##' @param ncomp The number of components used for PLS-DA
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' para <- transformation(para,valueID = "value")
##' para <- preProcess(para=para,scale = "uv",center = TRUE,valueID = "value")
##' plotPLSDA(para,valueID="value")
setGeneric("plotPLSDA",function(para,label="order",valueID="valueNorm",
ncomp=5,...)
standardGeneric("plotPLSDA"))
##' @describeIn plotPLSDA
setMethod("plotPLSDA", signature(para = "metaXpara"),
function(para,label="order",valueID="valueNorm",ncomp=5,...){
prefix <- para@prefix
outdir <- para@outdir
fig <- paste(para@outdir,"/",para@prefix,"-PLSDA.pdf",sep="")
pdf(file = fig,width = 6,height = 6)
# sampleList <- read.delim(para@sampleListFile)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
sampleList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
sampleList <- para@sampleList
}
peaksData <- para@peaksData
peaksData <- peaksData[!is.na(peaksData$class),]
peaksData$class <- as.character(peaksData$class)
plsData <- dcast(peaksData,sample+class~ID,
value.var = valueID)
#row.names(plsData) <- plsData$ID
plsData$class[is.na(plsData$class)] <- "QC"
pls.X <- as.matrix(plsData[,-c(1:2)])
pls.Y <- as.factor(as.character(plsData$class))
message("Remove NA value ...")
numNA <- apply(pls.X,2,function(x){any(is.na(x))})
message(sum(numNA)," features are removed!")
pls.X <- pls.X[,!numNA]
p <- plsDA(variables = pls.X,group = pls.Y,autosel = FALSE,comps=ncomp,...)
print(p$confusion)
print(p$R2)
print(p$Q2)
print(p$specs)
message("error rate: ",p$error_rate)
#r2 <- R2(p,estimate="train")
if(ncomp >=3){
plotData <- data.frame(x=p$components[,1],
y=p$components[,2],
z=p$components[,3],
sample=plsData$sample,
class=pls.Y)
}else{
plotData <- data.frame(x=p$components[,1],
y=p$components[,2],
sample=plsData$sample,
class=pls.Y)
}
plotData$class <- as.character(plotData$class)
plotData$class[is.na(plotData$class)] <- "QC"
sampleList$class <- NULL
plotData <- merge(plotData,sampleList,by="sample",sort=FALSE)
ggobj <-ggplot(data = plotData,aes(x=x,y=y,colour=class))+
geom_hline(yintercept=0,colour="gray")+
geom_vline(xintercept=0,colour="gray")+
geom_point()+
theme_bw()+
xlab(paste("PC1"," (",sprintf("%.2f%%",100*p$R2[1,1]),") ",sep=""))+
ylab(paste("PC2"," (",sprintf("%.2f%%",100*p$R2[2,1]),") ",sep=""))+
theme(legend.justification=c(1,1),
legend.position=c(1,1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
#panel.background=element_rect(fill="#E3E3EE"))+
#stat_ellipse(geom = "polygon", type="euclid",alpha = 0.4,
# aes(fill = class))+
stat_ellipse(geom = "path")
if(!is.null(label) && !is.na(label)){
if(label == "order"){
ggobj <- ggobj + geom_text(aes(label=order),size=4,hjust=-0.2)
}else if(label == "sample"){
ggobj <- ggobj + geom_text(aes(label=sample),size=4,hjust=-0.2)
}
}
print(ggobj)
## 3D PCA plot
#par(mgp=c(1.6,1,0))
if(ncomp >=3){
col <- as.numeric(as.factor(plotData$class))
s3d <- scatterplot3d(plotData$x,plotData$y,plotData$z,type="h",angle = 24,
col.grid="lightblue",lty.hplot=2,pch="",color="gray",
xlab = paste("PC1"," (",sprintf("%.2f%%",100*p$R2[1,1]),") ",sep=""),
ylab = paste("PC2"," (",sprintf("%.2f%%",100*p$R2[2,1]),") ",sep=""),
zlab = paste("PC3"," (",sprintf("%.2f%%",100*p$R2[3,1]),") ",sep="")
)#color = as.numeric(as.factor(plotData$class)))
s3d$points(plotData$x,plotData$y,plotData$z, pch = 1,col = col)
s3d.coords <- s3d$xyz.convert(plotData$x,plotData$y,plotData$z)
text(s3d.coords$x, s3d.coords$y, labels = plotData$order,
pos = 4,cex=0.5,col = col)
classLabel <- levels(as.factor(plotData$class))
legend(s3d$xyz.convert(max(plotData$x)*0.7,
max(plotData$y), min(plotData$z)),
col=as.numeric(as.factor(classLabel)), yjust=0,pch=1,
legend = classLabel, cex = 0.8)
}
dev.off()
}
)
##' @title filterQCPeaks
##' @description filter peaks according to the QC sample
##' @rdname filterQCPeaks
##' @param para An object of metaXpara
##' @param ratio filter peaks which have missing value more than percent of
##' "ratio", default is 0.5
##' @param omit.negative A logical value indicates whether omit the negative
##' value
##' @param ... Additional parameters
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' p <- filterQCPeaks(para,ratio=0.5)
setGeneric("filterQCPeaks",function(para,ratio=0.5,omit.negative=TRUE,...)
standardGeneric("filterQCPeaks"))
##' @describeIn filterQCPeaks
setMethod("filterQCPeaks", signature(para = "metaXpara"),
function(para,ratio=0.5,omit.negative=TRUE,...){
peaksData <- para@peaksData
if(sum(is.na(peaksData$class)) <=0 ){
message("Don't find QC sample in your data")
return(para)
}
#assign(x="filterRatio",value=ratio,envir=.GlobalEnv)
filterRatio <- ratio
rpeak <- ddply(peaksData[is.na(peaksData$class),],.(ID),here(summarise),
rp=countMissingValue(x = value,ratio = filterRatio,omit.negative=TRUE))
para@peaksData <- peaksData[ peaksData$ID %in% rpeak$ID[ !rpeak$rp],]
message("Remove peaks which the percent is more than ", ratio ,
" with intensity are NA!")
message(sum(rpeak$rp))
## save the remove peak to file
if(sum(rpeak$rp) >=1){
fi <- paste(para@outdir,"/",para@prefix,"-filterQCPeaks",sep="")
message("Save the removed peaks to file: ",fi)
write.table(rpeak,file=fi,row.names = FALSE,col.names = TRUE,
quote=FALSE,sep="\t")
}
return(para)
})
##' @title filterPeaks
##' @description filter peaks according to the non-QC sample
##' @rdname filterPeaks
##' @param para An object of metaXpara
##' @param ratio filter peaks which have missing value more than percent of
##' "ratio", default is 0.8
##' @param byBatch filter peaks by batch
##' @param byClass filter peaks by class
##' @param omit.negative A logical value indicates whether omit the negative
##' value
##' @param ... Additional parameters
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' p <- filterPeaks(para,ratio=0.2)
setGeneric("filterPeaks",function(para,ratio=0.8,byBatch=FALSE,byClass=FALSE,omit.negative=TRUE,...) standardGeneric("filterPeaks"))
##' @describeIn filterPeaks
setMethod("filterPeaks", signature(para = "metaXpara"),function(para,ratio=0.8,byBatch=FALSE,byClass=FALSE,omit.negative=TRUE,...){
peaksData <- para@peaksData
#assign(x="filterRatio",value=ratio,envir=.GlobalEnv)
filterRatio <- ratio
# rpeak <- ddply(peaksData[!is.na(peaksData$class),],.(ID),here(summarise),
# rp=countMissingValue(value,ratio = filterRatio,omit.negative))
if(byBatch==TRUE){
## filter QC samples firstly
cat("filter peaks by batch ...\n")
rpeak <- peaksData %>% filter(!is.na(class)) %>%
group_by(ID,batch) %>%
dplyr::summarise(rp=countMissingValue(value,ratio=filterRatio,omit.negative)) %>%
ungroup() %>%
select(-batch) %>%
group_by(ID) %>%
dplyr::summarise(rp=any(rp)) %>%
ungroup()
}else if(byClass==TRUE){
cat("filter peaks by class ...\n")
rpeak <- peaksData %>% filter(!is.na(class)) %>%
group_by(ID,class) %>%
dplyr::summarise(rp=countMissingValue(value,ratio=filterRatio,omit.negative)) %>%
ungroup() %>%
select(-batch) %>%
group_by(ID) %>%
dplyr::summarise(rp=any(rp)) %>%
ungroup()
}else{
rpeak <- peaksData %>% filter(!is.na(class)) %>%
group_by(ID) %>%
dplyr::summarise(rp=countMissingValue(value,ratio=filterRatio,omit.negative)) %>%
ungroup()
}
if(sum(rpeak$rp)!=0){
para@peaksData <- peaksData[ peaksData$ID %in% rpeak$ID[ !rpeak$rp],]
}
message("Total peaks: ",length(unique(peaksData$ID)))
message("Remove peaks which the percent is more than ", ratio ,
" with intensity are NA!")
message(sum(rpeak$rp))
message("Peaks left: ",sum(!rpeak$rp))
## save the remove peak to file
if(sum(rpeak$rp) >=1){
fi <- paste(para@outdir,"/",para@prefix,"-filterPeaks",sep="")
message("Save the removed peaks to file: ",fi)
write.table(rpeak,file=fi,row.names = FALSE,col.names = TRUE,
quote=FALSE,sep="\t")
}
return(para)
})
##' @title reSetPeaksData. Please note NA is converted to zero in default.
##' @description reSetPeaksData. Please note NA is converted to zero in default.
##' @rdname reSetPeaksData
##' @docType methods
##' @param para An object of \code{metaXpara}
##' @return none
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
setGeneric("reSetPeaksData",function(para) standardGeneric("reSetPeaksData"))
##' @describeIn reSetPeaksData
setMethod("reSetPeaksData", signature(para = "metaXpara"), function(para){
cat("Reset data!\n")
rawPeaks <- para@rawPeaks
# samList <- read.delim(para@sampleListFile)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
samList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
samList$class <- as.character(samList$class)
para@sampleList <- samList
}else{
para@sampleList$class <- as.character(para@sampleList$class)
samList <- para@sampleList
}
if("ID" %in% names(rawPeaks)){
rawPeaks$ID <- as.character(rawPeaks$ID)
}else if("name" %in% names(rawPeaks)){
rawPeaks <- rawPeaks %>% dplyr::rename(ID=name)
rawPeaks$ID <- as.character(rawPeaks$ID)
}else{
stop("No valid ID found!\n")
}
peaksData <- rawPeaks %>% tidyr::gather(key = "sample",value = "value",-ID)
peaksData <- dplyr::inner_join(peaksData,samList,by="sample")
#message("Convert NA to 0:",sum(is.na(peaksData$value)))
message("Convert <=0 to NA:",sum(peaksData$value<=0,na.rm = TRUE))
#peaksData$value[is.na(peaksData$value)] <- 0
peaksData$value[peaksData$value<=0] <- NA
para@peaksData <- peaksData
return(para)
})
##' @title Using the QC samples to do the quality control-robust spline signal
##' correction
##' @description Using the QC samples to do the quality control-robust spline
##' signal correction.
##' @rdname doQCRLSC
##' @docType methods
##' @exportMethod
##' @param para An object of metaXpara
##' @param impute A logical indicates whether impute the result
##' @param cpu The number of cpu used for processing
##' @param ... Additional parameters
##' @return A list object
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @seealso \code{\link{plotQCRLSC}}
##' @details
##' The smoothing parameter is optimised using leave-one-out cross validation
##' to avoid overfitting.
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)[1:20,]
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' res <- doQCRLSC(para,cpu=1)
setGeneric("doQCRLSC",function(para,impute=TRUE,cpu=0,...)
standardGeneric("doQCRLSC"))
##' @describeIn doQCRLSC
setMethod("doQCRLSC", signature(para = "metaXpara"),
function(para,impute=TRUE,cpu=0,...){
## output object
res <- list()
message("Using cpu: ",cpu)
peaksData <- para@peaksData
if(sum(is.na(peaksData$class)) <=0 ){
message("Don't find QC sample in your data!")
return(para)
}
message("The number of NA value in peaksData before QC-RLSC: ",
sum(is.na(peaksData$value) | peaksData$value <= 0))
qcData <- peaksData[is.na(peaksData$class),]
# samList <- read.delim(para@sampleListFile,stringsAsFactors=FALSE)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
samList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- para@sampleList
}
assign(x="maxOrder",value=max(samList[,para@sampleListHead['order']]),
envir=.GlobalEnv)
loessDegree <- 2
loessSpan <- para@qcRlscSpan
message("QC-RLSC loess span= ",loessSpan)
# require(doSMP) # will no longer work...
cpu = ifelse(cpu==0,detectCores(),cpu)
cl <- makeCluster(getOption("cl.cores", cpu))
clusterExport(cl, c("myLoessFit","tuneSpline"),envir=environment())
qcData$ID_batch <- paste(qcData$ID,qcData$batch,sep="_")
message(date(),"\tsmooth fitting ...")
if(loessSpan==0){
intPredict <- parLapply(cl,unique(qcData$ID_batch),.runFit1,
qcData=qcData,maxOrder=maxOrder)
#intPredict <- do.call(rbind,intPredict)
intPredict <- rbindlist(intPredict)
intPredict <- as.data.frame(intPredict)
}else{
intPredict <- parLapply(cl,unique(qcData$ID_batch),.runFit2,
qcData=qcData,maxOrder=maxOrder)
intPredict <- rbindlist(intPredict)
intPredict <- as.data.frame(intPredict)
}
stopCluster(cl)
message(date(),"\tsmooth fitting done.")
intPredict <- dplyr::rename(intPredict,order=newOrder)
## head: sample ID value batch class order valuePredict
peaksData$valuePredict <- NULL
peaksData$valueNorm <- NULL
peaksData <- plyr::join(peaksData,intPredict,
by=intersect(names(peaksData),names(intPredict)))
mpa <- ddply(peaksData,.(ID),summarise,mpa=median(value,na.rm = TRUE))
peaksData <- plyr::join(peaksData,mpa,
by=intersect(names(peaksData),names(mpa)))
peaksData <- dplyr::mutate(peaksData,valuePredict=valuePredict/mpa)
peaksData$mpa <- NULL
message("change value which ==0 to NA")
peaksData$value[ peaksData$value<=0] <- NA
peaksData$valueNorm <- peaksData$value/peaksData$valuePredict
######################################################################
######################################################################
peaksData$valuePredict[ peaksData$valuePredict<=0] <- NA
## calculate CV using this value
peaksData$valueNorm[ peaksData$valueNorm<=0] <- NA
message("The number of NA value in peaksData after QC-RLSC:",
sum(is.na(peaksData$valueNorm)))
## TODO: Do we still need to do missing value imputation are QC-RLSC?
if(impute){
message(date(),"\tDo missing value imputation after QC-RLSC...")
paraTmp <- para
paraTmp@peaksData <- peaksData
paraTmp <- missingValueImpute(x = paraTmp,valueID="valueNorm")
peaksData <- paraTmp@peaksData
}
## For each batch
## CV plot
cvStat <- ddply(peaksData[is.na(peaksData$class),],.(ID,batch),
summarise,
rawCV=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE),
normCV=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
cvStatForEachBatch <- melt(cvStat,id.vars = c("ID","batch"),
variable.name = "CV")
cvStatForEachBatch$batch <- as.factor(cvStatForEachBatch$batch)
## output information
message("Summary information of the CV for QC samples:")
cvTable <- ddply(cvStatForEachBatch,.(batch,CV),summarise,
lessThan30=sum(value<=0.3,na.rm = TRUE),
total=length(value),ratio=lessThan30/total)
print(cvTable)
res$cvBatch <- cvTable
message("\n")
para@fig$cv<- paste(para@outdir,"/",para@prefix,"-cv.pdf",sep="")
pdf(para@fig$cv,width = 6,height = 6)
p<-ggplot(data=cvStatForEachBatch,aes(x=value,fill=CV,colour=CV))+
facet_grid(batch~.)+
geom_density(alpha = 0.5)+
xlab(label = "CV")
print(p)
p<-ggplot(data=cvStatForEachBatch,aes(x=value,fill=CV,colour=CV))+
facet_grid(batch~.)+
geom_density(alpha = 0.5)+
xlim(0,2)+
xlab(label = "CV")
print(p)
dev.off()
#######################################
## For all
cvStat <- ddply(peaksData[is.na(peaksData$class),],.(ID),
summarise,
rawCV=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE),
normCV=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
cvStatForAll <- melt(cvStat,id.vars = c("ID"),
variable.name = "CV")
## output information
message("Summary information of the CV for QC samples:")
cvTable <- ddply(cvStatForAll,.(CV),summarise,
lessThan30=sum(value<=0.3,na.rm = TRUE),
total=length(value),ratio=lessThan30/total)
print(cvTable)
res$cvAll <- cvTable
message("\n")
########################################################
message("Peaks with CV > ",0.3,"!")
message(sum(cvStat$normCV > 0.3,na.rm = TRUE))
tmpPeaksData <- merge(peaksData,cvStat,by="ID")
if(nrow(tmpPeaksData)!=nrow(peaksData)){
error_file <- paste(para@outdir,"/",para@prefix,"-doQCRLSC-error.rda",
sep="")
message("Please see the file: ",error_file," for detail!")
save(peaksData,cvStat,file=error_file)
stop("Please see detailed data in ",error_file)
}
peaksData <- tmpPeaksData
peaksData <- dplyr::rename(peaksData,cv=normCV)
para@peaksData <- peaksData
#para@peaksData <- dplyr::filter(peaksData,!is.na(cv) & cv<=cvFilter)
res$metaXpara <- para
return(res)
})
.runFit1=function(id,qcData,maxOrder){
out <- tryCatch({
dat <- data.frame(newOrder=1:maxOrder)
piece <- qcData[ qcData$ID_batch==id,]
dat$valuePredict=myLoessFit(piece$order,piece$value,dat$newOrder)
dat$ID <- piece$ID[1]
dat$batch <- piece$batch[1]
dat
},
error=function(e){
message("Please see the file: runFit_error.rda for related data!")
save(e,id,qcData,maxOrder,file="runFit_error.rda")
stop("error in runFit!")
return(NULL)
},
warning=function(cond){
message("Please see the file: runFit_warning.rda for related data!")
save(cond,id,qcData,maxOrder,file="runFit_warning.rda")
return(NULL)
})
return(out)
}
.runFit2=function(id,qcData,maxOrder){
out <- tryCatch({
dat <- data.frame(newOrder=1:maxOrder)
piece <- qcData[ qcData$ID_batch==id,]
dat$valuePredict=predict(smooth.spline(piece$order,
piece$value,
spar = loessSpan),
dat$newOrder)$y
dat$ID <- piece$ID[1]
dat$batch <- piece$batch[1]
dat
},
error=function(e){
message("Please see the file: runFit_error.rda for related data!")
save(e,id,qcData,maxOrder,file="runFit_error.rda")
stop("error in runFit!")
return(NULL)
},
warning=function(cond){
message("Please see the file: runFit_warning.rda for related data!")
save(cond,id,qcData,maxOrder,file="runFit_warning.rda")
return(NULL)
})
return(out)
}
##' @title Missing value imputation
##' @description Missing value imputation
##' @rdname missingValueImpute
##' @docType methods
##' @exportMethod
##' @param x The value needed to be imputated
##' @param valueID The name of the column which will be used
##' @param method Method for imputation: bpca,knn,svdImpute,softImpute,rf,min
##' @param negValue A logical indicates whether convert <=0 value to NA
##' @param cpu The number of cpus used
##' @param ... Additional parameters
##' @return The imputation data
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
setGeneric("missingValueImpute",function(x,valueID="value",method="knn",
negValue = TRUE,cpu=1,...)
standardGeneric("missingValueImpute"))
##' @describeIn missingValueImpute
setMethod("missingValueImpute", signature(x = "metaXpara"),
function(x,valueID="value",...){
message("missingValueImpute: ",valueID)
para <- x
peaksData <- para@peaksData
message(date(),"\tMissing value imputation for \'",valueID,"\'")
x <- peaksData %>% dplyr::select(ID,sample,!!valueID) %>%
tidyr::spread(sample,!!valueID)
#x<-dcast(peaksData,ID~sample,value.var = valueID)
row.names(x) <- x$ID
x$ID <- NULL
x[x<=0] <- NA
message("Missing value in total: ",sum(is.na(x)))
if(any(is.na(peaksData$class))){
qcValue <- peaksData[,valueID][is.na(peaksData$class)]
message("Missing value in QC sample: ",
sum(qcValue<=0 | is.na(qcValue)))
sValue <- peaksData[,valueID][!is.na(peaksData$class)]
message("Missing value in non-QC sample: ",
sum(sValue<=0 | is.na(sValue)))
}
x <- missingValueImpute(x,method=para@missValueImputeMethod,cpu=cpu,...)
message("Missing value in total after missing value inputation: ",
sum(is.na(x)))
## maybe x has value which <=0
message("<=0 value in total after missing value inputation: ",
sum(x<=0))
x$ID <- row.names(x)
y <- melt(x,id.vars = "ID",variable.name = "sample",value.name = "newValue")
m <- plyr::join(peaksData,y,by=c("ID","sample"))
m[,valueID] <- m$newValue
m$newValue <- NULL
para@peaksData <- m
return(para)
})
##' @describeIn missingValueImpute
setMethod("missingValueImpute", signature(x = "data.frame"),
function(x,method="knn",negValue = TRUE,cpu=1,...){
if(cpu==0){
cpu <- detectCores()
}
inputedData <- NULL
colName <- names(x)
rowName <- row.names(x)
x <- as.matrix(t(x))
## An expression matrix with samples in the rows, features in the columns
save(x,file="x.rda")
message(date(),"\tThe ratio of missing value: ",
sprintf("%.4f%%",100*sum(is.na(x))/length(x)))
if(method == "bpca"){
## Numerical matrix with (or an object coercible to such) with samples
## in rows and variables as columns. Also takes ExpressionSet in which
## case the transposed expression matrix is used. Can also be a data
## frame in which case all numberic variables are used to fit the PCA.
## Please note that this method is very time-consuming.
mvd <- pca(x, nPcs = 3, method = "bpca")
inputedData <- completeObs(mvd)
}else if(method == "svdImpute"){
## This method is very fast.
mvd <- pca(x, nPcs = 3, method = "svdImpute")
inputedData <- completeObs(mvd)
}else if(method == "knn"){
## An expression matrix with genes in the rows, samples in the columns
## This method is very fast.
mvd <- impute.knn(t(x))
inputedData <- t(mvd$data)
}else if(method == "softImpute"){
# https://cran.r-project.org/web/packages/softImpute/vignettes/softImpute.html
# An m by n matrix with NAs.
# TODO: tune the parameters, rank.max and lambda
softfit <- softImpute(t(x))
x_new <- complete(x,softfit)
inputedData <- x_new
}else if(method == "rf"){
## A data matrix with missing values.
## The columns correspond to the variables and the rows to the
## observations.
## Please note that this method is very time-consuming.
if(cpu>1){
cat("do missForest cpu=(",cpu,") ...\n")
cl <- makeCluster(cpu)
registerDoParallel(cl)
# xmis: a data matrix with missing values.
# The columns correspond to the variables and the rows
# to the observations.
mvd <- missForest(xmis = x,parallelize = "variables")
print(mvd$OOBerror)
inputedData <- mvd$ximp
stopCluster(cl)
}else{
cat("do missForest ...\n")
mvd <- missForest(xmis = x)
print(mvd$OOBerror)
inputedData <- mvd$ximp
}
}else if(method == "min"){
## min value / 2
inputedData <- apply(x,1,function(y){
y[is.na(y) | y<=0] <- min(y[y>0],na.rm = TRUE)/2.0
y})
inputedData <- t(inputedData)
}else if(method == "none"){
cat("No missing value imputation!\n")
inputedData <- x
}else{
stop("Please provide valid method for missing value inputation!")
}
if(method != "none"){
if(negValue & method != "min"){
message("<=0: ",sum(inputedData<=0))
x <- inputedData
inputedData <- apply(x,1,function(y){
y[is.na(y) | y<=0] <- min(y[y>0],na.rm = TRUE)
y})
inputedData <- t(inputedData)
}
}
inputedData <- as.data.frame(t(inputedData))
row.names(inputedData) <- rowName
names(inputedData) <- colName
return(inputedData)
})
##' @title Judge whether the data has QC samples
##' @description Judge whether the data has QC samples
##' @rdname hasQC
##' @docType methods
##' @param para An object of data
##' @param ... Additional parameters
##' @return A logical value
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' hasQC(para)
setGeneric("hasQC",function(para,...) standardGeneric("hasQC"))
##' @describeIn hasQC
setMethod("hasQC", signature(para = "metaXpara"), function(para,...){
# samList <- read.delim(para@sampleListFile)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
samList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- para@sampleList
}
qc <- is.na(samList$class)
if(any(qc)){
message("Find QC samples in your data!")
message("There are ",sum(qc)," QC samples in your data!")
return(TRUE)
}else{
message("Don't find QC samples in your data!")
return(FALSE)
}
})
##' @title Convert the value <=0 to NA
##' @description Convert the value <=0 to NA
##' @rdname zero2NA
##' @docType methods
##' @param x An object of data
##' @param valueID The name of the column which will be used
##' @param ... Additional parameters
##' @return An object of \code{metaXpara}
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' \donttest{
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- zero2NA(para)
##' }
setGeneric("zero2NA",function(x,valueID="value",...) standardGeneric("zero2NA"))
##' @describeIn zero2NA
setMethod("zero2NA", signature(x = "metaXpara"), function(x,valueID="value",...){
dat <- x@peaksData[,valueID]
message(date(),"\tzero2NA:",sum(dat<=0))
dat[dat<=0] <- NA
x@peaksData[,valueID] <- dat
return(x)
})
##' @title Plot the distribution of the peaks intensity
##' @description Plot the distribution of the peaks intensity for both raw
##' intensity and normalized intensity.
##' @rdname plotIntDistr
##' @docType methods
##' @param x A metaXpara object.
##' @param width The width of pdf, default is 14.
##' @param sortBy Sort the samples according to order (default), batch or class
##' @param rmNA Whether or not to consider NA, default is TRUE. When this value is FALSE,
##' NA will be considered as 0 then all values are +1.
##' @param ... Additional parameter
##' @return The figure name
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' peaksData <- as.data.frame(groupval(xset,"medret",value="into"))
##' peaksData$name <- row.names(peaksData)
##' para <- new("metaXpara")
##' rawPeaks(para) <- peaksData
##' sampleListFile(para) <- system.file("extdata/faahKO_sampleList.txt",
##' package = "metaX")
##' para <- reSetPeaksData(para)
##' plotIntDistr(para)
##' ## after normalization
##' para <- metaX::normalize(para)
##' plotIntDistr(para)
setGeneric("plotIntDistr",function(x,width=14,sortBy="order",rmNA = TRUE,ylim=NULL,...) standardGeneric("plotIntDistr"))
##' @describeIn plotIntDistr
setMethod("plotIntDistr", signature(x = "metaXpara"), function(x,width=14,sortBy="order",rmNA = TRUE,ylim=NULL,...){
dat <- x@peaksData
fig <- paste(x@outdir,"/",x@prefix,"-intensityBoxplot.png",sep="")
highfig <- sub(pattern = "png$",replacement = "pdf",x = fig)
pdf(file = highfig,width = width,height = 3.7)
dat$class <- as.character(dat$class)
dat$class[is.na(dat$class)] <- "QC"
if(rmNA==TRUE){
dat$value[dat$value<=0] <- NA
dat$value <- log2(dat$value)
}else{
dat$value[is.na(dat$value)] <- 0
dat$value <- log2(dat$value+1)
}
#save(dat,file="dat.rda")
if(sortBy == "order"){
dat$order <- factor(dat$order,levels = sort(unique(dat$order)))
}else if(sortBy == "class"){
dat <- dplyr::arrange(dat,class,batch,order)
dat$order <- factor(dat$order,levels = unique(dat$order))
}else if(sortBy == "batch"){
dat <- dplyr::arrange(dat,batch,order)
dat$order <- factor(dat$order,levels = unique(dat$order))
}
dat$batch <- as.character(dat$batch)
#save(dat,sortBy,file="test.rda")
ggobj1 <- ggplot(data=dat,aes(x=order,y=value,fill=class))+
geom_boxplot(outlier.size=0.4)+
#scale_x_discrete(labels = "")+
scale_x_discrete(labels = NULL)+
theme_bw()+
#ggtitle("Raw intensity")+
ylab("log2(value)")
if(!is.null(ylim)){
ggobj1 <- ggobj1 + ylim(ylim[1],ylim[2])
}
print(ggobj1)
##
if("valueNorm" %in% names(dat)){
#dat$valueNorm[dat$valueNorm<=0] <- NA
if(rmNA==TRUE){
dat$valueNorm[dat$valueNorm<=0] <- NA
dat$valueNorm <- log2(dat$valueNorm)
}else{
dat$valueNorm[is.na(dat$valueNorm)] <- 0
dat$valueNorm <- log2(dat$valueNorm+1)
}
#dat$valueNorm <- log2(dat$valueNorm)
ggobj <- ggplot(data=dat,aes(x=order,y=valueNorm,fill=class))+
geom_boxplot(outlier.size=0.4)+
#scale_x_discrete(labels = "")+
scale_x_discrete(labels = NULL)+
theme_bw()+
#ggtitle("Normalized intensity")+
ylab("log2(value)")
if(!is.null(ylim)){
ggobj <- ggobj + ylim(ylim[1],ylim[2])
}
print(ggobj1)
print(ggobj)
}
dev.off()
png(filename = fig,width = width,height = 5,units = "in",res=150)
print(ggobj1)
dev.off()
res <- list(fig=fig,highfig=highfig)
return(res)
})
##' @title Plot the distribution of the peaks S/N
##' @description Plot the distribution of the peaks S/N, only suitable for
##' XCMS result. This function generates a figure.
##' @rdname plotPeakSN
##' @docType methods
##' @param x A metaXpara object
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' para <- new("metaXpara")
##' xcmsSetObj(para) <- xset
##' plotPeakSN(para)
setGeneric("plotPeakSN",function(x,...) standardGeneric("plotPeakSN"))
##' @describeIn plotPeakSN
setMethod("plotPeakSN", signature(x = "metaXpara"), function(x,...){
dat <- as.data.frame(x@xcmsSetObj@peaks)
if(!"sn" %in% names(dat)){
warning("Don't find column named 'sn' in xcmsSet object!")
return(NULL)
}
fig <- paste(x@outdir,"/",x@prefix,"-peakSN.pdf",sep="")
pdf(file = fig,width = 8,height = 5)
## sn distribution
snStat <- table(cut(dat$sn,breaks = c(0:10,20,30,40,50,Inf)))
snStat <- as.data.frame(snStat)
names(snStat) <- c("SN","Freq")
snStat$Ratio <- snStat$Freq/sum(snStat$Freq)
ggobj <- ggplot(data=snStat,aes(x=SN,y=Ratio))+
geom_bar(stat="identity",colour="white",fill="#56B4E9")+
geom_text(aes(label=sprintf("%.3f%%",100*Ratio)),
vjust=-0.2,size=3,angle=60)
print(ggobj)
dev.off()
}
)
get_dataType=function(para){
if(!is.null(para@dataType)){
return(para@dataType)
}else{
return("metabolite")
}
}
is_metabolite=function(para){
data_type <- get_dataType(para)
if(grepl(pattern = "metabo",x = data_type,ignore.case = TRUE)){
return(TRUE)
}else{
return(FALSE)
}
}
is_protein=function(para){
data_type <- get_dataType(para)
if(grepl(pattern = "prot",x = data_type,ignore.case = TRUE)){
return(TRUE)
}else{
return(FALSE)
}
}
is_gene=function(para){
data_type <- get_dataType(para)
if(grepl(pattern = "gene",x = data_type, ignore.case = TRUE)){
return(TRUE)
}else{
return(FALSE)
}
}
is_mrna=function(para){
data_type <- get_dataType(para)
if(grepl(pattern = "mrna",x = data_type, ignore.case = TRUE)){
return(TRUE)
}else{
return(FALSE)
}
}
##' @title Plot the distribution of the peaks number
##' @description Plot the distribution of the raw peaks number without
##' post-processing. This function not only generates a figure, but also
##' saves the information of peaks number into a file.
##' @rdname plotPeakNumber
##' @docType methods
##' @param x A metaXpara object
##' @param ... Additional parameter
##' @return The figure name
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' peaksData <- as.data.frame(groupval(xset,"medret",value="into"))
##' peaksData$name <- row.names(peaksData)
##' para <- new("metaXpara")
##' rawPeaks(para) <- peaksData
##' sampleListFile(para) <- system.file("extdata/faahKO_sampleList.txt",
##' package = "metaX")
##' plotPeakNumber(para)
setGeneric("plotPeakNumber",function(x,
legendRowBatch = NULL,
legendRowClass = NULL, ...) standardGeneric("plotPeakNumber"))
##' @describeIn plotPeakNumber
setMethod("plotPeakNumber", signature(x = "metaXpara"), function(x,
legendRowBatch = NULL,
legendRowClass = NULL,...){
## peak number
#nsample <- length(x@xcmsSetObj@filepaths)
# samList <- read.delim(x@sampleListFile)
if(is.null(x@sampleList) || is.na(x@sampleList) ||
nrow(x@sampleList) ==0){
samList <- read.delim(x@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- x@sampleList
}
peaksData <- x@rawPeaks[, names(x@rawPeaks) %in% samList$sample]
dat <- apply(peaksData,2,function(a){sum(a!=0 & !is.na(a))})
dat <- as.data.frame(dat)
names(dat) <- "npeaks"
dat$sample <- row.names(dat)
dat <- merge(dat,samList,by="sample")
#dat$order <- factor(dat$order,levels = sort(unique(dat$order)))
dat$batch <- as.character(dat$batch)
dat$class <- as.character(dat$class)
dat$class[is.na(dat$class)] <- "QC"
dat$missPeaksN <- nrow(peaksData) - dat$npeaks
## define outlier
outliers.coef <- 1.5
qs <- quantile(dat$npeaks,c(.25,.75),na.rm=TRUE)
iqr <- qs[2] - qs[1]
dat$outlier <- dat$npeaks < qs[1]-outliers.coef*iqr |
dat$npeaks > qs[2]+outliers.coef*iqr
fig <- paste(x@outdir,"/",x@prefix,"-peakNumber.png",sep="")
highfig <- sub(pattern = "png$",replacement = "pdf",x = fig)
pdf(file = highfig,width = 5.5,height = 3.7)
ggobj1 <- ggplot(data=dat,aes(x=order,y=npeaks,colour=class,shape=batch))+
geom_point()+
theme_bw()+
geom_text(aes(label=ifelse(outlier,order,"")),hjust=-0.2,size=4)+
scale_shape_manual(values=1:n_distinct(dat$batch))+
ylab(paste("# of ",x@dataType,"s",sep=""))
if(!is.null(legendRowBatch)){
ggobj1 <- ggobj1 + guides(shape = guide_legend(nrow = legendRowBatch))
}
if(!is.null(legendRowClass)){
ggobj1 <- ggobj1 + guides(col = guide_legend(nrow = legendRowClass))
}
print(ggobj1)
ggobj2 <- ggplot(data=dat,aes(x=order,y=missPeaksN,colour=class,shape=batch))+
geom_point()+
theme_bw()+
geom_text(aes(label=ifelse(outlier,order,"")),hjust=-0.2,size=4)+
scale_shape_manual(values=1:n_distinct(dat$batch))+
ylab(paste("# of missing ",x@dataType,"s",sep=""))
print(ggobj2)
dev.off()
png(filename = fig,width = 8,height = 5,units = "in",res=150)
print(ggobj1)
dev.off()
# save the peaks number data to file
peaksNumberFile <- paste(x@outdir,"/",x@prefix,"-peakNumber.txt",sep="")
message("Save the peaks number information to file: ",peaksNumberFile)
write.table(x = dat,file = peaksNumberFile,quote=FALSE,sep="\t",
row.names=FALSE,col.names = TRUE)
pTable <- dat %>% group_by(class) %>%
dplyr::summarise(n00=quantile(npeaks)[1],
n25=quantile(npeaks)[2],
n50=quantile(npeaks)[3],
n75=quantile(npeaks)[4],
n100=quantile(npeaks)[5])
print(pTable)
res <- list(fig=fig,highfig=highfig)
return(res)
}
)
##' @title Plot the CV distribution of peaks in each group
##' @description Plot the CV distribution of peaks in each group.
##' @rdname plotCV
##' @docType methods
##' @param x A metaXpara object
##' @param valueID The name of the column that used as peak intensity
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' peaksData <- as.data.frame(groupval(xset,"medret",value="into"))
##' peaksData$name <- row.names(peaksData)
##' para <- new("metaXpara")
##' rawPeaks(para) <- peaksData
##' sampleListFile(para) <- system.file("extdata/faahKO_sampleList.txt",
##' package = "metaX")
##' para <- reSetPeaksData(para)
##' plotCV(para)
setGeneric("plotCV",function(x,valueID="value",...) standardGeneric("plotCV"))
##' @describeIn plotCV
setMethod("plotCV", signature(x = "metaXpara"), function(x,valueID="value",...){
fig <- paste(x@outdir,"/",x@prefix,"-peakCV.png",sep="")
highfig <- sub(pattern = "png$",replacement = "pdf",x = fig)
pdf(file = highfig,width = 4.38,height = 3.7)
peaksData <- x@peaksData
peaksData$class <- as.character(peaksData$class)
peaksData$class[is.na(peaksData$class)] <- "QC"
peaksData$value[peaksData$value<=0] <- NA
#cvTmp <- ddply(peaksData,.(ID,class),summarise,
# cv=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE))
cvTmp <- peaksData %>% group_by(ID,class) %>%
dplyr::filter(!is.na(!!rlang::sym(valueID))) %>%
#mutate_(need_value=valueID) %>%
mutate(need_value=!!rlang::sym(valueID)) %>%
dplyr::summarise(cv=sd(need_value,na.rm = TRUE)/mean(need_value,na.rm = TRUE)) %>%
ungroup()
cvDat <- data.frame()
for(i in unique(cvTmp$class)){
tmp <- cvTmp[ cvTmp$class == i,]
tmp <- tmp[!is.na(tmp$cv),]
tmp <- tmp[order(tmp$cv),]
tmp$n <- (1:nrow(tmp))/nrow(tmp)
cvDat <- bind_rows(cvDat,tmp)
}
message("CV distribution (value):")
cvOut <- ddply(cvDat,.(class),summarise,
n=length(cv),
n30=sum(cv<=0.3,na.rm = TRUE),
n20=sum(cv<=0.2,na.rm = TRUE),
n30ratio=n30/length(cv),
n20ratio=n20/length(cv))
print(cvOut)
## whole range
ggobj1 <- ggplot(data=cvDat,aes(x=cv,y=n,colour=class))+
geom_line()+
theme_bw()+
geom_vline(xintercept=c(0.2,0.3),linetype=2)+
#ylab("Percent of peaks")+
theme(legend.position=c(1,0),legend.justification=c(1,0))+
#ggtitle("The CV distribution of the intensity")+
xlab("CV")+
ylab(paste("Percent of ",x@dataType,"s",sep=""))
print(ggobj1)
## only show cv between 0 and 1
ggobj2 <- ggplot(data=cvDat,aes(x=cv,y=n,colour=class))+
geom_line()+
theme_bw()+
geom_vline(xintercept=c(0.2,0.3),linetype=2)+
ylab(paste("Percent of ",x@dataType,"s",sep=""))+
theme(legend.position=c(1,0),legend.justification=c(1,0))+
xlab("CV")+
#ggtitle("The CV distribution of the intensity")+
coord_cartesian(xlim=c(-0.05,1.05))
print(ggobj2)
## for valueNorm
if("valueNorm" %in% names(peaksData)){
peaksData$valueNorm[peaksData$valueNorm<=0] <- NA
cvTmp <- ddply(peaksData,.(ID,class),summarise,
cv=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
cvDat <- data.frame()
for(i in unique(cvTmp$class)){
tmp <- cvTmp[ cvTmp$class == i,]
tmp <- tmp[!is.na(tmp$cv),]
tmp <- tmp[order(tmp$cv),]
tmp$n <- (1:nrow(tmp))/nrow(tmp)
cvDat <- rbind(cvDat,tmp)
}
message("CV distribution (valueNorm):")
cvOut <- ddply(cvDat,.(class),summarise,
n30=sum(cv<=0.3,na.rm = TRUE),
n20=sum(cv<=0.2,na.rm = TRUE),
n30ratio=n30/length(cv),
n20ratio=n20/length(cv))
print(cvOut)
## whole range
ggobj <- ggplot(data=cvDat,aes(x=cv,y=n,colour=class))+
geom_line()+
theme_bw()+
geom_vline(xintercept=c(0.2,0.3),linetype=2)+
theme(legend.position=c(1,0),legend.justification=c(1,0))+
#ylab("Percent of peaks")+
xlab("CV")+
ylab(paste("Percent of ",x@dataType,"s",sep=""))
#ggtitle("The CV distribution of the normalized intensity")
print(ggobj)
## only show cv between 0 and 1
ggobj <- ggplot(data=cvDat,aes(x=cv,y=n,colour=class))+
geom_line()+
theme_bw()+
geom_vline(xintercept=c(0.2,0.3),linetype=2)+
ylab(paste("Percent of ",x@dataType,"s",sep=""))+
theme(legend.position=c(1,0),legend.justification=c(1,0))+
xlab("CV")+
#ggtitle("The CV distribution of the normalized intensity")+
coord_cartesian(xlim=c(-0.05,1.05))
print(ggobj)
}
## if valueNorm and value are in the peaksData
if("valueNorm" %in% names(peaksData) && "value" %in% names(peaksData)){
peaksData$valueNorm[peaksData$valueNorm<=0] <- NA
cvTmp <- ddply(peaksData,.(ID,class),summarise,
cv=sd(valueNorm,na.rm = TRUE)/mean(valueNorm,na.rm = TRUE))
cvDat <- data.frame()
for(i in unique(cvTmp$class)){
tmp <- cvTmp[ cvTmp$class == i,]
tmp <- tmp[!is.na(tmp$cv),]
tmp <- tmp[order(tmp$cv),]
tmp$n <- (1:nrow(tmp))/nrow(tmp)
tmp$type <- "After normalization"
cvDat <- rbind(cvDat,tmp)
}
#cvDat$class <- "After normalization"
peaksData$value[peaksData$value<=0] <- NA
cvTmp <- ddply(peaksData,.(ID,class),summarise,
cv=sd(value,na.rm = TRUE)/mean(value,na.rm = TRUE))
#cvDat <- data.frame()
for(i in unique(cvTmp$class)){
tmp <- cvTmp[ cvTmp$class == i,]
tmp <- tmp[!is.na(tmp$cv),]
tmp <- tmp[order(tmp$cv),]
tmp$n <- (1:nrow(tmp))/nrow(tmp)
tmp$type <- "Before normalization"
cvDat <- rbind(cvDat,tmp)
}
#cvDat$class <- "Before normalization"
## only show cv between 0 and 1
ggobj <- ggplot(data=cvDat,aes(x=cv,y=n,colour=class,linetype=type))+
geom_line()+
theme_bw()+
geom_vline(xintercept=c(0.2,0.3),linetype=2)+
ylab(paste("Percent of ",x@dataType,"s",sep=""))+
theme(legend.position=c(1,0),legend.justification=c(1,0),
legend.box.just="right")+
xlab("CV")+
scale_y_continuous(labels = scales::percent)+
scale_x_continuous(labels = scales::percent)+
guides(linetype = guide_legend(order = 2),
colour = guide_legend(order = 1))+
#ggtitle("The CV distribution of the normalized intensity")+
coord_cartesian(xlim=c(-0.05,1.05))
print(ggobj)
}
dev.off()
png(filename = fig,width = 6,height = 5,units = "in",res=150)
print(ggobj2)
dev.off()
res <- list(fig=fig,highfig=highfig,stat=cvOut)
return(res)
}
)
##' @title Plot Phylogenies for samples
##' @description This function plots phylogenetic trees for samples.
##' @rdname plotTreeMap
##' @docType methods
##' @param para A metaXpara object
##' @param valueID The name of the column that used for plot
##' @param log A logical indicating whether to log the data
##' @param rmQC A logical indicating whether to remove the QC samples
##' @param nc The number of clusters
##' @param treeType A character string specifying the type of phylogeny to be
##' drawn; it must be one of "phylogram" (the default), "cladogram", "fan",
##' "unrooted", "radial" or any unambiguous abbreviation of these.
##' @param width The width and height of the graphics region in inches.
##' The default values are 8.
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' peaksData <- as.data.frame(groupval(xset,"medret",value="into"))
##' peaksData$name <- row.names(peaksData)
##' para <- new("metaXpara")
##' rawPeaks(para) <- peaksData
##' sampleListFile(para) <- system.file("extdata/faahKO_sampleList.txt",
##' package = "metaX")
##' para <- reSetPeaksData(para)
##' plotTreeMap(para,valueID="value")
setGeneric("plotTreeMap",function(para,valueID="valueNorm",log=TRUE,rmQC=TRUE,
nc=8,treeType="fan",width=8,...)
standardGeneric("plotTreeMap"))
##' @describeIn plotTreeMap
setMethod("plotTreeMap", signature(para = "metaXpara"),
function(para,valueID="valueNorm",log=TRUE,rmQC=TRUE,nc=8,
treeType="fan",width=8,...){
fig <- paste(para@outdir,"/",para@prefix,"-TreeMap.pdf",sep="")
pdf(file = fig,width = width,height = width)
peaksData <- para@peaksData
peaksData$class <- as.character(peaksData$class)
if(rmQC){
peaksData <- peaksData[ !is.na(peaksData$class),]
}else{
peaksData$class[is.na(peaksData$class)] <- "QC"
}
peaksData[,valueID][ peaksData[,valueID]<=0] <- NA
x<-dcast(peaksData,ID~sample,value.var = valueID)
row.names(x) <- x$ID
x$ID <- NULL
x <- as.data.frame(t(x))
x$sample <- row.names(x)
# samList <- read.delim(para@sampleListFile)
if(is.null(para@sampleList) || is.na(para@sampleList) ||
nrow(para@sampleList) ==0){
samList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
}else{
samList <- para@sampleList
}
samList$class <- as.character(samList$class)
samList$class[is.na(samList$class)] <- "QC"
dat <- merge(x,samList,by="sample")
if(nrow(dat) != nrow(x)){
warning("The nrow of the dat is ",nrow(dat),
", the nrow of x is ",nrow(x),"\n")
}
row.names(dat) <- paste(dat$class,dat$order,sep="-")
dat <- dat[ ,!names(dat) %in% names(samList)]
if(log==TRUE){
dat <- log2(dat)
}
if(is.na(nc) | is.null(nc)){
n <- length(unique(samList$class))
}else{
n <- nc
}
nclus <- n# number of clusters
color <- 1:n# a vector of colors
color_list=rep(color,nclus/length(color))
cfit <- hclust(dist(dat))
clus=cutree(cfit,nclus)
plot(as.phylo(cfit),type=treeType,label.offset=0.1,show.node.label=TRUE,
tip.color=color_list[clus],no.margin=TRUE)
dev.off()
}
)
##' @title Plot the correlation change of the QC samples.
##' @description Plot the correlation change of the QC samples.
##' @rdname plotQC
##' @docType methods
##' @param para A metaXpara object
##' @param valueID The name of the column that used for plot
##' @param log A logical indicating whether to log the data
##' @param step The step value of calculate the cor of the samples. Default is 4.
##' @param width The width of the graphics region in inches.
##' The default values are 8.
##' @param height The height of the graphics region in inches.
##' The default values are 4.
##' @param ... Additional parameter
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' plotQC(para,valueID="value")
setGeneric("plotQC",function(para,valueID="valueNorm",step=4,log=TRUE,
width=8,height=4,...)
standardGeneric("plotQC"))
##' @describeIn plotQC
setMethod("plotQC", signature(para = "metaXpara"),
function(para,valueID="valueNorm",step=4,log=TRUE,
width=8,height=4,...){
if(!any(is.na(para@peaksData$class))){
warning("Don't find QC sample in your data!")
return(NULL)
}
fig <- paste(para@outdir,"/",para@prefix,"-plotQC.pdf",sep="")
pdf(file = fig,width = width,height = height)
peaksData <- para@peaksData
peaksData <- peaksData[is.na(peaksData$class),]
peaksData$class <- as.character(peaksData$class)
peaksData$class[is.na(peaksData$class)] <- "QC"
peaksData[,valueID][ peaksData[,valueID]<=0] <- NA
if(log==TRUE){
peaksData[,valueID] <- log2(peaksData[,valueID])
}
peaksData <- peaksData[order(peaksData$order),]
## must sort
x<-dcast(peaksData,ID~order,value.var = valueID)
row.names(x) <- x$ID
x$ID <- NULL
orderName <- names(x)
corDF <- data.frame()
for(i in 1:(length(orderName)-step)){
valueCor <- cor(x[,i:(i+step)],use = "com")
valueCor <- data.frame(cor=valueCor[lower.tri(valueCor)],order=orderName[i])
corDF <- rbind(corDF,valueCor)
}
corDF$order <- as.numeric(as.character(corDF$order))
corDF$order <- factor(corDF$order,levels=sort(unique(corDF$order)))
ggobj <- ggplot(data=corDF,aes(x=order,y=cor))+
#geom_boxplot(outlier.size=0.4)+
geom_boxplot()+
theme(axis.text.x=element_text(angle=-90,vjust = 0.1))+
#ggtitle("Raw intensity")+
ylab("Correlation")
print(ggobj)
dev.off()
}
)
##' @title Plot heatmap
##' @description This function plots heatmap.
##' @rdname plotHeatMap
##' @docType methods
##' @param para A metaXpara object
##' @param valueID The name of the column that used for plot
##' @param log A logical indicating whether to log the data
##' @param rmQC A logical indicating whether to remove the QC samples
##' @param zero2na A logical indicating whether to convert the value <=0 to NA
##' @param colors Color for heatmap
##' @param anno A file or a data frame including sample annotation data.
##' @param width The width of the graphics region in inches.
##' The default values are 12.
##' @param height The height of the graphics region in inches.
##' The default values are 8.
##' @param saveRds Boolean, setting the argument to TRUE to save some objects to
##' disk for debug. Only useful for developer. Default is TRUE.
##' @param ... Additional parameter
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @return none
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' peaksData <- as.data.frame(groupval(xset,"medret",value="into"))
##' peaksData$name <- row.names(peaksData)
##' para <- new("metaXpara")
##' rawPeaks(para) <- peaksData
##' sampleListFile(para) <- system.file("extdata/faahKO_sampleList.txt",
##' package = "metaX")
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' plotHeatMap(para,valueID="value",width=6)
setGeneric("plotHeatMap",function(para,valueID="valueNorm",log=TRUE,rmQC=TRUE,
zero2na=FALSE,colors="none",anno=NULL,
width=12,height=8,saveRds=TRUE,show_rownames=FALSE,
show_colnames=FALSE,
classCol=NULL,
seriation=FALSE,...)
standardGeneric("plotHeatMap"))
##' @describeIn plotHeatMap
setMethod("plotHeatMap", signature(para = "metaXpara"),
function(para,valueID="valueNorm",log=TRUE,rmQC=TRUE,
zero2na=FALSE,colors="none",anno=NULL,width=12,height=8,
saveRds=TRUE,show_rownames=FALSE,show_colnames=FALSE,
classCol=NULL,seriation=FALSE,...){
message("plot heatmap for ",valueID)
peaksData <- para@peaksData
peaksData$class <- as.character(peaksData$class)
if(rmQC){
peaksData <- peaksData[ !is.na(peaksData$class),]
}else{
peaksData$class[is.na(peaksData$class)] <- "QC"
}
if(zero2na==TRUE){
peaksData[,valueID][ peaksData[,valueID]<=0] <- NA
}
x<-dcast(peaksData,class+order+sample~ID,value.var = valueID)
row.names(x) <- paste(x$class,x$order,sep="_")
classLabel <- x$class
orderLabel <- x$order
sampleLabel <- x$sample
annotationList <- data.frame(Class=classLabel)
row.names(annotationList) <- row.names(x)
if(!is.null(anno)){
cat("Use user defined annotation data!\n")
if(!is.data.frame(anno)){
anno <- read.delim(anno, check.names = FALSE)
}
anno <- dplyr::filter(anno,sample %in% x$sample)
tmp_x <- x %>% mutate(sample_label = row.names(x)) %>%
dplyr::select(sample,sample_label)
m_anno <- merge(tmp_x,anno,by="sample",sort=FALSE)
m_anno$sample <- NULL
row.names(m_anno) <- m_anno$sample_label
m_anno$sample_label <- NULL
annotationList <- m_anno
}
x$class <- NULL
x$order <- NULL
x$sample <- NULL
x <- as.data.frame(t(x))
if(log==TRUE){
x <- log2(x)
}
## set colour
if(colors=="gbr"){
colors <- colorRampPalette(c("green", "black", "red"), space="rgb")(256);
}else if(colors == "heat"){
colors <- heat.colors(256);
}else if(colors == "topo"){
colors <- topo.colors(256);
}else if(colors == "gray"){
colors <- colorRampPalette(c("grey90", "grey10"), space="rgb")(256);
}else{
colors <- rev(colorRampPalette(brewer.pal(10, "RdBu"))(256));
}
res <- list()
fig <- paste(para@outdir,"/",para@prefix,"-heatmap.pdf",sep="")
resfile <- paste(para@outdir,"/",para@prefix,"-heatmap.rds",sep="")
pdf(file = fig,width = width,height = height)
#res$heatmap <- pheatmap(x,annotation=annotationList,color=colors,
# show_rownames=show_rownames,...)
if(is.null(classCol)){
ha <- HeatmapAnnotation(df=annotationList,show_annotation_name=TRUE)
}else{
col_class <- classCol$col
names(col_class) <- classCol$class
ha <- HeatmapAnnotation(df=annotationList,show_annotation_name=TRUE,
col=list(Class=col_class))
}
if(seriation==TRUE){
cat("seriation ...\n")
o1 <- seriation::seriate(dist(x),method="GW")
o2 <- seriation::seriate(dist(t(x)),method="GW")
hp1 <- Heatmap(x %>% as.matrix(),
cluster_rows = as.dendrogram(o1[[1]]),
cluster_columns = as.dendrogram(o2[[1]]),
show_row_names = show_rownames,
show_column_names = show_colnames,
jitter = 1e-10,
top_annotation = ha,name = "value",...)
}else{
hp1 <- Heatmap(x %>% as.matrix(),
show_row_names = show_rownames,
show_column_names = show_colnames,
top_annotation = ha,name = "value",...)
}
draw(hp1)
dev.off()
fig_png <- str_replace(string = fig,pattern = ".pdf$",replacement = ".png")
png(filename = fig_png,width = width, height = height,units = "in",res=120)
#save(x,annotationList,colors,show_rownames,file="test_ph.rda")
#pheatmap(x,annotation=annotationList,color=colors,show_rownames=show_rownames,...)
draw(hp1)
dev.off()
res$fig <- fig_png
res$highfig <- fig
if(saveRds){
saveRDS(res,file=resfile)
}
return(res)
}
)
##' @title Filter peaks according to the RSD of peaks in QC samples
##' @description Filter peaks according to the RSD of peaks in QC samples.
##' Usually used after missing value imputation.
##' @rdname filterQCPeaksByCV
##' @param para An object of metaXpara
##' @param cvFilter Filter peaks with the RSD in QC samples > cvFilter.
##' @param valueID The name of the column which will be used.
##' @param ... Additional parameter
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' p <- filterQCPeaksByCV(para)
setGeneric("filterQCPeaksByCV",function(para,cvFilter=0.3,valueID="value",...)
standardGeneric("filterQCPeaksByCV"))
##' @describeIn filterQCPeaksByCV
setMethod("filterQCPeaksByCV", signature(para = "metaXpara"),
function(para,cvFilter=0.3,valueID="value",...){
peaksData <- para@peaksData
if(sum(is.na(peaksData$class)) <=0 ){
message("Don't find QC sample in your data!")
return(para)
}
## if peaksData has cv column, we need to remove it firstly.
peaksData$cv <- NULL
assign(x="valueID",value=valueID,envir=.GlobalEnv)
qcData <- peaksData[is.na(peaksData$class),]
cvData <- ddply(qcData,.(ID),summarise,
cv=abs(sd(get(valueID),na.rm = TRUE)/mean(get(valueID),
na.rm = TRUE)))
rmID <- cvData$ID[is.na(cvData$cv) | cvData$cv > cvFilter]
para@peaksData <- peaksData[ !peaksData$ID %in% rmID,]
para@peaksData <- merge(para@peaksData,cvData,by="ID")
message("remove peaks by QC CV > ",cvFilter," : ",length(rmID))
message("left peaks: ",length(cvData$ID)-length(rmID))
return(para)
})
##' @title Plot missing value distribution
##' @description Plot missing value distribution.
##' @rdname plotMissValue
##' @docType methods
##' @param para A metaXpara object
##' @param width The width of the graphics region in inches.
##' The default values are 8.
##' @param height The height of the graphics region in inches.
##' The default values are 5.
##' @param ... Additional parameter
##' @return The figure name
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @exportMethod
##' @examples
##' library(faahKO)
##' xset <- group(faahko)
##' xset <- retcor(xset)
##' xset <- group(xset)
##' xset <- fillPeaks(xset)
##' peaksData <- as.data.frame(groupval(xset,"medret",value="into"))
##' peaksData$name <- row.names(peaksData)
##' para <- new("metaXpara")
##' rawPeaks(para) <- peaksData
##' sampleListFile(para) <- system.file("extdata/faahKO_sampleList.txt",
##' package = "metaX")
##' para <- reSetPeaksData(para)
##' plotMissValue(para)
setGeneric("plotMissValue",function(para,width=8,height=5,...)
standardGeneric("plotMissValue"))
##' @describeIn plotMissValue
setMethod("plotMissValue", signature(para = "metaXpara"),
function(para,width=8,height=5,...){
fig <- paste(para@outdir,"/",para@prefix,"-missValueQC.png",sep="")
highfig <- sub(pattern = "png$",replacement = "pdf",x = fig)
pdf(file = highfig,width = width,height = height)
peaksData <- para@peaksData
peaksData[,"value"][ peaksData[,"value"]<=0] <- NA
## including qc sample
nm <- ddply(peaksData,.(ID),summarise,nmiss=sum(is.na(value))/length(value))
dat <- as.data.frame(table(cut(nm$nmiss,breaks = seq(0,1,by=0.1))))
dat$breaks <- as.factor(seq(0.1,1,by=0.1))
dat$ratio <- sprintf("%.2f%%",100*dat$Freq/nrow(nm))
ggobj1 <- ggplot(data=dat,aes(x=breaks,y=Freq))+
geom_bar(stat = "identity",width=0.6,fill="orangered")+
#scale_x_continuous(breaks=dat$breaks)+
theme_bw()+
geom_text(aes(label = ratio),vjust=-0.2,size=4)+
#ylab("Peaks number")+
xlab("Percent of missing value") +
ylab(paste("# of ",para@dataType,"s",sep=""))+
ggtitle(label = paste("Total ",para@dataType,"s:",nrow(nm),
"; total samples:",
length(unique(peaksData$sample)),sep=""))
print(ggobj1)
#
if(any(is.na(peaksData$class))){
# only true samples
mdat <- NULL
if(any(!is.na(peaksData$class))){
nm <- ddply(peaksData[!is.na(peaksData$class),],.(ID),summarise,
nmiss=sum(is.na(value))/length(value))
dat <- as.data.frame(table(cut(nm$nmiss,breaks = seq(0,1,by=0.1))))
dat$breaks <- as.factor(seq(0.1,1,by=0.1))
dat$ratio <- sprintf("%.2f%%",100*dat$Freq/nrow(nm))
dat$class <- "Sample"
mdat <- bind_rows(mdat,dat)
ggobj <- ggplot(data=dat,aes(x=breaks,y=Freq))+
geom_bar(stat = "identity",width=0.6,fill="orangered")+
#scale_x_continuous(breaks=dat$breaks)+
theme_bw()+
geom_text(aes(label = ratio),vjust=-0.2,size=4)+
#ylab("Peaks number")+
xlab("Percent of missing value")+
ylab(paste("# of ",para@dataType,"s",sep=""))+
ggtitle(label = paste("Total ",para@dataType,"s:",nrow(nm),
"; total samples (non-QC):",
length(unique(peaksData$sample[!is.na(peaksData$class)])),sep=""))
print(ggobj)
}
## only qc sample
nm <- ddply(peaksData[is.na(peaksData$class),],.(ID),summarise,
nmiss=sum(is.na(value))/length(value))
dat <- as.data.frame(table(cut(nm$nmiss,breaks = seq(0,1,by=0.1))))
dat$breaks <- as.factor(seq(0.1,1,by=0.1))
dat$ratio <- sprintf("%.2f%%",100*dat$Freq/nrow(nm))
dat$class <- "QC"
mdat <- bind_rows(mdat,dat)
ggobj <- ggplot(data=dat,aes(x=breaks,y=Freq))+
geom_bar(stat = "identity",width=0.6,fill="orangered")+
theme_bw()+
#scale_x_continuous(breaks=dat$breaks)+
geom_text(aes(label = ratio),vjust=-0.2,size=4)+
#ylab("Peaks number")+
xlab("Percent of missing value")+
ylab(paste("# of ",para@dataType,"s",sep="")) +
ggtitle(label = paste("Total ",para@dataType,"s:",nrow(nm),
"; total samples (only QC):",
length(unique(peaksData$sample[is.na(peaksData$class)])),sep=""))
print(ggobj)
ggobj3 <- ggplot(data=mdat,aes(x=breaks,y=Freq,fill=class))+
geom_bar(stat = "identity",width=0.6,position = "dodge")+
theme_bw()+
#scale_x_continuous(breaks=dat$breaks)+
#geom_text(aes(label = ratio),vjust=-0.2,size=4)+
#ylab("Peaks number")+
xlab("Percent of missing value")+
ylab(paste("# of ",para@dataType,"s",sep=""))
#ggtitle(label = paste("Total peaks:",nrow(nm),
# "; total samples (only QC):",
# length(unique(peaksData$sample[is.na(peaksData$class)]))))
print(ggobj3)
}
dev.off()
png(filename = fig,width = width,height = height,units = "in",res=150)
print(ggobj1)
dev.off()
res <- list(fig=fig,highfig=highfig)
return(res)
}
)
##acc
##' @title dir.case
##' @description dir.case
##' @rdname dir.case
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' dir.case(para) <- "./"
setGeneric("dir.case<-",function(para,value) standardGeneric("dir.case<-"))
##' @describeIn metaXpara
setReplaceMethod("dir.case", signature(para = "metaXpara"),
function(para,value){
para@dir.case <- value
para
}
)
##' @title dir.ctrl
##' @description dir.ctrl
##' @rdname dir.ctrl
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' dir.ctrl(para) <- "./"
setGeneric("dir.ctrl<-",function(para,value) standardGeneric("dir.ctrl<-"))
##' @describeIn metaXpara
setReplaceMethod("dir.ctrl", signature(para = "metaXpara"),
function(para,value){
para@dir.ctrl <- value
para
}
)
##' @title sampleListFile
##' @description sampleListFile
##' @rdname sampleListFile
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' sampleListFile(para) <- "sample.txt"
setGeneric("sampleListFile<-",function(para,value)
standardGeneric("sampleListFile<-"))
##' @describeIn metaXpara
setReplaceMethod("sampleListFile", signature(para = "metaXpara"),
function(para,value){
para@sampleListFile <- value
para
}
)
##' @title ratioPairs
##' @description ratioPairs
##' @rdname ratioPairs
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' ratioPairs(para) <- "1:2"
setGeneric("ratioPairs<-",function(para,value) standardGeneric("ratioPairs<-"))
##' @describeIn metaXpara
setReplaceMethod("ratioPairs", signature(para = "metaXpara"),
function(para,value){
para@ratioPairs <- value
para
}
)
##' @title missValueImputeMethod
##' @description missValueImputeMethod
##' @rdname missValueImputeMethod
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' missValueImputeMethod(para) <- "knn"
setGeneric("missValueImputeMethod<-",function(para,value)
standardGeneric("missValueImputeMethod<-"))
##' @describeIn metaXpara
setReplaceMethod("missValueImputeMethod", signature(para = "metaXpara"),
function(para,value){
para@missValueImputeMethod <- value
para
}
)
##' @title outdir
##' @description outdir
##' @rdname outdir
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' outdir(para) <- "outdir"
setGeneric("outdir<-",function(para,value) standardGeneric("outdir<-"))
##' @describeIn metaXpara
setReplaceMethod("outdir", signature(para = "metaXpara"),
function(para,value){
para@outdir <- value
para
}
)
##' @title prefix
##' @description prefix
##' @rdname prefix
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' prefix(para) <- "test"
setGeneric("prefix<-",function(para,value) standardGeneric("prefix<-"))
##' @describeIn metaXpara
setReplaceMethod("prefix", signature(para = "metaXpara"),
function(para,value){
para@prefix <- value
para
}
)
##' @title peaksData
##' @description peaksData
##' @rdname peaksData
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' peaksData(para) <- data.frame()
setGeneric("peaksData<-",function(para,value) standardGeneric("peaksData<-"))
##' @describeIn metaXpara
setReplaceMethod("peaksData", signature(para = "metaXpara"),
function(para,value){
para@peaksData <- value
para
}
)
##' @title addValueNorm
##' @description addValueNorm
##' @rdname addValueNorm
##' @param para An object of metaXpara
##' @param value An object of metaXpara
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' addValueNorm(para) <- para
setGeneric("addValueNorm<-",function(para,value)
standardGeneric("addValueNorm<-"))
##' @describeIn metaXpara
setReplaceMethod("addValueNorm",
signature(para = "metaXpara",value = "metaXpara"),
function(para,value){
para@peaksData$valueNorm <- value@peaksData$value
para
}
)
##' @title rawPeaks
##' @description rawPeaks
##' @rdname rawPeaks
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' rawPeaks(para) <- data.frame()
setGeneric("rawPeaks<-",function(para,value) standardGeneric("rawPeaks<-"))
##' @describeIn metaXpara
setReplaceMethod("rawPeaks", signature(para = "metaXpara"),
function(para,value){
para@rawPeaks <- value
para
}
)
##' @title idres
##' @description idres
##' @rdname idres
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' idres(para) <- data.frame()
setGeneric("idres<-",function(para,value) standardGeneric("idres<-"))
##' @describeIn metaXpara
setReplaceMethod("idres", signature(para = "metaXpara"),
function(para,value){
para@idres <- value
para
}
)
##' @title xcmsSet.method
##' @description xcmsSet.method
##' @rdname xcmsSet.method
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.method(para) <- "centWave"
setGeneric("xcmsSet.method<-",function(para,value)
standardGeneric("xcmsSet.method<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.method", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.method <- value
para
}
)
##' @title xcmsSet.ppm
##' @description xcmsSet.ppm
##' @rdname xcmsSet.ppm
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.ppm(para) <- 10
setGeneric("xcmsSet.ppm<-",function(para,value)
standardGeneric("xcmsSet.ppm<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.ppm", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.ppm <- value
para
}
)
##' @title xcmsSet.peakwidth
##' @description xcmsSet.peakwidth
##' @rdname xcmsSet.peakwidth
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.peakwidth(para) <- 12
setGeneric("xcmsSet.peakwidth<-",function(para,value)
standardGeneric("xcmsSet.peakwidth<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.peakwidth", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.peakwidth <- value
para
}
)
##' @title xcmsSet.snthresh
##' @description xcmsSet.snthresh
##' @rdname xcmsSet.snthresh
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.snthresh(para) <- 5
setGeneric("xcmsSet.snthresh<-",function(para,value)
standardGeneric("xcmsSet.snthresh<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.snthresh", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.snthresh <- value
para
}
)
##' @title xcmsSet.prefilter
##' @description xcmsSet.prefilter
##' @rdname xcmsSet.prefilter
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.prefilter(para) <- c(1,5000)
setGeneric("xcmsSet.prefilter<-",function(para,value)
standardGeneric("xcmsSet.prefilter<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.prefilter", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.prefilter <- value
para
}
)
##' @title xcmsSet.mzCenterFun
##' @description xcmsSet.mzCenterFun
##' @rdname xcmsSet.mzCenterFun
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.mzCenterFun(para) <- "wMean"
setGeneric("xcmsSet.mzCenterFun<-",function(para,value)
standardGeneric("xcmsSet.mzCenterFun<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.mzCenterFun", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.mzCenterFun <- value
para
}
)
##' @title xcmsSet.integrate
##' @description xcmsSet.integrate
##' @rdname xcmsSet.integrate
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.integrate(para) <- 1
setGeneric("xcmsSet.integrate<-",function(para,value)
standardGeneric("xcmsSet.integrate<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.integrate", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.integrate <- value
para
}
)
##' @title xcmsSet.mzdiff
##' @description xcmsSet.mzdiff
##' @rdname xcmsSet.mzdiff
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.mzdiff(para) <- -0.001
setGeneric("xcmsSet.mzdiff<-",function(para,value)
standardGeneric("xcmsSet.mzdiff<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.mzdiff", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.mzdiff <- value
para
}
)
##' @title xcmsSet.noise
##' @description xcmsSet.noise
##' @rdname xcmsSet.noise
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.noise(para) <- 1000
setGeneric("xcmsSet.noise<-",function(para,value)
standardGeneric("xcmsSet.noise<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.noise", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.noise <- value
para
}
)
##' @title xcmsSet.verbose.columns
##' @description xcmsSet.verbose.columns
##' @rdname xcmsSet.verbose.columns
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.verbose.columns(para) <- FALSE
setGeneric("xcmsSet.verbose.columns<-",function(para,value)
standardGeneric("xcmsSet.verbose.columns<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.verbose.columns", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.verbose.columns <- value
para
}
)
##' @title xcmsSet.polarity
##' @description xcmsSet.polarity
##' @rdname xcmsSet.polarity
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.polarity(para) <- "positive"
setGeneric("xcmsSet.polarity<-",function(para,value)
standardGeneric("xcmsSet.polarity<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.polarity", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.polarity <- value
para
}
)
##' @title xcmsSet.profparam
##' @description xcmsSet.profparam
##' @rdname xcmsSet.profparam
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.profparam(para) <- list(step=0.005)
setGeneric("xcmsSet.profparam<-",function(para,value)
standardGeneric("xcmsSet.profparam<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.profparam", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.profparam <- value
para
}
)
##' @title xcmsSet.nSlaves
##' @description xcmsSet.nSlaves
##' @rdname xcmsSet.nSlaves
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.nSlaves(para) <- 8
setGeneric("xcmsSet.nSlaves<-",function(para,value)
standardGeneric("xcmsSet.nSlaves<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.nSlaves", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.nSlaves <- value
para
}
)
##' @title xcmsSet.fitgauss
##' @description xcmsSet.fitgauss
##' @rdname xcmsSet.fitgauss
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.fitgauss(para) <- FALSE
setGeneric("xcmsSet.fitgauss<-",function(para,value)
standardGeneric("xcmsSet.fitgauss<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.fitgauss", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.fitgauss <- value
para
}
)
##' @title xcmsSet.sleep
##' @description xcmsSet.sleep
##' @rdname xcmsSet.sleep
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.sleep(para) <- 0
setGeneric("xcmsSet.sleep<-",function(para,value)
standardGeneric("xcmsSet.sleep<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.sleep", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.sleep <- value
para
}
)
##' @title xcmsSet.fwhm
##' @description xcmsSet.fwhm
##' @rdname xcmsSet.fwhm
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.fwhm(para) <- 30
setGeneric("xcmsSet.fwhm<-",function(para,value)
standardGeneric("xcmsSet.fwhm<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.fwhm", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.fwhm <- value
para
}
)
##' @title xcmsSet.max
##' @description xcmsSet.max
##' @rdname xcmsSet.max
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.max(para) <- 5
setGeneric("xcmsSet.max<-",function(para,value)
standardGeneric("xcmsSet.max<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.max", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.max <- value
para
}
)
##' @title xcmsSet.step
##' @description xcmsSet.step
##' @rdname xcmsSet.step
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSet.step(para) <- 0.1
setGeneric("xcmsSet.step<-",function(para,value)
standardGeneric("xcmsSet.step<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSet.step", signature(para = "metaXpara"),
function(para,value){
para@xcmsSet.step <- value
para
}
)
##' @title group.bw0
##' @description group.bw0
##' @rdname group.bw0
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.bw0(para) <- 10
setGeneric("group.bw0<-",function(para,value)
standardGeneric("group.bw0<-"))
##' @describeIn metaXpara
setReplaceMethod("group.bw0", signature(para = "metaXpara"),
function(para,value){
para@group.bw0 <- value
para
}
)
##' @title group.mzwid0
##' @description group.mzwid0
##' @rdname group.mzwid0
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.mzwid0(para) <- 0.015
setGeneric("group.mzwid0<-",function(para,value)
standardGeneric("group.mzwid0<-"))
##' @describeIn metaXpara
setReplaceMethod("group.mzwid0", signature(para = "metaXpara"),
function(para,value){
para@group.mzwid0 <- value
para
}
)
##' @title group.bw
##' @description group.bw
##' @rdname group.bw
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.bw(para) <- 5
setGeneric("group.bw<-",function(para,value)
standardGeneric("group.bw<-"))
##' @describeIn metaXpara
setReplaceMethod("group.bw", signature(para = "metaXpara"),
function(para,value){
para@group.bw <- value
para
}
)
##' @title group.mzwid
##' @description group.mzwid
##' @rdname group.mzwid
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.mzwid(para) <- 0.015
setGeneric("group.mzwid<-",function(para,value)
standardGeneric("group.mzwid<-"))
##' @describeIn metaXpara
setReplaceMethod("group.mzwid", signature(para = "metaXpara"),
function(para,value){
para@group.mzwid <- value
para
}
)
##' @title group.minfrac
##' @description group.minfrac
##' @rdname group.minfrac
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.minfrac(para) <- 0.3
setGeneric("group.minfrac<-",function(para,value)
standardGeneric("group.minfrac<-"))
##' @describeIn metaXpara
setReplaceMethod("group.minfrac", signature(para = "metaXpara"),
function(para,value){
para@group.minfrac <- value
para
}
)
##' @title group.minsamp
##' @description group.minsamp
##' @rdname group.minsamp
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.minsamp(para) <- 1
setGeneric("group.minsamp<-",function(para,value)
standardGeneric("group.minsamp<-"))
##' @describeIn metaXpara
setReplaceMethod("group.minsamp", signature(para = "metaXpara"),
function(para,value){
para@group.minsamp <- value
para
}
)
##' @title group.max
##' @description group.max
##' @rdname group.max
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.max(para) <- 1000
setGeneric("group.max<-",function(para,value)
standardGeneric("group.max<-"))
##' @describeIn metaXpara
setReplaceMethod("group.max", signature(para = "metaXpara"),
function(para,value){
para@group.max <- value
para
}
)
##' @title group.sleep
##' @description group.sleep
##' @rdname group.sleep
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' group.sleep(para) <- 0
setGeneric("group.sleep<-",function(para,value)
standardGeneric("group.sleep<-"))
##' @describeIn metaXpara
setReplaceMethod("group.sleep", signature(para = "metaXpara"),
function(para,value){
para@group.sleep <- value
para
}
)
##' @title retcor.method
##' @description retcor.method
##' @rdname retcor.method
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' retcor.method(para) <- "obiwarp"
setGeneric("retcor.method<-",function(para,value)
standardGeneric("retcor.method<-"))
##' @describeIn metaXpara
setReplaceMethod("retcor.method", signature(para = "metaXpara"),
function(para,value){
para@retcor.method <- value
para
}
)
##' @title retcor.profStep
##' @description retcor.profStep
##' @rdname retcor.profStep
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' retcor.profStep(para) <- 0.005
setGeneric("retcor.profStep<-",function(para,value)
standardGeneric("retcor.profStep<-"))
##' @describeIn metaXpara
setReplaceMethod("retcor.profStep", signature(para = "metaXpara"),
function(para,value){
para@retcor.profStep <- value
para
}
)
##' @title retcor.plottype
##' @description retcor.plottype
##' @rdname retcor.plottype
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' retcor.plottype(para) <- "deviation"
setGeneric("retcor.plottype<-",function(para,value)
standardGeneric("retcor.plottype<-"))
##' @describeIn metaXpara
setReplaceMethod("retcor.plottype", signature(para = "metaXpara"),
function(para,value){
para@retcor.plottype <- value
para
}
)
##' @title qcRlscSpan
##' @description qcRlscSpan
##' @rdname qcRlscSpan
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' qcRlscSpan(para) <- 0.4
setGeneric("qcRlscSpan<-",function(para,value)
standardGeneric("qcRlscSpan<-"))
##' @describeIn metaXpara
setReplaceMethod("qcRlscSpan", signature(para = "metaXpara"),
function(para,value){
para@qcRlscSpan <- value
para
}
)
##' @title xcmsSetObj
##' @description xcmsSetObj
##' @rdname xcmsSetObj
##' @param para An object of metaXpara
##' @param value value
##' @return An object of metaXpara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' library(faahKO)
##' para <- new("metaXpara")
##' xcmsSetObj(para) <- faahko
setGeneric("xcmsSetObj<-",function(para,value)
standardGeneric("xcmsSetObj<-"))
##' @describeIn metaXpara
setReplaceMethod("xcmsSetObj", signature(para = "metaXpara"),
function(para,value){
para@xcmsSetObj <- value
para
}
)
##' @title scale
##' @description scale
##' @rdname scale
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' scale(para) <- "uv"
setGeneric("scale<-",function(para,value)
standardGeneric("scale<-"))
##' @describeIn plsDAPara
setReplaceMethod("scale", signature(para = "plsDAPara"),
function(para,value){
para@scale <- value
para
}
)
##' @title center
##' @description center
##' @rdname center
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' center(para) <- TRUE
setGeneric("center<-",function(para,value)
standardGeneric("center<-"))
##' @describeIn plsDAPara
setReplaceMethod("center", signature(para = "plsDAPara"),
function(para,value){
para@center <- value
para
}
)
##' @title t
##' @description t
##' @rdname t
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' t(para) <- 1
setGeneric("t<-",function(para,value)
standardGeneric("t<-"))
##' @describeIn plsDAPara
setReplaceMethod("t", signature(para = "plsDAPara"),
function(para,value){
para@t <- value
para
}
)
##' @title validation
##' @description validation
##' @rdname validation
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' validation(para) <- "CV"
setGeneric("validation<-",function(para,value)
standardGeneric("validation<-"))
##' @describeIn plsDAPara
setReplaceMethod("validation", signature(para = "plsDAPara"),
function(para,value){
para@validation <- value
para
}
)
##' @title ncomp
##' @description ncomp
##' @rdname ncomp
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' ncomp(para) <- 5
setGeneric("ncomp<-",function(para,value)
standardGeneric("ncomp<-"))
##' @describeIn plsDAPara
setReplaceMethod("ncomp", signature(para = "plsDAPara"),
function(para,value){
para@ncomp <- value
para
}
)
##' @title nperm
##' @description nperm
##' @rdname nperm
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' nperm(para) <- 1000
setGeneric("nperm<-",function(para,value)
standardGeneric("nperm<-"))
##' @describeIn plsDAPara
setReplaceMethod("nperm", signature(para = "plsDAPara"),
function(para,value){
para@nperm <- value
para
}
)
##' @title kfold
##' @description kfold
##' @rdname kfold
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' kfold(para) <- 5
setGeneric("kfold<-",function(para,value)
standardGeneric("kfold<-"))
##' @describeIn plsDAPara
setReplaceMethod("kfold", signature(para = "plsDAPara"),
function(para,value){
para@kfold <- value
para
}
)
##' @title method
##' @description method
##' @rdname method
##' @param para An object of plsDAPara
##' @param value value
##' @return An object of plsDAPara
##' @docType methods
##' @exportMethod
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @examples
##' para <- new("plsDAPara")
##' method(para) <- "oscorespls"
setGeneric("method<-",function(para,value)
standardGeneric("method<-"))
##' @describeIn plsDAPara
setReplaceMethod("method", signature(para = "plsDAPara"),
function(para,value){
para@method <- value
para
}
)
##' @title importDataFromQI
##' @description Import peak data from Progenesis QI.
##' @param para an object of metaXpara
##' @param file a csv file exported from Progenesis QI, which contains peak
##' intensity data
##' @param mode 1, read the normalized data; 2, read the raw data
##' @param fw valid peak width range, for example, it can be set as c(1,30).
##' The unit is second.
##' @param rt valid retention time range, for example, it can be set as c(0.5,9).
##' The unit is minute
##' @return an object of metaXpara
##' @export
importDataFromQI=function(para,file,mode=1,fw=NULL,rt=NULL){
if(!dir.exists(para@outdir)){
dir.create(para@outdir,recursive = TRUE)
}
message("Read file:",file)
a <- read.csv(file,skip=2,check.names = FALSE)
#a <- read_csv(file,skip=2)
firstRow = read.csv(file,nrows = 1,header = FALSE)
rawDataRow = which(firstRow=="Raw abundance")
norDataRow = which(firstRow=="Normalised abundance")
if(mode==1){
## return nor data
a <- a[,1:(rawDataRow-1)]
}else{
## return raw data
a <- a[,-c(norDataRow:c(rawDataRow-1))]
}
a$name <- a$Compound
a$rt <- a$`Retention time (min)`
a$mz <- a$`m/z`
a$mass <- a$`Neutral mass (Da)`
## Chromatographic peak width (min)
a$fw <- 60*a$`Chromatographic peak width (min)`
## summary
message("Peaks number:",nrow(a))
message("Samples number:",rawDataRow-norDataRow)
## precursor charge information
message("Charge information:")
print(table(a$Charge,useNA="always"))
## retention time
message("Retention time:")
message(min(a$rt)," ",max(a$rt))
rt.fig <- paste(para@outdir,"/",para@prefix,"-rt.png",sep="")
png(filename = rt.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=rt))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
## mz
message("M/Z:")
message(min(a$mz)," ",max(a$mz))
mz.fig <- paste(para@outdir,"/",para@prefix,"-mz.png",sep="")
png(filename = mz.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=mz))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
## mass
message("mass:")
## sometimes, the masses all are NA
if(any(!is.na(a$mass))){
message(min(a$mass,na.rm = TRUE)," ",max(a$mass,na.rm = TRUE))
mass.fig <- paste(para@outdir,"/",para@prefix,"-mass.png",sep="")
png(filename = mass.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=mass))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
}
## mz versus rt
rtmz.fig <- paste(para@outdir,"/",para@prefix,"-rt_mz.png",sep="")
png(filename = rtmz.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=rt,y=mz))+geom_point()+geom_density2d()
print(gg)
dev.off()
message("Chromatographic peak width (s):")
message(min(a$fw)," ",max(a$fw))
fw.fig <- paste(para@outdir,"/",para@prefix,"-fw.png",sep="")
png(filename = fw.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=fw))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
message("Chromatographic peak width (s) VS retention time:")
fwrt.fig <- paste(para@outdir,"/",para@prefix,"-fwrt.png",sep="")
png(filename = fw.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=rt,y=fw))+geom_point()+geom_density2d()
#+
# scale_y_continuous(trans = log2_trans(),
# breaks = trans_breaks("log2", function(x) 2^x),
# labels = trans_format("log2", math_format(2^.x)))
print(gg)
dev.off()
if(!is.null(rt)){
if(length(rt)!=2){
stop("Please provide valid rt, such as c(0.5,14)!")
}
message("\n\nfilter rt(min): ",rt[1],", ",rt[2])
prt <- rt
n_1 <- nrow(a)
a <- a %>% filter(rt >= prt[1],rt <= prt[2])
n_2 <- nrow(a)
message("remove peaks:",n_1-n_2)
## precursor charge information
message("Charge information:")
print(table(a$Charge,useNA="always"))
## retention time
message("Retention time:")
message(min(a$rt)," ",max(a$rt))
message("Peak width:")
message(min(a$fw)," ",max(a$fw))
}
if(!is.null(fw)){
if(length(fw)!=2){
stop("Please provide valid fw, such as c(5,30)!")
}
message("\n\nfilter peak with (s): ",fw[1],", ",fw[2])
pfw <- fw
n_1 <- nrow(a)
a <- a %>% filter(fw >= pfw[1],fw <= pfw[2])
n_2 <- nrow(a)
message("remove peaks:",n_1-n_2)
## precursor charge information
message("Charge information:")
print(table(a$Charge,useNA="always"))
## retention time
message("Retention time:")
message(min(a$rt)," ",max(a$rt))
rt.fig <- paste(para@outdir,"/",para@prefix,"-rt_filter.png",sep="")
png(filename = rt.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=rt))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
## mz
message("M/Z:")
message(min(a$mz)," ",max(a$mz))
mz.fig <- paste(para@outdir,"/",para@prefix,"-mz_filter.png",sep="")
png(filename = mz.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=mz))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
## mass
message("mass:")
## sometimes, the masses all are NA
if(any(!is.na(a$mass))){
message(min(a$mass,na.rm = TRUE)," ",max(a$mass,na.rm = TRUE))
mass.fig <- paste(para@outdir,"/",para@prefix,"-mass_filter.png",sep="")
png(filename = mass.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=mass))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
}
## mz versus rt
rtmz.fig <- paste(para@outdir,"/",para@prefix,"-rt_mz_filter.png",sep="")
png(filename = rtmz.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=rt,y=mz))+geom_point()+geom_density2d()
print(gg)
dev.off()
message("Chromatographic peak width (s):")
message(min(a$fw)," ",max(a$fw))
fw.fig <- paste(para@outdir,"/",para@prefix,"-fw_filter.png",sep="")
png(filename = fw.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=fw))+geom_density(fill="blue",alpha=0.4)
print(gg)
dev.off()
message("Chromatographic peak width (s) VS retention time:")
fwrt.fig <- paste(para@outdir,"/",para@prefix,"-fwrt_filter.png",sep="")
png(filename = fw.fig,width = 8,height = 6,units = "in",res = 200)
gg <- ggplot(data=a,aes(x=rt,y=fw))+geom_point()+geom_density2d()
#+
# scale_y_continuous(trans = log2_trans(),
# breaks = trans_breaks("log2", function(x) 2^x),
# labels = trans_format("log2", math_format(2^.x)))
print(gg)
dev.off()
}
para@rawPeaks <- a
return(para)
}
##' @title importDataFromXCMS
##' @description Import peak data from XCMS
##' @param para an object of metaXpara
##' @param file a csv or txt format file exported from XCMS, which contains peak
##' intensity data
##' @return an object of metaXpara
##' @export
importDataFromXCMS=function(para,file){
if(!dir.exists(para@outdir)){
dir.create(para@outdir,recursive = TRUE)
}
message("Read file:",file)
if(str_detect(file,".csv")){
a <- read.csv(file,check.names = FALSE,stringsAsFactors = FALSE)
}else{
a <- read.table(file,header = TRUE,sep="\t",check.names = FALSE,stringsAsFactors = FALSE)
}
para@rawPeaks <- a
return(para)
}
##' @title importDataFromMetaboAnalyst
##' @description Import peak data from MetaboAnalyst
##' @param para an object of metaXpara
##' @param file a csv file exported from MetaboAnalyst, which contains peak
##' intensity data
##' @return an object of metaXpara
##' @export
importDataFromMetaboAnalyst=function(para,file){
if(!dir.exists(para@outdir)){
dir.create(para@outdir,recursive = TRUE)
}
message("Read file:",file)
sample_name <- as.character(read.csv(file,nrows = 1,check.names = FALSE,header = FALSE,stringsAsFactors = FALSE,row.names = 1))
sample_group <- as.character(read.csv(file,nrows = 1,check.names = FALSE,skip = 1,header = FALSE,stringsAsFactors = FALSE,row.names = 1))
if(any(str_detect(string = sample_name,pattern = "_"))){
sample_order <- str_split(sample_name,pattern = "_",n = 2)
sample_order <- as.integer(sapply(sample_order, function(x){x[2]}))
}else{
sample_order <- 1:length(sample_name)
}
sample_infor <- data.frame(sample=sample_name,class=sample_group,order=sample_order,batch=rep(1,length(sample_name)))
para@sampleListFile <- paste(para@outdir,"/sample_infor.txt",sep="")
write.table(x = sample_infor,file = para@sampleListFile, sep="\t",row.names = FALSE,col.names = TRUE,quote=FALSE)
para@rawPeaks <- read.csv(file,check.names = FALSE,header = FALSE,stringsAsFactors = FALSE,skip = 2)
names(para@rawPeaks) <- c("name",sample_name)
para@sampleList <- read.delim(para@sampleListFile,stringsAsFactors = FALSE)
return(para)
}
##' @title Plot figures for PCA/PLS-DA loadings
##' @description Plot figure for PCA/PLS-DA loadings
##' @rdname plotLoading
##' @docType methods
##' @param object object of pcaRes or PLS-DA
##' @param out.tol control the points to show labels
##' @param label 0=>only show part of the labels, 1=>show all the labels, 3=none labels
##' @param show.outlier Logical. The points will be colour-coded when the value is set as TRUE.
##' Default is FALSE
##' @return none
##' @author Bo Wen \email{wenbostar@@gmail.com}
##' @seealso \code{\link{plotPCA}}
##' @exportMethod
##' @examples
##' para <- new("metaXpara")
##' pfile <- system.file("extdata/MTBLS79.txt",package = "metaX")
##' sfile <- system.file("extdata/MTBLS79_sampleList.txt",package = "metaX")
##' rawPeaks(para) <- read.delim(pfile,check.names = FALSE)
##' sampleListFile(para) <- sfile
##' para <- reSetPeaksData(para)
##' para <- missingValueImpute(para)
##' para <- transformation(para,valueID = "value")
##' res <- metaX::plotPCA(para,valueID="value",scale="uv",center=TRUE)
##' plotLoading(res$pca,fig="loading.png")
setGeneric("plotLoading",function(object,out.tol=0.9,show.outlier=FALSE,label=0,fig="loading.png") standardGeneric("plotLoading"))
##' @describeIn plotLoading
setMethod("plotLoading", signature(object = "pcaRes"), function(object,out.tol=0.9,label=0,fig="loading.png"){
loadings_data <- as.data.frame(object@loadings)
plotLoading(loadings_data,out.tol=out.tol,label=label,fig=fig)
})
##' @describeIn plotLoading
setMethod("plotLoading", signature(object = "mvr"), function(object,out.tol=0.9,label=0,fig="loading.png"){
if(ncol(object$loadings)>=2){
loadings_data <- as.data.frame(object$loadings[,1:2])
names(loadings_data) <- c("PC1","PC2")
plotLoading(loadings_data,out.tol=out.tol,label=label,fig=fig)
}else{
message("The number of components of PLS-DA are less than 2!")
}
})
##' @describeIn plotLoading
setMethod("plotLoading", signature(object = "data.frame"),
function(object,out.tol=0.9,show.outlier=FALSE,label=0,fig="loading.png"){
loadings_data <- as.matrix(object)
HotEllipse<-abs(cbind(metaX:::HotE(loadings_data[,1],loadings_data[,2])))*out.tol
outliers<-as.numeric()
for (i in 1:nrow(loadings_data)){
sample<-abs(loadings_data[i,])
out.PC1<-which(HotEllipse[,1]<sample[1])
out.PC1.PC2<-any(HotEllipse[out.PC1,2]<sample[2])*1
outlier<-ifelse(out.PC1.PC2>0,1,0)#+out.PC1.PC3+out.PC2.PC3>0,1,0)
outliers<-c(outliers,outlier)
}
dat <- as.data.frame(loadings_data)
dat$label <- row.names(dat)
dat$label[outliers==0] <- ""
if(show.outlier){
dat$col <- as.character(outliers)
}else{
dat$col <- rep("A",nrow(dat))
}
dat$alllabel <- row.names(dat)
if(grepl(pattern = ".png$",x=fig)){
png(fig,width = 600,height = 600,res=120)
}else{
pdf(fig,width = 4,height = 4)
}
gg <- ggplot(data=dat,aes(x=PC1,y=PC2,colour=col))+
geom_hline(yintercept=0,linetype=2)+
geom_vline(xintercept=0,linetype=2)+
geom_point(alpha=0.5)+
theme_bw()+
theme(legend.position="none")
if(label==0){
gg <- gg + geom_text(aes(label=label),size=2.5,vjust=0,hjust=0)
}else if(label==1){
gg <- gg + geom_text(aes(label=alllabel),size=2.5,vjust=0,hjust=0)
}else{
## no label
}
print(gg)
dev.off()
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.