### =========================================================================
### Ratio estimation and ratio distribution functions.
### -------------------------------------------------------------------------
###
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Distribution fit functions
###
# Distributions used are Norm and Cauchy from package 'distr'.
# See class?Norm and class?Cauchy
fitWeightedNorm <- function(x,weights) {
new("Norm",
mean=weightedMean(data=x,weights=weights),
sd=sqrt(weightedVariance(data=x,weights=weights)[1])
)
}
fitNorm <- function(x,portion=0.75) {
x <- x[!is.na(x)]
if (is.null(portion) || (portion <= 0)){
bp <- boxplot.stats(x,coef=1)
limit <- 0.5*(abs(bp$stats[1])+abs(bp$stats[5]))
}
else
limit <- quantile(abs(x),prob=portion)
good <- (x >= -limit) & (x <= limit)
new("Norm",
mean=mean(x[good]),
sd=sd(x[good])
)
}
fitCauchy <- function(x) {
cauchy.fit <- function(theta,x){
-sum(dcauchy(x,location=theta[1],scale=theta[2],log=TRUE),na.rm=T)
}
good <- !is.na(x) & !is.nan(x)
theta.start <- c(median(x[good]),IQR(x[good])/2)
res <- nlminb(theta.start,cauchy.fit,x=x[good],
lower=c(-10,1e-20),upper=c(10,10))
new("Cauchy",location=res$par[1],scale=res$par[2])
}
fitTlsd <- function(x) {
t.fit <- function(theta,x){
#-sum(dtls(x,df=theta[3],location=theta[1],scale=theta[2],log=TRUE),na.rm=T)
-sum(log(dtls(x,df=theta[3],location=theta[1],scale=theta[2])),na.rm=T)
}
dtls <- function(x,df,location,scale,log = FALSE)
1/scale * dt((x - location)/scale, df, log = log)
good <- !is.na(x) & !is.nan(x)
theta.start <- c(median(x[good]),sd(x[good]),2) # TODO: find good starting value
res <- nlminb(theta.start,t.fit,x=x[good],lower=c(-1,10^(-6),2),upper=c(1,10,100))
new("Tlsd",df=res$par[3],location=res$par[1],scale=res$par[2])
}
fitNormalCauchyMixture <- function(x) {
gc.fit <- function(theta,x){
-sum(log(theta[4]*dcauchy(x,location=theta[1],scale=theta[2])+(1-theta[4])*dnorm(x,mean=theta[1],sd=theta[3])))
}
good <- !is.na(x) & !is.nan(x)
theta.start <- c(median(x[good]),IQR(x[good])/2,mad(x[good]),0.5)
res <- nlminb(theta.start,gc.fit,x=x[good],
lower=c(-10,1e-20,1e-20,0),upper=c(10,10,10,1))
d1 <- Cauchy(location=res$par[1],scale=res$par[2])
d2 <- Norm(mean=res$par[1],sd=res$par[3])
dd <- UnivarMixingDistribution(d1,d2,mixCoeff=c(res$par[4],1-res$par[4]))
list(mixture=dd,cauchy=d1,normal=d2)
}
fitGaussianMixture <- function(x,n=500) {
x <- x[!is.na(x)]
## EM Algorithm
## Initialization: Guessing parameters
theta <- list(
weights = c(0.5,0.5),
means = c(mean(x),mean(x)),
sds = c(sd(x)-0.5*sd(x), sd(x)+0.5*sd(x)))
em.gauss.mixd = function(x0,theta,same.mean=TRUE) {
N <- length(x0)
## Expectation-Step
##prob for each datapoint to come from each distr
p_iks <- do.call(cbind,lapply(seq_along(theta$weights),
function(m) dnorm(x0,theta$means[m],theta$sds[m])*theta$weights[m]))
## membership weights
Expectation <- p_iks / rowSums(p_iks)
sum.weights <- colSums(Expectation)
## Maximization-Step
theta$weights <- 1/N * colSums(Expectation) # new weights
if (!same.mean)
theta$means <- 1/sum.weights * colSums(Expectation*x0) # weighted mean
theta$sds <- sqrt(1/sum.weights *
colSums(Expectation*(x - matrix(theta$means,byrow=T,nrow=N,ncol=2))^2))
return(theta)
}
for(i in seq_len(n)){ theta=em.gauss.mixd(x,theta,same.mean=T) }
d1 <- Norm(theta$means[1],theta$sds[1])
d2 <- Norm(theta$means[2],theta$sds[2])
dd <- UnivarMixingDistribution(d1,d2,mixCoeff=theta$weights)
return(dd)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### estimateRatio generic and functions.
###
setGeneric("estimateRatioNumeric",
function(channel1, channel2, noise.model, ...)
standardGeneric("estimateRatioNumeric"))
setGeneric("estimateRatio",
function(ibspectra, noise.model=NULL,
channel1, channel2, protein, peptide,...)
standardGeneric("estimateRatio") )
## method definitions
setMethod("estimateRatioNumeric",
signature(channel1="numeric",channel2="numeric",noise.model="NULL"),
function(channel1,channel2,noise.model=NULL,...)
estimateRatioNumeric(channel1,channel2, ...)
)
setMethod("estimateRatioNumeric",
signature(channel1="numeric",channel2="numeric",noise.model="missing"),
function(channel1,channel2,summarize.f=median, ...) {
if (length(channel1) != length(channel2))
stop("length of channel 1 does not equal length of channel 2")
if (length(channel1)==0)
return(c(lratio=NA))
sel <- !is.na(channel1) & !is.na(channel2) & channel1 > 0 & channel2 > 0
return(c(lratio=summarize.f(log10(channel1[sel])-log10(channel2[sel]))))
}
)
setMethod("estimateRatioNumeric",signature(channel1="numeric",channel2="numeric",
noise.model="NoiseModel"),
function(channel1,channel2,noise.model,ratiodistr=NULL,
variance.function="maxi",
sign.level=0.05,sign.level.rat=sign.level,sign.level.sample=sign.level,
remove.outliers=TRUE,outliers.args=list(method="iqr",outliers.coef=1.5),
method="isobar",fc.threshold=1.3,channel1.raw=NULL,channel2.raw=NULL,
use.na=FALSE,preweights=NULL) {
if (length(channel1) != length(channel2))
stop("length of channel 1 does not equal length of channel 2")
if (!is.null(channel1.raw) && length(channel1) != length(channel1.raw) ||
!is.null(channel2.raw) && length(channel2) != length(channel2.raw))
stop("length of orig channels [",length(channel1.raw),"] is not the same as channels [",length(channel1),"]")
is.method <- function(m) identical(method,m)
is.a.method <- function(m) m %in% method || is.method("compare.all")
if (length(channel1)==0) {
if (is.method("isobar"))
return(c(lratio=NA,variance=NA,var_hat_vk=NA,
n.spectra=NA,n.na1=NA,n.na2=NA,
p.value.rat=NA,p.value.sample=NA,p.value=NA,is.significant=FALSE))
if (is.method("isobar-combn"))
return(c(lratio=NA,variance=NA,n.spectra=NA,n.na1=NA,n.na2=NA,
p.value.rat=NA,p.value.sample=NA,p.value=NA,is.significant=FALSE,is.significant.ev=NA,
p.value.combined=NA))
else if (is.method("libra") || is.method("pep"))
return(c(ch1=NA,ch2=NA))
else if (is.method("multiq"))
return(c(lratio=NA,variance=NA,n.spectra=0,isum=NA))
else if (is.method("test") || is.method("compare.all") || length(method) > 1)
return(NULL)
else warning("no spectra, unknown method")
}
sel <- !is.na(channel1) & !is.na(channel2) & channel1 > 0 & channel2 > 0
sel.notna <- !is.na(channel1) & !is.na(channel2) & channel1 > 0 & channel2 > 0
## Implementation of multiq and libra methods
if (is.method("multiq")) {
lratio <- log10(sum(channel1[sel])/sum(channel2[sel]))
return(c(lratio, variance=NA,n.spectra=length(lratio),
isum=sum(channel1[sel]+channel2[sel])))
}
if (is.method("libra") || is.method("pep")) {
return(c(ch1=sum(channel1[sel]),ch2=sum(channel2[sel])))
}
i1 <- log10(channel1)
i2 <- log10(channel2)
## Compute all the spectrum ratios and their individual
## variance based on the noise model
log.ratios = i2 - i1
if (is.null(channel1.raw))
var.i <- variance(noise.model,i1,i2)
else
var.i <- variance(noise.model,
log10(channel1.raw),log10(channel2.raw))
if (remove.outliers) {
if (identical(outliers.args$method,"wtd.iqr"))
outliers.args$weights <- 1/var.i
outliers.args$log.ratio <- log.ratios
sel.outliers <- do.call(.sel.outliers,outliers.args)
sel <- sel & !sel.outliers
}
# Select non-NA outlier removed spectra
channel1 <- channel1[sel]
channel2 <- channel2[sel]
i1 <- i1[sel]
i2 <- i2[sel]
log.ratios <- log.ratios[sel]
var.i <- var.i[sel]
center.val <- ifelse(is.null(ratiodistr), 0 , distr::q(ratiodistr)(0.5))
## linear regression estimation
if (is.a.method("lm")) {
res.lm <- .calc.lm(channel1,channel2,sign.level.sample,sign.level.rat,ratiodistr)
if (is.method("lm")) return (res.lm)
}
## weighted linear regression estimation
if (is.a.method("weighted lm")) {
res.wlm <- .calc.weighted.lm(channel1,channel2,var.i,sign.level.sample,sign.level.rat,ratiodistr)
if (is.method("weighted lm")) return (res.wlm)
}
# First, compute ratios on spectra with intensities for both reporter ions
lratio.n.var <-
.calc.weighted.ratio(log.ratios,var.i,variance.function,preweights[sel])
if (is.a.method("ttest")) {
if (length(log.ratios) < 2)
p.value <- 1
else
p.value <- t.test(log.ratios)$p.value
res.ttest <- c(lratio=lratio.n.var['lratio'],
variance=NA,n.spectra=length(log.ratios),
p.value=p.value,is.significant=p.value<sign.level)
if (is.method("ttest")) return(res.ttest)
}
if (is.a.method("ttest2")) {
p.value <- .ttest.pval(mu=center.var,
xbar=lratio.n.var['lratio'],
s=sqrt(lratio.n.var['sample.variance']),
n=length(log.ratios))
res.ttest2 <- c(lratio=lratio.n.var['lratio'],
variance=lratio.n.var['sample.variance'],
p.value = p.value,
is.significant=p.value<sign.level)
if (is.method("ttest2"))
return(res.ttest2)
}
#if (method == "wttest" || method == "compare.all") {
# p.value <- (length(log.ratios) < 2)? 1 : weighted.t.test(log.ratios,w=weights,weighted.ratio)$p.value
# res.wttest <- c(
# lratio=weighted.ratio,variance=NA,n.spectra=length(log.ratios),
# p.value=p.value,is.significant=p.value<sign.level)
# if (method != "compare.all") return(res.wttest)
# }
if (is.a.method("fc")) {
res.fc <- c(lratio=as.numeric(lratio.n.var['lratio']),variance=NA,
n.spectra=length(log.ratios),
is.significant=abs(as.numeric(lratio.n.var['lratio']))>log10(fc.threshold))
ratio.nw <- mean(log.ratios,na.rm=TRUE)
res.fc.nw <- c(lratio=ratio.nw,variance=NA,n.spectra=length(log.ratios),
is.significant=abs(ratio.nw)>log10(fc.threshold))
if (is.method("fc")) return(res.fc)
}
weighted.ratio <- as.numeric(lratio.n.var['lratio'])
calc.variance <- as.numeric(lratio.n.var['calc.variance'])
if (is.a.method("isobar") || is.a.method("isobar-combn")) {
# Check for channels where one is NA
sel.ch1na <- is.na(channel1) & !is.na(channel2)
sel.ch2na <- is.na(channel2) & !is.na(channel1)
if (use.na) {
stop("use.na currently not functional")
} # use.na
res.isobar <-
c(lratio=weighted.ratio, variance=calc.variance,
var_hat_vk=as.numeric(lratio.n.var['var_hat_vk']),
n.spectra=length(log.ratios),
n.na1=sum(sel.ch1na),n.na2=sum(sel.ch2na),
p.value.rat=calculate.ratio.pvalue(weighted.ratio, calc.variance, ratiodistr),
p.value.sample=calculate.sample.pvalue(weighted.ratio, ratiodistr),
p.value=if (!is.null(ratiodistr)) calcProbXDiffNormals(ratiodistr, weighted.ratio, sqrt(calc.variance), alternative="two-sided") else 1,
is.significant=NA)
if (!is.method("isobar"))
res.isobar['is.significant.ev'] <-
(res.isobar['p.value.sample'] <= sign.level.sample) &&
calculate.ratio.pvalue(weighted.ratio,lratio.n.var['estimator.variance'],
ratiodistr) <= sign.level.rat
res.isobar['is.significant'] <-
(res.isobar['p.value'] <= sign.level)
if (is.method("isobar-combn")) {
p.value.combined <- NA
if (all(!is.na(c(res.isobar['lratio'],res.isobar['variance'])))) {
p.value.combined <- calcProbXGreaterThanY(ratiodistr,Norm(res.isobar['lratio'],sqrt(res.isobar['variance'])))
p.value.combined <- 2*ifelse(p.value.combined<.5,p.value.combined,1-p.value.combined)
}
return(c(res.isobar,p.value.combined=p.value.combined))
}
if (is.method("isobar")) return(res.isobar)
}
if (length(method) == 1 && !is.method("compare.all")) stop(paste("method",method,"not available"))
add.res <- function(x,name) {
if (!exists(x)) return(NULL)
o <- get(x)
as.numeric(o[name])
}
res <- c(lratio=weighted.ratio,
lratio.lm=add.res("res.lm",'lratio'),
lratio.wlm=add.res('res.wlm','lratio'),
unweighted.ratio = mean(log.ratios,na.rm=TRUE),
n.spectra=length(log.ratios),
variance=calc.variance,
var.ev=as.numeric(lratio.n.var['estimator.variance']),
var.sv=as.numeric(lratio.n.var['sample.variance']),
var.lm=add.res('res.lm','stderr')**2,
var.lm.w=add.res('res.wlm','stderr')**2,
is.sign.isobar = add.res('res.isobar','is.significant'),
is.sign.isobar.ev = add.res('res.isobar','is.significant.ev'),
is.sign.lm = add.res('res.lm','is.significant'),
is.sign.wlm = add.res('res.wlm','is.significant'),
is.sign.rat = add.res('res.isobar','p.value.rat')<sign.level,
is.sign.sample = add.res('res.isobar','p.value.sample')<sign.level,
is.sign.ttest = add.res('res.ttest','is.significant'),
is.sign.fc = add.res('res.fc','is.significant'),
is.sign.fc.nw = add.res('res.fc.nw','is.significant'))
}
)
calculate.ratio.pvalue <- function(lratio, variance, ratiodistr = NULL) {
center.val <- ifelse(is.null(ratiodistr), 0, distr::q(ratiodistr)(0.5))
lt_mask <- !is.na(lratio) & lratio < center.val
ge_mask <- !is.na(lratio) & !lt_mask
res <- as.numeric( rep.int( NA, length(lratio) ) )
res[lt_mask] <- pnorm(lratio[lt_mask],
mean=center.val, sd=sqrt(variance[lt_mask]),
lower.tail=TRUE)
res[ge_mask] <- pnorm(lratio[ge_mask],
mean=center.val, sd=sqrt(variance[ge_mask]),
lower.tail=FALSE)
return(res)
}
calculate.sample.pvalue <- function(lratio,ratiodistr) {
res <- as.numeric( rep.int( NA, length(lratio) ) )
if (!is.null(ratiodistr)) {
center.val <- distr::q(ratiodistr)(0.5)
lt_mask <- !is.na(lratio) & lratio < center.val
ge_mask <- !is.na(lratio) & !lt_mask
res[lt_mask] <- distr::p(ratiodistr)(lratio[lt_mask], lower.tail=TRUE)
res[ge_mask] <- distr::p(ratiodistr)(lratio[ge_mask], lower.tail=FALSE)
} else {
#warning('Sample P-value not calculated: missing ratio distribution')
}
return(res)
}
calculate.mult.sample.pvalue <- function(lratio,ratiodistr,strict.pval,lower.tail,
n.possible.val, n.observed.val) {
if (is.null(ratiodistr))
return(NA)
product.p.vals <- prod(distr::p(ratiodistr)(lratio,lower.tail=lower.tail))
if (strict.pval)
pval <- getMultUnifPValues(product.p.vals*0.5^(n.possible.val-n.observed.val),n=n.possible.val)
else
pval <- getMultUnifPValues(product.p.vals,n=n.observed.val)
return(pval)
}
adjust.ratio.pvalue <- function(quant.tbl,p.adjust,sign.level,globally=FALSE) {
if (globally) {
quant.tbl[,'p.value.rat.adjusted'] <- p.adjust(quant.tbl[,'p.value.rat'], p.adjust)
quant.tbl[,'p.value.adjusted'] <- p.adjust(quant.tbl[,'p.value'], p.adjust)
quant.tbl[,'is.significant'] <- quant.tbl[,'is.significant'] & quant.tbl[,'p.value.adjusted'] < sign.level
} else {
comp.cols <- c("r1","r2","class1","class2")
comp.cols <- comp.cols[comp.cols %in% colnames(quant.tbl)]
quant.tbl <- ddply(quant.tbl,comp.cols,function(x) {
x[,'p.value.rat.adjusted'] <- p.adjust(x[,'p.value.rat'], p.adjust)
x[,'p.value.adjusted'] <- p.adjust(x[,'p.value'], p.adjust)
x[,'is.significant'] <- x[,'is.significant'] & x[,'p.value.adjusted'] < sign.level
x
})
}
quant.tbl
}
correct.peptide.ratios <- function(ibspectra, peptide.quant.tbl, protein.quant.tbl, protein.group.combined,
adjust.variance=TRUE, correlation=0, recalculate.pvalue = TRUE) {
attrs <- attributes(peptide.quant.tbl)
if (isTRUE(attr(peptide.quant.tbl,'summarize')) || isTRUE(attr(protein.quant.tbl,'summarize')))
stop("Ratio correction should be done before summarization! Use proteinRatio with argument before.summarize.f!")
# map from peptides to protein group identifier
all.q.prots <- unique(protein.quant.tbl[,'ac'])
n.quant <- table(protein.quant.tbl[!is.na(protein.quant.tbl[,'lratio']),'ac'])
q.protein.acs <- strsplit(all.q.prots,",")
pi <- protein.group.combined@peptideInfo
pi <- merge(pi,as.data.frame(isobar:::.as.matrix(protein.group.combined@indistinguishableProteins,
colnames=c("protein","protein.g")),
stringsAsFactors=FALSE))
pi <- pi[pi[["protein.g"]] %in% reporterProteins(protein.group.combined),]
pep.to.ac <- isobar:::.as.vect(unique(pi[,c('peptide','protein.g')]))
peptide.quant.tbl[,'ac'] <- pep.to.ac[peptide.quant.tbl[,'peptide']]
# merged peptide and protein quant table
tbl <- merge(peptide.quant.tbl,protein.quant.tbl[,c("ac","r1","r2","lratio","variance","is.significant")],
by = c("ac","r1","r2"), all.x = TRUE, suffixes=c(".modpep",".prot"))
tbl[,'lratio'] <- .cn(tbl,'lratio.modpep') - .cn(tbl,'lratio.prot')
attrs$adjust.variance <- adjust.variance
if (adjust.variance) {
attrs$adjust.variance.corralation <- correlation
cov <- correlation * sqrt(.cn(tbl,'variance.modpep')) * sqrt(.cn(tbl,'variance.prot'))
tbl[,'variance'] <- .cn(tbl,'variance.modpep') + .cn(tbl,'variance.prot') * 2 * cov
}
attrs$recalculate.pvalue <- recalculate.pvalue
if (recalculate.pvalue) {
ratiodistr <- attr(peptide.quant.tbl,'ratiodistr')
tbl[,'p.value.rat'] <- calculate.ratio.pvalue(tbl[,'lratio'], tbl[,'variance'], ratiodistr)
tbl[,'p.value.sample'] <- calculate.sample.pvalue(tbl[,'lratio'], ratiodistr)
tbl[,'p.value'] <- calcProbXDiffNormals( ratiodistr, tbl[,'lratio'], sqrt(tbl[,'variance']), alternative="two-sided" )
tbl[,'is.significant'] <- tbl[,'p.value'] <= attr(peptide.quant.tbl,'sign.level')
}
attrs$names <- colnames(tbl)
attrs$row.names <- rownames(tbl)
attributes(tbl) <- attrs
return(tbl)
}
.calc.lm <- function(channel1,channel2,
sign.level.rat,sign.level.sample,ratiodistr) {
res.lm <- NA
if (any(!is.na(channel1) & !is.na(channel2))){
fm <- lm(channel2~channel1+0)
summary.fm <- summary.lm(fm)$coefficients
ci <- confint(fm,level=1-sign.level.rat)
res.lm <- c(lratio=log10(summary.fm[1]),
ratio=summary.fm[1],
stderr=summary.fm[2],
ratio.ci.l=ci[1],
ratio.ci.u=ci[2])
if (!is.null(ratiodistr)) {
rat.neg <- log10(res.lm['ratio']) < distr::q(ratiodistr)(0.5)
ci[ci < 0] <- 0
# TODO: fix confidence interval which goes to minus
#message(paste(ci,collapse=":"))
lrat.p <- log10(ifelse(rat.neg,ci[2],ci[1]))
res.lm['p.value'] <- distr::p(ratiodistr)(lrat.p,lower.tail=rat.neg)
res.lm['is.significant'] <- res.lm['p.value'] < sign.level.sample
}
}
res.lm
}
.calc.weighted.lm <- function(channel1,channel2,var.i,
sign.level.rat,sign.level.sample,ratiodistr) {
res.lm <- NA
if (any(!is.na(channel1) & !is.na(channel2))){
fm <- lm(channel2~channel1+0,
weights=1/var.i)
summary.fm <- summary.lm(fm)$coefficients
ci <- confint(fm,level=1-sign.level.rat)
res.lm <- c(lratio=log10(summary.fm[1]),
ratio=summary.fm[1],
stderr=summary.fm[2],
ratio.ci.l=ci[1],
ratio.ci.u=ci[2])
if (!is.null(ratiodistr)) {
rat.neg <- log10(res.lm['ratio']) < distr::q(ratiodistr)(0.5)
ci[ci < 0] <- 0
# TODO: fix confidence interval which goes to minus
#message(paste(ci,collapse=":"))
lrat.p <- log10(ifelse(rat.neg,ci[2],ci[1]))
res.lm['p.value'] <- distr::p(ratiodistr)(lrat.p,lower.tail=rat.neg)
res.lm['is.significant'] <- res.lm['p.value'] < sign.level.sample
}
}
res.lm
}
.get.ri <- function(ri,ch) {
if (is.null(ri))
return(NULL)
if (ch == "ALL")
rowSums(ri,na.rm=TRUE)
else if (ch == "AVG")
apply(ri,1,mean,na.rm=TRUE)
else
ri[,ch]
}
.sel.outliers <- function(log.ratio,method="boxplot",
outliers.coef=1.5,outliers.trim=0.1,
weights=NULL) {
# discussion: http://stats.stackexchange.com/questions/7155/rigorous-definition-of-an-outlier
sel <- is.na(log.ratio)
if (method == "boxplot") {
# Tukey's (1977) method; see ?boxplot.stats and below at 'iqr'.
# outliers.coef=1.5, typically
bp <- boxplot.stats(log.ratio,coef=outliers.coef)
sel | log.ratio<bp$stats[1] | log.ratio>bp$stats[5]
} else if (method == "iqr") {
# Tukey's (1977) method
# applicable to skewed data (makes no distributional assumptions)
# may not be appropriate for small sample sizes
# selects points which are more than outliers.coef times the interquartile range
# above the third quartile or below the first quartile.
# possible outliers: outlier.coef=1.5 (99.3% of the data within range in normal distribution)
# probable outliers: outlier.coef=3
qs <- quantile(log.ratio,c(.25,.75),na.rm=TRUE)
iqr <- qs[2] - qs[1]
sel | log.ratio < qs[1]-outliers.coef*iqr | log.ratio > qs[2]+outliers.coef*iqr
} else if (method == "wtd.iqr") {
# weighted implementation of iqr method with weighted quantiles
require(Hmisc)
qs <- wtd.quantile(log.ratio,weights,c(.25,.75),na.rm=TRUE)
iqr <- qs[2] - qs[1]
sel | log.ratio < qs[1]-outliers.coef*iqr | log.ratio > qs[3]+outliers.coef*iqr
} else if (method == "robust.zscore") {
# modified z-score proposed by Iglewicz and Hoaglin (1993)
# 68% have zscores between +/- 1, 95% have zscores between +/- 2
# 99.7% have zscores between +/- 3
# outliers.coef=3.5, typically
sel | 0.6745*abs(log.ratio-mean(log.ratio,na.rm=TRUE))/mad(log.ratio,constant=1,na.rm=TRUE) > outliers.coef
} else if (method == "zscore") {
# 99.7% of the data lie within 3 standard deviations of the mean
# outliers.coef=3, typically
sel | abs(log.ratio-mean(log.ratio,na.rm=TRUE))/sd(log.ratio,na.rm=TRUE) > outliers.coef
} else if (method == "trim") {
sel | log.ratio < quantile(log.ratio,outliers.trim/2,na.rm=TRUE) |
log.ratio > quantile(log.ratio,1-outliers.trim/2,na.rm=TRUE)
} else {
stop("method ",method," not known, use one of [boxplot,iqr,wtd.iqr,zscore,trim]")
}
}
.calc.weighted.ratio <- function(log.ratio,variance,
variance.function,preweights=NULL) {
variance <- variance
preweights <- preweights
weights <- 1/variance
if (!is.null(preweights))
weights <- weights*preweights
sum.weights <- sum(weights)
weighted.ratio <- weightedMean(log.ratio,weights)
# variance calculation
estimator.variance <- 1/sum.weights
var_hat_vk = Inf
if (length(log.ratio) == 1)
sample.variance <- estimator.variance^0.75
else {
wvar <- weightedVariance(log.ratio,weights,weighted.ratio)
var_hat_vk = wvar[2]
if (length(log.ratio) == 2)
sample.variance <- max(c(wvar[1],estimator.variance^0.75))
else
sample.variance <- wvar[1]
}
calc.variance <- switch(variance.function,
maxi = max(estimator.variance,sample.variance,na.rm=T),
ev = estimator.variance,
wsv = sample.variance,
stop("unknown variance function - choose one of [maxi,ev,wsv]")
)
return(c(lratio=weighted.ratio,
estimator.variance=estimator.variance,
sample.variance=sample.variance,
calc.variance=calc.variance,
var_hat_vk = var_hat_vk
))
}
estimateRatioForProtein <- function(protein,ibspectra,noise.model,channel1,channel2,
combine=TRUE,method="isobar",specificity=REPORTERSPECIFIC,quant.w.grouppeptides=NULL,...) {
#message("parallel processing: ",isTRUE(getOption('isobar.parallel')))
if (combine) {
if (method == "multiq" || method == "libra" || method=="pep") {
## first compute peptide ratios, summarize then
peptide.ratios <-
estimateRatioForPeptide(peptides(proteinGroup(ibspectra),protein=protein),
ibspectra,noise.model=noise.model,
channel1=channel1,channel2=channel2,
combine=FALSE,method=method)
if (method == "libra" | method=="pep") {
# normalize to sum
sum.i <- peptide.ratios$ch1+peptide.ratios$ch2
p.ch1 <- peptide.ratios$ch1/sum.i
p.ch2 <- peptide.ratios$ch2/sum.i
if (method=="libra") {
##remove outliers +2sd
sd1 <- sd(p.ch1)
sd2 <- sd(p.ch2)
p.ch1 <- p.ch1[p.ch1 > mean(p.ch1)-2*sd1 & p.ch1 < mean(p.ch1)+2*sd1]
p.ch2 <- p.ch2[p.ch2 > mean(p.ch2)-2*sd2 & p.ch2 < mean(p.ch2)+2*sd2]
}
return(lratio=log10(p.ch2/p.ch1),variance=var(log10(p.ch2/p.ch1)),
ch1=p.ch1,ch2=p.ch2)
}
if (method == "multiq") {
## TODO: filtration of peptides with low confidence score
## TODO: Dynamic range comp.
## weighted average of peptide ratios
return(c(
lratio=weightedMean(peptide.ratios[,'lratio'],weights=peptide.ratios$isum),
variance=var(peptide.ratios$lratio)))
}
} else if (method %in% c("isobar","lm","ttest","ttest2","fc","compare.all")) {
.call.estimateRatio(protein,"protein",ibspectra,
noise.model,channel1,channel2,method=method,
specificity=specificity,...)
} else {
stop("method ",method," not known")
}
} else {
res <- ldply(protein,function(individual.protein) {
if (individual.protein %in% quant.w.grouppeptides)
specificity <- c(GROUPSPECIFIC,specificity)
.call.estimateRatio(individual.protein,"protein",ibspectra,
noise.model,channel1,channel2,method=method,...,
specificity=specificity)
},.parallel=isTRUE(getOption('isobar.parallel'))
)
rownames(res) <- protein
res
#res[apply(res,2,!function(r) all(is.na(r))),]
}
}
estimateRatioForPeptide <- function(peptide,ibspectra,noise.model,channel1,channel2,combine=TRUE,...) {
if (combine) {
r <- .call.estimateRatio(peptide,"peptide",ibspectra,noise.model,
channel1,channel2,...)
} else {
if (is.data.frame(peptide))
peptide <- as.matrix(peptide)
if (is.matrix(peptide)) {
r <- ldply(seq_len(nrow(peptide)),function(p_i)
.call.estimateRatio(peptide[p_i,,drop=FALSE],"peptide",ibspectra,noise.model,
channel1,channel2,...),.parallel=isTRUE(getOption('isobar.parallel')))
} else {
r <- ldply(peptide,function(individual.peptide)
.call.estimateRatio(individual.peptide,"peptide",ibspectra,noise.model,
channel1,channel2,...),.parallel=isTRUE(getOption('isobar.parallel')))
}
}
attr(r,"input") <- peptide
attr(r,"combine") <- combine
return(r)
}
# Calculate a T pvalue
.ttest.pval <- function(mu,xbar,s,n=2,conf.level=0.95) {
t <- (xbar-mu)/ (s/sqrt(n))
df <- ifelse(n==1,0.5,n-1) ## using 0.5 df when n=1 (has no theoretical justification)
p.value <- pt(t,df)
p.value <- ifelse(p.value<0.5,p.value,1-p.value) # two-sided
p.value <- p.value*2 ## correct for two-sided test
tc <- pt(conf.level,df)/2
if (length(xbar)>1) {cf <- cbind} else {cf <- c}
return(cf(t.stat=t,p.value,
ci.lower=xbar-tc*s/sqrt(n),
ci.upper=xbar+tc*s/sqrt(n)))
}
.ttest.pval.se <- function(mu,xbar,se,n=2,conf.level=0.95) {
t <- (xbar-mu)/ se
df <- ifelse(n==1,0.5,n-1) ## using 0.5 df when n=1
p.value <- pt(t,df)
p.value <- ifelse(p.value<0.5,p.value,1-p.value) # two-sided
p.value <- p.value*2 ## correct for two-sided test
if (length(xbar)>1) {cf <- cbind} else {cf <- c}
tc <- pt(conf.level,df)/2
return(cf(t.stat=t,p.value,
ci.lower=xbar-tc*se,
ci.upper=xbar+tc*se))
}
### Handling NULL protein or peptide argument
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="missing",channel2="missing",
protein="character",peptide="missing"),
function(ibspectra,noise.model=NULL,protein,
val="lratio",summarize=FALSE,combine=TRUE,...) {
channels <- reporterTagNames(ibspectra)
if (combine) {
res <- matrix(NA,nrow=length(channels),ncol=length(channels),
dimnames=list(channels,channels))
for(channel1 in channels)
for (channel2 in channels) {
rat <- estimateRatio(ibspectra,noise.model=noise.model,
channel1=channel1,channel2=channel2,
protein=protein,...)
res[channel1,channel2] <- rat[val]
}
return(res)
} else {
cmbn <- t(combn(channels,2))
res <- estimateRatio(ibspectra,noise.model=noise.model,
channel1=cmbn[i,1],channel2=cmbn[i,2],
protein=protein,combine=FALSE,...)
apply(cmbn,1,function(i)
cbind(r1=cmbn[i,1],r2=cmbn[i,2],ac=rownames(res),res))
}
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="missing",channel2="missing",
protein="missing",peptide="character"),
function(ibspectra,noise.model=NULL,peptide,
val="lratio",summarize=FALSE,combine=TRUE,...) {
channels <- reporterTagNames(ibspectra)
if (combine) {
res <- matrix(NA,nrow=length(channels),ncol=length(channels),
dimnames=list(channels,channels))
for(channel1 in channels)
for (channel2 in channels) {
rat <- estimateRatio(ibspectra,noise.model=noise.model,
channel1=channel1,channel2=channel2,
peptide=peptide,...)
res[channel1,channel2] <- rat[val]
}
return(res)
} else {
cmbn <- t(combn(channels,2))
res <- estimateRatio(ibspectra,noise.model=noise.model,
channel1=cmbn[i,1],channel2=cmbn[i,2],
peptide=peptide,combine=FALSE,...)
apply(cmbn,1,function(i)
cbind(r1=cmbn[i,1],r2=cmbn[i,2],ac=rownames(res),res))
}
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="character",peptide="NULL"),
function(ibspectra,noise.model,channel1,channel2,protein,peptide=NULL,...) {
estimateRatio(ibspectra,noise.model,channel1,channel2,protein=protein,...)
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="NULL",peptide="character"),
function(ibspectra,noise.model,channel1,channel2,protein=NULL,peptide,...) {
estimateRatio(ibspectra,noise.model,channel1,channel2,peptide=peptide,...)
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="NULL",peptide="matrix"),
function(ibspectra,noise.model,channel1,channel2,protein=NULL,peptide,...) {
estimateRatio(ibspectra,noise.model,channel1,channel2,peptide=peptide,...)
}
)
### Calling estimateRatioForPeptide and calcRatioForProtein, resp.
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="character",peptide="missing"),
function(ibspectra,noise.model,channel1,channel2,protein,...) {
estimateRatioForProtein(protein,ibspectra,noise.model,channel1,channel2,...)
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="missing",peptide="character"),
function(ibspectra,noise.model,channel1,channel2,peptide,...) {
estimateRatioForPeptide(peptide,ibspectra,noise.model,channel1,channel2,...)
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="missing",peptide="data.frame"),
function(ibspectra,noise.model,channel1,channel2,peptide,...) {
estimateRatioForPeptide(peptide,ibspectra,noise.model,channel1,channel2,...)
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="NULL",peptide="data.frame"),
function(ibspectra,noise.model,channel1,channel2,protein=NULL,peptide,...) {
estimateRatioForPeptide(peptide,ibspectra,noise.model,channel1,channel2,...)
}
)
setMethod("estimateRatio",
signature(ibspectra="IBSpectra",noise.model="ANY",
channel1="character",channel2="character",
protein="missing",peptide="matrix"),
function(ibspectra,noise.model,channel1,channel2,peptide,...) {
estimateRatioForPeptide(peptide,ibspectra,noise.model,channel1,channel2,...)
}
)
.NULLstring <- function(x) {
if(is.null(x))
return("NULL")
return(x)
}
### Helper function to estimateRatioNumeric
.call.estimateRatio <- function(x,level,ibspectra,noise.model,
channel1,channel2,
specificity=REPORTERSPECIFIC,modif=NULL,
n.sample=NULL,groupspecific.if.same.ac=FALSE,
use.precursor.purity=FALSE,do.warn=TRUE,
use.for.quant.only=TRUE,
use.vsn.transform=FALSE,...) {
allowed.channels <- c(reporterTagNames(ibspectra),'AVG','ALL')
allowed.channels.p <- paste(allowed.channels, collapse=", ")
if (is.null(channel1) || !all(channel1 %in% allowed.channels))
stop("channel 1 is ",.NULLstring(channel1),",",
" but it should be one of ",allowed.channels.p,".")
if (is.null(channel2) || !all(channel2 %in% allowed.channels))
stop("channel 2 is ",.NULLstring(channel2),",",
" but it should be one of ",allowed.channels.p,".")
## when more than one channel value is given, calculate the combination matrix
if (length(channel1) > 1 || length(channel2) > 1) {
## using match.call to get the call with all arguments
ecall <- match.call(expand.dots = TRUE)
res <- c()
for (c1 in channel1) {
for (c2 in channel2) {
my.ecall <- ecall
my.ecall[["channel1"]] <- c1
my.ecall[["channel2"]] <- c2
## using eval() on my.ecall for recursive calling of this function
res <- rbind(res,
data.frame(channel1=c1,channel2=c2,
t(eval(my.ecall,parent.frame())),
stringsAsFactors=FALSE))
}
}
rownames(res) <- NULL
return(res)
}
## select spectra based on quantification level
sel <- switch(level,
"protein"=spectrumSel(ibspectra,protein=x,specificity=specificity,
do.warn=do.warn,modif=modif,
groupspecific.if.same.ac=groupspecific.if.same.ac,
use.for.quant.only=use.for.quant.only),
"peptide"=spectrumSel(ibspectra,peptide=x,modif=modif,do.warn=do.warn,
use.for.quant.only=use.for.quant.only),
"peptide-probs"=stop("not implemented yet"),
stop("level ",level," unknown"))
## get reporter intensities from ibspectra
ri <- reporterIntensities(ibspectra)[sel,,drop=FALSE]
## get raw intensities (prior to normalization) for estimation of noise level
ri.raw <- reporterData(ibspectra,element="ions_not_normalized")[sel,,drop=FALSE]
## use vsn transformed data (see apply.vsn() in IBSpectra-class.R
if (use.vsn.transform) {
if (!"ions_vsn_normalized" %in% assayDataElementNames(ibspectra))
stop("Use apply.vsn() to calculate variance-stabilizing transformation, first")
ri <- reporterData(ibspectra,element="ions_vsn_normalized")[sel,,drop=FALSE]
}
## get reporter intensities
i1 <- .get.ri(ri,channel1)
i2 <- .get.ri(ri,channel2)
i1.raw <- .get.ri(ri.raw,channel1)
i2.raw <- .get.ri(ri.raw,channel2)
# sample data for testing puposes (TP/FP estimation)
if (!is.null(n.sample)) {
if (n.sample <= length(i1)) {
indices <- sample(seq_along(i1),n.sample)
i1 <- i1[indices]
i2 <- i2[indices]
if (!is.null(ri.raw)) {
i1.raw <- i1.raw[indices]
i2.raw <- i2.raw[indices]
}
}
else
i1 <- i2 <- i1.raw <- i2.raw <- numeric()
}
## use precursor purity for spectra weighting
if (isTRUE(use.precursor.purity)) {
if (!.SPECTRUM.COLS['PRECURSOR.PURITY'] %in% colnames(fData(ibspectra)))
stop("Argument use.precusor.purity is TRUE, but no column '",
.SPECTRUM.COLS['PRECURSOR.PURITY'],"' in data.")
precursor.purity <- fData(ibspectra)[sel,"precursor.purity"]
} else {
precursor.purity <- NULL
}
estimateRatioNumeric(channel1=i1,channel2=i2,noise.model=noise.model,...,
preweights=precursor.purity,
channel1.raw=i1.raw,channel2.raw=i2.raw)
}
getMultUnifPValues <- function(product,pvals=NULL,n=NULL){
if (is.null(n))
return(NULL)
if (!is.null(pvals))
if (n == length(pvals))
product <- prod(pvals)
else
return(NULL)
if (n == 1)
return(product)
s <- 1
for (i in 1:(n-1))
s <- s + (-log(product))^i/factorial(i)
product*s
} # getMultUnifPValues
getMultUnifDensity <- function(x,n=NULL){
if (is.null(n))
return(NULL)
if (n == 1)
return(1)
i <- n-1
(-log(x))^i/factorial(i)
} # getMultUnifDensity
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### proteinRatios generic and function.
###
## combn.matrix generates pairwise combinations
## of the values in 'x':
## - all against all (using combn, methd="global")
## - all combinations across classes (method="interclass")
## used when computing ratios class A against class B
## - all combinations within classes (method="intraclass")
## used when computing a intra-class ratio distribution
## for estimation of biological variability
## - method "versus.class": all combinations against class vs
## - method "versus.channel": all combinations against channel vs
combn.matrix <- function(x,method="global",cl=NULL,vs=NULL) {
if (method != "global" & is.null(cl))
stop("No class labels cl provided.")
if (method != "global" & !is.character(cl)) {
warning("Class labels should be of type character - using as.character to convert.")
cl <- as.character(cl)
}
if (method != "global" & length(cl) != length(x))
stop(sprintf("cl argument does not have the same length as x (cl: %s, x: %s).",
length(cl),length(x)))
# Filter NA channels
if (!is.null(cl)) {
x <- x[!is.na(cl)]
cl <- cl[!is.na(cl)]
}
if (!is.null(vs) && !is.character(vs))
vs <- as.character(vs)
# Create a combn matrix with all combinations of channels to consider
if (method == "versus.class" || method == "versus.channel") {
if (method == "versus.channel") {
if (is.null(vs)) {
vs = x[1]
warning("vs argument is null, but method is versus.channel. Using channel '",vs,"'")
}
if (!vs %in% x) stop("vs argument must be one of [",paste(x,collapse=", "),"]")
pos <- which(x==vs)
cmbn <- rbind(vs,x[-pos])
if (!is.null(cl)) {
vs.class <- cl[pos]
cmbn <- rbind(cmbn,vs.class,cl[-pos])
}
}
if (method == "versus.class") {
if (is.null(vs)) {
vs = cl[1]
warning("vs argument is null, but method is versus.channel. Using channel '",vs,"'")
}
if (!all(vs %in% cl)) stop("vs argument must be one of [",paste(cl,collapse=", "),"]")
if (is.null(cl)) stop("class labels must be given with method versus.class")
pos <- which(cl==vs)
cmbn <- rbind(x[pos],rep(x[-pos],each=length(pos)))
cmbn <- rbind(cmbn,vs,rep(cl[-pos],each=length(pos)))
}
} else if (method == "global") {
cmbn <- combn(x,2) # take all combinations
if (!is.null(cl))
cmbn <- rbind(cmbn,combn(cl,2))
} else {
t <- table(cl)[unique(cl)]
if (method == "intraclass") {
if (length(t[t==1]) > 0)
warning("Some class labels are not repeated - ",
"those are ignored in intraclass ratios.")
cmbn <- do.call(cbind,lapply(names(t)[t>1],
function(xx) rbind(combn(x[which(cl==xx)],2),class1=xx,class2=xx)))
} else if (method == "interclass") {
if (length(t) == 1) {
warning("Cannot compute interclass ratios when there is only one class - taking ratios vs ALL")
cmbn <- matrix(c(x,rep("ALL",length(x))),nrow=2,byrow=TRUE)
} else {
cmbn <- matrix(nrow=4,ncol=0)
for (name in names(t)) {
pos=which(cl==name);posn=which(cl!=name);
for (i in pos)
for (j in posn) {
cc <- c(x[i],x[j],cl[i],cl[j])
if (ncol(cmbn) > 0 &
any(apply(cmbn,2,function(xx) identical(rev(cc[1:2]),xx[1:2]))))
next;
cmbn <- cbind(cmbn,cc)
}
}
}
} else {
stop(paste("method",method,"not implemented."))
}
}
if (nrow(cmbn) == 2) rownames(cmbn) <- c("r1","r2")
else if (nrow(cmbn) == 4) rownames(cmbn) <- c("r1","r2","class1","class2")
return(cmbn)
}
## create a table with all protein ratios
combn.protein.tbl <- function(cmbn, reverse=FALSE, ...) {
ratios <- do.call(rbind,apply(cmbn,2,function(x) {
if (reverse)
if (length(x) == 4)
x <- x[c(2,1,4,3)]
else
x <- rev(x)
message("ratios ",x[2]," vs ",x[1])
r <- estimateRatio(channel1=x[1],channel2=x[2],...)
if (class(r)=="numeric") {
r <- t(r)
rownames(r) <- "prot1"
}
if (is.matrix(attr(r,"input")))
df <- data.frame(attr(r,"input"),r,stringsAsFactors=FALSE)
else
df <- data.frame(r,stringsAsFactors=FALSE)
if (!is.null(rownames(r)) && any(rownames(r) != as.character(seq_len(nrow(r)))))
df[,'ac']<- rownames(r)
rownames(df) <- NULL
df$r1 <- x[1]; df$r2 <- x[2]
if (length(x) == 4) {
df$class1 <- x[3]; df$class2 <- x[4]
}
return(df)
}))
attr(ratios,"arguments") <- list(...)
attr(ratios,"cmbn") <- cmbn
attr(ratios,"reverse") <- reverse
if (all(c("peptide","modif") %in% colnames(ratios)))
ratios <- ratios[order(ratios[,'peptide'],ratios[,'modif'],ratios$r1,ratios$r2),]
else
ratios <- ratios[order(ratios[,'ac'],ratios$r1,ratios$r2),]
return(ratios)
}
peptideRatios <- function(ibspectra,...,peptide=peptides(proteinGroup(ibspectra),columns=c("peptide","modif"))) {
proteinRatios(ibspectra,...,proteins=NULL,peptide=peptide)
}
peptideRatiosNotQuant <- function(ibspectra,...,
peptide=unique(fData(ibspectra)[!fData(ibspectra)[["use.for.quant"]],c("peptide","modif","site.probs")])) {
proteinRatios(ibspectra,...,proteins=NULL,peptide=peptide,use.for.quant.only=FALSE)
}
ratiosReshapeWide <- function(quant.tbl,vs.class=NULL,sep=".",cmbn=NULL,short.names=FALSE) {
id.cols <- c("group","ac","peptide","modif","gene_names","pep.siteprobs",
"ID","Description","Gene","gene_names.x","gene_names.y","swissprot.ac","modification")
id.cols <- id.cols[id.cols %in% colnames(quant.tbl)]
# remove unneccasry column [should not be present anyway in quant.tbl created with a current version]
quant.tbl$ratios.subset.1..by.column. <- NULL
if (!is.null(cmbn)) {
sel <- paste(quant.tbl$r1,quant.tbl$r2) %in% paste(cmbn[1,],cmbn[2,])
quant.tbl <- quant.tbl[sel,]
}
classes.unique <- "class1" %in% colnames(quant.tbl) &&
!any(is.na(quant.tbl$class1)) && !any(is.null(quant.tbl$class1)) &&
all(table(unique(quant.tbl[,c("r1","class1")])$class1)==1) &&
all(table(unique(quant.tbl[,c("r2","class2")])$class2)==1)
if (!is.null(vs.class)) {
if (!any(quant.tbl[,"class1"]==vs.class)) stop("vs.class set to ",vs.class,", but it is not present in quant table")
if (length(vs.class)==1 && short.names)
quant.tbl[,'comp']<- paste(quant.tbl$class2)
else
quant.tbl[,'comp']<- paste(quant.tbl$class2,quant.tbl$class1,sep="/")
quant.tbl <- quant.tbl[quant.tbl[["class1"]] %in% vs.class,]
} else {
if (classes.unique) {
quant.tbl[,'comp']<- paste(quant.tbl$class2,quant.tbl$class1,sep="/")
} else {
quant.tbl[,'comp']<- paste(quant.tbl$r2,quant.tbl$r1,sep="/")
}
}
ccomp <- unique(quant.tbl$comp)
fac <- factor(apply(quant.tbl[,id.cols,drop=FALSE],1,paste,collapse="."))
## check that all 'comp' entries are in the right order
if (!all(tapply(quant.tbl$comp,fac,function(x) all(x==ccomp))))
stop("quant.tbl not in the right format - comp column values different")
quant.tbl.num <- quant.tbl[,-(c(which(colnames(quant.tbl) %in%
c(id.cols,"comp","r1","r2","class1","class2"))))]
q.coltypes <- sapply(quant.tbl.num,class)
logical.cols <- q.coltypes == 'logical'
if (!all(q.coltypes %in% c("numeric","logical","integer"))) {
bad.cols <- q.coltypes[!q.coltypes %in% c("numeric","logical")]
stop("quantification table reshape does not work - ",
" columns ",paste0(names(bad.cols),collapse=","),
" have types ",paste0(bad.cols,collapse=","),".")
}
colnames.wide <- paste(rep(colnames(quant.tbl.num),each=length(ccomp)),
rep(ccomp,times=ncol(quant.tbl.num)),sep=sep)
q1 <- do.call(rbind,tapply(seq_len(nrow(quant.tbl)),fac,function(x) {
quant.tbl[x[1],id.cols]
},simplify=FALSE))
q2 <- do.call(rbind,tapply(seq_len(nrow(quant.tbl.num)),fac,function(x) {
unlist(quant.tbl.num[x,])
},simplify=FALSE))
colnames(q2) <- colnames.wide
if (!all(sapply(q2,is.numeric)))
stop("quantification table reshape did not work - columns should be numeric")
q2 <- as.data.frame(q2)
if (any(logical.cols)) {
col.n <- unlist(lapply((which(logical.cols)-1)*length(ccomp)+1,function(x) seq(x,length.out=length(ccomp))))
q2[,col.n] <- sapply(q2[,col.n],as.logical)
}
qq <- cbind(q1,q2)
rownames(qq) <- NULL
attrs <- attributes(quant.tbl)
for (a in names(attrs))
if (!a %in% c("row.names","names","class")) attr(qq,a) <- attrs[[a]]
qq
}
proteinRatios <-
function(ibspectra, noise.model, reporterTagNames=NULL,
proteins=reporterProteins(proteinGroup(ibspectra)),peptide=NULL,
cl=classLabels(ibspectra), combn.method="global",combn.vs=NULL,
symmetry=FALSE,
summarize=FALSE,summarize.method="mult.pval",
min.detect=NULL,strict.sample.pval=TRUE,strict.ratio.pval=TRUE,orient.div=0,
sign.level=0.05,sign.level.rat=sign.level,sign.level.sample=sign.level,
ratiodistr=NULL,zscore.threshold=NULL,variance.function="maxi",
combine=FALSE,p.adjust=NULL,reverse=FALSE,
cmbn=NULL,before.summarize.f=NULL,...) {
if ((!is.null(proteins) && !is.null(peptide)) ||
(is.null(proteins) && is.null(peptide)))
stop("supply either protein or peptides!")
if (!is.null(p.adjust) && !p.adjust %in% p.adjust.methods)
stop("p.adjust parameter must be one of '",paste(p.adjust.methods,collapse="','"),"'")
if (is.null(reporterTagNames)) reporterTagNames <- reporterTagNames(ibspectra)
if (is.null(cl) && is.null(cmbn)) stop("please supply class labels as argument cl or a cmbn matrix")
if (is.null(cmbn))
cmbn <- combn.matrix(reporterTagNames,combn.method,cl,vs=combn.vs)
if (ncol(cmbn) < 1)
stop("No possible combination for reporters [",paste(reporterTagNames,sep=","),"]",
" w/ classes [",paste(cl,sep=","),"] and combn.method ",combn.method," possible.",
" summarize=",ifelse(summarize,"TRUE","FALSE"))
ratios <- combn.protein.tbl(cmbn,reverse=reverse,
ibspectra=ibspectra,noise.model=noise.model,
ratiodistr=ratiodistr,
protein=proteins,peptide=peptide,
sign.level=sign.level,sign.level.rat=sign.level.rat,
sign.level.sample=sign.level.sample,
variance.function=variance.function,combine=combine,...)
attributes(ratios) = c(attributes(ratios),list(
classLabels=cl,combn.method=combn.method,symmetry=symmetry,summarize=FALSE,
sign.level.rat=sign.level.rat,sign.level.sample=sign.level.sample,
ratiodistr=ratiodistr,variance.function=variance.function,
combine=combine,p.adjust=p.adjust,reverse=reverse))
if (!is.null(before.summarize.f)) {
.check.isfunction(before.summarize.f)
ratios <- before.summarize.f(ibspectra,ratios)
}
if (summarize) {
message("summarizing ratios ...")
if (combn.method=="global")
stop("summarization not meaningful with combn.method='global'. ",
"Use combn.method='intraclass' or combn.method='interclass' to use ratios in or between classes.")
## calculate the number of combinations for each class-combination
if (reverse)
n.combination <- table(cmbn["class2",],cmbn["class1",])
else
n.combination <- table(cmbn["class1",],cmbn["class2",])
if (nrow(n.combination)==1 & ncol(n.combination)==1)
n.combination <- as.numeric(n.combination)
if (max(n.combination) < 2)
stop("Summarize=TRUE makes no sense with only one combination, set summarize to FALSE or class labels differently.")
if (is.null(min.detect))
min.detect <- n.combination
if (!is.null(proteins)) {
by.column <- "ac"
} else {
by.column <- "peptide"
if (!is.null(dim(peptide)) && "modif" %in% colnames(peptide))
by.column <- c("peptide","modif")
}
ratios <- summarize.ratios(ratios,by.column,
summarize.method,min.detect,n.combination,
strict.sample.pval,strict.ratio.pval,orient.div,
sign.level,sign.level.rat,sign.level.sample,
variance.function=variance.function,
ratiodistr=ratiodistr)
}
ratios[,'zscore'] <- .calc.zscore(ratios[,'lratio'])
if (!is.null(zscore.threshold))
ratios[,'is.significant'] <- ratios[,'p.value'] < sign.level & abs(ratios[,'zscore']) > zscore.threshold
if (symmetry) {
ratios.inv <- ratios
ratios.inv[,'lratio'] <- -ratios.inv[,'lratio']
ratios.inv[,c('r1','r2')] <- ratios.inv[,c('r2','r1')]
if ('class1' %in% colnames(ratios))
ratios.inv[,c('class1','class2')] <- ratios.inv[,c('class2','class1')]
ratios <- rbind(ratios,ratios.inv)
}
if (!is.null(p.adjust))
ratios <- adjust.ratio.pvalue(ratios,p.adjust,sign.level.rat)
return(ratios)
} # end proteinRatios
summarize.ratios <-
function(ratios,by.column="ac",summarize.method="mult.pval",min.detect=NULL,
n.combination=NULL,
strict.sample.pval=TRUE,strict.ratio.pval=TRUE,orient.div=0,
sign.level=0.05,sign.level.rat=sign.level,sign.level.sample=sign.level,
variance.function="maxi",ratiodistr=NULL) {
if (!summarize.method %in% c("mult.pval","mean"))
stop("Implemented summarize.methods: mult.pval, mean. ",
"Please choose one of them instead of ",summarize.method,".")
if (!all(c("class1","class2") %in% colnames(ratios)))
stop("'ratios' argument must specify classes w/ columns class1 and class2!")
classes <- unique(ratios[,c("class1","class2")])
if (is.null(n.combination)) {
cc <- unique(ratios[,c("r1","r2","class1","class2")])
n.combination <- table(cc[,"class1"],cc[,"class2"])
if (nrow(n.combination)==1 & ncol(n.combination)==1)
n.combination <- as.numeric(n.combination)
}
if (is.null(min.detect))
min.detect <- n.combination
if (!all(by.column %in% colnames(ratios)))
stop("'by.column' ",paste(by.column,collapse=",")," defined, but no such columns in 'ratios'.")
mean.r <- ifelse(is.null(ratiodistr),0,distr::q(ratiodistr)(0.5))
if (summarize.method == "mult.pval") {
result <- ddply(ratios,by.column,function(ratios.subset) {
ldply(seq_len(nrow(classes)),function(class_i) {
class1 <- classes[class_i,1]
class2 <- classes[class_i,2]
n.combination.c <- ifelse(is.matrix(n.combination),
n.combination[class1,class2],
n.combination)
ac.sel <- !is.na(ratios.subset$lratio) &
ratios.subset$class1 == class1 &
ratios.subset$class2 == class2
if (!any(ac.sel)) { ## no data for AC and classes
return(data.frame(lratio=NA,variance=NA,n.spectra=0,n.pos=0,n.neg=0,
p.value.rat=1,p.value.sample=1,p.value=1,is.significant=FALSE,r1=class1,r2=class2,
class1=class1,class2=class2,stringsAsFactors=FALSE))
}
n.pos <- sum(ratios.subset$lratio[ac.sel]>mean.r,na.rm=T)
n.neg <- sum(ratios.subset$lratio[ac.sel]<mean.r,na.rm=T)
is.pos <- (n.pos > n.neg && n.neg <= orient.div)
is.neg <- (n.neg > n.pos && n.pos <= orient.div)
## ratio summarization
good.sel <- ac.sel
if (!strict.ratio.pval) {
## take only positive/negative spectra for ratio summarization
if (is.pos)
good.sel <- good.sel & ratios.subset$lratio>mean.r
if (is.neg)
good.sel <- good.sel & ratios.subset$lratio<mean.r
}
ac.ratios <- ratios.subset$lratio[good.sel]
ac.vars <- ratios.subset$variance[good.sel]
sample.var <- weightedVariance(ac.ratios,weights=1/ac.vars)
estim.var <- 1/sum(1/ac.vars)
variance <- switch(variance.function,
maxi = max(estim.var,sample.var,na.rm=T),
ev = estim.var,
wsv = sample.var
)
lratio <- weightedMean(ac.ratios,weights=1/ac.vars)
## p-value computation
p.value.rat <- pnorm(lratio,mean=mean.r,sd=sqrt(variance),lower.tail=lratio<mean.r)
p.value.sample <- calculate.mult.sample.pvalue(
ac.ratios, ratiodistr, strict.pval=strict.sample.pval, lower.tail=n.neg>n.pos,
n.possible.val = n.combination.c, n.observed.val = sum(ac.sel))
p.value <- calcProbXDiffNormals(ratiodistr,mean.r,sqrt(variance),alternative="two-sided")
min.detect.c <- ifelse(is.matrix(min.detect),min.detect[class1,class2],min.detect)
## significance
is.significant <- (p.value <= sign.level) &&
(sum(ac.sel) >= min.detect.c) &&
(is.pos | is.neg)
return(data.frame(lratio=lratio,variance=variance,
n.spectra=min(ratios.subset$n.spectra[good.sel],na.rm=TRUE),n.pos=n.pos,n.neg=n.neg,
p.value.rat=p.value.rat,p.value.sample=p.value.sample,p.value=p.value,
is.significant=is.significant,r1=class1,r2=class2,
class1=class1,class2=class2,stringsAsFactors=FALSE))
})
},.parallel=isTRUE(getOption('isobar.parallel')))
if (is.null(result)) stop("Error summarizing.")
attributes(result) = c(attributes(result),
list(summarize=TRUE,
by.column=by.column,
min.detect=min.detect,
orient.div=orient.div))
attributes(result) = c(attributes(result),
attributes(ratios)[!names(attributes(ratios)) %in% names(attributes(result))])
return(result)
} else if (summarize.method=="mean") {
stop("summarize method mean not anymore available.")
# do.call(rbind,lapply(unique(ratios$ac),function(ac) {
# ac.sel <- (ratios$ac==ac) & !is.na(ratios$lratio)
# if (!any(ac.sel)) {
# return(data.frame(ac=ac,lratio=NA,stringsAsFactors=FALSE))
# }
# return(data.frame(ac=ac,lratio=mean(ratios$lratio[ac.sel]),stringsAsFactors=FALSE))
# }))
} else {
stop (paste("summarize method",summarize.method,"not implemented."))
}
}
.calc.zscore <- function(lratio) {
s.median <- median(lratio,na.rm=TRUE)
s.mad <- mad(lratio,na.rm=TRUE)
(lratio-s.median)/s.mad
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### weightedVariance and weightedMean generics and functions
### replace by Hmisc functions?
setGeneric("weightedMean", function(data, weights,trim=0) standardGeneric("weightedMean"))
setGeneric("weightedVariance", function(data, weights, mean.estimate,trim=0) standardGeneric("weightedVariance"))
setMethod("weightedVariance",
signature(data = "numeric", weights = "numeric", mean.estimate = "missing"),
function(data, weights, trim=0) {
mean.estimate <- weightedMean(data,weights,trim=trim)
weightedVariance(data,weights=weights,mean.estimate=mean.estimate,trim=trim)
}
)
setMethod("weightedVariance",
signature(data = "numeric", weights = "numeric", mean.estimate = "numeric"),
function(data, weights, mean.estimate, trim=0) {
# Validation? if (length(data)==1) { return(0); }
# Should we rescale the weights? weights=1/sum(weights)
# weighted sample variance - for not normally distributed data
# see http://www.gnu.org/software/gsl/manual/html_node/Weighted-Samples.html and
# http://en.wikipedia.org/wiki/Weighted_mean#Weighted_sample_variance
sel <- !is.na(data) & !is.na(weights)
weights <- weights[sel]
data <- data[sel]
if (trim < 0 || trim > 0.5)
stop("trim has to be between 0 and 0,5")
if (trim > 0) {
sel <- data > quantile(data,trim) & data < quantile(data,1-trim)
weights <- weights[sel]
data <- data[sel]
}
V1 <- sum(weights)
V2 <- sum(weights**2)
variance <- ( V1 / (V1**2 - V2) ) * sum(weights*(data-mean.estimate)**2)
n = length(data)
wik = (data-mean.estimate)**2
w_bar_k = mean((data-mean.estimate)**2)
var_hat_vk = median((wik-w_bar_k)**2) ## for shrinkage..
return(c(variance,var_hat_vk))
}
)
setMethod("weightedMean",
signature(data = "numeric", weights = "numeric"),
function(data, weights, trim = 0) {
sel <- !is.na(data) & !is.na(weights)
weights <- weights[sel]
data <- data[sel]
if (trim < 0 | trim > 0.5)
stop("trim has to be between 0 and 0,5")
if (trim > 0) {
sel <- data > quantile(data,trim) & data < quantile(data,1-trim)
weights <- weights[sel]
data <- data[sel]
}
return(
sum(data * weights) / sum(weights)
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.