Nothing
.tuning <- function(data.in, datacgh.in, allest, alphanewmat, powerunbal, null.dists.tune, test.stat="wmw", seqg, nperm, pi0, fdrcut=0.25, nresamp=100, gridnr=30, minim=10, a=a, nosamp=nosamp, shiftsam=shiftsam, verbose){
################################################################################################
# function that does the tuning
################################################################################################
probs <- seq(0,gridnr/(gridnr+1), 1/(gridnr+1))
probstruncate <- probs[probs<=((100-minim)/100)]
allest2 <- allest[seqg]
alphanewmat2<-alphanewmat[seqg,]
data.in2 <- data.in[seqg,]
datacgh.in2 <- datacgh.in[seqg,]
powerunbal2 <- powerunbal[seqg]
quant <- quantile(powerunbal2, probs=probstruncate)
whatthrmat <- c()
for (j in 1:nresamp){
if ((j %% 50) == 0){ if (verbose){ cat(paste(j, "of", nresamp, "resamples done, and counting...", sep=" "), "\n") } }
shiftsampled <- sample(shiftsam, nrow(data.in2), replace=TRUE)
pi0cutoff <- quantile(shiftsampled, pi0)
shiftsampled <- sapply(shiftsampled, function(x){if (x <= pi0cutoff){ return(0) } else { return(x)}})
redrawall <- t(sapply(1:nrow(data.in2), .redrawcol, allest=allest2, alphanmat=alphanewmat2, shiftsampled=shiftsampled, data2=data.in2, a=a, nosamp=nosamp))
newdata <- cbind(datacgh.in2[,-(1:3)], redrawall)
if(test.stat=="wmw"){ test.resamp <- apply(newdata, 1, .wmw.test.stats, nosamp, a) } else { test.resamp <- apply(newdata, 1, .wcvm.test.stats, nosamp, a) }
rawpvals.test.resamp <- .rawps(test.resamp, null.dists.tune, nperm)
rawp_powerunbal <- cbind(rawpvals.test.resamp, powerunbal2)
whatthresh <- sapply(quant, .countdiscoveries, rawp_powerunbal=rawp_powerunbal, fdrcutoff=fdrcut)
whatthrmat <- rbind(whatthrmat, whatthresh)
}
whatthrmean <- apply(whatthrmat, 2, mean)
# print(whatthrmean)
bestthr <- which.max(whatthrmean)
return(quant[bestthr])
}
.uni.an.wcvm <- function(data.both, nosamp, a, nperm, low.ci.thres=0.10, verbose=TRUE){
################################################################################################
# function that performs a univariate analysis with the weighted cvm-statistic,
# with the efficient p-value calculation.
################################################################################################
# Calculate the observed weighted cvm test statistics
wcvm.obs <- apply(data.both, 1, .wcvm.test.stats, nosamp, a)
# Actual analysis
data.perm <- data.both
# data.perm <- rbind(data.perm, data.perm)
total.genes.on.chr <- dim(data.both)[1]
remainders <- c(1:dim(data.perm)[1])
steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
steps <- steps[steps <= nperm]
wcvm.mat <- c()
for(j in 1:(length(steps)-1)){
for(i in 1:(steps[j+1]-steps[j])){
if (verbose){ if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)]," permutations done, and counting...", sep=""), "\n") } }
x <- sample(1:nosamp,nosamp) + a*nosamp
data.ran <- cbind(data.perm[, c(1:(a*nosamp))], data.perm[, x])
wcvm.ran <- apply(data.ran, 1, .wcvm.test.stats, nosamp, a)
wcvm.mat <- cbind(wcvm.mat, wcvm.ran)
}
# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
perm.and.obs <- cbind(wcvm.mat, wcvm.obs)
pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
pbound <- sapply(pvals, .pvalbound, steps[j+1])
index <- cbind(1:length(pbound), pbound)
pind <- index[pbound < low.ci.thres, 1]
if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }
wcvm.mat <- wcvm.mat[pind, ]
wcvm.obs <- wcvm.obs[pind]
if (verbose){ cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n") }
data.perm <- data.both[remainders, , drop=FALSE]
}
# Generate list with raw p-values for all genes
raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
raw.pvals[remainders,2] <- pvals[pind]
# Calculate BH-adjusted p-values
adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)
# Some editing for presentation purposes
colnames(adjpvals) <- c("clone.id", "raw.p", "adj.p")
return(adjpvals)
}
.uni.an.prob <- function(data.both, nosamp, a, nperm, low.ci.thres=0.10, verbose=TRUE){
################################################################################################
# function that performs a univariate analysis with the normalized weighted wm-statistic,
# with the efficient p-value calculation.
################################################################################################
# Calculate the observed weighted cvm test statistics
prob.obs <- apply(data.both, 1, .prob.test.stats, nosamp, a)
# Actual analysis
data.perm <- data.both
total.genes.on.chr <- dim(data.both)[1]
remainders <- c(1:dim(data.perm)[1])
steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
steps <- steps[steps <= nperm]
prob.mat <- c()
for(j in 1:(length(steps)-1)){
for(i in 1:(steps[j+1]-steps[j])){
if (verbose){ if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)]," permutations done, and counting...", sep=""), "\n") } }
x <- sample(1:nosamp,nosamp) + a*nosamp
data.ran <- cbind(data.perm[,c(1:(a*nosamp))],data.perm[,x])
prob.ran <- apply(data.ran, 1, .prob.test.stats, nosamp, a)
prob.mat <- cbind(prob.mat, prob.ran)
}
# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
perm.and.obs <- cbind(prob.mat, prob.obs)
pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
pbound <- sapply(pvals, .pvalbound, steps[j+1])
index <- cbind(1:length(pbound), pbound)
pind <- index[pbound < low.ci.thres,1]
if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }
prob.mat <- prob.mat[pind,]
prob.obs <- prob.obs[pind]
# cat(paste(length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n")
if (verbose){ cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n") }
data.perm <- data.both[remainders, , drop=FALSE]
}
# Generate list with raw p-values for all genes
raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
raw.pvals[remainders, 2] <- pvals[pind]
# Calculate BH-adjusted p-values
adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)
# Some editing for presentation purposes
colnames(adjpvals) <- c("clone.id", "raw.p", "adj.p")
return(adjpvals)
}
.R2.stat <- function(data.both, a, nosamp){
################################################################################################
# function that calculates the R^2 value.
################################################################################################
# Define function for calculated of numerator of R^2 statistic
r2.numerator <- function(data.both, a, nosamp){
alpha.ind <- matrix(data.both[c(1:(a*nosamp))], ncol=a, byrow=TRUE)
alpha.1.mom <- apply(alpha.ind, 2, mean)
alpha.2.mom <- t(alpha.ind) %*% alpha.ind / nosamp
cgh.em <- cbind(matrix(data.both[c(1:(a*nosamp))], ncol=2, byrow=TRUE), data.both[c((a*nosamp+1):((a+1)*nosamp))])
mu1 <- 1 / nosamp*sum(cgh.em[,3]*(cgh.em[,1]*alpha.1.mom[2] - alpha.2.mom[2,1]) / (alpha.2.mom[1,1]*alpha.1.mom[2]-alpha.1.mom[1]*alpha.2.mom[1,2]))
mu2 <- 1 / nosamp*sum(cgh.em[,3]*(cgh.em[,2]*alpha.1.mom[1] - alpha.2.mom[2,1]) / (alpha.2.mom[2,2]*alpha.1.mom[1]-alpha.1.mom[2]*alpha.2.mom[1,2]))
call.means <- matrix(c(mu1, mu2), nrow=1)
return(sum((data.both[a*nosamp+c(1:nosamp)] - alpha.ind %*% t(call.means))^2)/(nosamp-1))
}
# Calculate R^2 statistic
r2.denominator <- apply(data.both[, c((a*nosamp+1):((a+1)*nosamp))], 1, var)
numerator <- apply(data.both, 1, r2.numerator, a=a, nosamp=nosamp)
R2 <- 1- numerator / r2.denominator
for (i in 1:length(R2)){ R2[i] <- min(1, max(0, R2[i])) }
return(R2)
}
.shrin.an.prob <- function(data.both, nosamp, a, nperm, low.ci.thres=0.1, datacgh.org, verbose=TRUE){
################################################################################################
# function that performs a regional analysis with the normalized weighted mw-statistic, also
# called the probability in this context, with shrinkage per region and efficient p-value calculation.
################################################################################################
shrunken.prob.test.stats <- function(reg.bounds.lambda, data.both, nosamp, a){
################################################################################################
# Function that reshuffles the shrunken test statistics to get them in the right format.
################################################################################################
shrunken.test.stats.wrong.format <- apply(reg.bounds.lambda, 1, "shrunken.prob.test.stats.per.reg", cgh.em=data.both, nosamp=nosamp, a=a)
if (is.numeric(shrunken.test.stats.wrong.format)){
shrunken.test.stats <- shrunken.test.stats.wrong.format
}
if (is.matrix(shrunken.test.stats.wrong.format)){
shrunken.test.stats <- as.numeric(shrunken.test.stats.wrong.format)
}
if (is.list(shrunken.test.stats.wrong.format)){
shrunken.test.stats <- NULL
for (i in 1:dim(reg.bounds.lambda)[1]){
shrunken.test.stats <- c(shrunken.test.stats, shrunken.test.stats.wrong.format[[i]])
}
}
return(shrunken.test.stats)
}
shrunken.prob.test.stats.per.reg <- function(bounds.lambda, cgh.em, nosamp, a){
################################################################################################
# Function that calculates the normalized weighted MW test statistics for all clones in a region.
################################################################################################
lambda <- bounds.lambda[3]
bounds <- bounds.lambda[c(1:2)]
if (bounds[1] != bounds[2]){
cgh.em <- cgh.em[c(bounds[1]:bounds[2]),]
marg.test.stats <- apply(cgh.em,1, .prob.test.stats, nosamp,a)
shrunken.test.stats <- lambda*marg.test.stats + (1-lambda)*mean(marg.test.stats)
} else {
shrunken.test.stats <- .wcvm.test.stats(cgh.em[bounds[1],],nosamp,a)
}
return(shrunken.test.stats)
}
if (verbose){ cat("construct regions...", "\n") }
reg.bounds <- .find.cgh.reg.data(data.both, a, nosamp, datacgh.org)
if (verbose){ cat("calculate shrinkage parameters...", "\n") }
lambda.of.reg <- apply(reg.bounds, 1, .lambda.per.reg, data.both=data.both, a=a, nosamp=nosamp)
reg.bounds.lambda <- cbind(reg.bounds, lambda.of.reg)
colnames(reg.bounds.lambda) <- NULL
if (verbose){ cat("calculate observed test statistics...", "\n") }
shrunken.prob.obs <- shrunken.prob.test.stats(reg.bounds.lambda, data.both, nosamp, a)
if (verbose){ cat("calculate null distribution...", "\n") }
prob.obs <- shrunken.prob.obs
data.perm <- data.both
total.genes.on.chr <- dim(data.both)[1]
remainders <- c(1:dim(data.perm)[1])
steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
steps <- steps[steps <= nperm]
prob.mat <- c()
for(j in 1:(length(steps)-1)){
for(i in 1:(steps[j+1]-steps[j])){
if (verbose){ if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)]," permutations done, and counting...", sep=""), "\n") } }
x <- sample(1:nosamp,nosamp) + a*nosamp
data.ran <- cbind(data.perm[, c(1:(a*nosamp))], data.perm[,x])
prob.ran <- shrunken.prob.test.stats(reg.bounds.lambda, data.ran, nosamp, a)
prob.mat <- cbind(prob.mat, prob.ran[remainders])
}
# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
perm.and.obs <- cbind(prob.mat, prob.obs)
pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
pbound <- sapply(pvals, .pvalbound, steps[j+1])
index <- cbind(1:length(pbound), pbound)
pind <- index[pbound < low.ci.thres, 1]
if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }
prob.mat <- prob.mat[pind, , drop=FALSE]
prob.obs <- prob.obs[pind]
if (verbose){ cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n") }
}
# Generate list with raw p-values for all genes
raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
raw.pvals[remainders,2] <- pvals[pind]
# Calculate BH-adjusted p-values
adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)
reg.details <- NULL
for (i in 1:dim(reg.bounds.lambda)[1]){
reg.length <- (reg.bounds.lambda[i,2] - reg.bounds.lambda[i,1] +1)
reg.details <- rbind(reg.details, cbind(rep(i,reg.length), matrix(rep(reg.bounds.lambda[i,], reg.length), nrow=reg.length, byrow=TRUE)))
}
adjpvals <- cbind(adjpvals[,1], reg.details, adjpvals[,2:3])
colnames(adjpvals) <- c("clone.id", "reg.id", "begin.reg", "end.reg", "shrinkage", "raw.p", "adj.p")
return(adjpvals)
}
.shrin.an.wcvm <- function(data.both, nosamp, a, nperm, low.ci.thres=0.1, datacgh.org, verbose=TRUE){
################################################################################################
# function that performs a regional analysis with the weighted cvm-statistic,
# with shrinkage per region and efficient p-value calculation.
################################################################################################
shrunken.wcvm.test.stats <- function(reg.bounds.lambda, data.both, nosamp, a){
################################################################################################
# Function that reshuffles the shrunken test statistics to get them in the right format.
################################################################################################
shrunken.test.stats.wrong.format <- apply(reg.bounds.lambda, 1, "shrunken.wcvm.test.stats.per.reg", cgh.em=data.both, nosamp=nosamp, a=a)
if (is.numeric(shrunken.test.stats.wrong.format)){
shrunken.test.stats <- shrunken.test.stats.wrong.format
}
if (is.matrix(shrunken.test.stats.wrong.format)){
shrunken.test.stats <- as.numeric(shrunken.test.stats.wrong.format)
}
if (is.list(shrunken.test.stats.wrong.format)){
shrunken.test.stats <- NULL
for (i in 1:dim(reg.bounds.lambda)[1]){
shrunken.test.stats <- c(shrunken.test.stats, shrunken.test.stats.wrong.format[[i]])
}
}
return(shrunken.test.stats)
}
shrunken.wcvm.test.stats.per.reg <- function(bounds.lambda, cgh.em, nosamp, a){
################################################################################################
# Function that calculates the weighted CvMe test statistics for all clones in a region.
################################################################################################
lambda <- bounds.lambda[3]
bounds <- bounds.lambda[c(1:2)]
if (bounds[1] != bounds[2]){
cgh.em <- cgh.em[c(bounds[1]:bounds[2]),]
marg.test.stats <- apply(cgh.em, 1, .wcvm.test.stats, nosamp, a)
shrunken.test.stats <- lambda * marg.test.stats + (1-lambda) * mean(marg.test.stats)
} else {
shrunken.test.stats <- .wcvm.test.stats(cgh.em[bounds[1],], nosamp, a)
}
return(shrunken.test.stats)
}
if (verbose){ cat("construct regions...", "\n") }
reg.bounds <- .find.cgh.reg.data(data.both, a, nosamp, datacgh.org)
if (verbose){ cat("calculate shrinkage parameters...", "\n") }
lambda.of.reg <- apply(reg.bounds, 1, .lambda.per.reg, data.both=data.both, a=a, nosamp=nosamp)
reg.bounds.lambda <- cbind(reg.bounds, lambda.of.reg)
colnames(reg.bounds.lambda) <- NULL
if (verbose){ cat("calculate observed test statistics...", "\n") }
shrunken.wcvm.obs <- shrunken.wcvm.test.stats(reg.bounds.lambda, data.both, nosamp, a)
if (verbose){ cat("calculate null distribution...", "\n") }
wcvm.obs <- shrunken.wcvm.obs
data.perm <- data.both
total.genes.on.chr <- dim(data.both)[1]
remainders <- c(1:dim(data.perm)[1])
steps <- sort(unique(c(0,25,50,100,150,200,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000,9500,seq(from=10000,to=50000,by=1000),nperm)))
steps <- steps[steps <= nperm]
wcvm.mat <- c()
for(j in 1:(length(steps)-1)){
for(i in 1:(steps[j+1]-steps[j])){
if (verbose){ if (((steps[j]+i) %% 100) == 0){ cat(paste(steps[j]+i," of ", steps[length(steps)], " permutations done, and counting...", sep=""), "\n") } }
x <- sample(1:nosamp,nosamp) + a * nosamp
data.ran <- cbind(data.perm[, c(1:(a*nosamp))],data.perm[,x])
wcvm.ran <- shrunken.wcvm.test.stats(reg.bounds.lambda, data.ran, nosamp, a)
wcvm.mat <- cbind(wcvm.mat, wcvm.ran[remainders])
}
# compute pval bound and delete row from data set and permutation set when 0.001 < lower bound
perm.and.obs <- cbind(wcvm.mat, wcvm.obs)
pvals <- apply(perm.and.obs, 1, .countth)/steps[j+1]
pbound <- sapply(pvals, .pvalbound, steps[j+1])
index <- cbind(1:length(pbound), pbound)
pind <- index[pbound < low.ci.thres,1]
if (length(pind) > 2){ remainders <- remainders[pind] } else { pind <- c(1:length(remainders)) }
wcvm.mat <- wcvm.mat[pind, , drop=FALSE]
wcvm.obs <- wcvm.obs[pind]
if (verbose){ cat(paste(steps[j]+i, "of", steps[length(steps)], " permutations done, and", length(remainders), "of", total.genes.on.chr, "genes remaining...", sep=" "), "\n") }
}
# Generate list with raw p-values for all genes
raw.pvals <- cbind(c(1:dim(data.both)[1]), rep(1,dim(data.both)[1]))
raw.pvals[remainders,2] <- pvals[pind]
# Calculate BH-adjusted p-values
adjpvals <- cbind(raw.pvals, p.adjust(raw.pvals[,2], "BH"))
adjpvals[,2:3] <- round(adjpvals[,2:3], digits=4)
reg.details <- NULL
for (i in 1:dim(reg.bounds.lambda)[1]){
reg.length <- (reg.bounds.lambda[i,2] - reg.bounds.lambda[i,1] +1)
reg.details <- rbind(reg.details, cbind(rep(i, reg.length), matrix(rep(reg.bounds.lambda[i,], reg.length), nrow=reg.length, byrow=TRUE)))
}
adjpvals <- cbind(adjpvals[,1], reg.details, adjpvals[,2:3])
colnames(adjpvals) <- c("clone.id", "reg.id", "begin.reg", "end.reg", "shrinkage", "raw.p", "adj.p")
return(adjpvals)
}
.lambda.per.reg <- function(bounds, data.both, a, nosamp){
################################################################################################
# function that calculates the shrinkage factor for a region.
################################################################################################
# if the region consists of more than one clone,
if (bounds[1] != bounds[2]){
# select data from region
data.both.org <- matrix(data.both[c(bounds[1]:bounds[2]),], nrow=(bounds[2]-bounds[1]+1))
data.both.org <- data.both.org[, c((dim(data.both)[2] - dim(data.both)[2] / 3 + 1):dim(data.both)[2])]
# calculate mean correlation, and shrinkage parameter
slh <- cor(t(data.both.org), method="spearman")
lambda.final <- 1 - max(0, mean(slh[upper.tri(slh)]))
}
# if region consists of one clone, set shrinkage parameter equal to 1.
if (bounds[1] == bounds[2]){ lambda.final <- 1 }
return(lambda.final)
}
.find.cgh.reg.data <- function(data.both, a, nosamp, datacgh.org){
################################################################################################
# Compress the data to regions. A new regions starts when the CGH probabilities change.
################################################################################################
# every change in the call probabilities implies a new region
cgh.data <- data.both[,c(1:(a*nosamp))]
splitter <- list()
splitter[[1]] <- c(1)
index.temp <- 1
j <- 1
for (i in 1:(dim(cgh.data)[1]-1)){
if (all(cgh.data[i,] == cgh.data[i+1,])){
index.temp <- c(index.temp,i+1)
splitter[[j]] <- index.temp
} else {
index.temp <- i+1
j <- j + 1
splitter[[j]] <- index.temp
}
}
region.details <- NULL
for (i in 1:length(splitter)){
region.details <- rbind(region.details, c(min(splitter[[i]]), max(splitter[[i]])))
}
return(region.details)
}
.pvalbound <- function(pval, np){
################################################################################################
# p-value lower conf. bound. 3.09 = Z_{0.001}
################################################################################################
return(pval-sqrt(pval * (1-pval) / np) * 3.09)
}
.countth.eff <- function(statlist, threshold){
################################################################################################
# counts the number of values in statlist exceed threshold.
################################################################################################
return(length(statlist[statlist >= threshold]))
}
.countth <- function(statlist){
################################################################################################
# Counts the number of values in statlist exceed threshold.
################################################################################################
threshold <- as.numeric(statlist[length(statlist)])
statlist <- as.numeric(statlist[c(1:(length(statlist)-1))])
return(length(statlist[statlist >= threshold]))
}
.pval.perm.marg <- function(observed, permuted, nperm){
################################################################################################
# Calculates the marginal p-values.
# The p-value of gene is calculated using the null-ditribution
# resulting from the permutations of that gene.
################################################################################################
perm.and.obs <- cbind(permuted, observed)
return(apply(perm.and.obs, 1, .countth) / nperm)
}
.rawps <- function(stats.obs, nulldists, nperm){
################################################################################################
# calculate the raw p-values using the observed test statistics
# and their null distribution.
################################################################################################
pval.ln <- .pval.perm.marg(stats.obs, nulldists, nperm)
return(pval.ln)
}
.wcvm.test.stats <- function(cgh.em, nosamp, a){
################################################################################################
# function that calculates the wcvm test statistics for one clone.
################################################################################################
# makes a matrix of call probs
cgh.2cat <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a, byrow=TRUE)
# calculate contrast coefficients
alphaas <- t(cgh.2cat) %*% cgh.2cat
cs <- as.numeric(solve(alphaas) %*% matrix(c(-1,1), ncol=1))
cgh.em <- cbind(cgh.2cat, cgh.em[c((a*nosamp+1):((a+1)*nosamp))])
cgh.em <- cbind(cgh.em[order(cgh.em[,3]),], rep(1/dim(cgh.em)[1], dim(cgh.em)[1]))
cgh.em <- cbind(cgh.em, cumsum(cgh.em[, 4]), cumsum(cgh.em[, 1] * cgh.em[, dim(cgh.em)[2]]) * cs[1], cumsum(cgh.em[, 2] * cgh.em[, dim(cgh.em)[2]]) * cs[2])
test.stat <- -sum(cgh.em[,7] + cgh.em[,6]) / nosamp
return(test.stat)
}
.wmw.test.stats <- function(cgh.em, nosamp, a){
################################################################################################
# function that calculates the wMW-like test statistics for one clone.
################################################################################################
# makes a matrix; first 2 call probs, last is expression
cgh.em <- cbind(matrix(cgh.em[c(1:(a*nosamp))], ncol=a,byrow=TRUE), cgh.em[c((a*nosamp+1):((a+1)*nosamp))])
# sort by expression
cgh.em <- cgh.em[order(cgh.em[,(a+1)]),]
# shift probs 1 position, because I_{Xi < Xj}
cgh.em <- cbind(cgh.em, rbind(rep(0,a), apply(cgh.em[,1:a], 2, cumsum)[-nosamp,]))
# '2' = prob. class 1, a+2 = cum. prob class 2
test.stat <- sum(cgh.em[,a+2]*cgh.em[,2])
return(test.stat)
}
.prob.test.stats <- function(cgh.em, nosamp, a){
################################################################################################
# function that calculates the wMW-like test statistics for one clone.
################################################################################################
# makes a matrix of call probs
cgh.2cat <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a, byrow=TRUE)
# calculate contrast coefficients
alphaas <- t(cgh.2cat) %*% cgh.2cat
cs <- c(det(alphaas), alphaas[1,2]*sum(alphaas)/2)
# makes a matrix; first 2 call probs, last is expression
cgh.em <- cbind(matrix(cgh.em[c(1:(a*nosamp))], ncol=a,byrow=TRUE), cgh.em[c((a*nosamp+1):((a+1)*nosamp))])
# sort by expression
cgh.em <- cgh.em[order(cgh.em[,(a+1)]),]
# shift probs 1 position, because I_{Xi < Xj}
cgh.em <- cbind(cgh.em,rbind(rep(0,a), apply(cgh.em[,1:a], 2, cumsum)[-nosamp,]))
# '2' = prob. class 1, a+2 = cum. prob class 2
test.stat <- (sum(cgh.em[,a+2] * cgh.em[,2]) - cs[2]) / cs[1]
return(test.stat)
}
.pretest <- function(alphascgh){
################################################################################################
# function that determines which hypothesis is tested (loss vs. no-loss or no-gain vs. gain).
# also merges the call probabilities of the aberrated class that is not-dominant.
################################################################################################
alphas <- alphascgh[1:3]
probs <- matrix(alphascgh[-(1:3)], ncol=3, byrow=TRUE)
if (alphas[1] >= alphas[3]){
probs2 <- cbind(probs[,1], probs[,2]+probs[,3])
alphas2 <- c(alphas[1], alphas[2]+alphas[3])
return(c(1, alphas2, as.vector(t(probs2))))
} else {
probs2 <- cbind(probs[,1]+probs[,2],probs[,3])
alphas2 <- c(alphas[1]+alphas[2], alphas[3])
return(c(2, alphas2, as.vector(t(probs2))))
}
}
.alphaest <- function(cgh.em, nosamp, a){
################################################################################################
# function that calculated the first order moments of the call probabilities.
################################################################################################
cgh.em <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a,byrow=TRUE)
return(apply(cgh.em,2,mean))
}
.alphabivariate <- function(cgh.em, nosamp, a){
################################################################################################
# function that calculated the second order moments of the call probabilities.
################################################################################################
cgh.em <- matrix(cgh.em[c(1:(a*nosamp))], ncol=a, byrow=TRUE)
cgh.em1 <- cgh.em[,1]
cgh.em2 <- cgh.em[,2]
return(c(1 / nosamp * (cgh.em1 %*% cgh.em1), 1 / nosamp * (cgh.em2 %*% cgh.em2), 1 / nosamp * (cgh.em1 %*% cgh.em2)))
}
.probs2calls <- function(problist){
################################################################################################
# function that converts sof calls to hard calls
################################################################################################
maxprobposition <- which.max(problist)
call.list <- rep(0, length(problist))
call.list[maxprobposition] <- 1
return(call.list)
}
.shift.est <- function(row, nosamp, a, data2, alphabmat, alphanmat, minalphathr){
################################################################################################
# function that estimates the effect size.
# if a gene exceeds the power unbalance threshold NA is returned.
################################################################################################
cgh.em <- cbind(matrix(data2[row,c(1:(a*nosamp))], ncol=a, byrow=TRUE), data2[row,c((a*nosamp+1):((a+1)*nosamp))])
alphasbiv <- alphabmat[row,]
alphas <- alphanmat[row,]
c1 <- (alphasbiv[1]/alphasbiv[3] - alphas[1]/alphas[2])^(-1)
c2 <- (alphasbiv[2]/alphasbiv[3] - alphas[2]/alphas[1])^(-1)
if (is.na(c1) | is.na(c2)) {
return(NA)
} else {
if (max(c1,c2) >= minalphathr) {
return(NA)
} else {
mu1 <- 1 / nosamp * sum(cgh.em[,3] * (cgh.em[,1] * alphas[2] - alphasbiv[3]) / (alphasbiv[1] * alphas[2] - alphas[1] * alphasbiv[3]))
mu2 <- 1 / nosamp * sum(cgh.em[,3] * (cgh.em[,2] * alphas[1] - alphasbiv[3]) / (alphasbiv[2] * alphas[1] - alphas[2] * alphasbiv[3]))
shiftest <- mu2 - mu1
return(shiftest)
}
}
}
.powerunbalance <- function(row, alphabmat){
################################################################################################
# function that calculates the power unbalance
################################################################################################
alphasbiv <- alphabmat[row,]
unbalance <- alphasbiv[1] * alphasbiv[2] - alphasbiv[3]^2
return(unbalance)
}
.countdiscoveries <- function(powerquant, rawp_powerunbal, fdrcutoff){
################################################################################################
# function that converts sof calls to hard calls
################################################################################################
rawp_filt <- rawp_powerunbal[rawp_powerunbal[,2]>=powerquant,][,1]
m <- length(rawp_filt)
adpjrawp_filt <- cbind(rawp_filt, p.adjust(rawp_filt, "BH"))
selected <- matrix(adpjrawp_filt[adpjrawp_filt[,2] <= fdrcutoff,],ncol=2)
count <- nrow(selected)
false <- ifelse(count > 0, max(selected[,1])*m, 0)
countS <- count - false
return(countS)
}
.redraw <- function(col, shiftest, shiftsam, alpharow, dataexp, datacgh){
################################################################################################
# function that .....
################################################################################################
if(is.na(shiftest)){
return(sample(dataexp,1))
} else {
# this returns a re-sample from the data when there is basically only one group
alpha <- alpharow[1]
Ig <- sample(c(1,2), 1, prob=c(alpha,1-alpha))
if (Ig==1){
xib <- sample(dataexp, 1, prob=datacgh[,1]) + shiftest
} else {
xib <- sample(dataexp, 1, prob=datacgh[,2])
}
Ig2 <- sample(c(1,2), 1, prob = c( max(0, min(1,datacgh[col,1])), max(0, min(1, 1-datacgh[col,1]))))
if (Ig2==1){
xib2 <- xib
} else {
xib2 <- xib + shiftsam
}
return(xib2)
}
}
.redrawcol <- function(row, allest, alphanmat, shiftsampled, data2, a, nosamp){
################################################################################################
# function that .......
################################################################################################
shiftest <- allest[row]
datacgh <- matrix(data2[row,c(1:(a*nosamp))],ncol=a,byrow=TRUE)
dataexp <- data2[row,c((a*nosamp+1):((a+1)*nosamp))]
shiftsam <- shiftsampled[row]
alpharow <- alphanmat[row,]
result <- sapply(1:nosamp, .redraw, shiftest=shiftest, alpharow=alpharow, shiftsam=shiftsam, dataexp=dataexp, datacgh=datacgh)
return(result)
}
.pi0est <- function(data.in, null.dists.tune, test.stat="wmw", seqg, nperm, a, nosamp){
################################################################################################
# function that estimates the proportion of rejected null-hypothesis, i.e., the number
# of gene whose expression is afffected by copy number changes.
################################################################################################
data.in2 <- data.in[seqg,]
if(test.stat=="wmw"){
test.stats <- apply(data.in2, 1, .wmw.test.stats, nosamp, a)
} else {
test.stats <- apply(data.in2, 1, .wcvm.test.stats, nosamp, a)
}
rawpvals.test <- .rawps(test.stats, null.dists.tune, nperm)
pi0 <- limma::convest(rawpvals.test)
return(pi0)
}
.datareduce <- function(powerunbal, unbalthr){
################################################################################################
# function that selects rows that pass the power unbalance criterion
################################################################################################
datarows <- which(powerunbal >= unbalthr)
return(datarows)
}
.nulldist.all.wcvm <- function(data.both, nosamp, a, nperm, verbose){
################################################################################################
# function that calculates the null distribution of the weighted CvM test statistics for tuning.
################################################################################################
# Permutes data and calculates test statistic on permuted data
cvm.like.mat <- NULL
for(i in 1:nperm){
if ((i %% 50) == 0){ if (verbose){ cat(i," of ", nperm, " permutations done, and counting...", "\n") } }
x <- sample(1:nosamp,nosamp) + a*nosamp
data.ran <- cbind(data.both[,c(1:(a*nosamp))], data.both[,x])
cvm.like.ran <- apply(data.ran, 1, .wcvm.test.stats, nosamp, a)
cvm.like.mat <- cbind(cvm.like.mat,cvm.like.ran)
}
return(cvm.like.mat)
}
.nulldist.all.wmw <- function(data.both, nosamp, a, nperm, verbose){
################################################################################################
# function that calculates the null distribution of the weighted CvM test statistics for tuning.
################################################################################################
# Permutes data and calculates test statistic on permuted data
wmw.ln.mat <- NULL
cghdata <- data.both[,c(1:(a*nosamp))]
for(i in 1:nperm){
if ((i %% 50) == 0){ if (verbose){ cat(i," of ", nperm, " permutations done, and counting...", "\n") } }
x <- sample(1:nosamp, nosamp) + a*nosamp
data.ran <- cbind(cghdata, data.both[,x])
wmw.ran <- apply(data.ran, 1, .wmw.test.stats, nosamp, a)
wmw.ln.mat <- cbind(wmw.ln.mat, wmw.ran)
}
return(wmw.ln.mat)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.