Nothing
# loglik function calls C function "loglik" to compute the log-likelihood
loglik <- function(xx, AllPtrue, division, sel, Y, a, aa, base, theta)
{
storage.mode(xx) <- "double"
storage.mode(AllPtrue) <- "double"
for (i in 1:length(division)) {
storage.mode(division[[i]]) <- "integer"
}
storage.mode(sel) <- "integer"
storage.mode(Y) <- "integer"
for (i in 1:length(a)) {
storage.mode(a[[i]]) <- "integer"
}
storage.mode(MAX) <- "integer"
divsel <- division[[sel]]
storage.mode(divsel) <- "integer"
# base here should be a list, fix to use for loop
for (i in 1:length(base)) {
storage.mode(base[[i]]) <- "integer"
}
# storage.mode(theta) <- "double"
for (i in 1:length(theta)) {
storage.mode(theta[[i]]) <- "double"
}
.Call("loglik", xx, AllPtrue, divsel, Y, a, MAX, base, theta)
}
# gradient function calls C function "gradient" to compute the gradient
# of log-likelihood with respect to the probability parameters.
gradient <- function(xx, AllPtrue, division, sel, Y, a, aa, base, theta)
{
storage.mode(xx) <- "double"
storage.mode(AllPtrue) <- "double"
for (i in 1:length(division)) {
storage.mode(division[[i]]) <- "integer"
}
storage.mode(sel) <- "integer"
storage.mode(Y) <- "integer"
for (i in 1:length(a)) {
storage.mode(a[[i]]) <- "integer"
}
for (i in 1:length(aa)) {
storage.mode(aa[[i]]) <- "integer"
}
storage.mode(MAX) <- "integer"
divsel <- division[[sel]]
storage.mode(divsel) <- "integer"
# base here should be a list, fix to use for loop
for (i in 1:length(base)) {
storage.mode(base[[i]]) <- "integer"
}
# storage.mode(theta) <- "double"
for (i in 1:length(theta)) {
storage.mode(theta[[i]]) <- "double"
}
.Call("gradient", xx, AllPtrue, divsel, Y, a, aa, MAX, base, theta)
}
# a vector function specifying inequality constraints such that hin[j] > 0
# for all j used in the constrained optimization function auglag
hin <- function(x, AllPtrue, division, sel, Y, a, aa, base, theta)
{
return(c(x, 1 - sum(x)))
}
# Jacobian of hin, used in the constrained optimization function auglag
hin.jac <- function(x, AllPtrue, division, sel, Y, a, aa, base, theta)
{
return(rbind(diag(length(x)), rep(-1, length(x))))
}
# theta.zero returns theta=0 which is essentially same the YounSimon
# version model
theta.zero <- function(n)
{
return(0)
}
# theta.one returns theta=1
theta.one <- function(n)
{
return(1)
}
# theta.half returns theta=1/2
theta.half <- function(n)
{
return(1/2)
}
# theta.oneovern returns theta=1/n
theta.oneovern <- function(n)
{
return(1/n)
}
# theta.oneovernfac returns theta=1/n!
theta.oneovernfac <- function(n)
{
return(1/factorial(n))
}
# theta.user function computes the theta in the weights function
# where input thetafun should be user provided
theta.user <- function(thetafun)
{
function(n) {
thetafun(n)
}
}
# rtheta function computes the theta in the weights function
rtheta <- function(n, theta.fun = theta.oneovern)
{
theta.fun(n)
}
# subloglik function calculates the log of likelihood
subloglik <- function(P, N, a)
{
temp <- rep(1, nrow(a[[N]]))
for(j in 1:ncol(a[[N]])) {
temp <- temp*P[a[[N]][, j], j]
}
return(log(sum(temp)))
}
########################################################################
# main.function implements the iterative optimization to obtain the
# estimate of parameters
main.function <- function(Y, a, aa, thetafun = theta.zero)
{
# process the mutation allele frequency data
# Yold <- Y
threshold <- 0.0
samp.rm <- which(apply(Y, 1, function(x) sum(x > threshold)) == 0)
if (length(samp.rm) > 0) {
Y <- Y[-samp.rm, ]
}
base <- list()
theta <- list()
for (i in 1:nrow(Y)) {
indi <- which(Y[i, ] > threshold)
theta[[i]] <- thetafun(length(indi))
base[[i]] <- rank(-Y[i, indi], ties.method = "random")
Y[i, ] <- as.integer(Y[i, ] > threshold)
}
nomut <- max(rowSums(Y))
MAX <- max(rowSums(Y))
no.gene <- ncol(Y)
nparam <- min(MAX + 1, nomut)
division <- vector("list", nparam)
for (i in 1:(nparam - 1)) division[[i]] <- i
division[[nparam]] <- nparam:nomut
AllPtrue <- matrix(0, nrow = ncol(Y), ncol = nomut)
for (j in 1:nparam) {
init <- runif(no.gene)
init <- init / sum(init)
AllPtrue[, division[[j]]] <- init
}
prev0 <- Inf
prev <- Inf
n <- 0
decrease <- 1
no.repeat <- 20
decrease.limit <- 10^(-6)
while (decrease > decrease.limit & n <= no.repeat) {
n <- n + 1
prev0 <- prev
# For each k, the length of \vec{P_{k}} optimized is N-1 where N
# is the number of driver genes. This is because \sum_{i=1}^{N}
# P_{k,i} = 1 and the value of P_{k,N} is determeined by the other
# N-1 P_{k,i}, thus we optimize only for P_{k,i} for i=1,..., N-1.
# We let P_{k,N}=1-\sum_{i=1}^{N-1}{P_{k,i}}
# Find \vec{P_{k}} maximizing the log likelihood for each k in turn
# with the constraint that 0 < P_{k, i} for i = 1, ..., N-1 and 0 <
# 1 - \sum_{i=1}^{N-1}{P_{k,i}}.
for(sel in 1:nparam) {
# initial values for \vec{P_{k}}
init <- runif(no.gene)
init <- init / sum(init)
# auglag is a constrained optimization function in R
x <- auglag(par = init[-no.gene], fn = loglik, gr = gradient, hin =
hin, hin.jac = hin.jac, control.optim = list(maxit = 200),
control.outer = list(trace = F), AllPtrue = AllPtrue, division =
division, sel = sel, Y = Y, a = a, aa = aa, base = base, theta =
theta)
# adjust the optimal parameter estimate
tempAllPtrue <- AllPtrue
nonnegative <- pmax(c(x$par, 1 - sum(x$par)), 0)
# This is needed since sometimes the value x$par are negative.
tempAllPtrue[, division[[sel]]] <- nonnegative / sum(nonnegative)
# negative log likelihood
fval <- loglik(tempAllPtrue[-no.gene, 1], tempAllPtrue, division, 1,
Y, a, aa, base, theta)
decrease <- prev - fval
# if the negative log likelihood decreases, then change old P_{k}
# with new P_{k}
if (decrease > 0) {
prev <- fval
AllPtrue <- tempAllPtrue
}
}
decrease <- prev0 - prev
}
return(list(AllPtrue, prev))
}
# generate a list of permutation matrices by considering the probability
# parameter contraints
generatea <- function(MAX, MAX.mut)
{
a <- vector("list", MAX.mut)
a[[1]] <- matrix(1, nrow = 1, ncol = 1)
for (N in 2:MAX.mut){
x <- matrix(1:N, nrow = N)
for(i in 1:(min(N, MAX) - 1)) {
y <- rep(1:N, nrow(x))[-as.vector(t(x + ((1:nrow(x)) - 1) * N))]
x <- t(matrix(as.vector(t(matrix(rep(as.vector(x), N - ncol(x)),nrow =
nrow(x)))), nrow = ncol(x)))
x <- cbind(x, y)
}
if (N > MAX) {
y <- rep(1:N, nrow(x))[-as.vector(t(x + ((1:nrow(x)) - 1) * N))]
y <- t(matrix(y, ncol = nrow(x)))
x <- cbind(x, y)
}
a[[N]] <- x
}
return(a)
}
# function to estimate the probability matrix and mutation order of genes
# This version ignores 'MAX'
order_estimate <- function(sample.gene.mutation, N, parallel, MAX=NULL, thetafun = theta.zero)
{
Y <- sample.gene.mutation
threshold <- 0.0
for (i in 1:nrow(Y)) {
Y[i, ] <- as.integer(Y[i, ] > threshold)
}
# MAX <- 4
MAX <- max(rowSums(Y))
a <- generatea(MAX, max(rowSums(Y)))
aa <- generatea(MAX - 1, max(rowSums(Y)))
## Run optimization with different inital values N times using parallel
## computing if the variable "parallel" is TRUE :
if (parallel) {
tmp <- foreach (kk = 1:N) %dopar% main.function(sample.gene.mutation, a, aa, thetafun)
}
else { ## Otherwise, run for loop
tmp <- vector("list", N)
for(i in 1:N) {
tmp[[i]] <- main.function(sample.gene.mutation, a, aa, thetafun)
}
}
minusloglik <- rep(Inf, N)
for(l in 1:N) {
if(is.list(tmp[[l]])) minusloglik[l] <- tmp[[l]][[2]]
}
# Find the one giving the maximum likelihood
result <- tmp[[which(minusloglik == min(minusloglik))[1]]][[1]]
return(result)
}
# Prob that there is any mutation in a gene
total <- function(X) {
return(1 - apply(1 - X, 1, prod))
}
Intersection <- function(X,N)
{
sel <- combinations(ncol(X), N)
res <- 0
for(i in 1:nrow(sel)) {
res <- res + apply(X[, sel[i, ]], 1, prod)
}
return(res)
}
Union <- function(X)
{
res <- 0
for(i in 2:ncol(X)) {
res <- res + Intersection(X, i) * (-1)^(i + 1)
}
return(apply(X, 1, sum) + res)
}
# Constructing BCa CI from bootstrapped data for each of the colon and lung data
ConfInterval <- function(x, mle, jack, sample.gene.mutation, lower, upper)
{
no.gene <- ncol(sample.gene.mutation)
# bias-correction constant
z0 <- matrix(0, nrow = no.gene, ncol = ncol(mle))
for(i in 1:no.gene)
{
for(j in 1:ncol(mle))
{
z0[i,j] <- qnorm(mean(x[i, j, ] <= mle[i, j], na.rm = T))
}
}
z0[z0 == Inf] <- 1000
theta.mean <- apply(jack, 1:2, mean, na.rm = T)
num <- denom <- 0
for(i in 1:nrow(sample.gene.mutation))
{
if(sum(is.na(jack[, , i])) == 0)
{
num <- num + (theta.mean - jack[, , i])^3
denom <- denom + (theta.mean - jack[, , i])^2
}
}
accel <- num/6/denom^1.5 # acceleration constant
alpha1 <- pnorm(z0 + (z0 + qnorm(lower))/(1 - accel*(z0 + qnorm(lower))))
alpha2 <- pnorm(z0 + (z0 + qnorm(upper))/(1 - accel*(z0 + qnorm(upper))))
CI <- array(0, dim = c(no.gene, 2, ncol(mle)))
for(i in 1:no.gene)
{
for(j in 1:ncol(mle))
{
CI[i, 1, j] <- quantile(x[i, j, ], alpha1[i, j], na.rm = T)
CI[i, 2, j] <- quantile(x[i, j, ], alpha2[i, j], na.rm = T)
}
}
CI <- round(CI, 2)
a <- round(mle, 2)
tab <- NULL
for(i in 1:ncol(a))
{
tab <- cbind(tab, "&", a[, i], "&", "(" , CI[, 1, i], ",", CI[,2,i], ")")
}
# for generating tables in the paper
tab <- cbind(colnames(sample.gene.mutation), tab, "\\")
return(tab)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.