#' @include AllClasses.R
NULL
setMethod("initializeTheta", "MultiBatchModel", function(object){
th <- matrix(NA, nBatch(object), k(object))
for(j in seq_len(k(object))){
th[, j] <- rnorm(nBatch(object), mu(object)[j], tau(object)[j])
}
th
})
.sigma2_batch <- function(object){
s2 <- 1/rgamma(nBatch(object)*k(object),
shape=1/2*nu.0(object),
rate=1/2*nu.0(object)*sigma2.0(object))
s2 <- matrix(s2, nBatch(object), k(object))
s2
}
setMethod("initializeSigma2", "MultiBatchModel", function(object){
.sigma2_batch(object)
})
simulateThetas <- function(y, K){
prob.kmeans <- runif(1, 0, 1)
if(prob.kmeans < 0.5){
## simulate thetas based on a priori expectation
homo.del <- runif(1, -5, -0.5)
hemi.del <- rnorm(1, -0.5, 0.5)
diploid <- rnorm(1, 0, 0.5)
dupl <- rnorm(1, 0.5, 0.5)
twocopy.gain <- rnorm(1, 1, 0.5)
thetas <- c(homo.del, hemi.del, diploid, dupl, twocopy.gain)
thetas <- sample(thetas, K, replace=FALSE)
} else {
## simulate starting values near kmean centers
thetas <- kmeans(y, K)$centers[, 1]
thetas <- thetas + rnorm(K, thetas, 0.1)
}
sort(thetas)
}
tau2Hyperparams <- function(thetas){
tau2hat <- var(thetas)
itau2 <- 1/tau2hat
##qInverseTau2(mn=itau2, sd=0.001)
qInverseTau2(mn=itau2, sd=itau2/10)
}
.initialize_z_singlebatch <- function(y, centers){
kmeans(y, centers=centers)$cluster
}
##.init_sb2 <- function(object){
## hypp <- hyperParams(object)
## if(length(y(object))==0) return(object)
## sigma2.0(object) <- rgamma(1, a(hypp), b(hypp))
## nu.0(object) <- max(rgeom(1, betas(hypp)), 1)
## ys <- y(object)
## ys <- sample(ys, length(ys), replace=TRUE)
## K <- k(object)
## capture.output(mc <- tryCatch(invisible(Mclust(ys, G=K)), warning=function(w) NULL))
## if(is.null(mc)){
## stop("Trouble selecting starting values with mclust.")
## }
## params <- mc$parameters
## thetas <- params$mean
## if(K > 1){
## tau2.hp <- tau2Hyperparams(thetas)
## eta.0(hypp) <- tau2.hp$eta.0
## m2.0(hypp) <- tau2.hp$m2.0
## hyperParams(object) <- hypp
## }
## s2s <- params$variance$sigmasq
## ps <- params$pro
## yy <- y(object)
## if(K > 1){
## zz <- .initialize_z_singlebatch(yy, thetas)
## } else {
## zz <- rep(1L, length(yy))
## }
## mu(object) <- mean(thetas)
## if(K > 1){
## tau2(object) <- var(thetas)
## } else tau2(object) <- var(y(object))*10
## p(object) <- ps
## z(object) <- zz
## theta(object) <- thetas
## if(length(s2s) != length(thetas)){
## s2s <- rep(s2s[1], length(thetas))
## }
## sigma2(object) <- s2s
## zFreq(object) <- as.integer(table(zz))
## dataPrec(object) <- 1/computeVars(object)
## dataMean(object) <- computeMeans(object)
## log_lik(object) <- computeLoglik(object)
## logPrior(object) <- computePrior(object)
## chains(object) <- McmcChains(object)
## object
##}
.init_sb1 <- function(object){
hypp <- hyperParams(object)
sigma2.0(object) <- rgamma(1, a(hypp), b(hypp))
nu.0(object) <- max(rgeom(1, betas(hypp)), 1)
##mu(object) <- rnorm(1, mu.0(object), tau.0(object))
mu(object) <- mean(y(object))
##tau2(object) <- 1/rgamma(1, shape=1/2*eta.0(object),
##rate=1/2*eta.0(object)*m2.0(object))
tau2(object) <- var(y(object))*10
theta(object) <- rnorm(k(object), mu(object), tau(object))
sigma2s <- 1/rgamma(k(object),
shape=1/2*nu.0(object),
rate=1/2*nu.0(object)*sigma2.0(object))
sigma2(object) <- sigma2s
p(object) <- as.numeric(rdirichlet(1, alpha(hypp))) ## rows are
## platform, columns are components
if(length(y(object)) > 0){
zz <- as.integer(simulateZ(length(y(object)), p(object)))
cnt <- 1
while (length(table(zz)) < k(object)) {
if (cnt > 10) {
stop("Too few observations or too many components.")
}
p(object) <- as.numeric(rdirichlet(1, alpha(hypp))) ## rows are
zz <- as.integer(simulateZ(length(y(object)), p(object)))
cnt <- cnt + 1
}
} else zz <- integer()
z(object) <- zz
zFreq(object) <- as.integer(table(z(object)))
dataPrec(object) <- 1/computeVars(object)
dataMean(object) <- computeMeans(object)
log_lik(object) <- computeLoglik(object)
logPrior(object) <- computePrior(object)
chains(object) <- McmcChains(object)
object
}
##setMethod("startingValues", "SingleBatchModel", function(object){
## ##object <- .init_sb1(object)
## counter <- 0; model <- NULL
## while(counter < 3 && is.null(model)){
## ## when k is too large, kmeans for finding cluster centers will not work
## ## -- try a couple of times before quitting
## model <- tryCatch(.init_sb2(object), error=function(e) NULL)
## counter <- counter + 1
## }
## K <- k(object)
## while(is.null(model) & K > 1){
## K <- K - 1
## k(object) <- K
## model <- tryCatch(.init_sb2(object), error=function(e) NULL)
## }
## if(is.null(model)) stop("No good initial values identified")
## model
##})
.init_kmeans <- function(object){
hypp <- hyperParams(object)
sigma2.0(object) <- rgamma(1, a(hypp), b(hypp))
nu.0(object) <- max(rgeom(1, betas(hypp)), 1)
B <- uniqueBatch(object)
nB <- nBatch(object)
K <- k(object)
##T <- theta(object)
##S <- sigma(object)
T <- S <- matrix(NA, nB, K)
P <- S
zlist <- vector("list", length(B))
for(i in seq_along(B)){
is.batch <- batch(object)==B[i]
j <- which(is.batch)
y.batch <- y(object)[j]
ys <- sample(y.batch, length(y.batch), replace=TRUE)
##ys <- ys + rnorm(length(ys), 0, 0.1)
if(K > 1){
km <- kmeans(ys, K)
thetas <- as.numeric(km$centers[, 1])
##zs <- as.integer(kmeans(y.batch, centers=thetas[ix])$cluster)
} else {
thetas <- mean(ys)
}
ix <- order(thetas)
T[i, ] <- thetas[ix]
ps <- as.numeric(rdirichlet(1, alpha(hypp)))
if(K > 1){
zs <- kmeans(y.batch, centers=thetas[ix])$cluster
} else zs <- rep(1L, length(y.batch))
##zs <- as.integer(simulateZ(length(y.batch), ps))
##zs <- as.integer(factor(km$cluster, levels=ix))
##ps <- (table(zs)/length(zs))
P[i, ] <- ps
sds <- sapply(split(ys, zs), sd)
if(length(sds) < K){
sds <- sqrt(initializeSigma2(object))[1, ]
}
S[i, ] <- sds
zlist[[i]] <- zs
}
## if only one observation, sd is NA
sd.mns <- colMeans(S, na.rm=TRUE)
sd.mns <- matrix(sd.mns, nrow(S), ncol(S), byrow=TRUE)
S[is.na(S)] <- sd.mns[is.na(S)]
zz <- unlist(zlist)
p <- colMeans(P)
mu(object) <- colMeans(T)
tau2(object) <- colVars(T)
if(K > 1){
tau2.hypp <- tau2HyperparamsBatch(T)
eta.0(hypp) <- tau2.hypp$eta.0
m2.0(hypp) <- tau2.hypp$m2.0
hyperParams(object) <- hypp
}
p(object) <- p
z(object) <- zz
theta(object) <- T
sigma2(object) <- S^2
zFreq(object) <- as.integer(table(zz))
if(length(y(object)) > 0){
dataMean(object) <- computeMeans(object)
dataPrec(object) <- computePrec(object)
log_lik(object) <- computeLoglik(object)
logPrior(object) <- computePrior(object)
}
probz(object) <- .computeProbZ(object)
chains(object) <- McmcChains(object)
object
}
.init_priors <- function(object){
hypp <- hyperParams(object)
sigma2.0(object) <- rgamma(1, a(hypp), b(hypp))
nu.0(object) <- max(rgeom(1, betas(hypp)), 1)
K <- k(object)
B <- nBatch(object)
homo.del <- runif(B, -5, -0.5)
hemi.del <- rnorm(B, -0.5, 0.5)
diploid <- rnorm(B, 0, 0.5)
dupl <- rnorm(B, 0.5, 0.5)
twocopy.gain <- rnorm(B, 1, 0.5)
thetas <- cbind(homo.del, hemi.del, diploid, dupl, twocopy.gain)
if(K > 5){
replace <- TRUE
} else replace <- FALSE
thetas <- thetas[, sample(1:5, K, replace=replace), drop=FALSE]
for(i in 1:B){
thetas[i, ] <- sort(thetas[i, ])
}
sigma2s <- initializeSigma2(object)
sigma2(object) <- sigma2s
mu(object) <- colMeans(thetas)
tau2(object) <- colVars(thetas)
p(object) <- as.numeric(rdirichlet(1, alpha(hypp)))
theta(object) <- thetas
if(length(y(object)) > 0){
zz <- simulateZ(length(y(object)), p(object))
z(object) <- as.integer(factor(zz))
} else {
zz <- as.integer(factor(levels=seq_len(k(hypp))))
z(object) <- zz
}
if(length(y(object)) > 0){
dataMean(object) <- computeMeans(object)
dataPrec(object) <- computePrec(object)
log_lik(object) <- computeLoglik(object)
logPrior(object) <- computePrior(object)
}
probz(object) <- .computeProbZ(object)
chains(object) <- McmcChains(object)
object
}
tau2HyperparamsBatch <- function(thetas){
tau2s <- colVars(thetas)
itau2s <- 1/tau2s
med <- median(itau2s)
qInverseTau2(mn=med, sd=med/100)
}
##tau2HyperparamsBatch <- function(tau2s){
## mn <- 1/var(tau2s)
## qInverseTau2(mn=mn, sd=0.001)
##}
.initialize_z <- function(y, centers){
if(length(centers) > 1){
z <- tryCatch(kmeans(y, centers=centers)$cluster, error=function(e) NULL)
if(is.null(z)) stop("Empty clusters because k is too large")
} else {
z <- rep(1L, length(y))
}
z
}
##.init_mclust <- function(object){
## hypp <- hyperParams(object)
## sigma2.0(object) <- rgamma(1, a(hypp), b(hypp))
## nu.0(object) <- max(rgeom(1, betas(hypp)), 1)
## B <- uniqueBatch(object)
## K <- k(object)
## nB <- nBatch(object)
## P <- T <- S <- matrix(NA, nB, K)
## ylist <- split(y(object), batch(object))
## n.b <- elementNROWS(ylist)
## zlist <- vector("list", length(B))
## ##browser()
## for(i in seq_along(B)){
## ys <- ylist[[i]]
## ys <- sample(ys, length(ys), replace=TRUE)
## capture.output(mc <- tryCatch(invisible(Mclust(ys, G=K)), warning=function(w) NULL))
## if(is.null(mc)) stop("Error initializing values for batch model")
## params <- mc$parameters
## thetas <- params$mean
## sds <- sqrt(params$variance$sigmasq)
## ps <- params$pro
## T[i, ] <- thetas
## zlist[[i]] <- .initialize_z(ylist[[i]], thetas)
## P[i, ] <- ps
## S[i, ] <- sds
## }
## ##
## ## since batches are not ordered, unlist(z) still matches the original y's
## ##
## zz <- unlist(zlist)
## ## *should do a weighted average since batches are unequally sized
## ##p <- colMeans(P)
## mu(object) <- colMeans(T)
## tau2(object) <- colVars(T)
## if(K > 1){
## tau2.hypp <- tau2HyperparamsBatch(T)
## eta.0(hypp) <- tau2.hypp$eta.0
## m2.0(hypp) <- tau2.hypp$m2.0
## hyperParams(object) <- hypp
## }
## p(object) <- P
## z(object) <- zz
## ## since batches are required to be in batch order, this is not necessary
## ##y(object) <- unlist(ylist)
## ##batch(object) <- unlist(split(batch(object), batch(object)))
## theta(object) <- T
## sigma2(object) <- S^2
## zFreq(object) <- as.integer(table(zz))
## if(length(y(object)) > 0){
## dataMean(object) <- computeMeans(object)
## dataPrec(object) <- computePrec(object)
## log_lik(object) <- computeLoglik(object)
## logPrior(object) <- computePrior(object)
## }
## probz(object) <- .computeProbZ(object)
## chains(object) <- McmcChains(object)
## object
##}
##.init_batchmodel2 <- function(object){
## u <- runif(1, 0, 1)
## model <- NULL
## counter <- 0
## if(u <= 0.25){
## while(is.null(model) && counter < 30){
## model <- tryCatch(.init_kmeans(object), error=function(e) NULL)
## counter <- counter+1
## }
## }
## if(u > 0.25){
## while(is.null(model) && counter < 30){
## model <- tryCatch(.init_mclust(object), error=function(e) NULL)
## counter <- counter+1
## }
## }
## model
##}
##
##.init_batchmodel3 <- function(object){
## hypp <- hyperParams(object)
## sigma2.0(object) <- rgamma(1, a(hypp), b(hypp))
## nu.0(object) <- max(rgeom(1, betas(hypp)), 1)
## NB <- nBatch(object)
## K <- k(object)
## ylist <- split(y(object), batch(object))
## thetas.list <- lapply(ylist, simulateThetas, K=K)
## thetas <- do.call(rbind, thetas.list)
## theta(object) <- thetas
## mu(object) <- colMeans(thetas)
## tau2(object) <- colVars(thetas)
## nB <- nBatch(object)
## p(object) <- as.numeric(rdirichlet(1, alpha(hypp)))
## if(length(y(object)) > 0){
## zz <- simulateZ(length(y(object)), p(object))
## z(object) <- as.integer(factor(zz))
## } else {
## zz <- as.integer(factor(levels=seq_len(k(hypp))))
## z(object) <- zz
## }
## zFreq(object) <- as.integer(table(zz))
## zlist <- split(zz, batch(object))
## sigma2s <- initializeSigma2(object)
## sigma2(object) <- sigma2s
## if(length(y(object)) > 0){
## dataMean(object) <- computeMeans(object)
## dataPrec(object) <- computePrec(object)
## log_lik(object) <- computeLoglik(object)
## logPrior(object) <- computePrior(object)
## }
## probz(object) <- .computeProbZ(object)
## chains(object) <- McmcChains(object)
## object
##}
##
##.init_batchmodel <- function(object){
## hypp <- hyperParams(object)
## sigma2.0(object) <- rgamma(1, a(hypp), b(hypp))
## nu.0(object) <- max(rgeom(1, betas(hypp)), 1)
## mu(object) <- rnorm(k(object), mu.0(object), tau.0(object))
## tau2(object) <- 1/rgamma(k(object), shape=1/2*eta.0(object),
## rate=1/2*eta.0(object)*m2.0(object))
## nB <- nBatch(object)
## th <- initializeTheta(object)
## if(is(object, "UnivariateBatchModel") || ncol(th) == 1){
## theta(object) <- th
## } else {
## theta(object) <- t(apply(th, 1, sort))
## }
## sigma2(object) <- initializeSigma2(object)
## p(object) <- as.numeric(rdirichlet(1, alpha(hypp))) ## rows are platform, columns are components
## ##p(object) <- rdirichlet(nBatch(object), alpha(hypp))
## ##if(missing(zz)){
## if(length(y(object)) > 0){
## zz <- simulateZ(length(y(object)), p(object))
## z(object) <- as.integer(factor(zz))
##
## } else zz <- as.integer(factor(levels=seq_len(k(hypp))))
## zFreq(object) <- as.integer(table(zz))
## if(length(y(object)) > 0){
## dataMean(object) <- computeMeans(object)
## dataPrec(object) <- computePrec(object)
## log_lik(object) <- computeLoglik(object)
## logPrior(object) <- computePrior(object)
## }
## probz(object) <- .computeProbZ(object)
## chains(object) <- McmcChains(object)
## object
##}
##setMethod("startingValues", "MultiBatchModel", function(object){
## ##.init_batchmodel(object)
## if(length(y(object)) == 0) return(object)
## .init_batchmodel2(object)
##})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.