Nothing
##################################################################################
## Function for fitting Rtreemix model to a given RtreemixData dataset.
## The data and the number of trees (K) have to be specified.
## The function estimates K-oncogenetic trees mixture model from the
## specified data (with a noise component).
## When K == 1, and noise == FALSE a single mutagenetic tree is fit to the data.
## When K == 1 and noise == TRUE a star mutagenetic tree is fit to the data.
## If K > 1, the first mutagenetic tree is always the star,
## i.e. the case K > 1 and noise == FALSE is not possible.
if(!isGeneric("fit"))
setGeneric("fit", function(data, K, ...) standardGeneric("fit"))
setMethod("fit",
signature(data = "RtreemixData", K = "numeric"),
function(data, K, no.start.sol = 100,
eps = 0.01, weighing = FALSE, equal.edgeweights = TRUE,
seed = (-1), noise = TRUE) {
## Setting the true type
K <- as.integer(K)
no.start.sol <- as.integer(no.start.sol)
equal.edgeweights <- as.integer(equal.edgeweights)
eps <- as.numeric(eps)
weighing <- as.integer(weighing)
seed <- as.integer(seed)
## Check if the necessary parameters are provided and have the correct form.
if(K < 1)
stop("The number of trees must be an integer greater than zero!")
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
stop("The seed must be a positive integer!")
if(no.start.sol < 1)
stop("The number of starting solutions for the k-means must be an integer greater than zero!")
##Add the first column of ones in the sample.
sample <- cbind(rep(1, sampleSize(data)), Sample(data))
## Fitting the corresponding RtreemixModel to the provided data.
if(noise && (K > 1)) {
fit.res <- .Call("R_fit", Events(data), sample, K,
no.start.sol, equal.edgeweights,
eps, weighing, seed)
for(i in 1:K)
edgeDataDefaults(fit.res$graphs.mixture[[i]], "weight") <- numeric(1)
return(new("RtreemixModel",
ParentData = data,
Weights = as.numeric(fit.res$alpha),
Resp = as.matrix(fit.res$resp),
CompleteMat = as.matrix(fit.res$pat.hat),
Star = TRUE,
Trees = fit.res$graphs.mixture))
} else {
if(K == 1) {
if (!noise) {
## Fit single RtreemixModel to the data.
fit.res <- .Call("R_fit1", Events(data), sample, eps, weighing)
edgeDataDefaults(fit.res$graphs.mixture[[1]], "weight") <- numeric(1)
return(new("RtreemixModel",
ParentData = data,
Weights = as.numeric(fit.res$alpha),
Resp = as.matrix(t(fit.res$resp)),
Star = FALSE,
Trees = fit.res$graphs.mixture))
} else {
## Fit single star RtreemixModel to the data.
fit.res <- .Call("R_fit0", Events(data), sample,
equal.edgeweights, weighing)
edgeDataDefaults(fit.res$graphs.mixture[[1]], "weight") <- numeric(1)
return(new("RtreemixModel",
ParentData = data,
Weights = as.numeric(fit.res$alpha),
Resp = as.matrix(t(fit.res$resp)),
Star = TRUE,
Trees = fit.res$graphs.mixture))
}
} else
stop("Mixture model with more than one tree and without noise component is not implemented!")
}
})
##################################################################################
## Function for fitting Rtreemix model to given data and
## analyzing its variance with the bootstrap method.
## The data and number of trees (K) have to be specified.
if(!isGeneric("bootstrap"))
setGeneric("bootstrap", function(data, K, ...) standardGeneric("bootstrap"))
setMethod("bootstrap",
signature(data = "RtreemixData", K = "numeric"),
function(data, K, no.start.sol = 100,
eps = 0.0, weighing = FALSE, equal.edgeweights = TRUE,
seed = (-1), B = 1000,
conf.interval = 0.05) {
## Setting the true type
K <- as.integer(K)
no.start.sol <- as.integer(no.start.sol)
equal.edgeweights <- as.integer(equal.edgeweights)
eps <- as.numeric(eps)
weighing <- as.integer(weighing)
seed <- as.integer(seed)
B <- as.integer(B)
conf.interval <- as.numeric(conf.interval)
events <- Events(data)
## Check if the necessary parameters are provided and have the correct form.
if(K < 1)
stop("The number of trees must be an integer greater than zero!")
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
stop("The seed must be a positive integer!")
if(no.start.sol < 1)
stop("The number of starting solutions for the k-means must be an integer greater than zero!")
if(B < 1)
stop("The number of bootstrap samples integer greater than zero!")
## Add the first column of ones in the sample.
sample <- cbind(replicate(sampleSize(data), 1), Sample(data))
## Fit a model and analyze its variance with the bootstrap method.
fit.boot <- .Call("R_bootstrap", events, sample, K,
no.start.sol, equal.edgeweights,
eps, weighing, seed,
B, conf.interval
)
for(i in 1:K) {
edgeDataDefaults(fit.boot$graphs.mixture[[i]], "weight") <- numeric(1)
edgeDataDefaults(fit.boot$graphs.mixture[[i]], "ci") <- numeric(0)
}
return(new("RtreemixModel",
ParentData = data,
Weights = as.numeric(fit.boot$alpha),
WeightsCI = fit.boot$alpha.ci,
Resp = as.matrix(fit.boot$resp),
CompleteMat = as.matrix(fit.boot$pat.hat),
Star = TRUE,
Trees = fit.boot$graphs.mixture
))
})
####################################################################################
## Function for predicting the likelihoods of given data by using a given Rtreemix model.
## The model and the data have to be specified.
if(!isGeneric("likelihoods"))
setGeneric("likelihoods", function(model, data) standardGeneric("likelihoods"))
setMethod("likelihoods",
signature(model = "RtreemixModel", data = "RtreemixData"),
function(model, data) {
## Check if the patterns have the correct length.
if(eventsNum(model) != eventsNum(data)) {
cat("The patterns are expected to have length: ", eventsNum(model), ". ")
stop("\n")
} else {
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function for calculating the likelihoods.
res.like <- .Call("R_likelihood", eventsNum(model),
listG, Weights(model), cbind(rep(1, sampleSize(data)), Sample(data)))
return(new("RtreemixStats",
Data = data,
Model = model,
LogLikelihoods = res.like[[2]],
WLikelihoods = res.like[[1]]))
}
})
###################################################################################
## Function for predicting the GPS of given dataset by using a given Rtreemix model.
## The model has to be specified. If the dataset is missing the GPS is calculated for all possible patterns.
if(!isGeneric("gps"))
setGeneric("gps", function(model, data, ...) standardGeneric("gps"))
## The dataset is specified as an RtreemixData object.
setMethod("gps",
signature(model = "RtreemixModel", data = "RtreemixData"),
function(model, data, sampling.mode = "exponential", sampling.param = 1,
no.sim = 10, seed = (-1)) {
## Setting the true type
sampling.param <- as.numeric(sampling.param)
no.sim <- as.integer(no.sim)
seed <- as.integer(seed)
## Check if the necessary parameters are provided and have the correct form.
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
stop("The seed must be a positive integer!")
if (!missing(sampling.mode)) {
if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
stop("Please give correct sampling mode (constant or exponential)!")
}
if (no.sim < 1)
stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")
mat <- cbind(rep(1, sampleSize(data)), Sample(data))
## Check if the patterns have the correct length.
if(eventsNum(model) != ncol(mat)) {
cat("The patterns are expected to have length: ", eventsNum(model), ". \n")
stop("\n")
}
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function for calculating the GPS.
res.GPS <- .Call("R_time", eventsNum(model), listG, Weights(model),
as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
sampling.param, no.sim, seed)
## All possible patterns.
all.data <- new("RtreemixData", Sample = res.GPS[[1]][, -1])
resp <- WLikelihoods(likelihoods(model = model, data = all.data))
bind.gps <- sapply(res.GPS[[2]], rbind)
bind.gps.bin <- apply(!apply(bind.gps, 2, is.na), 2, as.integer)
filter.resp <- resp * bind.gps.bin
resp <- filter.resp / apply(filter.resp, 1, sum)
if(numTrees(model) > 1) {
resp[which(apply(bind.gps.bin, 1, sum) == 0),] <- c(1, rep(0, numTrees(model) - 1))
## Take the mean for the noise.
if (sampling.mode == "exponential")
res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- 1.0/sampling.param
else
res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- sampling.param
for(i in 2:numTrees(model))
res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
} else {
for(i in 1:numTrees(model))
res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
}
fin.GPS <- apply(t(resp) * t(sapply(res.GPS[[2]], rbind)), 2, sum)
## Take the GPS of the patterns present in the given dataset.
index <- apply(mat[,-1], 1, '%*%', 2^c(0:(ncol(mat) - 2))) + 1
res <- fin.GPS[index]
return(new("RtreemixGPS",
Data = data,
Model = model,
SamplingMode = sampling.mode,
SamplingParam = sampling.param,
GPS = res))
})
## The dataset is specified as a binary matrix.
setMethod("gps",
signature(model = "RtreemixModel", data = "matrix"),
function(model, data, sampling.mode = "exponential", sampling.param = 1,
no.sim = 10, seed = (-1)) {
## Setting the true type
sampling.param <- as.numeric(sampling.param)
no.sim <- as.integer(no.sim)
seed <- as.integer(seed)
## Check if the necessary parameters are provided and have the correct form.
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0)))
stop("The seed must be a positive integer!")
if (!missing(sampling.mode)) {
if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
stop("Please give correct sampling mode (constant or exponential)!")
}
if (no.sim < 1)
stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")
mat <- cbind(rep(1, nrow(data)), data)
## Check if the patterns have the correct length.
if(eventsNum(model) != ncol(mat)) {
cat("The patterns are expected to have length: ", eventsNum(model), ". \n")
stop("\n")
}
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function for calculating the GPS.
res.GPS <- .Call("R_time", eventsNum(model), listG, Weights(model),
as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
sampling.param, no.sim, seed)
## Generate all possible patterns.
all.data <- new("RtreemixData", Sample = res.GPS[[1]][, -1])
resp <- WLikelihoods(likelihoods(model = model, data = all.data))
bind.gps <- sapply(res.GPS[[2]], rbind)
bind.gps.bin <- apply(!apply(bind.gps, 2, is.na), 2, as.integer)
filter.resp <- resp * bind.gps.bin
resp <- filter.resp / apply(filter.resp, 1, sum)
if(numTrees(model) > 1) {
resp[which(apply(bind.gps.bin, 1, sum) == 0),] <- c(1, rep(0, numTrees(model) - 1))
## Take the mean for the noise.
if (sampling.mode == "exponential")
res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- 1.0/sampling.param
else
res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- sampling.param
for(i in 2:numTrees(model))
res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
} else {
for(i in 1:numTrees(model))
res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
}
fin.GPS <- apply(t(resp) * t(sapply(res.GPS[[2]], rbind)), 2, sum)
## Take the GPS of the patterns present in the given dataset.
index <- apply(mat[,-1], 1, '%*%', 2^c(0:(ncol(mat) - 2))) + 1
res <- fin.GPS[index]
return(new("RtreemixGPS",
Model = model,
Data = new("RtreemixData", Sample = data),
SamplingMode = sampling.mode,
SamplingParam = sampling.param,
GPS = res))
})
## The dataset is not specified. Calculate the GPS for all possible patterns.
setMethod("gps",
signature(model = "RtreemixModel", data = "missing"),
function(model, sampling.mode = "exponential", sampling.param = 1,
no.sim = 10, seed = (-1)) {
## Setting the true type
sampling.param <- as.numeric(sampling.param)
no.sim <- as.integer(no.sim)
seed <- as.integer(seed)
## Check if the necessary parameters are provided and have the correct form.
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
stop("The seed must be a positive integer!")
if (!missing(sampling.mode)) {
if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
stop("Please give correct sampling mode (constant or exponential)!")
}
if (no.sim < 1)
stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function for calculating the GPS.
res.GPS <- .Call("R_time", eventsNum(model), listG, Weights(model),
as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
sampling.param, no.sim, seed)
## Generate all possible patterns.
all.data <- new("RtreemixData", Sample = res.GPS[[1]][, -1])
resp <- WLikelihoods(likelihoods(model = model, data = all.data))
bind.gps <- sapply(res.GPS[[2]], rbind)
bind.gps.bin <- apply(!apply(bind.gps, 2, is.na), 2, as.integer)
filter.resp <- resp * bind.gps.bin
resp <- filter.resp / apply(filter.resp, 1, sum)
if(numTrees(model) > 1) {
resp[which(apply(bind.gps.bin, 1, sum) == 0),] <- c(1, rep(0, numTrees(model) - 1))
## Take the mean for the noise.
if (sampling.mode == "exponential")
res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- 1.0/sampling.param
else
res.GPS[[2]][[1]][which(is.na(res.GPS[[2]][[1]]))] <- sampling.param
for(i in 2:numTrees(model))
res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
} else {
for(i in 1:numTrees(model))
res.GPS[[2]][[i]][which(is.na(res.GPS[[2]][[i]]))] <- numeric(1)
}
## Compute GPS for all patterns.
fin.GPS <- apply(t(resp) * t(sapply(res.GPS[[2]], rbind)), 2, sum)
return(new("RtreemixGPS",
Model = model,
Data = all.data,
SamplingMode = sampling.mode,
SamplingParam = sampling.param,
GPS = fin.GPS))
})
###################################################################################
## When the sampling mode and sampling parameter are specified, function for simulating patterns
## and their waiting and sampling times from a given Rtreemix model.
## When only the mixture model is specified, function for drawing data from the mixture model.
## The model has to be specified in both cases.
if(!isGeneric("sim"))
setGeneric("sim", function(model, sampling.mode, sampling.param, ...) standardGeneric("sim"))
## Function for drawing patterns from a given Rtreemix model.
setMethod("sim",
signature(model = "RtreemixModel", sampling.mode = "missing", sampling.param = "missing"),
function(model, no.draws = 10, seed = (-1)) {
## Setting the true type
no.draws <- as.integer(no.draws)
seed <- as.integer(seed)
## Check if the necessary parameters are provided and have the correct form.
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
stop("The seed must be a positive integer!")
if(no.draws < 1)
stop("The number of draws must be an integer greater than zero!")
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function
res.sim <- .Call("R_draw", eventsNum(model), listG, Weights(model),
no.draws, seed)
return(new("RtreemixData",
Sample = res.sim[,-1]))
})
## Function for simulating patterns with their sampling and waiting times.
setMethod("sim",
signature(model = "RtreemixModel", sampling.mode = "character", sampling.param = "numeric"),
function(model, sampling.mode, sampling.param, no.sim = 10, seed = (-1)) {
## Setting the true type
no.sim <- as.integer(no.sim)
seed <- as.integer(seed)
## Check if the necessary parameters are provided and have the correct form.
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
stop("The seed must be a positive integer!")
if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
stop("Please give correct sampling mode (constant or exponential)!")
if(no.sim < 1)
stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function
res.sim <- .Call("R_simulate", eventsNum(model), listG, Weights(model),
as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1)),
sampling.param, no.sim,
seed)
return(new("RtreemixSim",
Model = model,
SimPatterns = new("RtreemixData", Sample = res.sim$patterns[, -1]),
SamplingMode = sampling.mode,
SamplingParam = sampling.param,
WaitingTimes = res.sim$wtimes,
SamplingTimes = res.sim$stimes))
})
###################################################################################
## Function for generating a random Rtreemix model.
## The number of tree components and the number of genetic events have to be specified.
if(!isGeneric("generate"))
setGeneric("generate", function(K, no.events, ...) standardGeneric("generate"))
setMethod("generate",
signature(K = "numeric", no.events = "numeric"),
function(K, no.events, noise.tree = TRUE,
equal.edgeweights = TRUE, prob = c(0.0, 1.0),
seed = (-1)) {
## Setting the true type
K <- as.integer(K)
no.events <- as.integer(no.events)
noise.tree <- as.integer(noise.tree)
seed <- as.integer(seed)
equal.edgeweights <- as.integer(equal.edgeweights)
## Check if the necessary parameters are provided and have the correct form.
if(K < 1)
stop("The number of trees must be an integer greater than zero!")
if (no.events < 1)
stop("The number of events must be an integer greater than zero!")
if((seed != -1) &&
(!(is.numeric(seed)) || !(trunc(seed) == seed) || !(seed >= 0 )))
stop("The seed must be a positive integer!")
if(!missing(prob)) {
if(!is.numeric(prob) || (length(prob) != 2))
stop("Specify the boundaries of the conditional probabilities as a numeric vector
of length two = c(min, max)!")
if(prob[2] < prob[1])
stop("In the probability vector the lower boundary must be smaller than the
upper boundary!")
}
## Call to the C function.
res.sim <- .Call("R_random", K, no.events,
noise.tree, equal.edgeweights,
prob[1], prob[2], seed)
for(i in 1:K)
edgeDataDefaults(res.sim$graphs.mixture[[i]], "weight") <- numeric(1)
res <- new("RtreemixModel",
Weights = as.numeric(res.sim$alpha),
Star = as.logical(noise.tree),
Trees = res.sim$graphs.mixture)
return(res)
})
###################################################################################
## Function for generating the (scaled) probablility distribution induced with a given RtreemixModel.
## The RtreemixModel has to be specified.
## The sampling mode and the parameters for the sampling times for the observed
## input and output probabilities are optional.
if(!isGeneric("distribution"))
setGeneric("distribution", function(model, sampling.mode, sampling.param, output.param) standardGeneric("distribution"))
setMethod("distribution",
signature(model = "RtreemixModel", sampling.mode = "missing",
sampling.param = "missing", output.param = "missing"),
function(model) {
if(eventsNum(model) > 30)
stop("Invalid for more than 30 events!")
## There is no specified sampling mode
mode = as.integer(-1)
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function for getting the distribution.
res.distr <- .Call("R_distr", eventsNum(model), listG, Weights(model),
mode, numeric(1), numeric(1))
## All possible patterns
sample <- as.matrix(res.distr[[1]])
rownames(sample) <- paste("P", 1:nrow(sample), sep = "")
## Build a dataframe of all possible patterns with their corresponding probabilities
result <- data.frame(sample, res.distr[[2]])
colnames(result) <- c(paste("E", 0:(ncol(sample) - 1), sep = ""), "probability")
return(result)
})
setMethod("distribution",
signature(model = "RtreemixModel", sampling.mode = "character",
sampling.param = "numeric", output.param = "numeric"),
function(model, sampling.mode, sampling.param,
output.param) {
if(eventsNum(model) > 30)
stop("Invalid for more than 30 events!")
if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
stop("Please give correct sampling mode (constant or exponential)!")
mode <- as.integer(ifelse(identical(sampling.mode, "constant"), 0, 1))
## Give the node names and edge weights in a list structure.
listG <- list()
for(i in 1:numTrees(model)) {
listG[[i]] <- list()
listG[[i]][[1]] <- nodes(Trees(model)[[i]])
listG[[i]][[2]] <- edgeWeights(Trees(model)[[i]])
}
## Call to the C function for getting the distribution.
res.distr <- .Call("R_distr", eventsNum(model), listG, Weights(model),
mode, sampling.param, output.param)
## All possible patterns
sample <- as.matrix(res.distr[[1]])
rownames(sample) <- paste("P", 1:nrow(sample), sep = "")
## Build a dataframe of all possible patterns with their corresponding probabilities
result <- data.frame(sample, res.distr[[2]])
colnames(result) <- c(paste("E", 0:(ncol(sample) - 1), sep = ""), "probability")
## Print some useful information.
if (mode == 1)
cat("Exponential sampling mode!\n")
else
cat("Constant sampling mode!\n")
cat("The parameter for the observed input probabilities:\n")
print(sampling.param)
cat("\n The parameter for the observed output probabilities:\n")
print(output.param)
print(result)
return(result)
})
###################################################################################
## Function for calculating GPS values and their 95% bootstrap
## confidence intervals for a given dataset.
## the GPS values are calculated with respect to an underlying
## oncogenetic trees mixture model fitted to the given data.
## The number of trees for the model (K) and the dataset have to be specified.
##change to confIntGPS
if(!isGeneric("confIntGPS"))
setGeneric("confIntGPS", function(data, K, ...) standardGeneric("confIntGPS"))
setMethod("confIntGPS",
signature(data = "RtreemixData", K = "numeric"),
function(data, K, sampling.mode = "exponential", sampling.param = 1,
no.sim = 10000, B = 1000, equal.star = TRUE) {
## Setting the true type
K <- as.integer(K)
B <- as.integer(B)
sampling.param <- as.numeric(sampling.param)
no.events <- eventsNum(data)
no.sim <- as.integer(no.sim)
equal.star <- as.logical(equal.star)
## Check if the necessary parameters are provided and have the correct form.
if(K < 1)
stop("The number of trees must be an integer greater than zero!")
if(B < 1)
stop("The number of bootstrap samples integer greater than zero!")
if (no.sim < 1)
stop("The number of iterations for the waiting time simulation must be an integer greater than zero!")
if (!missing(sampling.mode)) {
if (!(identical(sampling.mode, "constant") || identical(sampling.mode, "exponential")))
stop("Please give correct sampling mode (constant or exponential)!")
}
## Fit an Rtreemix fm model to the given data.
fm <- fit(data = data, K = K, noise = TRUE, equal.edgeweights = equal.star, eps = 0.01)
## Compute the GPS values for all possible patterns.
fit.gps <- GPS(gps(model = fm, no.sim = no.sim))
## Find the row indices of the patterns in the given data in the matrix of all possible patterns
ind <- apply(Sample(data), 1, '%*%', 2^c(0 : (no.events - 2))) + 1
boot.gps <- vector(mode = "list", length = 2 ^ (no.events - 1))
for (i in 1:B) {
## Draw a bootstrap sample from the data with replacement.
random.rows <- sample(sampleSize(data), sampleSize(data), replace = TRUE)
new.data <- new("RtreemixData", Sample = Sample(data)[random.rows, ])
## Fit an Rtreemix model to it
new.fit <- fit(data = new.data, K = K, noise = TRUE, equal.edgeweights = equal.star, eps = 0.01)
## Calculate the GPS values of all patterns for the new model
new.gps <- GPS(gps(model = new.fit, no.sim = no.sim))
for(j in 1:length(boot.gps))
boot.gps[[j]] <- c(boot.gps[[j]], new.gps[j])
}
sort.boot <- lapply(boot.gps, sort)
## Calculate 95% confidence intervals for the GPS values
boot.intervals <- lapply(sort.boot, function(sort.boot){c(sort.boot[ceiling((B * 2.5) / 100)], sort.boot[as.integer((B * 97.5) / 100)])})
fit.gps.data <- fit.gps[ind]
result <- matrix(nrow = length(ind), ncol = 2)
for(i in 1:length(ind))
result[i, ] <- boot.intervals[[ind[i]]]
return(new("RtreemixGPS",
Model = fm,
Data = data,
SamplingMode = sampling.mode,
SamplingParam = sampling.param,
GPS = fit.gps.data,
gpsCI = result))
})
######################################################################
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.