Nothing
## Copyright 2013, 2014, 2015, 2016, 2017 Ramon Diaz-Uriarte
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
## Does it even make sense to keepPhylog with oncoSimulSample?
## Yes, but think what we store.
oncoSimulSample <- function(Nindiv,
fp,
model = "Exp",
numPassengers = 0,
mu = 1e-6,
muEF = NULL,
detectionSize = round(runif(Nindiv, 1e5, 1e8)),
detectionDrivers = {
if(inherits(fp, "fitnessEffects")) {
if(length(fp$drv)) {
nd <- (2: round(0.75 * length(fp$drv)))
} else {
nd <- 9e6
}
} else {
nd <- (2 : round(0.75 * max(fp)))
}
if(length(nd) == 1) ## for sample
nd <- c(nd, nd)
sample(nd, Nindiv,
replace = TRUE)
},
detectionProb = NA,
sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
0.025),
initSize = 500,
s = 0.1,
sh = -1,
K = initSize/(exp(1) - 1),
minDetectDrvCloneSz = "auto",
extraTime = 0,
finalTime = 0.25 * 25 * 365,
onlyCancer = TRUE,
keepPhylog = FALSE,
mutationPropGrowth = ifelse(model == "Bozic",
FALSE, TRUE),
max.memory = 2000,
max.wall.time.total = 600,
max.num.tries.total = 500 * Nindiv,
## well, obviously they are errors
## errorHitWallTime = TRUE,
## errorHitMaxTries = TRUE,
typeSample = "whole",
thresholdWhole = 0.5,
initMutant = NULL,
AND_DrvProbExit = FALSE,
fixation = NULL,
verbosity = 1,
showProgress = FALSE,
seed = "auto"){
## No longer using mclapply, because of the way we use the limit on
## the number of tries.
## leaving detectionSize and detectionDrivers as they are, produces
## the equivalente of uniform sampling. For last, fix a single number
## detectionDrivers when there are none: had we left it at 0, then
## when there are no drivers we would stop at the first sampling
## period.
if(Nindiv < 1)
stop("Nindiv must be >= 1")
if(keepPhylog)
warning(paste("oncoSimulSample does not return the phylogeny",
"for now, so there is little point in storing it."))
if(max.num.tries.total < Nindiv)
stop(paste("You have requested something impossible: ",
"max.num.tries.total < Nindiv"))
attemptsLeft <- max.num.tries.total
attemptsUsed <- 0
numToRun <- Nindiv
pop <- vector(mode = "list", length = Nindiv)
indiv <- 1
## FIXME! really pass params such as extraTime and minDDr as vectors,
## or give a warning
params <- data.frame(seq.int(Nindiv),
extraTime = extraTime,
minDetectDrvCloneSz = minDetectDrvCloneSz,
detectionSize = detectionSize,
detectionDrivers = detectionDrivers,
stringsAsFactors = TRUE)[, -1, drop = FALSE]
## FIXME: we are not triggering an error, just a message. This is on
## purpose, since some of these conditions DO provide useful
## output. Do we want these to be errors?
f.out.attempts <- function() {
message("Run out of attempts")
return(list(
popSummary = NA,
popSample = NA,
HittedMaxTries = TRUE,
HittedWallTime = FALSE,
UnrecoverExcept = FALSE
))
}
f.out.time <- function() {
message("Run out of time")
return(list(
popSummary = NA,
popSample = NA,
HittedMaxTries = FALSE,
HittedWallTime = TRUE,
UnrecoverExcept = FALSE
))
}
f.out.attempts.cpp <- function() {
message("Run out of attempts (in C++)")
return(list(
popSummary = NA,
popSample = NA,
HittedMaxTries = TRUE,
HittedWallTime = FALSE,
UnrecoverExcept = FALSE
))
}
f.out.time.cpp <- function() {
message("Run out of time (in C++)")
return(list(
popSummary = NA,
popSample = NA,
HittedMaxTries = FALSE,
HittedWallTime = TRUE,
UnrecoverExcept = FALSE
))
}
f.out.unrecover.except <- function(x) {
message("Unrecoverable exception (in C++)")
return(list(
popSummary = NA,
popSample = NA,
HittedMaxTries = NA,
HittedWallTime = NA,
UnrecoverExcept = TRUE,
ExceptionMessage = x$other$ExceptionMessage
))
}
startTime <- Sys.time()
while(TRUE) {
possibleAttempts <- attemptsLeft - (numToRun - 1)
## I think I do not want a try here.
tmp <- oncoSimulIndiv(fp = fp,
model = model,
numPassengers = numPassengers,
mu = mu,
muEF = muEF,
detectionSize = params[indiv, "detectionSize"],
detectionDrivers = params[indiv, "detectionDrivers"],
sampleEvery = sampleEvery,
initSize = initSize,
s = s,
sh = sh,
K = K,
minDetectDrvCloneSz = params[indiv, "minDetectDrvCloneSz"],
extraTime = params[indiv, "extraTime"],
finalTime = finalTime,
max.memory = max.memory,
max.wall.time = max.wall.time.total,
max.num.tries = possibleAttempts,
verbosity = verbosity,
keepEvery = -9,
onlyCancer = onlyCancer,
errorHitWallTime = TRUE,
errorHitMaxTries = TRUE,
seed = seed,
initMutant = initMutant,
keepPhylog = keepPhylog,
mutationPropGrowth = mutationPropGrowth,
detectionProb = detectionProb,
AND_DrvProbExit = AND_DrvProbExit,
fixation = fixation)
if(tmp$other$UnrecoverExcept) {
return(f.out.unrecover.except(tmp))
}
pop[[indiv]] <- tmp
numToRun <- (numToRun - 1)
attemptsUsed <- attemptsUsed + tmp$other$attemptsUsed
attemptsLeft <- (max.num.tries.total - attemptsUsed)
if(showProgress) {
cat("...... Done individual ", indiv,
". Used ", attemptsUsed, "attempts.",
". Running for ", as.double(difftime(Sys.time(),
startTime, units = "secs")),
" secs.\n"
)
}
indiv <- indiv + 1
## We need to check in exactly this order. Attempts left only
## matters if no remaining individuals to run. But C++ might bail
## out in exactly the last individual
if(
(exists("HittedMaxTries", where = tmp) &&
tmp[["HittedMaxTries"]]) ) {
## in C++ code
return(f.out.attempts.cpp())
} else if(
(exists("HittedWallTime", where = tmp) &&
tmp[["HittedWallTime"]]) ) {
## in C++ code
return(f.out.time.cpp())
} else if( indiv > Nindiv ) {
if(verbosity > 0)
message(paste0("Successfully sampled ", Nindiv, " individuals"))
class(pop) <- "oncosimulpop"
if(inherits(fp, "fitnessEffects")) {
geneNames <- names(getNamesID(fp))
} else {
geneNames <- NULL
}
return(list(
popSummary = summary(pop),
popSample = samplePop(pop, timeSample = "last",
typeSample = typeSample,
thresholdWhole = thresholdWhole,
geneNames = geneNames),
attemptsUsed = attemptsUsed,
probCancer = Nindiv/attemptsUsed,
HittedMaxTries = FALSE,
HittedWallTime = FALSE,
UnrecoverExcept = FALSE
))
} else if( attemptsLeft <= 0 ) {
## it is very unlikely this will ever happen.
return(f.out.attempts())
} else if( as.double(difftime(Sys.time(), startTime, units = "secs"))
> max.wall.time.total ) {
return(f.out.time())
}
}
}
add_noise <- function(x, properr) {
if(properr <= 0) {
return(x)
}
else {
if(properr > 1)
stop("Proportion with error cannot be > 1")
nn <- prod(dim(x))
flipped <- sample(nn, round(nn * properr))
x[flipped] <- as.integer(!x[flipped])
return(x)
}
}
samplePop <- function(x, timeSample = "last",
typeSample = "whole",
thresholdWhole = 0.5,
geneNames = NULL,
popSizeSample = NULL,
propError = 0) {
## timeSample <- match.arg(timeSample)
gN <- geneNames
if(!is.null(popSizeSample) && (length(popSizeSample) > 1) &&
(length(popSizeSample) != length(x))) {
message("length popSizeSample != number of subjects")
popSizeSample <- rep(popSizeSample, length.out = length(x))
}
## A hack to prevent Map from crashing with a NULL
if(is.null(popSizeSample)) popSizeSample <- -99
if(inherits(x, "oncosimulpop")) {
z <- do.call(rbind,
Map(get.mut.vector,
x = x,
timeSample = timeSample,
typeSample = typeSample,
thresholdWhole = thresholdWhole,
popSizeSample = popSizeSample
))
## We need to check if the object is coming from v.2., to avoid
## having to force passing a vector of names
if(is.null(gN) && (!is.null(x[[1]]$geneNames)))
gN <- x[[1]]$geneNames
} else {
z <- get.mut.vector(x,
timeSample = timeSample,
typeSample = typeSample,
thresholdWhole = thresholdWhole,
popSizeSample = popSizeSample)
dim(z) <- c(1, length(z))
if(is.null(gN) && (!is.null(x$geneNames)))
gN <- x$geneNames
}
message("\n Subjects by Genes matrix of ",
nrow(z), " subjects and ",
ncol(z), " genes.\n")
if(propError > 0) {
z <- add_noise(z, propError)
}
if(!is.null(gN)) {
colnames(z) <- gN
} else {
colnames(z) <- seq_len(ncol(z))
}
return(z)
}
## single_largest_last_pop <- function(x) {
## last <- nrow(x$pops.by.time)
## largest <- which.max(x$pops.by.time[last, , drop = FALSE]) - 1
## genot <- x[["GenotypesLabels"]][largest]
## strsplit(genot, split = ", ")[[1]]
## }
## ## like samplePop(x, timeSample = "last") but return the single most abundant genotype
## Just a prototype. Not well tested.
## largest_last_pop <- function(x) {
## y <- lapply(x, single_largest_last_pop)
## allg <- sort(unique(unlist(y)))
## m <- t(vapply(y, function(z) {as.integer(allg %in% z) },
## integer(length(allg)) ))
## colnames(m) <- allg
## return(m)
## }
oncoSimulPop <- function(Nindiv,
fp,
model = "Exp",
numPassengers = 0,
mu = 1e-6,
muEF = NULL,
detectionSize = 1e8,
detectionDrivers = 4,
detectionProb = NA,
sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
0.025),
initSize = 500,
s = 0.1,
sh = -1,
K = initSize/(exp(1) - 1),
keepEvery = sampleEvery,
minDetectDrvCloneSz = "auto",
extraTime = 0,
## used to be this
## ifelse(model \%in\% c("Bozic", "Exp"), -9,
## 5 * sampleEvery),
finalTime = 0.25 * 25 * 365,
onlyCancer = TRUE,
keepPhylog = FALSE,
mutationPropGrowth = ifelse(model == "Bozic",
FALSE, TRUE),
max.memory = 2000,
max.wall.time = 200,
max.num.tries = 500,
errorHitWallTime = TRUE,
errorHitMaxTries = TRUE,
initMutant = NULL,
AND_DrvProbExit = FALSE,
fixation = NULL,
verbosity = 0,
mc.cores = detectCores(),
seed = "auto") {
if(Nindiv < 1)
stop("Nindiv must be >= 1")
if(.Platform$OS.type == "windows") {
if(mc.cores != 1)
message("You are running Windows. Setting mc.cores = 1")
mc.cores <- 1
}
pop <- mclapply(seq.int(Nindiv),
function(x)
oncoSimulIndiv(
fp = fp,
model = model,
numPassengers = numPassengers,
mu = mu,
muEF = muEF,
detectionSize = detectionSize,
detectionDrivers = detectionDrivers,
sampleEvery = sampleEvery,
initSize = initSize,
s = s,
sh = sh,
K = K,
keepEvery = keepEvery,
minDetectDrvCloneSz = minDetectDrvCloneSz,
extraTime = extraTime,
finalTime = finalTime,
onlyCancer = onlyCancer,
max.memory = max.memory,
max.wall.time = max.wall.time,
max.num.tries = max.num.tries,
errorHitWallTime = errorHitWallTime,
errorHitMaxTries = errorHitMaxTries,
verbosity = verbosity,
seed = seed, keepPhylog = keepPhylog,
initMutant = initMutant,
mutationPropGrowth = mutationPropGrowth,
detectionProb = detectionProb,
AND_DrvProbExit = AND_DrvProbExit,
fixation = fixation),
mc.cores = mc.cores)
## mc.allow.recursive = FALSE ## FIXME: remove?
## done for covr issue
## https://github.com/r-lib/covr/issues/335#issuecomment-424116766
class(pop) <- "oncosimulpop"
attributes(pop)$call <- match.call()
return(pop)
}
## where is the default K coming from? Here:
## log( (K+N)/K ) = 1; k + n = k * exp(1); k(exp - 1) = n; k = n/(exp - 1)
oncoSimulIndiv <- function(fp,
model = "Exp",
numPassengers = 0,
mu = 1e-6,
muEF = NULL,
detectionSize = 1e8,
detectionDrivers = 4,
detectionProb = NA,
sampleEvery = ifelse(model %in% c("Bozic", "Exp"), 1,
0.025),
initSize = 500,
s = 0.1,
sh = -1,
K = initSize/(exp(1) - 1),
keepEvery = sampleEvery,
minDetectDrvCloneSz = "auto",
extraTime = 0,
## used to be this
## ifelse(model \%in\% c("Bozic", "Exp"), -9,
## 5 * sampleEvery),
finalTime = 0.25 * 25 * 365,
onlyCancer = TRUE,
keepPhylog = FALSE,
mutationPropGrowth = ifelse(model == "Bozic",
FALSE, TRUE),
max.memory = 2000,
max.wall.time = 200,
max.num.tries = 500,
errorHitWallTime = TRUE,
errorHitMaxTries = TRUE,
verbosity = 0,
initMutant = NULL,
AND_DrvProbExit = FALSE,
fixation = NULL,
seed = NULL
) {
call <- match.call()
if(all(c(is_null_na(detectionProb),
is_null_na(detectionSize),
is_null_na(detectionDrivers),
is_null_na(finalTime),
is_null_na(fixation)
)))
stop("At least one stopping condition should be given.",
" At least one of detectionProb, detectionSize, detectionDrivers,",
" finalTime. Otherwise, we'll run until aborted by max.wall.time,",
" max.num.tries, and the like.")
if(AND_DrvProbExit && (is_null_na(detectionProb) || is_null_na(detectionDrivers)))
stop("AND_DrvProbExit is TRUE: both of detectionProb and detectionDrivers",
" must be non NA.")
if(AND_DrvProbExit && !is_null_na(detectionSize)) {
warning("With AND_DrvProbExit = TRUE, detectionSize is ignored.")
detectionSize <- NA
}
if(inherits(fp, "fitnessEffects")) {
s <- sh <- NULL ## force it soon!
}
## legacies from poor name choices
typeFitness <- switch(model,
"Bozic" = "bozic1",
"Exp" = "exp",
"McFarlandLog" = "mcfarlandlog",
"McFL" = "mcfarlandlog",
stop("No valid value for model")
)
if(initSize < 1)
stop("initSize < 1")
if( (K < 1) && (model %in% c("McFL", "McFarlandLog") )) {
stop("Using McFarland's model: K cannot be < 1")
} ## if ( !(model %in% c("McFL", "McFarlandLog") )) {
## K <- 1 ## K is ONLY used for McFarland; set it to 1, to avoid
## ## C++ blowing.
if(typeFitness == "exp") {
death <- 1
## mutationPropGrowth <- 1
} else {
death <- -99
## mutationPropGrowth <- 0
}
birth <- -99
if( (typeFitness == "mcfarlandlog") &&
(sampleEvery > 0.05)) {
warning("With the McFarland model you often want smaller sampleEvery")
}
if(minDetectDrvCloneSz == "auto") {
if(model %in% c("Bozic", "Exp") )
minDetectDrvCloneSz <- 0
else if (model %in% c("McFL", "McFarlandLog"))
minDetectDrvCloneSz <- initSize
## minDetectDrvCloneSz <- eFinalMf(initSize, s, detectionDrivers)
else
stop("Unknown model")
}
if( (length(mu) > 1) && !inherits(fp, "fitnessEffects"))
stop("Per-gene mutation rates cannot be used with the old poset format")
if(any(mu < 0)) {
stop("(at least one) mutation rate (mu) is negative")
}
## We do not test for equality to 0. That might be a weird but
## legitimate case?
## No user-visible magic numbers
## if(is.null(keepEvery))
## keepEvery <- -9
if(is_null_na(keepEvery)) keepEvery <- -9
if( (keepEvery > 0) & (keepEvery < sampleEvery)) {
keepEvery <- sampleEvery
warning("setting keepEvery <- sampleEvery")
}
if( (typeFitness == "bozic1") && (mutationPropGrowth) )
warning("Using fitness Bozic (bozic1) with mutationPropGrowth = TRUE;",
"this will have no effect.")
if( (typeFitness == "exp") && (death != 1) )
warning("Using fitness exp with death != 1")
if(!is_null_na(detectionDrivers) && (detectionDrivers >= 1e9))
stop("detectionDrivers > 1e9; this doesn't seem reasonable")
if(is_null_na(detectionDrivers)) detectionDrivers <- (2^31) - 1
if(is_null_na(detectionSize)) detectionSize <- Inf
if(is_null_na(finalTime)) finalTime <- Inf
if(is_null_na(sampleEvery)) stop("sampleEvery cannot be NULL or NA")
if(!inherits(fp, "fitnessEffects")) {
if(any(unlist(lapply(list(fp,
numPassengers,
s, sh), is.null)))) {
m <- paste("You are using the old poset format.",
"You must specify all of poset, numPassengers",
"s, and sh.")
stop(m)
}
if(AND_DrvProbExit) {
stop("The AND_DrvProbExit = TRUE setting is invalid",
" with the old poset format.")
}
if(!is.null(muEF))
stop("Mutator effects cannot be specified with the old poset format.")
if( length(initMutant) > 0)
warning("With the old poset format you can no longer use initMutant.",
" The initMutant you passed will be ignored.")
## stop("With the old poset, initMutant can only take a single value.")
if(!is_null_na(fixation))
stop("'fixation' cannot be specified with the old poset format.")
## Seeding C++ is now much better in new version
if(is.null(seed) || (seed == "auto")) {## passing a null creates a random seed
## name is a legacy. This is really the seed for the C++ generator.
## Nope, we cannot use 2^32, because as.integer will fail.
seed <- as.integer(round(runif(1, min = 0, max = 2^16)))
}
if(verbosity >= 2)
cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
if(!is_null_na(detectionProb)) stop("detectionProb cannot be used in v.1 objects")
## if(message.v1)
## message("You are using the old poset format. Consider using the new one.")
## A simulation stops if cancer or finalTime appear, the first
## one. But if we set onlyCnacer = FALSE, we also accept simuls
## without cancer (or without anything)
op <- try(oncoSimul.internal(poset = fp, ## restrict.table = rt,
## numGenes = numGenes,
numPassengers = numPassengers,
typeCBN = "CBN",
birth = birth,
s = s,
death = death,
mu = mu,
initSize = initSize,
sampleEvery = sampleEvery,
detectionSize = detectionSize,
finalTime = finalTime,
initSize_species = 2000,
initSize_iter = 500,
seed = seed,
verbosity = verbosity,
speciesFS = 10000,
ratioForce = 2,
typeFitness = typeFitness,
max.memory = max.memory,
mutationPropGrowth = mutationPropGrowth,
initMutant = -1,
max.wall.time = max.wall.time,
max.num.tries = max.num.tries,
keepEvery = keepEvery,
## alpha = 0.0015,
sh = sh,
K = K,
minDetectDrvCloneSz = minDetectDrvCloneSz,
extraTime = extraTime,
detectionDrivers = detectionDrivers,
onlyCancer = onlyCancer,
errorHitWallTime = errorHitWallTime,
errorHitMaxTries = errorHitMaxTries),
silent = !verbosity)
objClass <- "oncosimul"
} else {
s <- sh <- NULL ## force it.
if(numPassengers != 0)
warning(paste("Specifying numPassengers has no effect",
" when using fitnessEffects objects. ",
" The fitnessEffects objects are much more ",
"flexible and you can use, for example,",
"the noIntGenes component for passengers."))
if(is.null(seed)) {## Passing a null creates a random seed
## We use a double, to be able to pass in range > 2^16.
## Do not use 0, as that is our way of signaling to C++ to
## generate the seed.
seed <- round(runif(1, min = 1, max = 2^32))
if(verbosity >= 2)
cat(paste("\n Using ", seed, " as seed for C++ generator\n"))
} else if(seed == "auto") {
seed <- 0.0
if(verbosity >= 2)
cat("\n A (high quality) random seed will be generated in C++\n")
}
if(!is_null_na(fixation)) {
if( (!is.list(fixation)) && (!is.vector(fixation)) )
stop("'fixation' must be a list or a vector.")
if(!(all(unlist(lapply(fixation, is.vector)))))
stop("Each element of 'fixation' must be a single element vector.")
if(!(any(unlist(lapply(fixation, class)) == "character")))
stop("At least one element of 'fixation' must be a single element character vector.")
if(!(all( unlist(lapply(fixation, length)) == 1)))
stop("Each element of 'fixation' must be a single element vector.")
if(AND_DrvProbExit)
stop("It makes no sense to pass AND_DrvProbExit and a fixation list.")
}
op <- try(nr_oncoSimul.internal(rFE = fp,
birth = birth,
death = death,
mu = mu,
initSize = initSize,
sampleEvery = sampleEvery,
detectionSize = detectionSize,
finalTime = finalTime,
initSize_species = 2000,
initSize_iter = 500,
seed = seed,
verbosity = verbosity,
speciesFS = 10000,
ratioForce = 2,
typeFitness = typeFitness,
max.memory = max.memory,
mutationPropGrowth = mutationPropGrowth,
initMutant = initMutant,
max.wall.time = max.wall.time,
max.num.tries = max.num.tries,
keepEvery = keepEvery,
## alpha = 0.0015,
K = K,
minDetectDrvCloneSz = minDetectDrvCloneSz,
extraTime = extraTime,
detectionDrivers = detectionDrivers,
onlyCancer = onlyCancer,
errorHitWallTime = errorHitWallTime,
errorHitMaxTries = errorHitMaxTries,
keepPhylog = keepPhylog,
MMUEF = muEF,
detectionProb = detectionProb,
AND_DrvProbExit = AND_DrvProbExit,
fixation = fixation),
silent = !verbosity)
objClass <- c("oncosimul", "oncosimul2")
}
if(inherits(op, "try-error")) {
## if(length(grep("BAIL OUT NOW", op)))
stop(paste("Unrecoverable error:", op ))
}
if(verbosity >= 2) {
cat("\n ... finished this run:")
cat("\n Total Pop Size = ", op$TotalPopSize)
cat("\n Drivers Last = ", op$MaxDriversLast)
cat("\n Final Time = ", op$FinalTime, "\n")
}
class(op) <- objClass
attributes(op)$call <- call
return(op)
}
summary.oncosimul <- function(object, ...) {
## This should be present even in HittedWallTime and HittedMaxTries
## if those are not regarded as errors
pbp <- ("pops.by.time" %in% names(object) )
if(object$other$UnrecoverExcept) { ## yes, when bailing out from
## except. can have just minimal
## content
return(NA)
} else if( !pbp & (object$HittedWallTime || object$HittedMaxTries) ) {
return(NA)
} else if ( !pbp & !(object$HittedWallTime || object$HittedMaxTries) ) {
stop("Eh? No pops.by.time but did not hit wall time or max tries? BUG!")
} else {
tmp <- object[c("NumClones", "TotalPopSize", "LargestClone",
"MaxNumDrivers", "MaxDriversLast",
"NumDriversLargestPop", "TotalPresentDrivers",
"FinalTime", "NumIter", "HittedWallTime",
"HittedMaxTries")]
tmp$errorMF <- object$other$errorMF
tmp$minDMratio <- object$other$minDMratio
tmp$minBMratio <- object$other$minBMratio
if( (tmp$errorMF == -99)) tmp$errorMF <- NA
if( (tmp$minDMratio == -99)) tmp$minDMratio <- NA
if( (tmp$minBMratio == -99)) tmp$minBMratio <- NA
tmp$OccurringDrivers <- object$OccurringDrivers
return(as.data.frame(tmp, stringsAsFactors = FALSE))
}
}
print.oncosimul <- function(x, ...) {
cat("\nIndividual OncoSimul trajectory with call:\n ")
print(attributes(x)$call)
cat("\n")
print(summary(x))
if(inherits(x, "oncosimul2")) {
## we know small object
cat("\n")
cat("Final population composition:\n")
df <- data.frame(Genotype = x$GenotypesLabels,
N = x$pops.by.time[nrow(x$pops.by.time), -1],
stringsAsFactors = TRUE)
print(df)
}
}
## I want this to return things that are storable
## summary.oncosimulpop <- function(object, ...) {
## as.data.frame(rbindlist(lapply(object, summary)))
## }
summary.oncosimulpop <- function(object, ...) {
## some simulations could have failed seriously returning a NULL
## FIXME: how, why? Out of memory?
rm1 <- which(unlist(lapply(object, is.null)))
if(length(rm1) > 0) {
if(length(rm1) < length(object)) {
warning("Some simulations returned NULL. They will be removed",
" from the summary. The NULL runs are ",
paste(rm1, collapse = ", "),
".")
} else {
warning("All simulations returned NULL.")
return(NA)
}
}
## I do not want to rm above for two reasons:
## - avoid re-asigning to the object
## - changing the numbering of the indices below if NULLs
## So I need something more involved
## Figure out exactly what the summary of a NULL is
sumnull <- summary(NULL)
tmp <- lapply(object, summary)
## rm <- which(unlist(lapply(tmp,
## function(x) (length(x) == 1) &&
## (is.na(x) || is.null(x)))))
rm <- which(unlist(lapply(tmp,
function(x) ((length(x) == 1) &&
(is.na(x) || is.null(x))) ||
(identical(x, sumnull)))))
if(length(rm) > 0)
if(length(rm) < length(object)) {
warning("Some simulations seem to have failed and will be removed",
" from the summary. The failed runs are ",
paste(rm, collapse = ", "),
".")
tmp <- tmp[-rm]
} else {
warning("All simulations failed.")
return(NA)
}
as.data.frame(rbindlist(tmp), stringsAsFactors = TRUE)
}
print.oncosimulpop <- function(x, ...) {
cat("\nPopulation of OncoSimul trajectories of",
length(x), "individuals. Call :\n")
print(attributes(x)$call)
cat("\n")
print(summary(x))
}
plot.oncosimulpop <- function(x, ask = TRUE,
show = "drivers",
type = ifelse(show == "genotypes",
"stacked", "line"),
col = "auto",
log = ifelse(type == "line", "y", ""),
ltyClone = 2:6,
lwdClone = 0.9,
ltyDrivers = 1,
lwdDrivers = 3,
xlab = "Time units",
ylab = "Number of cells",
plotClones = TRUE,
plotDrivers = TRUE,
addtot = FALSE,
addtotlwd = 0.5,
ylim = NULL,
xlim = NULL,
thinData = FALSE,
thinData.keep = 0.1,
thinData.min = 2,
plotDiversity = FALSE,
order.method = "as.is",
stream.center = TRUE,
stream.frac.rand = 0.01,
stream.spar = 0.2,
border = NULL,
lwdStackedStream = 1,
srange = c(0.4, 1),
vrange = c(0.8, 1),
breakSortColors = "oe",
legend.ncols = "auto",
...
) {
op <- par(ask = ask)
on.exit(par(op))
null <- lapply(x, function(z)
plot.oncosimul(z,
show = show,
type = type,
col = col,
log = log,
ltyClone = ltyClone,
lwdClone = lwdClone,
ltyDrivers = ltyDrivers,
lwdDrivers = lwdDrivers,
xlab = xlab,
ylab = ylab,
plotClones = plotClones,
plotDrivers = plotDrivers,
addtot = addtot,
addtotlwd = addtotlwd,
ylim = ylim,
xlim = xlim,
thinData = thinData,
thinData.keep = thinData.keep,
thinData.min = thinData.min,
plotDiversity = plotDiversity,
order.method = order.method,
stream.center = stream.center,
stream.frac.rand = stream.frac.rand,
stream.spar = stream.spar,
border = border,
lwdStackedStream = lwdStackedStream,
srange = srange,
vrange = vrange,
breakSortColors = breakSortColors,
legend.ncols = legend.ncols,
...))
}
## plot.oncosimul <- function(x, col = c(8, "orange", 6:1),
## log = "y",
## ltyClone = 2:6,
## lwdClone = 0.9,
## ltyDrivers = 1,
## lwdDrivers = 3,
## xlab = "Time units",
## ylab = "Number of cells",
## plotClones = TRUE,
## plotDrivers = TRUE,
## addtot = FALSE,
## addtotlwd = 0.5,
## yl = NULL,
## thinData = FALSE,
## thinData.keep = 0.1,
## thinData.min = 2,
## plotDiversity = FALSE,
## ...
## ) {
## if(thinData)
## x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min)
## ## uvx
## if(!inherits(x, "oncosimul2"))
## ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
## else {
## ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
## }
## if(is.null(yl)) {
## if(log %in% c("y", "xy", "yx") )
## yl <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
## else
## yl <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
## }
## if(plotDiversity) {
## par(fig = c(0, 1, 0.8, 1))
## m1 <- par()$mar
## m <- m1
## m[c(1, 3)] <- c(0, 0.7)
## op <- par(mar = m )
## plotShannon(x)
## par(op)
## m1[c(3)] <- 0.2
## op <- par(mar = m1)
## par(fig = c(0, 1, 0, 0.8), new = TRUE)
## }
## if(plotClones) {
## plotClones(x,
## ndr = ndr,
## xlab = xlab,
## ylab = ylab,
## lty = ltyClone,
## col = col,
## ylim = yl,
## lwd = lwdClone,
## axes = FALSE,
## log = log,
## ...)
## }
## if(plotClones && plotDrivers)
## par(new = TRUE)
## if(plotDrivers){
## plotDrivers0(x,
## ndr,
## timescale = 1,
## trim.no.drivers = FALSE,
## xlab = "", ylab = "",
## lwd = lwdDrivers,
## lty = ltyDrivers,
## col = col,
## addtot = addtot,
## addtotlwd = addtotlwd,
## log = log, ylim = yl,
## ...)
## }
## }
plot.oncosimul <- function(x,
show = "drivers",
type = ifelse(show == "genotypes",
"stacked", "line"),
col = "auto",
log = ifelse(type == "line", "y", ""),
ltyClone = 2:6,
lwdClone = 0.9,
ltyDrivers = 1,
lwdDrivers = 3,
xlab = "Time units",
ylab = "Number of cells",
plotClones = TRUE,
plotDrivers = TRUE,
addtot = FALSE,
addtotlwd = 0.5,
ylim = NULL,
xlim = NULL,
thinData = FALSE,
thinData.keep = 0.1,
thinData.min = 2,
plotDiversity = FALSE,
order.method = "as.is",
stream.center = TRUE,
stream.frac.rand = 0.01,
stream.spar = 0.2,
border = NULL,
lwdStackedStream = 1,
srange = c(0.4, 1),
vrange = c(0.8, 1),
breakSortColors = "oe",
legend.ncols = "auto",
...
) {
if(!(type %in% c("stacked", "stream", "line")))
stop("Type of plot unknown: it must be one of",
"stacked, stream or line")
if(!(show %in% c("genotypes", "drivers")))
stop("show must be one of ",
"genotypes or drivers")
if(!(breakSortColors %in% c("oe", "distave", "random")))
stop("breakSortColors must be one of ",
"oe, distave, or random")
colauto <- FALSE
if((length(col) ==1) && (col == "auto") &&
(type == "line") && (show == "drivers"))
col <- c(8, "orange", 6:1)
if((length(col) ==1) && (col == "auto") &&
(show == "genotypes")) {
## For categorical data, I find Dark2, Paired, or Set1 to work best.
col <- colorRampPalette(brewer.pal(8, "Dark2"))(ncol(x$pops.by.time) - 1)
colauto <- TRUE
}
if(show == "genotypes") {
plotDrivers <- FALSE
plotClones <- TRUE
}
if(thinData)
x <- thin.pop.data(x, keep = thinData.keep, min.keep = thinData.min)
if(!is.null(xlim))
x <- xlim.pop.data(x, xlim)
## For genotypes, ndr is now the genotypes. Actually, ndr is now just
## a sequence 1:(ncol(y) - 1)
## The user will want to change the colors, like a colorRamp, etc. Or
## rainbow.
## genotypes and line, always call plotDrivers0
if(show == "drivers") {
if(!inherits(x, "oncosimul2"))
ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
else {
ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
}
} else { ## show we are showing genotypes
ndr <- 1:(ncol(x$pops.by.time) - 1)
}
if((type == "line") && is.null(ylim)) {
if(log %in% c("y", "xy", "yx") )
ylim <- c(1, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
else
ylim <- c(0, max(apply(x$pops.by.time[, -1, drop = FALSE], 1, sum)))
}
if(plotDiversity) {
oppd <- par(fig = c(0, 1, 0.8, 1))
m1 <- par()$mar
m <- m1
m[c(1, 3)] <- c(0, 0.7)
op <- par(mar = m )
plotShannon(x)
par(op)
m1[c(3)] <- 0.2
op <- par(mar = m1)
par(fig = c(0, 1, 0, 0.8), new = TRUE)
}
## Shows its history: plotClones makes plotDrivers0 unneeded with
## stacked and stream plots. But now so with line plot.
## When showing genotypes, plotDrivers0 with line only used for
## showing the legend.
if(plotClones) {
plotClonesSt(x,
ndr = ndr,
show = show,
na.subs = TRUE,
log = log,
lwd = lwdClone,
lty = ifelse(show == "drivers", ltyClone, ltyDrivers),
col = col,
order.method = order.method,
stream.center = stream.center,
stream.frac.rand = stream.frac.rand,
stream.spar = stream.spar,
border = border,
srange = srange,
vrange = vrange,
type = type,
breakSortColors = breakSortColors,
colauto = colauto,
legend.ncols = legend.ncols,
lwdStackedStream = lwdStackedStream,
xlab = xlab,
ylab = ylab,
ylim = ylim,
xlim = xlim,
...)
}
if(plotClones && plotDrivers && (type == "line"))
par(new = TRUE)
if( plotDrivers && (type == "line") ) {
plotDrivers0(x,
ndr,
timescale = 1,
trim.no.drivers = FALSE,
xlab = "", ylab = "",
lwd = lwdDrivers,
lty = ltyDrivers,
col = col,
addtot = addtot,
addtotlwd = addtotlwd,
log = log, ylim = ylim,
xlim = xlim,
legend.ncols = legend.ncols,
...)
}
if(plotDiversity) {
par(oppd)
}
}
plotClonesSt <- function(z,
ndr,
show = "drivers",
na.subs = TRUE,
log = "y",
lwd = 1,
## type = "l",
lty = 1:8, col = 1:9,
order.method = "as.is",
stream.center = TRUE,
stream.frac.rand = 0.01,
stream.spar = 0.2,
border = NULL,
srange = c(0.4, 1),
vrange = c(0.8, 1),
type = "stacked",
breakSortColors = "oe",
colauto = TRUE,
legend.ncols = "auto",
lwdStackedStream = 1,
xlab = "Time units",
ylab = "Number of cells",
ylim = NULL,
xlim = NULL,
...) {
## if given ndr, we order columns based on ndr, so clones with more
## drivers are plotted last
y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE]
## Code in stacked and stream plots relies on there being no NAs. Could
## change it, but it does not seem reasonable.
## But my original plotting code runs faster and is simpler if 0 are
## dealt as NAs (which also makes log transformations simpler).
if(type %in% c("stacked", "stream") )
na.subs <- FALSE
if(na.subs){
y[y == 0] <- NA
}
## if(is.null(ndr))
## stop("Should never have null ndr")
## if(!is.null(ndr)) {
## could be done above, to avoid creating
## more copies
oo <- order(ndr)
y <- y[, oo, drop = FALSE]
ndr <- ndr[oo]
if(show == "drivers") {
col <- rep(col, length.out = (1 + max(ndr)))[ndr + 1]
lty <- rep(lty, length.out = ncol(y))
} else {
if(length(col) < max(ndr))
warning("Repeating colors; you might want to",
"pass a col vector of more elements")
col <- rep(col, length.out = (max(ndr)))[ndr]
}
## }
if(type == "line") {
matplot(x = z$pops.by.time[, 1],
y = y,
log = log, type = "l",
col = col, lty = lty,
lwd = lwd,
xlab = xlab,
ylab = ylab,
ylim = ylim,
xlim = xlim,
...)
box()
if(show == "genotypes") {
if(!inherits(z, "oncosimul2")) {
ldrv <- genotypeLabel(z)
} else {
ldrv <- z$GenotypesLabels
}
ldrv[ldrv == ""] <- "WT"
ldrv[ldrv == " _ "] <- "WT"
if(legend.ncols == "auto") {
if(length(ldrv) > 6) legend.ncols <- 2
else legend.ncols <- 1
}
legend(x = "topleft",
title = "Genotypes",
lty = lty,
col = col, lwd = lwd,
legend = ldrv,
ncol = legend.ncols)
}
} else {
ymax <- colSums(y)
if((show == "drivers") || ((show == "genotypes") && (colauto))) {
cll <- myhsvcols(ndr, ymax, srange = srange, vrange = vrange,
breakSortColors = breakSortColors)
} else {
cll <- list(colors = col)
}
x <- z$pops.by.time[, 1]
if(grepl("y", log)) {
stop("It makes little sense to do a stacked/stream",
"plot after taking the log of the y data.")
}
if(grepl("x", log)) {
x <- log10(x + 1)
}
if (type == "stacked") {
plot.stacked2(x = x,
y = y,
order.method = order.method,
border = border,
lwd = lwdStackedStream,
col = cll$colors,
log = log,
xlab = xlab,
ylab = ylab,
ylim = ylim,
xlim = xlim,
...)
} else if (type == "stream") {
plot.stream2(x = x,
y = y,
order.method = order.method,
border = border,
lwd = lwdStackedStream,
col = cll$colors,
frac.rand = stream.frac.rand,
spar = stream.spar,
center = stream.center,
log = log,
xlab = xlab,
ylab = ylab,
ylim = ylim,
xlim = xlim,
...)
}
if(show == "drivers") {
if(legend.ncols == "auto") {
if(length(cll$colorsLegend$Drivers) > 6) legend.ncols <- 2
else legend.ncols <- 1
}
legend(x = "topleft",
title = "Number of drivers",
pch = 15,
## lty = 1,
## lwd = 2,
col = cll$colorsLegend$Color,
legend = cll$colorsLegend$Drivers,
ncol = legend.ncols)
} else if (show == "genotypes") {
if(!inherits(z, "oncosimul2")) {
ldrv <- genotypeLabel(z)
} else {
ldrv <- z$GenotypesLabels
}
ldrv[ldrv == ""] <- "WT"
ldrv[ldrv == " _ "] <- "WT"
if(legend.ncols == "auto") {
if(length(ldrv) > 6) legend.ncols <- 2
else legend.ncols <- 1
}
legend(x = "topleft",
title = "Genotypes",
pch = 15,
col = cll$colors,
legend = ldrv,
ncol = legend.ncols)
}
}
}
relabelLogaxis <- function(side = 2) {
## we do a plot but pass the already transformed data; relabel
## afterwards.
po <- axis( side = side, labels = FALSE, tick = FALSE, lwd = 0)
axis(side = side, labels = 10^po, at = po, tick = TRUE)
}
myhsvcols <- function(ndr, ymax, srange = c(0.4, 1),
vrange = c(0.8, 1),
breakSortColors = "oe") {
## Generate a set of colors so that:
## - easy to tell when we increase number of drivers
## - reasonably easy to tell in a legend
## - different clones with same number of drivers have "similar" colors
## I use hsv color specification as this seems the most reasonable.
minor <- table(ndr)
major <- length(unique(ndr)) ## yeah same as length(minor), but least
## surprise
h <- seq(from = 0, to = 1, length.out = major + 1)[-1]
## do not keep similar hues next to each other
if(breakSortColors == "oe") {
oe <- seq_along(h) %% 2
h <- h[order(oe, h)]
} else if(breakSortColors == "distave"){
sl <- seq_along(h)
h <- h[order(-abs(mean(sl) - sl))]
} else if(breakSortColors == "random") {
rr <- order(runif(length(h)))
h <- h[rr]
}
hh <- rep(h, minor)
sr <- unlist(lapply(minor, function(x)
seq(from = srange[1], to = srange[2], length.out = x)))
sv <- unlist(lapply(minor, function(x)
seq(from = vrange[1], to = vrange[2], length.out = x))
)
colors <- hsv(hh, sr, sv)
## This gives "average" or "median" color for legend
## colorsLegend <- aggregate(list(Color = colors), list(Drivers = ndr),
## function(x)
## as.character(x[((length(x) %/% 2) + 1 )]))
## Give the most abundant class color as the legend. Simpler to read
colorsLegend <- by(data.frame(Color = colors, maxnum = ymax),
list(Drivers = ndr),
function(x) as.character(x$Color[which.max(x$maxnum)]))
colorsLegend <- data.frame(Drivers = as.integer(row.names(colorsLegend)),
Color = cbind(colorsLegend)[, 1],
stringsAsFactors = FALSE)
## To show what it would look like
## plot(1:(sum(minor)), col = colors, pch = 16, cex = 3)
## legend(1, length(ndr), col = colorsLegend$Color, legend = names(minor),
## pch = 16)
return(list(colors = colors,
colorsLegend = colorsLegend))
}
genotypeLabel <- function(x) {
## For oncosimul objects, that do not have the GenotypesLabels object
apply(x$Genotypes, 2,
function(x) paste(which(x == 1), sep = "", collapse = ", "))
}
plotDrivers0 <- function(x,
ndr,
timescale = 1,
trim.no.drivers = FALSE,
addtot = TRUE,
addtotlwd = 2,
na.subs = TRUE, log = "y", type = "l",
lty = 1:9, col = c(8, "orange", 6:1),
lwd = 2,
legend.ncols = "auto",
...) {
## z <- create.drivers.by.time(x, numDrivers)
z <- create.drivers.by.time(x, ndr)
## We can now never enter here because trim.no.drivers is always FALSE
## in call.
## if(trim.no.drivers && x$MaxDriversLast) {
## fi <- which(apply(z[, -c(1, 2), drop = FALSE], 1,
## function(x) sum(x) > 0))[1]
## z <- z[fi:nrow(z), , drop = FALSE]
## }
y <- z[, 2:ncol(z), drop = FALSE]
if(na.subs){
y[y == 0] <- NA
}
## Likewise, we can never enter here now as timescale fixed at 1. And
## this is silly.
## if(timescale != 1) {
## time <- timescale * z[, 1]
## } else {
## time <- z[, 1]
## }
time <- timescale * z[, 1]
if(nrow(y) <= 2) type <- "b"
## Allow for the weird case of possibly missing drivers
col2 <- rep(col, length.out = (1 + max(ndr)))
col2 <- col2[sort(unique(ndr)) + 1]
matplot(x = time,
y = y,
type = type, log = log, lty = lty, col = col2, lwd = lwd,
...)
if(addtot) {
tot <- rowSums(y, na.rm = TRUE)
lines(time, tot, col = "black", lty = 1, lwd = addtotlwd)
}
## This will work even if under the weird case of a driver missing
ldrv <- unlist(lapply(strsplit(colnames(y), "dr_", fixed = TRUE),
function(x) x[2]))
if(legend.ncols == "auto") {
if(length(ldrv) > 6) legend.ncols <- 2
else legend.ncols <- 1
}
legend(x = "topleft",
title = "Number of drivers",
lty = lty, col = col2, lwd = lwd,
legend = ldrv,
ncol = legend.ncols)
## legend = (1:ncol(y)) - 1)
}
plotPoset <- function(x, names = NULL, addroot = FALSE,
box = FALSE, ...) {
if(is.null(names)) {
if(addroot) names <- c("Root", 1:max(x))
else names <- 1:max(x)
}
plot(posetToGraph(x, names = names, addroot = addroot,
type = "graphNEL",
strictAdjMat = FALSE), ...)
if(box)
box()
}
## this function seems to never be used
## plotAdjMat <- function(adjmat) {
## plot(as(adjmat, "graphNEL"))
## }
which_N_at_T <- function(x, N = 1, t = "last") {
if((length(t) == 1) && (t == "last"))
T <- nrow(x$pops.by.time)
else if(length(t) == 2) {
if(t[1] < 0)
warning("smallest t must be >= 0; setting to 0")
if(t[1] > t[2])
stop("t[1] must be <= t[2]")
if(t[2] > max(x$pops.by.time[, 1]))
message("t[2] > largest time; setting it to the max")
T <- which(
(x$pops.by.time[, 1] >= t[1]) &
(x$pops.by.time[, 1] <= t[2]))
}
else
stop("t must be either 'last' or a vector of length 2")
z <- which(x$pops.by.time[T, -1, drop = FALSE] >= N, arr.ind = TRUE)[, 2]
z <- unique(z) ## recall we removed first column but we index from first.
return(z)
}
phylogClone <- function(x, N = 1, t = "last", keepEvents = TRUE) {
if(!inherits(x, "oncosimul2"))
stop("Phylogenetic information is only stored with v >= 2")
z <- which_N_at_T(x, N, t)
tG <- x$GenotypesLabels[z] ## only for GenotypesLabels we keep all
## sample size info at each period
## FIXME: aren't this and the next warnings redundant or aliased?
if( (length(tG) == 1) && (tG == "")) {
warning("There never was a descendant of WT")
}
df <- x$other$PhylogDF
if(nrow(df) == 0) {
warning("PhylogDF has 0 rows: no descendants of initMutant ever appeared. ",
"This also happens if you did not set 'keepPhylog = TRUE'.")
return(NA)
}
if(!keepEvents) { ## is this just a graphical thing? or not?
df <- df[!duplicated(df[, c(1, 2)]), ]
}
g <- igraph::graph.data.frame(df[, c(1, 2)])
## nodes <- match(tG, V(g)$name)
nodesInP <- unique(unlist(igraph::neighborhood(g, order = 1e9,
nodes = tG,
mode = "in")))
## Remember that the phylog info can contain clones that are
## not in pops.by.time, as they go extinct between creation
## and sampling.
allLabels <- unique(as.character(unlist(df[, c(1, 2)])))
nodesRm <- setdiff(allLabels, V(g)$name[nodesInP])
g <- igraph::delete.vertices(g, nodesRm)
tmp <- list(graph = g, df = df)
class(tmp) <- c(class(tmp), "phylogClone")
return(tmp)
## trivial to return an adjacency matrix if needed. The keepEvents = FALSE
}
plotClonePhylog <- function(x, N = 1, t = "last",
timeEvents = FALSE,
keepEvents = FALSE,
fixOverlap = TRUE,
returnGraph = FALSE, ...) {
if(!inherits(x, "oncosimul2"))
stop("Phylogenetic information is only stored with v >=2")
if(nrow(x$other$PhylogDF) == 0)
stop("It seems you run the simulation with keepPhylog= FALSE. ",
"This error can also appear if your simulation exited ",
"very fast, before any clones beyond the initial were ",
"generated.")
pc <- phylogClone(x, N, t, keepEvents)
## if(is.na(pc)) {
## ## This should not be reachable, as caught before
## ## where we check for nrow of PhylogDF
## warning("No clone phylogeny available. Exiting without plotting.")
## return(NULL)
## }
l0 <- igraph::layout.reingold.tilford(pc$g)
if(!timeEvents) {
plot(pc$g, layout = l0)
} else {
l1 <- l0
indexAppear <- match(V(pc$g)$name, as.character(pc$df[, 2]))
firstAppear <- pc$df$time[indexAppear]
firstAppear[1] <- 0
l1[, 2] <- (max(firstAppear) - firstAppear)
if(fixOverlap) {
dx <- which(duplicated(l1[, 1]))
if(length(dx)) {
ra <- range(l1[, 1])
l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
}
}
plot(pc$g, layout = l1)
}
if(returnGraph)
return(pc$g)
}
############# The rest are internal functions
closest_time <- function(x, size) {
## Find the first time when a given size is reached
## But these are not times, but the position in pops.by.time. Bad naming.
sizes <- rowSums(x$pops.by.time[, -1, drop = FALSE])
candidates <- which(sizes >= size)
if(length(candidates) == 0) {
warning(paste("Pop size never >= requested size.",
"Thus, there is nothing to sample. You will get NAs"))
return(-99)
} else {
return(candidates[1])
}
}
get.the.time.for.sample <- function(tmp, timeSample, popSizeSample) {
if( !is.null(popSizeSample) && (popSizeSample >= 0) ) {
## should be "closest_index" and "the.index"
the.time <- closest_time(tmp, popSizeSample)
} else if(timeSample == "last") {
if(tmp$TotalPopSize == 0) {
warning(paste("Final population size is 0.",
"Thus, there is nothing to sample with",
"sampling last. You will get NAs"))
the.time <- -99
} else {
the.time <- nrow(tmp$pops.by.time)
}
} else if (timeSample %in% c("uniform", "unif")) {
candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
if (length(candidate.time) == 0) {
warning(paste("There is not a single sampled time",
"at which there are any mutants with drivers. ",
"Thus, no uniform sampling possible.",
"You will get NAs"))
the.time <- -99
## return(rep(NA, nrow(tmp$Genotypes)))
} else if (length(candidate.time) == 1) {
message("Only one sampled time period with mutants.")
the.time <- candidate.time
} else {
the.time <- sample(candidate.time, 1)
}
} else {
stop("Unknown timeSample option")
}
return(the.time)
}
get.mut.vector <- function(x, timeSample, typeSample,
thresholdWhole, popSizeSample) {
if(is.null(x$TotalPopSize)) {
warning(paste("It looks like this simulation never completed.",
" Maybe it reached maximum allowed limits.",
" You will get NAs"))
return(rep(NA, length(x$geneNames)))
}
the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
if(the.time < 0) {
return(rep(NA, nrow(x$Genotypes)))
}
pop <- x$pops.by.time[the.time, -1]
if(all(pop == 0)) {
stop("You found a bug: this should never happen")
}
if(typeSample %in% c("wholeTumor", "whole")) {
popSize <- x$PerSampleStats[the.time, 1]
return( as.numeric((tcrossprod(pop,
x$Genotypes)/popSize) >= thresholdWhole) )
} else if (typeSample %in% c("singleCell", "single")) {
return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
} else if (typeSample %in% c("singleCell-noWT", "single-noWT",
"singleCell-nowt", "single-nowt")) {
genots <- x$Genotypes
whichwt <- which(x$GenotypesLabels == "")
if(length(whichwt)) {
genots <- genots[, -whichwt, drop = FALSE]
pop <- pop[-whichwt]
}
if(all(pop == 0)) {
warning("No non-WT clone with required popSize or at required time")
return(rep(NA, nrow(x$Genotypes)))
} else {
return(genots[, sample(seq_along(pop), 1, prob = pop)])
}
} else {
stop("Unknown typeSample option")
}
}
## get.mut.vector <- function(x, timeSample, typeSample,
## thresholdWhole, popSizeSample) {
## if(is.null(x$TotalPopSize)) {
## warning(paste("It looks like this simulation never completed.",
## " Maybe it reached maximum allowed limits.",
## " You will get NAs"))
## return(rep(NA, length(x$geneNames)))
## }
## the.time <- get.the.time.for.sample(x, timeSample, popSizeSample)
## if(the.time < 0) {
## return(rep(NA, nrow(x$Genotypes)))
## }
## pop <- x$pops.by.time[the.time, -1]
## if(all(pop == 0)) {
## stop("You found a bug: this should never happen")
## }
## if(typeSample %in% c("wholeTumor", "whole")) {
## popSize <- x$PerSampleStats[the.time, 1]
## return( as.numeric((tcrossprod(pop,
## x$Genotypes)/popSize) >= thresholdWhole) )
## } else if (typeSample %in% c("singleCell", "single")) {
## return(x$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
## } else {
## stop("Unknown typeSample option")
## }
## }
oncoSimul.internal <- function(poset, ## restrict.table,
numPassengers,
## numGenes,
typeCBN,
birth,
s,
death,
mu,
initSize,
sampleEvery,
detectionSize,
finalTime,
initSize_species,
initSize_iter,
seed,
verbosity,
speciesFS,
ratioForce,
typeFitness,
max.memory,
mutationPropGrowth, ## make it explicit
initMutant,
max.wall.time,
keepEvery,
alpha,
sh,
K,
## endTimeEvery,
detectionDrivers,
onlyCancer,
errorHitWallTime,
max.num.tries,
errorHitMaxTries,
minDetectDrvCloneSz,
extraTime) {
## the value of 20000, in megabytes, for max.memory sets a limit of ~ 20 GB
## if(keepEvery < sampleEvery)
## warning("setting keepEvery to sampleEvery")
## a backdoor to allow passing restrictionTables directly
if(inherits(poset, "restrictionTable"))
restrict.table <- poset
else
restrict.table <- poset.to.restrictTable(poset)
numDrivers <- nrow(restrict.table)
numGenes <- (numDrivers + numPassengers)
if(numGenes < 2)
stop("There must be at least two genes (loci) in the fitness effects.",
"If you only care about a degenerate case with just one,",
"you can enter a second gene",
"with fitness effect of zero.")
if(numGenes > 64)
stop("Largest possible number of genes (loci) is 64 for version 1.",
"You are strongly encouraged to use the new specification",
"as in version 2.")
## These can never be set by the user
## if(initSize_species < 10) {
## warning("initSize_species too small?")
## }
## if(initSize_iter < 100) {
## warning("initSize_iter too small?")
## }
## numDrivers <- nrow(restrict.table)
if(length(unique(restrict.table[, 1])) != numDrivers)
stop("BAIL OUT NOW: length(unique(restrict.table[, 1])) != numDrivers)")
ddr <- restrict.table[, 1]
if(any(diff(ddr) != 1))
stop("BAIL OUT NOW: any(diff(ddr) != 1")
## sanity checks
if(max(restrict.table[, 1]) != numDrivers)
stop("BAIL OUT NOW: max(restrict.table[, 1]) != numDrivers")
if(numDrivers > numGenes)
stop("BAIL OUT NOW: numDrivers > numGenes")
non.dep.drivers <- restrict.table[which(restrict.table[, 2] == 0), 1]
## if( (is.null(endTimeEvery) || (endTimeEvery > 0)) &&
## (typeFitness %in% c("bozic1", "exp") )) {
## warning(paste("endTimeEvery will take a positive value. ",
## "This will make simulations not stop until the next ",
## "endTimeEvery has been reached. Thus, in simulations ",
## "with very fast growth, simulations can take a long ",
## "time to finish, or can hit the wall time limit. "))
## }
## if(is.null(endTimeEvery))
## endTimeEvery <- keepEvery
## if( (endTimeEvery > 0) && (endTimeEvery %% keepEvery) )
## warning("!(endTimeEvery %% keepEvery)")
## a sanity check in restricTable, so no neg. indices for the positive deps
neg.deps <- function(x) {
## checks a row of restrict.table
numdeps <- x[2]
if(numdeps) {
deps <- x[3:(3+numdeps - 1)]
return(any(deps < 0))
} else FALSE
}
if(any(apply(restrict.table, 1, neg.deps)))
stop("BAIL OUT NOW: Negative dependencies in restriction table")
## transpose the table
rtC <- convertRestrictTable(restrict.table)
return(c(
BNB_Algo5(restrictTable = rtC,
numDrivers = numDrivers,
numGenes = numGenes,
typeCBN_= typeCBN,
s = s,
death = death,
mu = mu,
initSize = initSize,
sampleEvery = sampleEvery,
detectionSize = detectionSize,
finalTime = finalTime,
initSp = initSize_species,
initIt = initSize_iter,
seed = seed,
verbosity = verbosity,
speciesFS = speciesFS,
ratioForce = ratioForce,
typeFitness_ = typeFitness,
maxram = max.memory,
mutationPropGrowth = as.integer(mutationPropGrowth),
initMutant = initMutant,
maxWallTime = max.wall.time,
keepEvery = keepEvery,
sh = sh,
K = K,
detectionDrivers = detectionDrivers,
onlyCancer = onlyCancer,
errorHitWallTime = errorHitWallTime,
maxNumTries = max.num.tries,
errorHitMaxTries = errorHitMaxTries,
minDetectDrvCloneSz = minDetectDrvCloneSz,
extraTime = extraTime
),
NumDrivers = numDrivers
))
}
OncoSimulWide2Long <- function(x) {
## Put data in long format, for ggplot et al
if(!inherits(x, "oncosimul2")) {
ndr <- colSums(x$Genotypes[1:x$NumDrivers, , drop = FALSE])
genotLabels <- genotypeLabel(x)
} else {
ndr <- colSums(x$Genotypes[x$Drivers, , drop = FALSE])
genotLabels <- x$GenotypesLabels
}
genotLabels[genotLabels == ""] <- "WT"
genotLabels[genotLabels == " _ "] <- "WT"
y <- x$pops.by.time[, 2:ncol(x$pops.by.time), drop = FALSE]
y[y == 0] <- NA
oo <- order(ndr)
y <- y[, oo, drop = FALSE]
ndr <- ndr[oo]
nc <- ncol(y)
nr <- nrow(y)
y <- as.vector(y)
return(data.frame(Time = rep(x$pops.by.time[, 1], nc),
Y = y,
Drivers = factor(rep(ndr, rep(nr, nc))),
Genotype = rep(genotLabels, rep(nr, nc)),
stringsAsFactors = TRUE))
}
## We are not using this anymore
## create.muts.by.time <- function(tmp) { ## tmp is the output from Algorithm5
## if(tmp$NumClones > 1) {
## NumMutations <- apply(tmp$Genotypes, 2, sum)
## muts.by.time <- cbind(tmp$pops.by.time[, c(1), drop = FALSE],
## t(apply(tmp$pops.by.time[, -c(1),
## drop = FALSE], 1,
## function(x) tapply(x,
## NumMutations, sum))))
## colnames(muts.by.time)[c(1)] <- "Time"
## } else {
## muts.by.time <- tmp$pops.by.time
## }
## return(muts.by.time)
## }
create.drivers.by.time <- function(tmp, ndr) {
## CountNumDrivers <- apply(tmp$Genotypes[1:numDrivers, ,drop = FALSE], 2, sum)
CountNumDrivers <- ndr
if(tmp$NumClones >= 1) {
if(tmp$NumClones == 1) {
if(ncol(tmp$pops.by.time) != 2)
stop("This is impossible!")
drivers.by.time <- tmp$pops.by.time
} else {
if(length(unique(CountNumDrivers )) > 1) {
drivers.by.time <- cbind(tmp$pops.by.time[, c(1),
drop = FALSE] ,
t(apply(tmp$pops.by.time[, -c(1),
drop = FALSE],
1,
function(x)
tapply(x,
CountNumDrivers,
sum))))
} else {
drivers.by.time <- cbind(tmp$pops.by.time[, c(1),
drop = FALSE] ,
rowSums(tmp$pops.by.time[, -c(1),
drop =FALSE]))
}
}
colnames(drivers.by.time) <- c("Time",
paste("dr_",
colnames(drivers.by.time)[-c(1)],
sep = ""))
} else {
drivers.by.time <- NULL
}
return(drivers.by.time)
}
## For plotting, this helps decrease huge file sizes, while still showing
## the start of each clone, if it was originally recorded.
thin.pop.data <- function(x, keep = 0.1, min.keep = 3) {
norig <- nrow(x$pops.by.time)
keep1 <- round(seq.int(from = 1, to = norig,
length.out = round(norig * keep)))
keep2 <- apply(x$pops.by.time[, -1, drop = FALSE],
1, function(x) any((x[x > 0] < min.keep)))
keep <- sort(union(keep1, keep2))
x$pops.by.time <- x$pops.by.time[keep, , drop = FALSE]
return(x)
}
xlim.pop.data <- function(x, xlim) {
x$pops.by.time <- x$pops.by.time[
(x$pops.by.time[, 1] >= xlim[1]) &
(x$pops.by.time[, 1] <= xlim[2]), ]
return(x)
}
shannonI <- function(x) {
sx <- sum(x)
p <- x/sx
p <- p[p > 0]
return(-sum(p * log(p)))
}
plotShannon <- function(z) {
h <- apply(z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE],
1, shannonI)
plot(x = z$pops.by.time[, 1],
y = h, type = "l", xlab = "", ylab = "H", axes = FALSE)
box()
axis(2)
}
is_null_na <- function(x) {
## For arguments, if user passes "undefined" or "not set"
## See also http://stackoverflow.com/a/19655909
if(is.function(x)) return(FALSE)
if( is.null(x) ||
( (length(x) == 1) && (is.na(x)) ) ||
( (length(x) == 1) && (x == "") ) ## careful here
) {
return(TRUE)
} else {
return(FALSE)
}
}
## Not used anymore, but left here in case they become useful.
## Expected numbers at equilibrium under McFarland's
## eFinalMf <- function(initSize, s, j) {
## ## Expected final sizes for McF, when K is set to the default.
## # j is number of drivers
## ## as it says, with no passengers
## ## Set B(d) = D(N)
## K <- initSize/(exp(1) - 1)
## return(K * (exp( (1 + s)^j) - 1))
## }
## mcflE <- function(p, s, initSize) {
## K <- initSize/(exp(1) - 1)
## ## Expected number at equilibrium
## return( K * (exp((1 + s)^p) - 1))
## }
## mcflEv <- function(p, s, initSize) {
## ## expects vectors for p and s
## K <- initSize/(exp(1) - 1)
## ## Expected number at equilibrium
## return( K * (exp(prod((1 + s)^p)) - 1))
## }
## simpsonI <- function(x) {
## sx <- sum(x)
## p <- x/sx
## p <- p[p > 0]
## return(sum(p^2)))
## }
## plotSimpson <- function(z) {
## h <- apply(z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE],
## 1, shannonI)
## plot(x = z$pops.by.time[, 1],
## y = h, lty = "l", xlab = "", ylab = "H")
## }
## plotClones <- function(z, ndr = NULL, na.subs = TRUE,
## log = "y", type = "l",
## lty = 1:8, col = 1:9, ...) {
## ## if given ndr, we order columns based on ndr, so clones with more
## ## drivers are plotted last
## y <- z$pops.by.time[, 2:ncol(z$pops.by.time), drop = FALSE]
## if(na.subs){
## y[y == 0] <- NA
## }
## if(!is.null(ndr)) {
## ## could be done above, to avoid creating
## ## more copies
## oo <- order(ndr)
## y <- y[, oo, drop = FALSE]
## ndr <- ndr[oo]
## col <- col[ndr + 1]
## }
## matplot(x = z$pops.by.time[, 1],
## y = y,
## log = log, type = type,
## col = col, lty = lty,
## ...)
## box()
## }
## No longer used
## rtNoDep <- function(numdrivers) {
## ## create a restriction table with no dependencies
## x <- matrix(nrow = numdrivers, ncol = 3)
## x[, 1] <- 1:numdrivers
## x[, 2] <- 0
## x[, 3] <- -9
## return(x)
## }
## Simulate from generative model. This is a quick function, and is most
## likely wrong! Never used for anything.
## simposet <- function(poset, p) {
## ## if (length(parent.nodes) != length (child.nodes)){
## ## print("An Error Occurred")
## ## }
## ## else {
## num.genes <- max(poset) - 1 ## as root is not a gene
## genotype <-t(c(1, rep(NA, num.genes)))
## colnames(genotype) <- as.character(0:num.genes)
## poset$runif <- runif(nrow(poset))
## ## this.relation.prob.OK could be done outside, but having it inside
## ## the loop would allow to use different thresholds for different
## ## relationships
## for (i in (1:nrow(poset))) {
## child <- poset[i, 2]
## this.relation.prob.OK <- as.numeric(poset[i, "runif"] > p)
## the.parent <- genotype[ poset[i, 1] ] ## it's the value of parent in genotype.
## if (is.na(genotype[child])){
## genotype[child] <- this.relation.prob.OK * the.parent
## }
## else
## genotype[child] <- genotype[child]*(this.relation.prob.OK * the.parent)
## }
## ## }
## return(genotype)
## }
## to plot and adjacency matrix in this context can do
## plotPoset(intAdjMatToPoset(adjMat))
## where intAdjMatToPoset is from best oncotree code: generate-random-trees.
## No! the above is simpler
## get.mut.vector.whole <- function(tmp, timeSample = "last", threshold = 0.5) {
## ## Obtain, from results from a simulation run, the vector
## ## of 0/1 corresponding to each gene.
## ## threshold is the min. proportion for a mutation to be detected
## ## We are doing whole tumor sampling here, as in Sprouffske
## ## timeSample: do we sample at end, or at a time point, chosen
## ## randomly, from all those with at least one driver?
## if(timeSample == "last") {
## if(tmp$TotalPopSize == 0)
## warning(paste("Final population size is 0.",
## "Thus, there is nothing to sample with ",
## "sampling last. You will get NAs"))
## return(as.numeric(
## (tcrossprod(tmp$pops.by.time[nrow(tmp$pops.by.time), -1],
## tmp$Genotypes)/tmp$TotalPopSize) > threshold))
## } else if (timeSample %in% c("uniform", "unif")) {
## candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
## if (length(candidate.time) == 0) {
## warning(paste("There is not a single sampled time",
## "at which there are any mutants.",
## "Thus, no uniform sampling possible.",
## "You will get NAs"))
## return(rep(NA, nrow(tmp$Genotypes)))
## } else if (length(candidate.time) == 1) {
## the.time <- candidate.time
## } else {
## the.time <- sample(candidate.time, 1)
## }
## pop <- tmp$pops.by.time[the.time, -1]
## popSize <- tmp$PerSampleStats[the.time, 1]
## ## if(popSize == 0)
## ## warning(paste("Population size at this time is 0.",
## ## "Thus, there is nothing to sample at this time point.",
## ## "You will get NAs"))
## return( as.numeric((tcrossprod(pop,
## tmp$Genotypes)/popSize) > threshold) )
## }
## }
## the.time <- sample(which(tmp$PerSampleStats[, 4] > 0), 1)
## if(length(the.time) == 0) {
## warning(paste("There are no clones with drivers at any time point.",
## "No uniform sampling possible.",
## "You will get a vector of NAs."))
## return(rep(NA, nrow(tmp$Genotypes)))
## }
## get.mut.vector.singlecell <- function(tmp, timeSample = "last") {
## ## No threshold, as single cell.
## ## timeSample: do we sample at end, or at a time point, chosen
## ## randomly, from all those with at least one driver?
## if(timeSample == "last") {
## the.time <- nrow(tmp$pops.by.time)
## } else if (timeSample %in% c("uniform", "unif")) {
## candidate.time <- which(tmp$PerSampleStats[, 4] > 0)
## if (length(candidate.time) == 0) {
## warning(paste("There is not a single sampled time",
## "at which there are any mutants.",
## "Thus, no uniform sampling possible.",
## "You will get NAs"))
## return(rep(NA, nrow(tmp$Genotypes)))
## } else if (length(candidate.time) == 1) {
## the.time <- candidate.time
## } else {
## the.time <- sample(candidate.time, 1)
## }
## }
## pop <- tmp$pops.by.time[the.time, -1]
## ## popSize <- tmp$PerSampleStats[the.time, 1]
## ## genot <- sample(seq_along(pop), 1, prob = pop)
## if(all(pop == 0)) {
## warning(paste("All clones have a population size of 0",
## "at the chosen time. Nothing to sample.",
## "You will get NAs"))
## return(rep(NA, nrow(tmp$Genotypes)))
## } else {
## return(tmp$Genotypes[, sample(seq_along(pop), 1, prob = pop)])
## }
## }
## get.mut.vector <- function(x, timeSample = "whole", typeSample = "last",
## thresholdWhole = 0.5) {
## if(typeSample %in% c("wholeTumor", "whole")) {
## get.mut.vector.whole(x, timeSample = timeSample,
## threshold = thresholdWhole)
## } else if(typeSample %in% c("singleCell", "single")) {
## get.mut.vector.singlecell(x, timeSample = timeSample)
## }
## }
## plotClonePhylog <- function(x, timeEvent = FALSE,
## showEvents = TRUE,
## fixOverlap = TRUE) {
## if(!inherits(x, "oncosimul2"))
## stop("Phylogenetic information is only stored with v >=2")
## if(nrow(x$other$PhylogDF) == 0)
## stop("It seems you run the simulation with keepPhylog= FALSE")
## ## requireNamespace("igraph")
## df <- x$other$PhylogDF
## if(!showEvents) {
## df <- df[!duplicated(df[, c(1, 2)]), ]
## }
## g <- igraph::graph.data.frame(df)
## l0 <- igraph::layout.reingold.tilford(g)
## if(!timeEvent) {
## plot(g, layout = l0)
## } else {
## l1 <- l0
## indexAppear <- match(V(g)$name, as.character(df[, 2]))
## firstAppear <- df$time[indexAppear]
## firstAppear[1] <- 0
## l1[, 2] <- (max(firstAppear) - firstAppear)
## if(fixOverlap) {
## dx <- which(duplicated(l1[, 1]))
## if(length(dx)) {
## ra <- range(l1[, 1])
## l1[dx, 1] <- runif(length(dx), ra[1], ra[2])
## }
## }
## plot(g, layout = l1)
## }
## }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.