Nothing
#' Perform inference of the maximum likelihood clonal tree from longitudinal data.
#' @title LACE
#'
#' @examples
#' data(longitudinal_sc_variants)
#' inference = LACE(D = longitudinal_sc_variants,
#' lik_w = c(0.2308772,0.2554386,0.2701754,0.2435088),
#' alpha = list(c(0.10,0.05,0.05,0.05)),
#' beta = list(c(0.10,0.05,0.05,0.05)),
#' keep_equivalent = FALSE,
#' num_rs = 5,
#' num_iter = 10,
#' n_try_bs = 5,
#' num_processes = NA,
#' seed = 12345,
#' verbose = FALSE)
#'
#' @param D Mutation data from multiple experiments for a list of driver genes. It can be either a list with a data matrix per time point or a SummarizedExperiment object.
#' In this latter, the object must contain two fields: assays and colData. Assays stores one unique data matrix pooling all single cells observed at each time point and colData stores a vector of labels reporting the time point when each single cell was sequenced.
#' Ordering of cells in assays field and colData field must be the same.
#' @param lik_w Weight for each data point. If not provided, weights to correct for sample sizes are used.
#' @param alpha False positive error rate provided as list of elements; if a vector of alpha (and beta) is provided, the inference is performed for multiple values and the solution at
#' maximum-likelihood is returned.
#' @param beta False negative error rate provided as list of elements; if a vector of beta (and alpha) is provided, the inference is performed for multiple values and the solution at
#' maximum-likelihood is returned.
#' @param initialization Starting point of the mcmc; if not provided, a random starting point is used.
#' @param keep_equivalent Boolean. Shall I return results (B and C) at equivalent likelihood with the best returned solution?
#' @param check_indistinguishable Boolean. Shall I remove any indistinguishable event from input data prior inference?
#' @param num_rs Number of restarts during mcmc inference.
#' @param num_iter Maximum number of mcmc steps to be performed during the inference.
#' @param n_try_bs Number of steps without change in likelihood of best solution after which to stop the mcmc.
#' @param learning_rate Parameter to tune the probability of accepting solutions at lower values during mcmc. Value of learning_rate = 1 (default), set a
#' probability proportional to the difference in likelihood; values of learning_rate greater than 1 inclease the chance of accepting solutions at lower likelihood
#' during mcmc while values lower than 1 decrease such probability.
#' @param marginalize Boolean. Shall I marginalize C when computing likelihood?
#' @param error_move Boolean. Shall I include estimation of error rates in the MCMC moves?
#' @param num_processes Number of processes to be used during parallel execution. To execute in single process mode,
#' this parameter needs to be set to either NA or NULL.
#' @param seed Seed for reproducibility.
#' @param verbose Boolean. Shall I print to screen information messages during the execution?
#' @param log_file log file where to print outputs when using parallel. If parallel execution is disabled, this parameter is ignored.
#' @return A list of 9 elements: B, C, clones_prevalence, relative_likelihoods, joint_likelihood, clones_summary and error_rates. Here, B returns the maximum likelihood longitudinal
#' clonal tree, C the attachment of cells to clones, corrected_genotypes the corrected genotypes and clones_prevalence clones' prevalence; relative_likelihoods and joint_likelihood are respectively
#' the likelihood of the solutions at each individual time points and the joint likelihood; clones_summary provide a summary of association of mutations to clones. In equivalent_solutions,
#' solutions (B and C) with likelihood equivalent to the best solution are returned. Finally error_rates provides the best values of alpha and beta among the considered ones.
#' @export LACE
#' @import parallel
#' @import SummarizedExperiment
#' @importFrom Rfast rowMaxs
#' @importFrom stats runif rnorm
#'
LACE <- function( D, lik_w = NULL, alpha = NULL, beta = NULL, initialization = NULL, keep_equivalent = TRUE, check_indistinguishable = TRUE, num_rs = 50, num_iter = 10000, n_try_bs = 500, learning_rate = 1, marginalize = FALSE, error_move = FALSE, num_processes = Inf, seed = NULL, verbose = TRUE, log_file = "" ) {
# Set the seed
set.seed(seed)
# Handle SummarizedExperiment objects
if(typeof(D)=="S4") {
curr_data <- t(assays(D)[[1]])
curr_experiment <- colData(D)[[1]]
curr_experiment_unique <- unique(curr_experiment)
curr_D <- list()
for(i in 1:length(curr_experiment_unique)) {
curr_D[[as.character(curr_experiment_unique[i])]] <- curr_data[which(curr_experiment==curr_experiment_unique[i]),,drop=FALSE]
}
D <- curr_D
}
# Set storage mode to integer
for(i in 1:length(D)) {
storage.mode(D[[i]]) <- "integer"
}
if(!is.null(initialization)){
storage.mode(initialization) <- "integer"
}
# Remove any indistinguishable event from input data prior inference
if(check_indistinguishable) {
D <- check.indistinguishable(D)
}
# Initialize weights to compute weighted joint likelihood
if(is.null(lik_w)) {
total_sample_size <- 0
for(i in 1:length(D)) {
total_sample_size <- total_sample_size + nrow(D[[i]])
}
for(i in 1:length(D)) {
lik_w <- c(lik_w,(1-(nrow(D[[i]])/total_sample_size)))
}
lik_w <- lik_w / sum(lik_w)
}
# Initialize error rates alpha and beta
if(is.null(alpha)) {
alpha = list(rep(0.01,length(D)))
beta = list(rep(0.05,length(D)))
}
if(verbose) {
cat(paste0("Starting inference for a total of ",length(alpha)," different values of alpha and beta.","\n"))
}
# Setting up parallel execution
parallel <- NULL
close_parallel <- FALSE
if(is.null(parallel)&&length(alpha)>1) {
if(is.na(num_processes) || is.null(num_processes) || num_processes == 1) {
parallel <- NULL
}
else if(num_processes==Inf) {
cores <- as.integer((detectCores()-1))
if(cores < 2) {
parallel <- NULL
}
else {
num_processes <- min(num_processes,length(alpha))
parallel <- makeCluster(num_processes,outfile=log_file)
close_parallel <- TRUE
}
}
else {
num_processes <- min(num_processes,length(alpha))
parallel <- makeCluster(num_processes,outfile=log_file)
close_parallel <- TRUE
}
if(verbose && !is.null(parallel)) {
cat("Executing",num_processes,"processes via parallel...","\n")
}
}
# Now start the inference
if(is.null(parallel)) {
# Sequential computation
inference <- list()
for(i in 1:length(alpha)) {
inference[[i]] <- learn.longitudinal.phylogeny( D = D,
lik_w = lik_w,
alpha = alpha[[i]],
beta = beta[[i]],
initialization = initialization,
keep_equivalent = keep_equivalent,
num_rs = num_rs,
num_iter = num_iter,
n_try_bs = n_try_bs,
learning_rate = learning_rate,
marginalize = marginalize,
seed = round(runif(1)*10000),
verbose = verbose)
}
}
else {
# Parallel computation
res_clusterEvalQ <- clusterEvalQ(parallel,library("Rfast"))
clusterExport(parallel,varlist=c("D","lik_w","alpha","beta","initialization","keep_equivalent","num_rs","num_iter","n_try_bs","learning_rate","marginalize","verbose"),envir=environment())
clusterExport(parallel,c("learn.longitudinal.phylogeny","initialize.B","move.B","compute.C"),envir=environment())
clusterSetRNGStream(parallel,iseed=round(runif(1)*100000))
inference <- parLapply(parallel,1:length(alpha),function(x) {
if(verbose) {
cat('Performing inference for alpha =',paste0(alpha[[x]],collapse=" | "),'and beta =',paste0(beta[[x]],collapse=" | "),'\n')
}
inference <- learn.longitudinal.phylogeny( D = D,
lik_w = lik_w,
alpha = alpha[[x]],
beta = beta[[x]],
initialization = initialization,
keep_equivalent = keep_equivalent,
num_rs = num_rs,
num_iter = num_iter,
n_try_bs = n_try_bs,
learning_rate = learning_rate,
marginalize = marginalize,
seed = round(runif(1)*10000),
verbose = FALSE)
})
}
# Close parallel
if(close_parallel) {
stopCluster(parallel)
}
# Return the solution at maximum likelihood among the inferrend ones
lik <- NULL
for(i in 1:length(inference)) {
lik <- c(lik,inference[[i]][["joint_lik"]])
}
best <- which(lik==max(lik))[1]
error_rates <- list(alpha=inference[[best]][["alpha"]],beta=inference[[best]][["beta"]])
# compute corrected genotypes
inference_B <- inference[[best]][["B"]]
rownames(inference_B)[1] <- 0
rownames(inference_B) <- (as.numeric(rownames(inference_B))+1)
colnames(inference_B)[1] <- 0
colnames(inference_B) <- (as.numeric(colnames(inference_B))+1)
inference_attachments <- (unlist(inference[[best]][["C"]])+1)
inference_attachments_ordered <- inference_attachments
for(curr_C_index in 1:length(inference_attachments)) {
inference_attachments_ordered[curr_C_index] <- as.integer(colnames(inference_B)[inference_attachments[curr_C_index]])
}
inference_attachments <- inference_attachments_ordered
idx_B_curr <- sort.int(as.integer(colnames(inference_B)),index.return=TRUE)$ix
inference_B <- inference_B[idx_B_curr,]
inference_B <- inference_B[,idx_B_curr]
inference_C <- array(0L,c(length(inference_attachments),ncol(inference_B)))
for(curr_C_index in 1:nrow(inference_C)) {
inference_C[curr_C_index,inference_attachments[curr_C_index]] <- 1L
}
corrected_genotypes <- inference_C %*% inference_B
cells_names <- NULL
for(cn in 1:length(D)) {
cells_names <- c(cells_names,rownames(D[[cn]]))
}
rownames(corrected_genotypes) <- cells_names
colnames(corrected_genotypes) <- c("Root",colnames(D[[1]]))
corrected_genotypes <- corrected_genotypes[,-1,drop=FALSE]
# Renaming
B <- inference[[best]][["B"]]
rownames(B) <- c("Root",paste0("Clone_",rownames(B)[2:nrow(B)]))
colnames(B) <- c("Root",colnames(D[[1]])[as.numeric(colnames(B)[2:ncol(B)])])
C <- inference[[best]][["C"]]
names(C) <- paste0("Experiment_",1:length(C))
for(i in 1:length(C)) {
tmp <- matrix(C[[i]],ncol=1)
rownames(tmp) <- rownames(D[[i]])
colnames(tmp) <- "Clone"
C[[i]] <- tmp
}
relative_likelihoods <- inference[[best]][["lik"]]
names(relative_likelihoods) <- paste0("Experiment_",1:length(relative_likelihoods))
joint_likelihood <- inference[[best]][["joint_lik"]]
# Compute clones' prevalence
clones_prevalence <- array(NA,c((dim(B)[1]-1),(length(C)+1)))
rownames(clones_prevalence) <- rownames(B)[2:nrow(B)]
colnames(clones_prevalence) <- c(paste0("Experiment_",1:length(C)),"Total")
total_number_cell <- length(unlist(C))
for(i in 1:dim(clones_prevalence)[1]) {
clone_number_cell <- 0
for(j in 1:length(C)) {
clone_number_cell <- clone_number_cell + length(which(C[[j]]==i))
clones_prevalence[i,j] <- length(which(C[[j]]==i)) / dim(C[[j]])[1]
}
clones_prevalence[i,"Total"] <- clone_number_cell / total_number_cell
}
# Include Root in clones_prevalence
clones_prevalence <- rbind(rep(NA,ncol(clones_prevalence)),clones_prevalence)
rownames(clones_prevalence)[1] <- "Root"
clones_prevalence["Root",] <- (1-colSums(clones_prevalence,na.rm=TRUE))
# Compute clones' summary
clones_summary <- list()
Bwor <- B[,-1]
i = 2
for(c in rownames(Bwor[-1,])) {
mut_list <- colnames(Bwor)[Bwor[i,]==1]
clones_summary[[c]] <- mut_list
i = i + 1
}
# Finally, process equivalent solutions
equivalent_solutions <- inference[[best]][["equivalent_solutions"]]
if(length(equivalent_solutions)>1) { # if we have at least one other solution besides the best one
duplicated <- NULL
for(i in 2:length(equivalent_solutions)) { # consider all solutions besides the first one
for(j in 1:i) { # check if we have equivalent solutions among the ones discovered before
if(i!=j) {
curr_i <- equivalent_solutions[[i]][["B"]]
curr_i <- as.adj.matrix(curr_i)
curr_j <- equivalent_solutions[[j]][["B"]]
curr_j <- as.adj.matrix(curr_j)
if(all(curr_i==curr_j)) {
duplicated <- c(duplicated,i)
}
}
}
}
if(!is.null(duplicated)) {
duplicated <- unique(duplicated)
equivalent_solutions <- equivalent_solutions[-duplicated]
}
for(i in 1:length(equivalent_solutions)) {
rownames(equivalent_solutions[[i]][["B"]]) <- c("Root",paste0("Clone_",rownames(equivalent_solutions[[i]][["B"]])[2:nrow(equivalent_solutions[[i]][["B"]])]))
colnames(equivalent_solutions[[i]][["B"]]) <- c("Root",colnames(D[[1]])[as.numeric(colnames(equivalent_solutions[[i]][["B"]])[2:ncol(equivalent_solutions[[i]][["B"]])])])
}
}
if(length(equivalent_solutions)==1) { # if we have only the best solution (no other equivalent solutions)
equivalent_solutions <- list()
}
return(list(B=B,C=C,corrected_genotypes=corrected_genotypes,clones_prevalence=clones_prevalence,relative_likelihoods=relative_likelihoods,joint_likelihood=joint_likelihood,clones_summary=clones_summary,equivalent_solutions=equivalent_solutions,error_rates=error_rates))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.