R/general-functions.R

Defines functions logclr convertLegacyCoseq transformRNAseq matchContTable clusterInertia clusterEntropy compareICL kmeansProbaPost coseqRun

Documented in clusterEntropy clusterInertia compareICL convertLegacyCoseq coseqRun kmeansProbaPost logclr matchContTable transformRNAseq

#' Co-expression analysis
#'
#' Function for primary code to perform co-expression analysis, with or without data transformation,
#' using mixture models. The output of \code{coseqRun} is an S4 object of class \code{coseqResults}.
#'
#' @param y (\emph{n} x \emph{q}) matrix of observed counts for \emph{n}
#' observations (genes) and \emph{q} variables (samples). In nearly all cases,
#' \emph{n} > \emph{q}.
#' @param K Number of clusters (a single value or a vector of values)
#' @param conds Vector of length \emph{q} defining the condition (treatment
#' group) for each variable (column) in \code{y}
#' @param normFactors The type of estimator to be used to normalize for differences in
#' library size: (\dQuote{\code{TC}} for total count, \dQuote{\code{UQ}} for
#' upper quantile, \dQuote{\code{Med}} for median, \dQuote{\code{DESeq}} for
#' the normalization method in the DESeq package, and \dQuote{\code{TMM}} for
#' the TMM normalization method (Robinson and Oshlack, 2010). Can also be a
#' vector (of length \emph{q}) containing pre-estimated library size estimates
#' for each sample, or \dQuote{\code{none}} if no normalization is required.
#' @param model Type of mixture model to use (\dQuote{\code{Poisson}} or \dQuote{\code{Normal}}), or alternatively
#' \dQuote{\code{kmeans}} for a K-means algorithm
#' @param transformation Transformation type to be used: \dQuote{\code{voom}}, \dQuote{\code{logRPKM}}
#' (if \code{geneLength} is provided by user), \dQuote{\code{arcsin}}, \dQuote{\code{logit}},
#' \dQuote{\code{logMedianRef}}, \dQuote{\code{profile}}, \dQuote{\code{logclr}}, \dQuote{\code{clr}},
#' \dQuote{\code{alr}}, \dQuote{\code{ilr}}, or \dQuote{\code{none}}
#' @param subset Optional vector providing the indices of a subset of
#' genes that should be used for the co-expression analysis (i.e., row indices
#' of the data matrix \code{y}. For the generic function \code{coseq}, the results of a previously
#' run differential analysis may be used to select a subset of genes on which to perform the
#' co-expression analysis. If this is desired, \code{subset.index} can also be an object of class
#' DESeqResults (from the \code{results} function in \code{DESeq2}).
#' @param meanFilterCutoff Value used to filter low mean normalized counts if desired (by default,
#' set to a value of 50)
#' @param modelChoice Criterion used to select the best model. For Gaussian mixture models,
#' \dQuote{\code{ICL}} (integrated completed likelihood criterion) is currently supported. For Poisson
#' mixture models, \dQuote{\code{ICL}}, \dQuote{\code{BIC}} (Bayesian information criterion), and a
#' non-asymptotic criterion calibrated via the slope heuristics  using either the \dQuote{\code{DDSE}}
#' (data-driven slope estimation) or \dQuote{\code{Djump}} (dimension jump) approaches may be used.
#' See the \code{HTSCluster} package documentation for more details about the slope heuristics approaches.
#' @param parallel If \code{FALSE}, no parallelization. If \code{TRUE}, parallel
#' execution using BiocParallel (see next argument \code{BPPARAM}). A note on running
#' in parallel using BiocParallel: it may be advantageous to remove large, unneeded objects
#' from the current R environment before calling the function, as it is possible that R's
#' internal garbage collection will copy these files while running on worker nodes.
#' @param BPPARAM Optional parameter object passed internally to \code{bplapply} when
#' \code{parallel=TRUE}. If not specified, the parameters last registered with \code{register}
#' will be used.
#' @param seed If desired, an integer defining the seed of the random number generator. If
#' \code{NULL}, a random seed is used.
#' @param ... Additional optional parameters.
#'
#' @return
#' An S4 object of class \code{coseqResults} whose assays contain a \code{SimpleList}
#' object, where each element in the list corresponds to the conditional probabilities of cluster membership
#' for each gene in each model. Meta data (accessible via \code{metatdata} include the \code{model} used
#' (either \code{Normal} or \code{Poisson}), the \code{transformation} used on the data, the
#' transformed data using to estimate model (\code{tcounts}), the normalized profiles for use in plotting
#' (\code{y_profiles}), and the normalization factors used in the analysis (\code{normFactors}).
#'
#' @author Andrea Rau
#'
#' @export
#' @importFrom HTSCluster PoisMixClus
#' @importFrom HTSCluster PoisMixClusWrapper
#' @importFrom stats na.omit kmeans
#' @importFrom capushe capushe
#' @importClassesFrom S4Vectors DataFrame
#' @importMethodsFrom S4Vectors metadata
#'
#' @examples
#' ## Simulate toy data, n = 300 observations
#' set.seed(12345)
#' countmat <- matrix(runif(300*4, min=0, max=500), nrow=300, ncol=4)
#' countmat <- countmat[which(rowSums(countmat) > 0),]
#' conds <- rep(c("A","B","C","D"), each=2)
#'
#' ## Run the K-means for K = 2,3,4 with logCLR transformation
#' ## The following are equivalent:
#' run <- coseqRun(y=countmat, K=2:15)
#' run <- coseq(object=countmat, K=2:15, transformation="logclr", model="kmeans")
#'
#' ## Run the Normal mixture model for K = 2,3,4 with arcsine transformation
#' ## The following are equivalent:
#' run <- coseqRun(y=countmat, K=2:4, iter=5, transformation="arcsin", model="Normal")
#' run <- coseq(object=countmat, K=2:4, iter=5, transformation="arcsin", model="Normal")
#'
coseqRun <- function(y, K, conds=NULL, normFactors="TMM", model="kmeans", transformation="logclr",
                      subset=NULL, meanFilterCutoff=50,
                      modelChoice=ifelse(model=="kmeans", "DDSE", "ICL"),
                      parallel=FALSE, BPPARAM=bpparam(), seed=NULL,  ...) {

  if(!is.null(subset)) y <- y[subset,]
  if(!all.equal(round(abs(K)), K)) stop("K should be a vector of cluster numbers.")
  y_profiles <- transformRNAseq(y=y, normFactors=normFactors, transformation="profile",
                                meanFilterCutoff=meanFilterCutoff, verbose=FALSE)$tcounts
  ## Check how the data are set up
  if(nrow(y) < ncol(y))
    warning("The # of observations (rows) < the # of variables (columns) in your data.

In most use cases of co-expression analysis with coseq, genes should be in the rows
and samples in the columns.

Please double-check that your data are in the correct format.")

  ## Check the model used
  if(model == "normal") model <- "Normal"
  if(model == "poisson") model <- "Poisson"
  if(model == "Kmeans") model <- "kmeans"

  ## Parse ellipsis function
  providedArgs <- list(...)
  arg.user <- list(alg.type="EM", init.runs=50, init.type="small-em", init.iter=20,
                   iter=1000, cutoff=0.001, GaussianModel="Gaussian_pk_Lk_Ck",
                   verbose=TRUE, digits=3,
                   Kmin.init="small-em", split.init=FALSE, fixed.lambda=NA,
                   equal.proportions=FALSE, EM.verbose=FALSE, interpretation="sum",
                   geneLength=NA, iter.max=50, nstart=50, algorithm="MacQueen", trace=FALSE)

  if(model == "Poisson") {
    arg.user$init.runs <- 1
    arg.user$init.iter <- 10
    arg.user$cutoff <- 1e-05
    if(is.null(subset)) arg.user$subset.index <- NA
  }
  arg.user[names(providedArgs)] <- providedArgs

  cat("****************************************\n")
  cat("coseq analysis:", model, "approach &", transformation, "transformation\n")
  cat("K =", min(K), "to", max(K), "\n")
  cat("Use seed argument in coseq for reproducible results.\n")
  cat("****************************************\n")

  ########################
  ## POISSON MIXTURE MODEL
  ########################
  if(length(model) & model == "Poisson") {

    if(!is.null(seed)) {
      set.seed(seed)
    }

    if(transformation != "none") stop("Poisson mixture model may only be applied on raw counts.")
    if(is.null(conds)) {
      message("Poisson mixture model fit assuming each sample is an independent condition.")
      conds <- seq_len(ncol(y))
    }

    ## Grouping columns of y in order of condition (all replicates put together)
    o.ycols <- order(conds)
    y <- y[,o.ycols]
    conds <- conds[o.ycols]
    conds.names <- unique(conds)
    d <- length(unique(conds))
    r <- as.vector(table(conds))
    if(!length(rownames(y))) rn <- seq_len(nrow(y))
    if(length(rownames(y))) rn <- rownames(y)
    y <- as.matrix(y, nrow = nrow(y), ncol = ncol(y))
    rownames(y) <- rn


    n <- dim(y)[1]
    cols <- dim(y)[2]
    w <- rowSums(y)


    tcounts <- transformRNAseq(y=y, normFactors=normFactors, transformation="none",
                                geneLength=arg.user$geneLength,
                                meanFilterCutoff=meanFilterCutoff, verbose=FALSE)
    conds <- as.vector(conds)

    if(!parallel) {
      run <- suppressWarnings(PoisMixClusWrapper(y=tcounts$tcounts, gmin=min(K),
                                                 gmax=max(K), conds=conds,
                                                 norm=tcounts$ellnorm / sum(tcounts$ellnorm),
                                                 gmin.init.type=arg.user$Kmin.init,
                                                 split.init=arg.user$split.init, subset.index=NA,
                                                 init.runs=arg.user$init.runs, init.iter=arg.user$init.iter,
                                                 alg.type=arg.user$alg.type, cutoff=arg.user$cutoff,
                                                 iter=arg.user$iter,
                                                 fixed.lambda=arg.user$fixed.lambda,
                                                 equal.proportions=arg.user$equal.proportions,
                                                 verbose=arg.user$verbose, EM.verbose=arg.user$EM.verbose,
                                                 interpretation=arg.user$interpretation))
      names(run$all.results) <- paste0("K=", K)
      run$nbCluster.all <- K
    }


    if(parallel) {
      all.results <- vector("list", length = length(K))
      names(all.results) <- paste0("K=", K)
      cat("Running K =", min(K), "...\n")
      if(arg.user$split.init == TRUE) {
        warning("Splitting initialization is not compatible with parallelization.")
      }
      run <- PoisMixClus(y=tcounts$tcounts, g=min(K), conds=conds,
                         norm=tcounts$ellnorm / sum(tcounts$ellnorm),
                         init.type=arg.user$Kmin.init,
                         subset.index=NA,
                         wrapper=TRUE, init.runs=arg.user$init.runs,
                         init.iter=arg.user$init.iter, alg.type=arg.user$alg.type,
                         cutoff=arg.user$cutoff, iter=arg.user$iter,
                         fixed.lambda=arg.user$fixed.lambda,
                         equal.proportions=arg.user$equal.proportions,
                         prev.labels=NA, prev.probaPost = NA,
                         verbose=arg.user$verbose,
                         EM.verbose=arg.user$EM.verbose,
                         interpretation=arg.user$interpretation)
      all.results[[1]] <- run

      index <- 2
      remainingK <- K[-which(K == min(K))]
      if(length(remainingK) > 0) {
        tmp <- bplapply(remainingK, function(ii, P_y, P_conds, P_norm,
                                             P_init.type,
                                             P_init.runs, P_init.iter,
                                             P_alg.type, P_cutoff,
                                             P_iter, P_fixed.lambda,
                                             P_equal.proportions, P_verbose,
                                             P_interpretation, P_EM.verbose, P_seed) {
          cat("Running K =", ii, "...\n")
          if(!is.null(P_seed)) {
            set.seed(P_seed)
          }
          res <- PoisMixClus(g=as.numeric(ii),
                             y=P_y,
                             conds=P_conds,
                             norm=P_norm,
                             init.type=P_init.type,
                             subset.index=NA,
                             wrapper=TRUE,
                             prev.probaPost=NA,
                             prev.labels=NA,
                             init.runs = P_init.runs,
                             init.iter = P_init.iter,
                             alg.type = P_alg.type,
                             cutoff = P_cutoff,
                             iter = P_iter,
                             fixed.lambda = P_fixed.lambda,
                             equal.proportions = P_equal.proportions,
                             verbose = P_verbose,
                             interpretation = P_interpretation,
                             EM.verbose = P_EM.verbose)
          return(res)},
          P_y=tcounts$tcounts, P_conds=conds, P_norm=tcounts$ellnorm / sum(tcounts$ellnorm),
          P_init.type=arg.user$Kmin.init,
          P_init.runs=arg.user$init.runs, P_init.iter=arg.user$init.iter,
          P_alg.type=arg.user$alg.type, P_cutoff=arg.user$cutoff,
          P_iter=arg.user$iter, P_fixed.lambda=arg.user$fixed.lambda,
          P_equal.proportions=arg.user$equal.proportions, P_verbose=arg.user$verbose,
          P_interpretation=arg.user$interpretation, P_EM.verbose=arg.user$EM.verbose,
          BPPARAM=BPPARAM, P_seed=seed)
        Kmods <- paste0("K=", unlist(lapply(tmp, function(x) ncol(x$lambda))))
        all.results[-1] <- tmp[na.omit(match(names(all.results), Kmods))]
      }

      logLike.all <- unlist(lapply(all.results, function(x) x$log.like))
      ICL.all <- unlist(lapply(all.results, function(x) x$ICL))
      ICL.choose <- which.max(ICL.all)
      select.results <- all.results[[ICL.choose]]
      select.results$model.selection <- "ICL"

      BIC.all <- unlist(lapply(all.results, function(x) x$BIC))
      BIC.choose <- which.max(BIC.all)
      select.results2 <- all.results[[BIC.choose]]
      select.results2$model.selection <- "BIC"

      # Apply capushe: only if at least 10 models are considered
      if(c(max(K) - min(K) + 1) <= 10) {
        message("Note: slope heuristics for model selection only applied if > 10 models are fit.")
        DDSE.results <- NA
        Djump.results <- NA
        capushe <- NA
        ResCapushe <- NA
      }
      if(c(max(K) - min(K) + 1) > 10) {
        message("Note: diagnostic plots for slope heuristics (Djump and DDSE) should be examined to \n
                ensure that sufficiently complex models have been considered.")
        Kchoice <- K
        np <- (Kchoice-1) + (length(unique(conds))-1)*(Kchoice)
        mat <- cbind(Kchoice, np/n, np/n, -logLike.all/n)
        ResCapushe <- suppressWarnings(capushe(mat, n))
        DDSE <- DDSEextract(ResCapushe)
        Djump <- Djumpextract(ResCapushe)
        DDSE.results <- all.results[[paste0("K=", DDSE)]]
        Djump.results <- all.results[[paste0("K=", Djump)]]
        DDSE.results$model.selection <- "DDSE"
        Djump.results$model.selection <- "Djump"
      }
      run <- list(nbCluster.all=K, logLike.all = logLike.all,
                  ICL.all = ICL.all,
                  capushe = ResCapushe,
                  all.results = all.results,
                  DDSE.results = DDSE.results,
                  Djump.results = Djump.results,
                  BIC.results = select.results2,
                  ICL.results = select.results)
    }


    if(modelChoice == "BIC") final.results <- run$BIC.results
    if(modelChoice == "ICL") final.results <- run$ICL.results
    if(modelChoice == "DDSE") final.results <- run$DDSE.results
    if(modelChoice == "Djump") final.results <- run$Djump.results

    final.results$probaPost <- round(final.results$probaPost, arg.user$digits)
    colnames(final.results$probaPost) <- paste0("Cluster_", seq_len(ncol(final.results$probaPost)))
    rownames(final.results$probaPost) <- rownames(tcounts$tcounts)
    for(jj in seq_len(length(run$all.results))) {
      run$all.results[[jj]]$probaPost <- round(run$all.results[[jj]]$probaPost,
                                               arg.user$digits)
    }

    ## CONVERT RESULTS TO SUMMARIZEDEXPERIMENT0 CLASS
    nbClust.all <- run$nbCluster.all
    logLike.all <- run$logLike.all
    ICL.all <- run$ICL.all

    pp <- lapply(run$all.results, function(x) {
      tmp <- round(x$probaPost, digits=arg.user$digits)
      colnames(tmp) <- paste0("Cluster_", seq_len(ncol(tmp)))
      rownames(tmp) <- rownames(tcounts$tcounts)
      return(tmp)
    })
    names(pp) <- names(run$all.results)


    ## Format HTSCluster results
    ICL.results <- SummarizedExperiment(final.results$probaPost, metadata=list(nbCluster=nbClust.all,
                                                                               logLike=logLike.all,ICL=ICL.all))

    tcountsDF = as(tcounts$tcounts, "DataFrame")
    y_profilesDF <- y_profiles <- as(tcounts$tcounts/rowSums(tcounts$tcounts), "DataFrame")
    run <- coseqResults(as(ICL.results, "RangedSummarizedExperiment"),
                        allResults = pp,
                        model="Poisson",
                        transformation="none",
                        tcounts=tcountsDF,
                        y_profiles=y_profilesDF,
                        normFactors=tcounts$ellnorm / sum(tcounts$ellnorm))
  }



  ########################
  ## NORMAL MIXTURE MODEL
  ########################
  if(length(model) & model == "Normal") {

    # (pk_Lk_Bk) or diagonal (pk_Lk_I)

    if(modelChoice != "ICL") message("Note: only ICL is currently supported for model choice for Normal mixture models.")
    tcounts <- transformRNAseq(y=y, normFactors=normFactors, transformation=transformation,
                                geneLength=arg.user$geneLength, meanFilterCutoff=meanFilterCutoff, verbose=FALSE)
    run <- NormMixClus(y_profiles=tcounts$tcounts, K=K, subset=NULL,
                       parallel=parallel,
                       BPPARAM=BPPARAM, alg.type=arg.user$alg.type,
                       init.runs=arg.user$init.runs,
                       init.type=arg.user$init.type, init.iter=arg.user$init.iter,
                       iter=arg.user$iter, cutoff=arg.user$cutoff,
                       verbose=arg.user$verbose, digits=arg.user$digits,
                       GaussianModel=arg.user$GaussianModel,
                       seed=seed)
  }


  ########################
  ## K-means
  ########################

  if(length(model) & model == "kmeans") {

    if(!is.null(seed)) {
      set.seed(seed)
    }

    if(modelChoice != "DDSE") message("Note: only DDSE is currently supported for model choice for K-means.")
    if(!transformation %in% c("clr", "alr", "logclr", "ilr")) {
      message("Transformation used is: ", transformation, "\n
Typically one of the following profile transformations is used with K-means: clr, alr, ilr, logclr")
    }
    tcounts <- transformRNAseq(y=y, normFactors=normFactors, transformation=transformation,
                               geneLength=arg.user$geneLength,
                               meanFilterCutoff=meanFilterCutoff, verbose=FALSE)

    n <- nrow(tcounts$tcounts)
    d <- ncol(tcounts$tcounts)
    km_cluster <- vector("list", length(K)) ## Initialisation pour les vecteurs de classification
    tot_withinss <- c() ## Initialisation pour l'inertie intra
    names(km_cluster) <- paste0("K=", K)

    Kerr <- c()
    if(!parallel) {
      for(k in K) {
        if(arg.user$verbose) cat("Running K =", k, "...\n")
        km <- suppressWarnings(kmeans(tcounts$tcounts, centers=k,
                                      iter.max=arg.user$iter.max,
                                      nstart=arg.user$nstart,
                                      algorithm=arg.user$algorithm,
                                      trace=arg.user$trace))
        km_cluster[[paste0("K=",k)]] <- km$cluster
        tot_withinss <- c(tot_withinss, km$tot.withinss)
        if(!is.null(km$ifault)) Kerr <- c(Kerr, k)
      }
      names(tot_withinss) <- paste0("K=", K)
    }
    if(parallel) {
      tot_withinss <- rep(NA, length(K)) ## Initialisation pour l'inertie intra
      names(tot_withinss) <- paste0("K=", K)
      tmp <- bplapply(K, function(ii, km_tcounts, km_iter.max, km_nstart, km_algorithm,
                                  km_trace, km_verbose, km_seed) {
        if(km_verbose) cat("Running K =", ii, "...\n")
        if(!is.null(km_seed)) {
          set.seed(km_seed)
        }
        km <- suppressWarnings(kmeans(km_tcounts, centers=as.numeric(ii),
                                      iter.max=km_iter.max,
                                      nstart=km_nstart,
                                      algorithm=km_algorithm,
                                      trace=km_trace))
        return(km)}, km_tcounts=tcounts$tcounts, km_iter.max=arg.user$iter.max,
        km_nstart=arg.user$nstart, km_algorithm=arg.user$algorithm, km_trace=arg.user$trace,
        km_verbose=arg.user$verbose, BPPARAM=BPPARAM, km_seed=seed)
      Kmods <- paste0("K=", unlist(lapply(tmp, function(x) length(x$size))))
      km_cluster <- lapply(tmp[na.omit(match(names(km_cluster), Kmods))], function(x) x$cluster)
      names(km_cluster) <- paste0("K=", K)
      tot_withinss <- unlist(lapply(tmp[na.omit(match(names(km_cluster), Kmods))],
                                    function(x) x$tot.withinss))
      names(tot_withinss) <- Kmods
      Kerr <- unlist(lapply(tmp[na.omit(match(names(km_cluster), Kmods))],
                                    function(x) ifelse(is.null(x$ifault),
                                                       NA, length(x$size))))
      Kerr <- Kerr[which(!is.na(Kerr))]
    }

    if(length(K) < 10) {
      message("For model selection via capushe, at least 10 models should be estimated.\n
The model with the largest number of clusters has automatically been selected.")
      K_select <- length(km_cluster)
    }
    if(length(K) >= 10) {
      cap <- suppressWarnings(capushe(matrix(c(K, sqrt(n*d*K), sqrt(n*d*K), tot_withinss), ncol=4)))
      K_select <- paste0("K=",DDSEextract(cap)[1]) # nombre de classes selectionne par capushe
    }
    cluster_select <- km_cluster[[K_select]]

#   ## Keep the estimated posterior probabilities for K-means rather than just the cluster labels
#    pp_select <- round(kmeansProbaPost(clusters=cluster_select, tcounts=tcounts$tcounts), arg.user$digits)
    pp_select <- matrix(0, nrow=nrow(tcounts$tcounts), ncol=as.numeric(DDSEextract(cap)[1]))
    pp_select[cbind(seq_len(length(cluster_select)), cluster_select)] <- 1
    colnames(pp_select) <- paste0("Cluster_", seq_len(ncol(pp_select)))
#    rownames(pp_select) <- rownames(tcounts$tcounts)
    rownames(pp_select) <- rownames(y_profiles)


    nbClust.all <- K
    names(nbClust.all) <- names(km_cluster)

    select.results <- SummarizedExperiment(pp_select,
                                           metadata=list(nbCluster=nbClust.all,
                                                         DDSE=paste0("K=", DDSEextract(cap)[1]),
                                                         Djump=paste0("K=", Djumpextract(cap)[1]),
                                                         capushe=cap,
                                                         tot_withinss=tot_withinss,
                                                         nbClusterError=Kerr))

    all.results <- vector("list", length(K))
    names(all.results) <- paste0("K=", K)
    for(k in K) {
      # all.results[[paste0("K=",k)]] <- round(kmeansProbaPost(clusters=km_cluster[[paste0("K=", k)]],
      #                                                 tcounts=tcounts$tcounts), arg.user$digits)
      all.results[[paste0("K=",k)]] <- matrix(0, nrow=nrow(tcounts$tcounts), ncol=k)
      all.results[[paste0("K=",k)]][cbind(seq_len(length(km_cluster[[paste0("K=", k)]])),
                                          km_cluster[[paste0("K=", k)]])] <- 1
      colnames(all.results[[paste0("K=",k)]]) <- paste0("Cluster_", seq_len(ncol(all.results[[paste0("K=",k)]])))
#      rownames(all.results[[paste0("K=",k)]]) <- rownames(tcounts$tcounts)
      rownames(all.results[[paste0("K=",k)]]) <- rownames(y_profiles)
    }

    run <- coseqResults(as(select.results, "RangedSummarizedExperiment"), allResults=all.results)
  }

  ####################################
  ## RETURN RESULTS
  ####################################

  ICL.results <- SummarizedExperiment(assay(run), metadata=metadata(run))
  all.results <- coseqFullResults(run)
  tcountsDF <- as(tcounts$tcounts, "DataFrame")
  y_profilesDF <- as(y_profiles, "DataFrame")

  colnames(tcountsDF) <- colnames(y_profilesDF)
  rownames(tcountsDF) <- rownames(y_profilesDF)

  RESULTS <- coseqResults(as(ICL.results, "RangedSummarizedExperiment"),
                          allResults=all.results,
                          model=model,
                          transformation=transformation,
                          tcounts=tcountsDF,
                          y_profiles=y_profilesDF,
                          normFactors=tcounts$snorm)
  return(RESULTS)
}



#' Calculate conditional probabilities of cluster membership for K-means clustering
#'
#' @param clusters Cluster labels arising from K-means clustering
#' @param tcounts Transformed counts clustered using K-means
#'
#' @return Conditional probabilities of cluster membership for each observation in each cluster
#' @export
#' @importFrom mvtnorm dmvnorm
#'
#' @examples
#' ## Example of K-means taken from ?kmeans help page
#' x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
#'            matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
#'            colnames(x) <- c("x", "y")
#' cl <- kmeans(x, 5)
#' probaPost <- kmeansProbaPost(cl$cluster, x)
#' head(probaPost)
kmeansProbaPost <- function(clusters,tcounts)
{
  results <- clusters
  K <- max(results)
  centers <- matrix(0,nrow=K,ncol=ncol(tcounts))
  dens <- matrix(0,nrow=nrow(tcounts),ncol=K)
  withinss <- c() ###initialisation pour les inerties inter
  size <- c() ### initialisation pour les tailles de classes
  atyp <- c() ### indices des classes a un element
  for(i in seq_len(K)) {
    I <- which(results==i)
    size <- c(size,length(I))
    if(length(I)>1) {
      centers[i,] <- colSums(tcounts[I,])/length(I)
      var <- sum((tcounts[I,] - matrix(rep(colSums(tcounts[I,])/length(I),length(I)),
                                       ncol=ncol(tcounts),byrow=TRUE))^2)/length(I)
      withinss <- c(withinss,var)
      dens[,i] <- 1/length(I)*exp(dmvnorm(tcounts,centers[i,], var/ncol(tcounts)*diag(ncol(tcounts)),
                                          log=TRUE))
    }
    if(length(I)==1) {
      centers[i,] <- tcounts[I,]
      var <- 0
      dens[,i] <- rep(0,nrow(tcounts))
      atyp <- c(atyp,i)
    }
  }
  # dens <- dens/rowSums(dens)
  # epsilon <- 1e-10
  # maxcut <- 1 - epsilon
  # mincut <- epsilon
  # dens <- apply(dens, 2, pmax, mincut)
  # dens <- apply(dens, 2, pmin, maxcut)

  tauik <- dens/matrix(rep(rowSums(dens),K),nrow=nrow(tcounts))
  if(length(atyp)>0) {
    for (i in atyp) {
      I <- which(results==i)
      tauik[I,i] <- 1
    }
  }
  rownames(tauik) <- rownames(tcounts)
  colnames(tauik) <- paste0("Cluster_", seq_len(K))
  return(tauik)
}






#' Compare corrected ICL values after data transformation
#'
#' Compare the corrected ICL values after applying the arcsin, logit, and logMedianRef
#' transformations in a coseq analysis
#'
#' @param x A list made up of \code{coseqResults} objects. At the current time, this function
#' only supports the comparison of \code{coseqResults} objects using \code{model="Normal"} and
#' \code{transformation = c("arcsin", "logit", "logMedianRef")}
#'
#' @return A plot of corrected ICL values for the models included in \code{x} (the list
#' of \code{coseqResults} objects)
#'
#' @author Andrea Rau, Cathy Maugis-Rabusseau
#'
#' @export
#' @example inst/examples/coseq-package.R
compareICL <- function(x) {

  coseq_list <- x

  if(length(coseq_list) > 3) stop("Function only currently supported for <= 3 coseq objects")
  transf <- unlist(lapply(coseq_list, function(xx) transformationType(xx)))
  check <- which(transf != "arcsin" & transf != "logit" & transf != "logMedianRef")
  if(length(check))
    stop("Function only currently supported for arcsin, logit, and logMedianRef transformations")
  ## TODO: CHECK THAT EACH TRANSFORMATION IS ONLY PRESENT ONCE
  resarcsin <- reslogit <- reslogMedianRef <- NULL
  if(length(which(transf == "arcsin"))) {
    resarcsin <- coseq_list[[which(transf == "arcsin")]]
    resarcsin_profiles <- as.matrix(as.data.frame(profiles(resarcsin)))
  }
  if(length(which(transf == "logit"))) {
    reslogit <- coseq_list[[which(transf == "logit")]]
    reslogit_profiles <- as.matrix(as.data.frame(profiles(reslogit)))
  }
  if(length(which(transf == "logMedianRef"))) {
    reslogMedianRef <- coseq_list[[which(transf == "logMedianRef")]]
    reslogMedianRef_profiles <- as.matrix(as.data.frame(profiles(reslogMedianRef)))
  }

  ## TODO: check that same data are used for all transformations
  #if(sum(profiles(resarcsin) != profiles(reslogit)) > 0 |
  #   sum(profiles(resarcsin) != profiles(reslogMedianRef)) > 0)
  #  stop("y_profiles in coseq objects not equal -- are models estimated on same data?")

  ## NOTE: REPLACE 0's with smallest value > 0, 1's with largest value < 1
  PP <- resarcsin_profiles
  PP[which(PP == 0)] <- min(PP[which(PP > 0)])
  PP[which(PP == 1)] <- max(PP[which(PP < 1)])

  n <- dim(PP)[1]
  p <- dim(PP)[2]
  qarcsin <- (n*p*log(2)) + (0.5*sum(sum(log(PP*(1-PP)))))
  qlogit <- (n*p*log(log(2))) + (sum(sum(log(PP*(1-PP)))))
  qlogmedianref <- (n*p*log(log(2))) + sum(sum(log(PP)))

  plotmat <- matrix(NA, nrow=0, ncol=3)
  colnames(plotmat) <- c("K", "ICL", "Transformation")

  if(!is.null(resarcsin)) {
    plotmat <- rbind(plotmat, cbind(metadata(resarcsin)$nbCluster,
                                    metadata(resarcsin)$ICL + (2*qarcsin),
                                    rep("arcsin", length(metadata(resarcsin)$ICL))))
  }
  if(!is.null(reslogit)) {
    plotmat <- rbind(plotmat, cbind(metadata(reslogit)$nbCluster,
                                    metadata(reslogit)$ICL + (2*qlogit),
                                    rep("logit", length(metadata(reslogit)$ICL))))
  }
  if(!is.null(reslogMedianRef)) {
    plotmat <- rbind(plotmat, cbind(metadata(reslogMedianRef)$nbCluster,
                                    metadata(reslogMedianRef)$ICL + (2*qlogmedianref),
                                    rep("logMedianRef",
                                        length(metadata(reslogMedianRef)$ICL))))
  }
  plotdf <- data.frame(plotmat, row.names=seq_len(nrow(plotmat)),
                       stringsAsFactors = FALSE)
  plotdf$Transformation <- factor(plotdf$Transformation)
  plotdf$ICL <-  as.numeric(plotdf$ICL)
  plotdf$K <-  as.numeric(plotdf$K)

  g1 <- ggplot(plotdf, aes_string(x="K", y="ICL")) +
    geom_line(aes_string(color="Transformation")) +
    geom_point(aes_string(color="Transformation")) +
    scale_y_continuous(name="Corrected ICL") + theme_bw()

  print(g1)
}


#' Calculation of per-cluster entropy
#'
#' Provides the calculation of per-cluster entropy, equivalent to
#' \deqn{Entropy(k) = \sum_{i \in C_k} \log (\tau_{ik})}
#' where \eqn{\tau_{ik}} is the conditional probability of gene \emph{i} belonging
#' to cluster \emph{k} and \eqn{C_k} corresponds to the set of indices of genes
#' attributed to cluster \emph{k}.
#'
#' @param probaPost Matrix containing the conditional probabilities of belonging
#' to each cluster for all observations
#'
#' @return Entropy per cluster
#'
#' @author Cathy Maugis-Rabusseau
#'
#' @examples
#' ## Generate artificial matrix of conditional probabilities for K=5 clusters
#' tmp <- matrix(runif(100*5), nrow=100, ncol=5)
#' probaPost <- tmp / rowSums(tmp)
#' clusterEntropy(probaPost)
#'
#' @export
clusterEntropy <- function(probaPost) {
  label <- apply(probaPost,1,which.max)
  entrop <- NULL
  for (k in seq_len(max(label))){
    II <- which(label==k)
    entrop <- c(entrop,sum(log(probaPost[II,k])))
  }
  return(entrop)
}


#' Calculation of within-cluster inertia
#'
#' Provides the calculation of within-cluster inertia, equivalent to
#' \deqn{Inertia(k) = \sum_{i \in C_k} (y_{ik} - \mu_k)^2}
#' where \eqn{\mu_k} is the mean of cluster \emph{k} and \eqn{C_k} corresponds to the set of indices of genes
#' attributed to cluster \emph{k}.
#'
#' @param profiles Matrix, data.frame, or DataFrame containing the (transformed) profiles used for the clustering
#' @param clusters Vector of cluster labels corresponding to the observations in \code{profiles}
#'
#' @return Within cluster inertia
#'
#' @author Andrea Rau, Antoine Godichon-Baggioni
#'
#' @export
#' @examples
#' ## Simulate toy data, n = 300 observations
#' set.seed(12345)
#' countmat <- matrix(runif(300*4, min=0, max=500), nrow=300, ncol=4)
#' countmat <- countmat[which(rowSums(countmat) > 0),]
#' conds <- rep(c("A","B","C","D"), each=2)
#'
#' ## Run the K-means algorithm for logclr profiles for K = 2,..., 20
#' run_kmeans <- coseq(object=countmat, K=2:20, transformation="logclr",
#' model="kmeans")
#' clusterInertia(profiles=tcounts(run_kmeans), clusters=clusters(run_kmeans))
#'
clusterInertia <- function(profiles, clusters) {
  inertia <- NULL
  ## TODO: check that the length of clusters equals profiles
  for(k in seq_len(max(clusters))){
    II <- which(clusters==k)
    if(length(II) > 1) {
      tmp <- as.matrix(profiles[II,])
      inertia <- c(inertia, sum(t(t(tmp) - colMeans(tmp))^2))
    } else inertia <- c(inertia, 0)
  }
  return(inertia)
}


#' Permute columns of a contingency table
#'
#' Permute the columns of a contingency table comparing two clusterings
#' to load the diagonal as much as possible.
#'
#' @param table_1 Partition from a first data clustering
#' @param table_2 Partition from a second data clustering
#'
#' @return Permuted contingency table
#' @export
#'
#' @examples
#' ## Generate arbitrary labels from two separate clustering results
#' labels_1 <- sample(1:10, 1000, replace=TRUE)  ## K=10 clusters
#' labels_2 <- sample(1:8, 1000, replace=TRUE)   ## K=8 clusters
#' matchContTable(labels_1, labels_2)
#'
#' @importFrom e1071 matchClasses
matchContTable <- function(table_1, table_2){
  tab <- table(table_1, table_2)
  ## Put larger clustering in rows if needed, nrow(tab) >= ncol(tab)
  transpose <- FALSE
  if(nrow(tab) < ncol(tab)) transpose <- TRUE
  if(transpose) tab <- t(tab)
  ## Order rows according to largest clusters
  tab <- tab[order(apply(tab,1,max), decreasing=TRUE),]
  ## Match best column with each row of tab
  ## Use unique indices as some columns might map to multiple rows
  index <- matchClasses(tab, method=ifelse(nrow(tab)==ncol(tab), "exact", "rowmax"))
  tabord <- tab[,unique(index)]
  if(transpose) tabord <- t(tabord)
  return(tabord)
}

#' Transform RNA-seq data using common transformations
#'
#' Application of common transformations for RNA-seq data prior to fitting a normal mixture model
#'
#' @param y (\emph{n} x \emph{q}) \code{matrix} or \code{data.frame} of observed counts
#' for \emph{n} observations and \emph{q} variables
#' @param normFactors The type of estimator to be used to normalize for differences in
#' library size: \dQuote{\code{TC}} for total count, \dQuote{\code{DESeq}} for
#' the normalization method in the DESeq package, and \dQuote{\code{TMM}} for
#' the TMM normalization method (Robinson and Oshlack, 2010). Can also be a
#' vector (of length \emph{q}) containing pre-estimated library size estimates
#' for each sample.
#' @param transformation Transformation type to be used: \dQuote{\code{arcsin}},
#' \dQuote{\code{logit}}, \dQuote{\code{logMedianRef}}, \dQuote{\code{profile}},
#' \dQuote{\code{voom}}, \dQuote{\code{logRPKM}} (if \code{geneLength} is provided by user),
#' \dQuote{\code{logclr}}, \dQuote{\code{clr}}, \dQuote{\code{alr}}, \dQuote{\code{ilr}},
#' \dQuote{\code{none}},
#' @param geneLength Vector of length equal to the number of rows in \dQuote{\code{y}} providing
#' the gene length (bp) for RPKM calculation
#' @param meanFilterCutoff Value used to filter low mean normalized counts
#' @param verbose If \code{TRUE}, include verbose output
#'
#' @return
#' \item{tcounts }{Transformed counts}
#' \item{normCounts }{Normalized counts}
#' \item{snorm }{Per-sample normalization factors divided by mean normalization factor}
#' \item{ellnorm }{Per-sample normalization factors}
#'
#' @export
#'
#' @examples
#' set.seed(12345)
#' countmat <- matrix(runif(300*4, min=0, max=500), nrow=300, ncol=4)
#' countmat <- countmat[which(rowSums(countmat) > 0),]
#' conds <- rep(c("A","B","C","D"), each=2)
#'
#' ## Arcsin transformation, TMM normalization
#' arcsin <- transformRNAseq(countmat, normFactors="TMM", transformation="arcsin")$tcounts
#' ## Logit transformation, TMM normalization
#' logit <- transformRNAseq(countmat, normFactors="TMM", transformation="logit")$tcounts
#' ## logCLR transformation, TMM normalization
#' logclr <- transformRNAseq(countmat, normFactors="TMM", transformation="logclr")$tcounts
#'
#' @importFrom edgeR calcNormFactors
#' @importFrom edgeR cpm
#' @importFrom HTSFilter HTSBasicFilter
#' @importFrom DESeq2 varianceStabilizingTransformation
#' @importFrom stats median
#' @importFrom compositions alr clr ilr
transformRNAseq <- function(y, normFactors="TMM", transformation="arcsin",
                             geneLength=NA, meanFilterCutoff=NULL, verbose=TRUE) {

  ##################################
  ## Add warning message if y looks like profiles
  ##################################
  if(min(y) < 0)
    warning("Your data contain negative values, but the transformRNAseq function expects counts.
Are you sure you want to proceed?")
  if(min(y) >= 0 & max(y) <= 1)
    warning("Your data look like profiles, but the transformRNAseq function expects counts.
Are you sure you want to proceed?

Note: if you have profiles and wish to apply the arcsin/logit/CLR/logCLR transformations,
use one of the following instead.

arcsin <- asin(sqrt(profiles))
logit <- log2(profiles / (1-profiles))
CLR <- clr(profiles)
logCLR <- logclr(profiles)

Then use transformation='none' and normFactors='none'.
")

  ##################################
  ## Calculate normalization factors
  ##################################
  ## Only calculate s values if they are not provided
  if(length(normFactors) != 1 & length(normFactors) != ncol(y)) stop(paste(sQuote("normFactors"), "must be one of
     the following: none, TC, DESeq, TMM, or a vector oflength equal to the number of columns
     in", sQuote("y")))
  ## If estimated from data, all genes should be used
  if(length(normFactors) == 1) {
    if(normFactors == "none") {
      libsize <- rep(1, ncol(y))
    }
    if(normFactors == "TMM") {
      f <- calcNormFactors(as.matrix(y), method="TMM")
      libsize <- colSums(y) * f
    }
    if(normFactors == "TC") {
      libsize <- colSums(y)
    }
    if(normFactors == "DESeq") {
      loggeomeans <- rowMeans(log(y))
      f <- apply(y, 2, function(x)
        exp(median((log(x) - loggeomeans)[is.finite(loggeomeans)])))
      libsize <- colSums(y) * f
    }
  }
  if(length(normFactors) > 1) {
    libsize <- normFactors
  }

  snorm <- libsize / (mean(libsize))
  ellnorm <- libsize

  ##################################
  ## Filter data based on mean count if desired
  ##################################
  if(!is.null(meanFilterCutoff)) {

    filter <- HTSBasicFilter(y, method="mean", cutoff.type="value",
                             cutoff=meanFilterCutoff, normalization=normFactors)
    if(verbose) {
      cat("Filter applied: remove observations with normalized mean < ", meanFilterCutoff, "\n")
      cat("               ", nrow(filter$filteredData), "observations retained for analysis\n")
    }
    newy <- filter$filteredData
    colnames(newy) <- colnames(y)
    rownames(newy) <- rownames(y)[which(filter$on==1)]
    y <- newy
    geneLength <- unlist(geneLength)[which(filter$on == 1)]
  }

  ##################################
  ## Transform data and calculate the first derivative of transformations
  ##################################

  normCounts <- t(t(y)/snorm +1)
  if(transformation == "none") {
    tcounts <- y
  }
  if(transformation == "profile") {
    tcounts <- normCounts / rowSums(normCounts)
  }
  if(transformation == "clrProfile") {
    profiles <- normCounts / rowSums(normCounts)
    gm <- apply(profiles, 1, function(x) exp(mean(log(x))))
    tcounts <- log(profiles / gm)
  }
  if(transformation == "voom") {
    tcounts <- log2(t((t(y + 0.5)/(ellnorm + 1)))*1e+06)
  }
  if(transformation == "logRPKM") {
    if(is.na(geneLength)[1]) stop("RPKM transformation requires input for gene lengths")
    if(length(unlist(geneLength))!=nrow(y)) stop("Gene length vector of different dimension than counts")
    Lcounts <- y/geneLength
    tcounts <- log2(t(t(Lcounts)/ellnorm) * 10^9 + 0.5)
    elltmp <- matrix(rep(ellnorm, each = nrow(y)), nrow=nrow(y))
    lentmp <- matrix(rep(geneLength, times=ncol(y)), nrow=nrow(y))
  }
  if(transformation == "arcsin") {
    props <- normCounts / rowSums(normCounts)
    tcounts <- asin(sqrt(props))
    w <- matrix(rep(rowSums(normCounts), times=ncol(y)), nrow=nrow(y))
  }
  if(transformation == "logit") {
    props <- normCounts / rowSums(normCounts)
    tcounts <- log2(props / (1-props))
    w <- matrix(rep(rowSums(normCounts), times=ncol(y)), nrow=nrow(y))
    stemp <- matrix(rep(snorm, each = nrow(y)), nrow=nrow(y))
  }
  if(transformation == "logMedianRef") {
    m <- apply((t(t(y)/snorm))+1, 1, median)
    tcounts <- log2((t(t(y)/snorm) + 1)/(m+1))
    stemp <- matrix(rep(snorm, each = nrow(y)), nrow=nrow(y))
  }
  if(transformation == "logclr") {
    profiles <- normCounts / rowSums(normCounts)
    tcounts <- logclr(profiles)
  }
  if(transformation == "clr") {
    profiles <- normCounts / rowSums(normCounts)
    tmp <- clr(profiles)
    ## Remove the rmult attributes to keep only a matrix
    attributes(tmp) <- NULL
    tmp <- matrix(tmp, nrow=nrow(normCounts), ncol=ncol(normCounts))
    rownames(tmp) <- rownames(normCounts)
    colnames(tmp) <- colnames(normCounts)
    tcounts <- tmp
  }
  if(transformation == "alr") {
    profiles <- normCounts / rowSums(normCounts)
    tmp <- alr(profiles)
    ## Remove the rmult attributes to keep only a matrix
    attributes(tmp) <- NULL
    tmp <- matrix(tmp, nrow=nrow(normCounts), ncol=ncol(normCounts))
    rownames(tmp) <- rownames(normCounts)
    colnames(tmp) <- colnames(normCounts)
    tcounts <- tmp
  }
  if(transformation == "ilr") {
    profiles <- normCounts / rowSums(normCounts)
    tmp <- ilr(profiles)
    ## Remove the rmult attributes to keep only a matrix
    attributes(tmp) <- NULL
    tmp <- matrix(tmp, nrow=nrow(normCounts), ncol=ncol(normCounts))
    rownames(tmp) <- rownames(normCounts)
    colnames(tmp) <- colnames(normCounts)
    tcounts <- tmp
  }
  ##################################
  ## Old transformations (kept for reference)
  ##################################
  if(transformation == "log") {
    tcounts <- log2(y + 1)
  }
  if(transformation == "normlog") {
    tcounts <- log2(t(t(y)/snorm) + 1)
  }
  if(transformation == "logMeanRef") {
    m <- apply((t(t(y)/snorm))+1, 1, mean)
    tcounts <- log2((t(t(y)/snorm) + 1)/(m+1))
  }
  if(transformation == "logGMeanRef") {
    m <- apply((t(t(y)/snorm))+1, 1, function(x) (prod(x+1))^(1/length(x)))
    tcounts <- log2((t(t(y)/snorm) + 1)/(m))
  }
  if(transformation == "vst") {
    tcounts <- varianceStabilizingTransformation(as.matrix(y), blind=TRUE,
                                                 fitType="parametric")
  }
  if(transformation == "moderatedCPM") {
    tcounts <- cpm(as.matrix(y), normalized.lib.sizes=TRUE, log=TRUE,
                   prior.count=0.25)
  }
  rownames(tcounts) <- rownames(y)
  colnames(tcounts) <- colnames(y)
  return(list(tcounts=tcounts, normCounts=normCounts, snorm=snorm, ellnorm=ellnorm))
}



#' Convert legacy coseq objects
#'
#' Convert legacy coseq S3 class objects to coseqResults S4 class objects
#'
#' @param object Object of S3 class \code{coseq} arising from a call to previous versions of
#' coseq (< 0.99.1)
#' @param digits integer indicating the number of decimal places (round) to retain in results.
#'
#' @return Converted object of S4 class \code{coseqResults} compatible with
#' recent versions of coseq (>= 0.99.1)
#' @export
convertLegacyCoseq <- function(object, digits=3) {
  if(!is(object, "coseq"))
    stop("This function is intended to convert legacy coseq S3 objects \n
          from coseq versions < 0.99.1 into coseqResults S4 objects from \n
          coseq versions >= 0.99.2.")
  if(object$model == "Poisson")
    pp_ICL <- round(object$results$selected.results$probaPost, digits=digits)
  if(object$model == "Normal")
    pp_ICL <- round(object$results$ICL.results$probaPost, digits=digits)
  colnames(pp_ICL) <- paste0("Cluster_", seq_len(ncol(pp_ICL)))
  if(!is.null(rownames(object$y_profiles)))
    rownames(pp_ICL) <- rownames(object$y_profiles)
  if(is.null(rownames(object$y_profiles)))
    rownames(pp_ICL) <- seq_len(nrow(pp_ICL))
  ICL.results <- SummarizedExperiment(pp_ICL,
                                      metadata=list(nbCluster=object$results$nbCluster.all,
                                                    logLike=object$results$logLike.all,
                                                    ICL=object$results$ICL.all,
                                      GaussianModel=object$results$ICL.results$GaussianModel))
  tcountsDF <- as(object$tcounts, "DataFrame")
  y_profilesDF = as(object$y_profiles, "DataFrame")
  pp <- lapply(object$results$all.results, function(x) {
    tmp <- round(x$probaPost, digits=digits)
    if(ncol(tmp)) {
      colnames(tmp) <- paste0("Cluster_", seq_len(ncol(tmp)))
      if(is.null(rownames(tmp))) {
        if(!is.null(rownames(object$y_profiles))) rownames(tmp) <- rownames(object$y_profiles)
        if(is.null(rownames(object$y_profiles))) rownames(tmp) <- seq_len(nrow(tmp))
      }
    }
    return(tmp)
  })
  newobj <- coseqResults(as(ICL.results, "RangedSummarizedExperiment"),
                         allResults = pp,
                         model=object$model,
                         transformation=object$transformation,
                         tcounts=tcountsDF,
                         y_profiles=y_profilesDF,
                         normFactors=numeric())
  return(newobj)
}



#' Calculate the Log Centered Log Ratio (logCLR) transformation
#'
#' @param profiles Matrix of profiles. Note that the presence of 0 values causes an error message to be produced.
#'
#' @return logCLR-transformed profiles
#' @export
logclr <- function(profiles)
{
  d <- ncol(profiles)
  n <- nrow(profiles)
  if(length(which(profiles == 0)))
    stop("log-CLR transformation cannot be performed when there are 0's in the profiles matrix")
  tprofiles <- profiles / exp(rowMeans(log(profiles)))
  tprofiles2 <- matrix(NA, nrow=nrow(tprofiles), ncol=ncol(tprofiles))
  tprofiles2[which(tprofiles <= 1)] <- -(log(1-log(tprofiles[which(tprofiles <= 1)]))^2)
  tprofiles2[which(tprofiles > 1)] <- (log(tprofiles[which(tprofiles > 1)]))^2
  return(tprofiles2)
}
andreamrau/coseq documentation built on July 25, 2021, 10:17 a.m.