R/estimateSignatures.R

Defines functions estimateSignatures

Documented in estimateSignatures

#' Estimate number of signatures based on cophenetic correlation metric
#' @details This function decomposes a non-negative matrix into n signatures.
#' Extracted signatures are compared against 30 experimentally validated signatures by calculating cosine similarity. See http://cancer.sanger.ac.uk/cosmic/signatures for details.
#'
#' @param mat Input matrix of diemnsion nx96 generated by \code{\link{trinucleotideMatrix}}
#' @param nMin Minimum number of signatures to try. Default 2.
#' @param nTry Maximum number of signatures to try. Default 6.
#' @param nrun numeric giving the number of run to perform for each value in range. Default 5
#' @param plotBestFitRes plots consensus heatmap for range of values tried. Default FALSE
#' @param parallel Default 4. Number of cores to use.
#' @param verbose Default TRUE
#' @param pConstant A small positive value to add to the matrix. Use it ONLY if the functions throws an \code{non-conformable arrays} error
#' @return a list with \code{NMF.rank} object and summary stats.
#' @examples
#' \dontrun{
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml <- read.maf(maf = laml.maf)
#' laml.tnm <- trinucleotideMatrix(maf = laml, ref_genome = 'BSgenome.Hsapiens.UCSC.hg19', prefix = 'chr',
#' add = TRUE, useSyn = TRUE)
#' library("NMF")
#' laml.sign <- estimateSignatures(mat = laml.tnm, plotBestFitRes = FALSE, nMin = 2, nTry = 3, nrun = 2, pConstant = 0.01)
#' }
#' @importFrom grDevices pdf boxplot.stats dev.off
#' @seealso \code{\link{plotCophenetic}} \code{\link{extractSignatures}} \code{\link{trinucleotideMatrix}}
#' @export
estimateSignatures = function(mat, nMin = 2, nTry = 6, nrun = 10, parallel = 4, pConstant = NULL, verbose = TRUE, plotBestFitRes = FALSE){


  if(nMin >= nTry){
    stop("nMin should be less than nTry")
  }

  if(nMin < 2){
    stop("nMin should atleast be 2")
  }

  #suppressPackageStartupMessages(require(NMF, quietly = TRUE))
  #transpose matrix
  start_time = proc.time()
  mat = t(mat$nmf_matrix)

  #Validation
  zeroMutClass = names(which(rowSums(mat) == 0))

  if(length(zeroMutClass)){
    message('-Found zero mutations for conversions:')
    for(temp in zeroMutClass){
      message(paste0("  ", temp))
    }
  }

  #To avoid error due to non-conformable arrays
  if(!is.null(pConstant)){
    if(pConstant < 0 | pConstant == 0){
      stop("pConstant must be > 0")
    }
    mat = mat+pConstant
  }

  #Notes:
  #Available methods for nmf decompositions are 'brunet', 'lee', 'ls-nmf', 'nsNMF', 'offset'.
  #But based 21 breast cancer signatures data, defualt brunet seems to be working close to the results.
  #Sticking with default for now.

  cat(paste0('-Running NMF for ', nTry, ' ranks\n'))
  if(!is.null(parallel)){
    nmfTry = NMF::nmfEstimateRank(mat, seq(nMin,nTry), method='brunet', nrun = nrun, seed = 123456, .opt = paste0('P', parallel), verbose = verbose) #try nmf for a range of values
  }else{
    nmfTry = NMF::nmfEstimateRank(mat, seq(nMin,nTry), method='brunet', nrun = nrun, seed = 123456, verbose = verbose) #try nmf for a range of values
  }

  if(plotBestFitRes){
    png(filename = 'nmf_consensus.png', bg = 'white', res = 70)
    NMF::consensusmap(nmfTry)
    dev.off()
    cat('-created nmf_consensus.png\n')
    #print(NMF::plot(nmfTry, 'cophenetic'))
  }

  nmf.sum = summary(nmfTry) # Get summary of estimates
  data.table::setDT(nmf.sum)

  results = list(nmfSummary = nmf.sum, nmfObj = nmfTry)
  plotCophenetic(results)

  cat("-Finished in",data.table::timetaken(start_time),"\n")
  return(results)
}
PoisonAlien/maftools documentation built on Nov. 10, 2024, 6:01 p.m.