R/mafSuvival.R

Defines functions mafSurvival

Documented in mafSurvival

#' Performs survival analysis
#' @description Performs survival analysis by grouping samples from maf based on mutation status of given gene(s) or manual grouping of samples.
#' @details This function takes MAF file and groups them based on mutation status associated with given gene(s) and performs survival analysis. Requires dataframe containing survival status and time to event.
#' Make sure sample names match to Tumor Sample Barcodes from MAF file.
#' @param maf an \code{\link{MAF}} object generated by \code{\link{read.maf}}
#' @param genes gene names for which survival analysis needs to be performed. Samples with mutations in any one of the genes provided are used as mutants.
#' @param samples samples to group by. Genes and samples are mutually exclusive.
#' @param clinicalData dataframe containing events and time to events. Default looks for clinical data in annotation slot of \code{\link{MAF}}.
#' @param time column name contining time in \code{clinicalData}
#' @param Status column name containing status of patients in \code{clinicalData}. must be logical or numeric. e.g, TRUE or FALSE, 1 or 0.
#' @param groupNames names for groups. Should be of length two. Default c("Mutant", "WT")
#' @param col colors for plotting.
#' @param isTCGA FALSE. Is data is from TCGA.
#' @param showConfInt TRUE. Whether to show confidence interval in KM plot.
#' @param addInfo TRUE. Whether to show survival info in the plot.
#' @param textSize Text size for surv table. Default 7.
#' @import survival
#' @return Survival plot
#' @examples
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml.clin <- system.file("extdata", "tcga_laml_annot.tsv", package = "maftools")
#' laml <- read.maf(maf = laml.maf,  clinicalData = laml.clin)
#' mafSurvival(maf = laml, genes = 'DNMT3A', time = 'days_to_last_followup', Status = 'Overall_Survival_Status', isTCGA = TRUE)
#'
#'@export

mafSurvival = function(maf, genes = NULL, samples = NULL, clinicalData = NULL, time = "Time",
                       Status = "Status", groupNames = c("Mutant", "WT"), showConfInt = TRUE, addInfo = TRUE,
                       col = c('maroon', 'royalblue'), isTCGA = FALSE,
                       textSize = 12){

  if(is.null(genes) & is.null(samples)){
    stop("Either provide Gene names or Sample names to group by.")
  }

  if(!is.null(genes) & !is.null(samples)){
    stop("Either provide Gene names or Sample names to group by. Not both!")
  }

  if(is.null(clinicalData)){
    message("Looking for clinical data in annoatation slot of MAF..")
    clinicalData = getClinicalData(x = maf)
    clinicalData = data.table::setDT(clinicalData)
  }else{
    clinicalData = data.table::setDT(clinicalData)
  }

  if(!"Tumor_Sample_Barcode" %in% colnames(clinicalData)){
    print(colnames(clinicalData))
    stop("Column Tumo_Sample_Barcode not found in clinical data. Check column names and rename it to Tumo_Sample_Barcode if necessary.")
  }

  #clinicalData$Tumor_Sample_Barcode = gsub(pattern = '-', replacement = '.', x = clinicalData$Tumor_Sample_Barcode)
  if(isTCGA){
    clinicalData$Tumor_Sample_Barcode = substr(x = clinicalData$Tumor_Sample_Barcode, start = 1, stop = 12)
  }

  if(length(colnames(clinicalData)[colnames(clinicalData) %in% time]) == 0){
    print(colnames(clinicalData))
    stop(paste0(time, " not found in clinicalData. Use argument time to povide column name containing time to event."))
  }else{
    colnames(clinicalData)[colnames(clinicalData) %in% time] = 'Time'
  }

  if(length(colnames(clinicalData)[colnames(clinicalData) %in% Status]) == 0){
    print(colnames(clinicalData))
    stop(paste0(Status, " not found in clinicalData. Use argument Status to povide column name containing events (Dead or Alive)."))
  }else{
    colnames(clinicalData)[colnames(clinicalData) %in% Status] = 'Status'
  }

  names(col) = groupNames

  if(!is.null(genes)){
    #genesTSB = unique(as.character(subsetMaf(maf = maf, includeSyn = FALSE, genes = genes)[,Tumor_Sample_Barcode]))
    genesTSB = genesToBarcodes(maf = maf, genes = genes, justNames = TRUE)
    #genesTSB = lapply(X = genes, FUN = function(x) unique(as.character(subsetMaf(maf = maf, includeSyn = FALSE, genes = x)[,Tumor_Sample_Barcode])))
    #names(genesTSB) = genes
    genesTSB = genesTSB[sapply(genesTSB, FUN = function(x) length(x) != 0)]
    message("Number of mutated samples for given genes: ")
    print(sapply(genesTSB, FUN = length))

    genesMissing = genes[!genes %in% names(genesTSB)]
    if(length(genesMissing) > 0){
      genes = genes[!genes %in% genesMissing]
      genesMissing = paste(genesMissing, collapse = ', ')
      message(paste0("genes ", genesMissing, " does not seeem to be mutated. Removing them."))
    }

    if(length(genes) == 0){
      stop('None of the given genes are mutated!')
    }else{
      genes = paste(genes, collapse = ', ')
    }

    genesTSB = unique(as.character(unlist(genesTSB)))
  }else{
    #genesTSB = gsub(pattern = '-', replacement = '.', x = samples)
    genesTSB = samples
  }

  #clinicalData = clinicalData[!is.na(Time)]
  data.table::setDT(clinicalData)

  clinicalData$Time = suppressWarnings( as.numeric(as.character(clinicalData$Time)) )
  clinicalData$Status = suppressWarnings( as.integer(clinicalData$Status) )
  clinicalData$Group = ifelse(test = clinicalData$Tumor_Sample_Barcode %in% genesTSB, yes = groupNames[1], no = groupNames[2])

  clinicalData$Time = ifelse(test = is.infinite(clinicalData$Time), yes = NA, no = clinicalData$Time)

  if(nrow(clinicalData[!is.na(Time)][!is.na(Status)]) < nrow(clinicalData)){
    message(paste0("Removed ", nrow(clinicalData) - nrow(clinicalData[!is.na(Time)][!is.na(Status)]),
                   " samples with NA's"))
    clinicalData = clinicalData[!is.na(Time)][!is.na(Status)]
  }
  clin.mut.dat = clinicalData[,.(medianTime = median(Time, na.rm = TRUE),N = .N), Group][order(Group)]
  message("Median survival..")
  print(clin.mut.dat)

  surv.km = survival::survfit(formula = survival::Surv(time = Time, event = Status) ~ Group, data = clinicalData, conf.type = "log-log")
  res = summary(surv.km)

  surv.diff = survival::survdiff(formula = survival::Surv(time = Time, event = Status) ~ Group, data = clinicalData)
  surv.diff.pval = signif(1 - pchisq(surv.diff$chisq, length(surv.diff$n) - 1), digits = 3)

  surv.cox = survival::coxph(formula = survival::Surv(time = Time, event = Status) ~ Group, data = clinicalData)
  hr = signif(1/exp(stats::coef(surv.cox)), digits = 3)

  surv.dat = data.table::data.table(Group = res$strata, Time = res$time, survProb = res$surv, survUp = res$upper, survLower = res$lower)
  surv.dat$Group = gsub(pattern = 'Group=', replacement = '', x = surv.dat$Group)

  par(mar = c(4, 4, 2, 2))
  x_lims = pretty(surv.km$time)
  y_lims = seq(0, 1, 0.20)
  plot(NA, xlim = c(min(x_lims), max(x_lims)), ylim = c(0, 1),
       frame.plot = FALSE, axes = FALSE, xlab = NA, ylab = NA)
  abline(h = y_lims, v = x_lims, lty = 2, col = grDevices::adjustcolor(col = "gray70", alpha.f = 0.5), lwd = 0.75)

  points(surv.dat[Group %in% names(col)[1], Time], y = surv.dat[Group %in% names(col)[1], survProb], pch = 8, col = col [1])
  points(surv.dat[Group %in% names(col)[2], Time], y = surv.dat[Group %in% names(col)[2], survProb], pch = 8, col = col [2])

  lines(surv.km[1], col = col[1], lty = 1, lwd = 2, conf.int=FALSE)
  lines(surv.km[2], col = col[2], lty = 1, lwd = 2, conf.int=FALSE)

  axis(side = 1, at = x_lims)
  axis(side = 2, at = y_lims, las = 2)
  mtext(text = "Survival probability", side = 2, line = 2.5)
  mtext(text = time, side = 1, line = 2, adj = 1)

  cd_n = clinicalData[,.N,Group] #numbers per group
  data.table::setDF(x = cd_n, rownames = cd_n$Group)

  cd_n = cd_n[names(col),, drop = FALSE]
  legend(x = "topright", legend = apply(cd_n, 1, paste, collapse = ":"), col = col, bty = "n", lwd = 2, pch = 8)

  title(main = NA,
        sub = paste0("P-value: ", round(surv.diff.pval, digits = 4), "; ", "HR: ", hr), cex.main = 1, font.main= 4, col.main= "black",
        cex.sub = 1, font.sub = 3, col.sub = ifelse(test = surv.diff.pval < 0.05, yes = "red", no = "black"),
        line = 2.5, adj = 0)
  title(main = paste0(groupNames[1], " v/s ", groupNames[2]), adj = 0, font.main = 4)
}
PoisonAlien/maftools documentation built on Nov. 10, 2024, 6:01 p.m.