#' Predict genesets associated with survival
#' @param maf an \code{\link{MAF}} object generated by \code{\link{read.maf}}
#' @param top If genes is \code{NULL} by default used top 20 genes
#' @param genes Manual set of genes
#' @param geneSetSize Default 2
#' @param minSamples minimum number of samples to be mutated to be considered for analysis. Default 5
#' @param clinicalData dataframe containing events and time to events. Default looks for clinical data in annotation slot of \code{\link{MAF}}.
#' @param time column name contining time in \code{clinicalData}
#' @param Status column name containing status of patients in \code{clinicalData}. must be logical or numeric. e.g, TRUE or FALSE, 1 or 0.
#' @param verbose Default TRUE
#' @param plot Default FALSE If TRUE, generate KM plots of the genesets combinations.
#' @export
#' @examples
#' laml.maf <- system.file("extdata", "tcga_laml.maf.gz", package = "maftools")
#' laml.clin <- system.file("extdata", "tcga_laml_annot.tsv", package = "maftools")
#' laml <- read.maf(maf = laml.maf, clinicalData = laml.clin)
#' survGroup(maf = laml, top = 20, geneSetSize = 1, time = "days_to_last_followup", Status = "Overall_Survival_Status", plot = FALSE)
survGroup = function(maf, top = 20, genes = NULL, geneSetSize = 2, minSamples = 5, clinicalData = NULL, time = "Time",
Status = "Status", verbose = TRUE, plot = FALSE){
if(is.null(genes)){
genes = getGeneSummary(x = maf)[1:top, Hugo_Symbol]
}
if(length(genes) < 2){
stop("Minimum two genes required!")
}
genesCombn = combn(x = genes, m = geneSetSize)
if(verbose){
cat("------------------\n")
cat(paste0("genes: ", length(genes), "\n"))
cat(paste0("geneset size: ", geneSetSize, "\n"))
cat(paste0(ncol(genesCombn), " combinations\n"))
}
if(is.null(clinicalData)){
if(verbose){
message("Looking for clinical data in annoatation slot of MAF..")
}
clinicalData = data.table::copy(getClinicalData(x = maf))
clinicalData = data.table::setDT(clinicalData)
}else{
clinicalData = data.table::setDT(clinicalData)
}
if(!"Tumor_Sample_Barcode" %in% colnames(clinicalData)){
print(colnames(clinicalData))
stop("Column Tumo_Sample_Barcode not found in clinical data. Check column names and rename it to Tumo_Sample_Barcode if necessary.")
}
if(length(colnames(clinicalData)[colnames(clinicalData) %in% time]) == 0){
print(colnames(clinicalData))
stop(paste0(time, " not found in clinicalData. Use argument time to povide column name containing time to event."))
}else{
colnames(clinicalData)[colnames(clinicalData) %in% time] = 'Time'
}
if(length(colnames(clinicalData)[colnames(clinicalData) %in% Status]) == 0){
print(colnames(clinicalData))
stop(paste0(Status, " not found in clinicalData. Use argument Status to povide column name containing events (Dead or Alive)."))
}else{
colnames(clinicalData)[colnames(clinicalData) %in% Status] = 'Status'
}
clinicalData$Time = suppressWarnings(as.numeric(as.character(clinicalData$Time)) )
clinicalData$Status = suppressWarnings(as.integer(clinicalData$Status))
clinicalData$Time = ifelse(test = is.infinite(clinicalData$Time), yes = NA, no = clinicalData$Time)
if(nrow(clinicalData[!is.na(Time)][!is.na(Status)]) < nrow(clinicalData)){
message(paste0("Removed ", nrow(clinicalData) - nrow(clinicalData[!is.na(Time)][!is.na(Status)]),
" samples with NA's"))
clinicalData = clinicalData[!is.na(Time)][!is.na(Status)]
}
om = createOncoMatrix(m = maf, g = genes)
all.tsbs = levels(getSampleSummary(x = maf)[,Tumor_Sample_Barcode])
mutMat = t(om$numericMatrix)
missing.tsbs = all.tsbs[!all.tsbs %in% rownames(mutMat)]
if(nrow(mutMat) < 2){
stop("Minimum two genes required!")
}
mutMat[mutMat > 0 ] = 1
res = lapply(seq_along(1:ncol(genesCombn)), function(i){
x = genesCombn[,i]
mm = mutMat[,x, drop = FALSE]
genesTSB = names(which(rowSums(mm) == geneSetSize))
if(length(genesTSB) >= minSamples){
if(verbose){
cat("Geneset: ", paste0(x, collapse = ","), "[N=", length(genesTSB),"]\n")
}
surv.dat = run_surv(cd = clinicalData, tsbs = genesTSB, doplot = plot)
}else{
surv.dat = NULL
}
surv.dat
})
names(res) = apply(genesCombn, 2, paste, collapse = '_')
res = data.table::rbindlist(l = res, idcol = "Gene_combination")
res = res[order(P_value, decreasing = FALSE)]
res
}
run_surv = function(cd, tsbs, doplot = FALSE){
groupNames = c("Mutant", "WT")
col = c('maroon', 'royalblue')
cd$Group = ifelse(test = cd$Tumor_Sample_Barcode %in% tsbs, yes = groupNames[1], no = groupNames[2])
if(nrow(cd[,.N,Group]) == 1){
#Only group - surv.diff wont work
return(NULL)
}
surv.km = survival::survfit(formula = survival::Surv(time = Time, event = Status) ~ Group, data = cd, conf.type = "log-log")
res = summary(surv.km)
surv.diff = survival::survdiff(formula = survival::Surv(time = Time, event = Status) ~ Group, data = cd)
surv.diff.pval = signif(1 - pchisq(surv.diff$chisq, length(surv.diff$n) - 1), digits = 3)
surv.cox = survival::coxph(formula = survival::Surv(time = Time, event = Status) ~ Group, data = cd)
hr = signif(1/exp(stats::coef(surv.cox)), digits = 3)
surv.dat = data.table::data.table(Group = res$strata, Time = res$time, survProb = res$surv, survUp = res$upper, survLower = res$lower)
surv.dat$Group = gsub(pattern = 'Group=', replacement = '', x = surv.dat$Group)
geneSet <- paste("Geneset: ", paste0( parent.frame()$x, collapse = " + "))
if (doplot){
par(mar = c(4, 4, 2, 2))
x_lims = pretty(surv.km$time)
y_lims = seq(0, 1, 0.20)
plot(NA, xlim = c(min(x_lims), max(x_lims)), ylim = c(0, 1),
frame.plot = FALSE, axes = FALSE, xlab = NA, ylab = NA)
abline(h = y_lims, v = x_lims, lty = 2, col = grDevices::adjustcolor(col = "gray70", alpha.f = 0.5), lwd = 0.75)
points(surv.dat[Group %in% "Mutant", Time], y = surv.dat[Group %in% "Mutant", survProb], pch = 8, col = col [1])
points(surv.dat[Group %in% "WT", Time], y = surv.dat[Group %in% "WT", survProb], pch = 8, col = col [2])
lines(surv.km[1], col = col[1], lty = 1, lwd = 2, conf.int=FALSE)
lines(surv.km[2], col = col[2], lty = 1, lwd = 2, conf.int=FALSE)
axis(side = 1, at = x_lims)
axis(side = 2, at = y_lims, las = 2)
mtext(text = "Survival probability", side = 2, line = 2.5)
#mtext(text = "Time", side = 1, line = 2)
legend(x = "topright", legend = c(paste0("Geneset [N= ", nrow(cd[Group == "Mutant"]),"]"),
paste0("WT [N= ", nrow(cd[Group == "WT"]),"]")), col = col, bty = "n", lwd = 2, pch = 8)
title(main = NA,
sub = paste0("P-value: ", surv.diff.pval, "; ", "HR: ", hr), cex.main = 1, font.main= 4, col.main= "black",
cex.sub = 1, font.sub = 3, col.sub = ifelse(test = surv.diff.pval < 0.05, yes = "red", no = "black"),
line = 2.5, adj = 0)
title(main = geneSet, adj = 0, font.main = 3, cex.main = 1)
}
#surv.dat = data.table::data.table(Group = res$strata, Time = res$time, survProb = res$surv, survUp = res$upper, survLower = res$lower)
#surv.dat$Group = gsub(pattern = 'Group=', replacement = '', x = surv.dat$Group)
surv.dat = data.table::data.table(P_value = surv.diff.pval, hr = hr, WT = nrow(cd[Group == "WT"]), Mutant = nrow(cd[Group == "Mutant"]))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.