Nothing
############# VII. Survival Analysis ###############
#' Function to produce Kaplan-Meier Survival Plots of selected gene
#' expression data.
#' @param samp_cluster Object vector containing the samples and the cluster
#' number they belong to. This object is an output of the
#' cluster_analysis function.
#' @param clinical String indicating the name of the text file containing
#' patient clinical information. The file should be a data frame consisting
#' of two columns. The first column contains the patient survival time
#' information in months. The second column indicates occurrence of
#' a censorship (0) or an event (1).
#' @param survival_type String specifying the type of survival event being
#' analyzed. Examples include "Disease-free survival (DFS)",
#' "Overall Survival (OS)", "Relapse-free survival (RFS)", etc.
#' @param data_name String indicating the name to be used to label the plot.
#' @param cluster_type String indicating the type of clustering method
#' used in the cluster_analysis function. "Kmeans" or "HClust"
#' are the two options.
#' @param distance String describing the distance metric uses for HClust in
#' the cluster_analysis function. Options include one of "euclidean",
#' "maximum", manhattan", "canberra", "binary", or "minkowski".
#' @param linkage_type String describing the linkage metric use in the
#' cluster_analysis function. Options include "ward.D2", "average",
#' "complete", "median", "centroid", "single", and "mcquitty".
#' @param probe_rank String indicating the feature selection method used
#' in the probe_ranking function. Options include "CV_Rank",
#' "CV_Guided", "SD_Rank", and "Poly".
#' @param probe_num_selection String indicating the way in which probes
#' were selected in the number_probes function. Options include
#' "Fixed_Probe_Num", "Percent_Probe_Num", and "Adaptive_Probee_Num".
#' @param cluster_num_selection String indicating how the number of
#' clusters were determined in the number_clusters function. Options
#' include "Fixed_Clust_Num" and "Gap_Statistic".
#' @return Produces a pdf image of a Kaplan-Meier Survival Plot with Cox
#' Survival P Value. Also returns an object containing the
#' cox survival P value.
#' @seealso \code{\link{number_clusters}}, \code{\link{number_probes}},
#' \code{\link{probe_ranking}}, \code{\link{cluster_analysis}},
#' \code{\link[survival]{coxph}}
#' @author Alec Fabbri, Nathan Lawlor
#' @examples
#' # Load in a data file
#' data_file <- system.file("extdata", "GSE2034.normalized.expression.txt",
#' package="multiClust")
#' data <- input_file(input=data_file)
#' # Choose 300 genes to select for
#' gene_num <- number_probes(input=data_file, data.exp=data, Fixed=300,
#' Percent=NULL, Adaptive=NULL)
#' # Choose the "CV_Rank" Method for gene ranking
#' sel.data <- probe_ranking(input=data_file, probe_number=300,
#' probe_num_selection="Fixed_Probe_Num", data.exp=data, method="CV_Rank")
#' # Choose a fixed cluster number of 3
#' clust_num <- number_clusters(data.exp=data, Fixed=3, gap_statistic=NULL)
#' # Call function for Kmeans parameters
#' kmeans_analysis <- cluster_analysis(sel.exp=sel.data, cluster_type="Kmeans",
#' distance=NULL, linkage_type=NULL, gene_distance=NULL,
#' num_clusters=3, data_name="GSE2034 Breast",
#' probe_rank="CV_Rank", probe_num_selection="Fixed_Probe_Num",
#' cluster_num_selection="Fixed_Clust_Num")
#' # Load the clinical outcome file
#' clin_file <- system.file("extdata", "GSE2034-RFS-clinical-outcome.txt",
#' package="multiClust")
#' # Example of Calling surv_analysis function
#' surv <- surv_analysis(samp_cluster=kmeans_analysis, clinical=clin_file,
#' survival_type="RFS", data_name="GSE2034 Breast", cluster_type="Kmeans",
#' distance=NULL, linkage_type=NULL, probe_rank="CV_Rank",
#' probe_num_selection="Fixed_Probe_Num",
#' cluster_num_selection="Fixed_Cluster_Num")
#' @export
surv_analysis <- function(samp_cluster, clinical, survival_type="RFS",
data_name, cluster_type="HClust", distance="euclidean",
linkage_type="ward.D2", probe_rank="SD_Rank",
probe_num_selection="Fixed_Probe_Num",
cluster_num_selection="Fixed_Clust_Num") {
# Conditionals to make sure inputs are strings
if (is.character(clinical) == FALSE) {
stop("Please input string for clinical")
}
if (is.character(survival_type) == FALSE) {
stop("Please input string for survival_type")
}
if (is.character(data_name) == FALSE) {
stop("Please input string for data_name")
}
if (is.character(cluster_type) == FALSE) {
stop("Please input string for cluster_type")
}
if (is.null(distance) == FALSE) {
if (is.character(distance) == FALSE) {
stop("Please input string for distance")
}
}
if (is.null(linkage_type) == FALSE) {
if (is.character(linkage_type) == FALSE) {
stop("Please input string for linkage_type")
}
}
if (is.character(probe_rank) == FALSE) {
stop("Please input string for probe_rank")
}
if (is.character(probe_num_selection) == FALSE) {
stop("Please input string for probe_num_selection")
}
if (is.character(cluster_num_selection) == FALSE) {
stop("Please input string for cluster_num_selection")
}
# Read in survival data text file
sample.anns <- utils::read.delim2(clinical, header=TRUE,
stringsAsFactors=FALSE)
time <- sample.anns[, 1]
event <- sample.anns[, 2]
time <- as.numeric(time)
event <- as.numeric(event)
event_type <- survival_type
time_type <- 'Months'
# Determine max number of clusters in samp_cluster
Number_Clusters <- max(samp_cluster)
# Produce Kaplan Meier survival plots
lev <- '1'
for(i in 2:Number_Clusters) {
lev <- c(lev, paste('',i, sep=''))
}
groupfac <- factor(samp_cluster, levels=lev)
cox <- survival::coxph(survival::Surv(time, event==1) ~ groupfac)
csumm <- summary(cox)
cox.p.value <- c(csumm$logtest[3])
p <- survival::survfit(survival::Surv(time,
event==1)~survival::strata(samp_cluster))
# Produce Kaplan-Meier Survival Plot
grDevices::pdf(paste(data_name, cluster_type, linkage_type, probe_rank,
probe_num_selection, cluster_num_selection,
survival_type, "pdf", sep='.'))
graphics::plot(p, xlab= time_type, ylab='Survival Probability',
conf.int=FALSE, xlim=c(0,100), col=1:Number_Clusters,
main=paste(data_name, cluster_type, probe_rank,
survival_type, sep =' '))
graphics::legend('bottomleft',
legend=levels(survival::strata(samp_cluster)),lty=1,
col=1:Number_Clusters,text.col=1:Number_Clusters, title='clusters')
graphics::legend('topright', paste('Pvalue =',
signif(csumm$logtest[3],digits=2), sep=''))
grDevices::dev.off()
print("Your Kaplan Meier Survival Plot has been finished")
return(cox.p.value)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.