Nothing
## ---- eval=TRUE, message=FALSE------------------------------------------------
# Load GEO and biobase libraries
library(GEOquery)
library(Biobase)
library(multiClust)
library(preprocessCore)
library(ctc)
library(gplots)
library(dendextend)
library(graphics)
library(grDevices)
library(amap)
library(survival)
## ---- eval=FALSE, message=FALSE-----------------------------------------------
# # Obtain GSE series matrix file from GEO website using getGEO function
# gse <- getGEO(GEO="GSE2034")
#
# # Save the gene expression matrix as an object
# data.gse <- exprs(gse[[1]])
#
# # Save the patient clinical data to an object
# pheno <- pData(phenoData(gse[[1]]))
#
# # Write the gene expression and clinical data to text files
# WriteMatrixToFile(tmpMatrix=data.gse, tmpFileName="GSE2034.expression.txt",
# blnRowNames=TRUE, blnColNames=TRUE)
#
# WriteMatrixToFile(tmpMatrix=pheno, tmpFileName="GSE2034.clinical.txt",
# blnRowNames=TRUE, blnColNames=TRUE)
#
## ---- eval=FALSE--------------------------------------------------------------
# # Obtain GSE series matrix file from GEO website using getGEO function
# gse <- getGEO(filename="GSE2034_series_matrix.txt.gz")
#
# # Save the gene expression matrix as an object
# data.gse <- exprs(gse[[1]])
#
# # Save the patient clinical data to an object
# pheno <- pData(phenoData(gse[[1]]))
#
# # Write the gene expression and clinical data to text files
# WriteMatrixToFile(tmpMatrix=data.gse, tmpFileName="GSE2034.expression.txt",
# blnRowNames=TRUE, blnColNames=TRUE)
#
# WriteMatrixToFile(tmpMatrix=pheno, tmpFileName="GSE2034.clinical.txt",
# blnRowNames=TRUE, blnColNames=TRUE)
#
## ---- eval=FALSE--------------------------------------------------------------
# # Obtain GSE series matrix file from GEO website using getGEo function
# gse <- getGEO(GEO="GSE2034")
#
# # Save the gene expression matrix as an object
# data.gse <- exprs(gse)
#
# # Quantile normalization of the dataset
# data.norm <- normalize.quantiles(data.gse, copy=FALSE)
#
# # shift data before log scaling to prevent errors from log scaling
# # negative numbers
# if (min(data.norm)> 0) {
# }
# else {
# mindata.norm=abs(min(data.norm)) + .001
# data.norm=data.norm + mindata.norm
# }
#
# # Log2 scaling of the dataset
# data.log <- t(apply(data.norm, 1, log2))
#
# # Write the gene expression and clinical data to text files
# WriteMatrixToFile(tmpMatrix=data.log,
# tmpFileName="GSE2034.normalized.expression.txt",
# blnRowNames=TRUE, blnColNames=TRUE)
#
## ---- eval=TRUE---------------------------------------------------------------
# Obtain clinical outcome file
clin_file <- system.file("extdata", "GSE2034-RFS-clinical-outcome.txt",
package="multiClust")
# Load in survival information file
clinical <- read.delim2(file=clin_file, header=TRUE)
# Display first few rows of the file
clinical[1:5, 1:2]
# Column one represents the survival time in months
# Column two represents the relapse free survival (RFS) event
## ---- eval=TRUE---------------------------------------------------------------
# Obtain gene expression matrix
exp_file <- system.file("extdata", "GSE2034.normalized.expression.txt", package= "multiClust")
# Load the gene expression matrix
data.exprs <- input_file(input=exp_file)
# View the first few rows and columns of the matrix
data.exprs[1:4,1:4]
## ---- eval=TRUE---------------------------------------------------------------
# Example 1: Choosing a fixed gene or probe number
# Obtain gene expression matrix
exp_file <- system.file("extdata", "GSE2034.normalized.expression.txt",
package="multiClust")
gene_num <- number_probes(input=exp_file,
data.exp=data.exprs, Fixed=300,
Percent=NULL, Poly=NULL, Adaptive=NULL)
## ---- eval=TRUE---------------------------------------------------------------
# Example 2: Choosing 50% of the total selected gene probes in a dataset
# Obtain gene expression matrix
exp_file <- system.file("extdata", "GSE2034.normalized.expression.txt",
package="multiClust")
gene_num <- number_probes(input=exp_file,
data.exp=data.exprs, Fixed=NULL,
Percent=50, Poly=NULL, Adaptive=NULL)
## ---- eval=TRUE---------------------------------------------------------------
# Example 3: Choosing the polynomial selected gene probes in a dataset
# Obtain gene expression matrix
exp_file <- system.file("extdata", "GSE2034.normalized.expression.txt",
package="multiClust")
gene_num <- number_probes(input=exp_file,
data.exp=data.exprs, Fixed=NULL,
Percent=NULL, Poly=TRUE, Adaptive=NULL)
## ---- eval=FALSE--------------------------------------------------------------
# # Example 4: Choosing the Adaptive Gaussian Mixture Modeling method
#
# gene_num <- number_probes(input=exp_file,
# data.exp=data.exprs, Fixed=NULL,
# Percent=NULL, Poly=NULL, Adaptive=TRUE)
#
## ---- eval=TRUE---------------------------------------------------------------
# Obtain gene expression matrix
exp_file <- system.file("extdata", "GSE2034.normalized.expression.txt",
package="multiClust")
# Load the gene expression matrix
data.exprs <- input_file(input=exp_file)
# Call probe_ranking function
# Select for 500 probes
# Choose genes using the SD_Rank method
ranked.exprs <- probe_ranking(input=exp_file,
probe_number=300,
probe_num_selection="Fixed_Probe_Num",
data.exp=data.exprs,
method="SD_Rank")
# Display the first few columns and rows of the matrix
ranked.exprs[1:4,1:4]
## ---- eval=TRUE---------------------------------------------------------------
# Call the number_clusters function
# data.exp is the original expression matrix object outputted from
# the input_file function
# User specifies that samples will be separated into 3 clusters
# with the "Fixed" argument
cluster_num <- number_clusters(data.exp=data.exprs, Fixed=3,
gap_statistic=NULL)
## ---- eval=FALSE--------------------------------------------------------------
#
# # Call the number_clusters function
# # data.exp is the original expression matrix object ouputted from
# # the input_file function
# # User chooses the gap_statistic option by making gap_statistic equal TRUE
# # The Fixed argument is also set to NULL
# cluster_num <- number_clusters(data.exp=data.exprs, Fixed=NULL,
# gap_statistic=TRUE)
#
## ---- eval=TRUE, fig.show='hold', warning=FALSE-------------------------------
# Call the cluster_analysis function
hclust_analysis <- cluster_analysis(sel.exp=ranked.exprs,
cluster_type="HClust",
distance="euclidean", linkage_type="ward.D2",
gene_distance="correlation",
num_clusters=3, data_name="GSE2034 Breast",
probe_rank="SD_Rank", probe_num_selection="Fixed_Probe_Num",
cluster_num_selection="Fixed_Clust_Num")
# Display the first few columns and rows of the object
head(hclust_analysis)
## ---- eval=TRUE, echo=FALSE, fig.show='hold', fig.width=8, fig.height=8, fig.align='center', warning=FALSE----
# retrieve file path for image
fpath <- system.file("extdata", "GSE2034_Breast.SD_Rank.ward.D2.Fixed_Probe_Num.Fixed_Clust_Num.jpg", package="multiClust")
knitr::include_graphics(path = fpath)
## ---- eval=TRUE, echo=FALSE, fig.show='hold', fig.width=8, fig.height=8, fig.align='center', warning=FALSE----
# Produce a sample dendrogram using the selected gene/probe expression object
# In the function, cluster_analysis the dendrogram is not displayed in
# the console.
# Make the selected expression object numeric
colname <- colnames(ranked.exprs)
ranked.exprs <- t(apply(ranked.exprs, 1,as.numeric))
colnames(ranked.exprs) <- colname
# Normalization of the selected expression matrix before clustering
norm.sel.exp <- t(apply(ranked.exprs, 1, nor.min.max))
hc <- hclust(dist(x=t(norm.sel.exp), method="euclidean"), method="ward.D2")
dc <- as.dendrogram(hc)
# Portray the samples in each cluster as a different color
dc <- set(dc, "branches_k_color", value=1:3, k=3)
# Make a vector filled with blank spaces to replace sample names with
# blank space
vec <- NULL
len <- length(labels(dc))
for (i in 1:len) {
i <- ""
vec <- c(vec, i)
}
# Remove sample labels from dendrogram
dc <- dendextend::set(dc, "labels", vec)
# Plot the sample dendrogram
plot(dc, main=paste("GSE2034 Breast", "Sample Dendrogram", sep=" "))
# Display orange lines on the plot to clearly separate each cluster
rect.dendrogram(dc, k=3, border='orange')
## ---- eval=TRUE---------------------------------------------------------------
# Call the cluster_analysis function
kmeans_analysis <- cluster_analysis(sel.exp=ranked.exprs,
cluster_type="Kmeans",
distance=NULL, linkage_type=NULL,
gene_distance=NULL, num_clusters=3,
data_name="GSE2034 Breast", probe_rank="SD_Rank",
probe_num_selection="Fixed_Probe_Num",
cluster_num_selection="Fixed_Clust_Num")
# Display the first few rows and columns of the object
head(kmeans_analysis)
## ---- eval=TRUE---------------------------------------------------------------
# Call the avg_probe_exp function
avg_matrix <- avg_probe_exp(sel.exp=ranked.exprs,
samp_cluster=kmeans_analysis,
data_name="GSE2034 Breast", cluster_type="Kmeans", distance=NULL,
linkage_type=NULL, probe_rank="SD_Rank",
probe_num_selection="Fixed_Probe_Num",
cluster_num_selection="Fixed_Clust_Num")
# Display the first few rows and columns of the matrix
head(avg_matrix)
## ---- eval=TRUE, fig.show='hold'----------------------------------------------
# Obtain clinical outcome file
clin_file <- system.file("extdata", "GSE2034-RFS-clinical-outcome.txt",
package="multiClust")
# Call the avg_probe_exp function
surv <- surv_analysis(samp_cluster=kmeans_analysis, clinical=clin_file,
survival_type="RFS", data_name="GSE2034 Breast",
cluster_type="Kmeans", distance=NULL,
linkage_type=NULL, probe_rank="SD_Rank",
probe_num_selection="Fixed_Probe_Num",
cluster_num_selection="Fixed_Clust_Num")
# Display the survival P value
surv
## ---- eval=TRUE, echo=FALSE, fig.show='hold', fig.width=8, fig.height=8, fig.align='center', warning=FALSE----
# Obtain clinical outcome file
clin_file <- system.file("extdata", "GSE2034-RFS-clinical-outcome.txt",
package="multiClust")
# Produce a Kaplan-Meier plot using the sample cluster information
# from the cluster_analysis function
# Read in survival data text file
sample.anns <- read.delim2(file=clin_file,
header=TRUE, stringsAsFactors=FALSE)
time <- sample.anns[,1]
event <- sample.anns[,2]
time <- as.numeric(time)
event <- as.numeric(event)
event_type <- "RFS"
time_type <- 'Months'
# Determine max number of clusters in samp_cluster
samp_cluster <- kmeans_analysis
Number_Clusters <- max(samp_cluster)
# Produce Kaplan Meier survival plots
lev <- '1'
for(i in 2:Number_Clusters) {
lev <- c(lev, paste('',i, sep=''))
}
groupfac <- factor(samp_cluster, levels=lev)
cox <- coxph(Surv(time, event==1) ~ groupfac)
csumm <- summary(cox)
cox.p.value <- c(csumm$logtest[3])
p <- survfit(Surv(time,event==1)~strata(samp_cluster))
plot (p,xlab= time_type ,ylab='Survival Probability',conf.int=FALSE,
xlim=c(0,100), col=1:Number_Clusters,
main=paste("GSE2034 Breast", "Kmeans", "SD_Rank", "RFS", sep =' '))
legend('bottomleft',legend=levels(strata(samp_cluster)),lty=1,
col=1:Number_Clusters, text.col=1:Number_Clusters, title='clusters')
legend('topright', paste('Pvalue =', signif(csumm$logtest[3],digits=2),
sep =''))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.