##############################################
# The deveroper and the maintainer: #
# Lang Ho Lee (lhlee@bwh.harvard.edu) #
# Sasha A. Singh (sasingh@bwh.harvard.edu) #
##############################################
# Mute warnings
options(warn=1)
#' @title get_condition_biased_comigrations
#' @description get comigrations that at least one biased cluster is involved in.
#' Biased clusters are defined by
#' @param clustering_result A list containing XINA clustering results.
#' See \link[XINA]{xina_clustering}
#' @param count_table A data frame generated by using \link[plyr]{count}.
#' If count_table is NULL (by default), XINA will consider all the comigrations.
#' @param selected_conditions A vector of condition names used in XINA clustering results.
#' The number of selected conditions should be at least two.
#' @param condition_composition The resulting data frame of 'plot_condition_compositions'.
#' See \link[XINA]{plot_condition_compositions}.
#' @param threshold_percent Default is 50. The percentage threshold for finding condition-biased clusters
#' @param color_for_null A color for non-condition-biased comigrations. Default is 'gray'
#' @param color_for_highly_matched A color for comigrations that are involved with more than two condition-biased clusters. Default is 'red4'
#' @param cex Size of cluster number on block axis. Default if 0.7. See \link[alluvial]{alluvial}.
#' @param alpha Transparency of alluvia colors. Default is 0.3. See \link[alluvial]{alluvial}.
#' @return An alluvial plot displaying comigrations and the data frame containing condition-biased comigrations.
#' @export
#' @examples
#'
#' # load XINA example data
#' data(xina_example)
#'
#' # get a vector of experimental conditions analyzed in the clustering results
#' conditions <- as.vector(example_clusters$condition)
#'
#' # get condition composition information
#' condition_composition <- plot_condition_compositions(example_clusters)
#'
#' comigrations_size10 <- alluvial_enriched(example_clusters, conditions, comigration_size=10)
#' # Finding condition-biased comigrations by 50% threshold
#' condition_biased_comigrations <-
#' get_condition_biased_comigrations(clustering_result=example_clusters,
#' count_table=comigrations_size10, selected_conditions=conditions,
#' condition_composition=condition_composition)
#'
#' # Finding condition-biased comigrations by 70% threshold
#' condition_biased_comigrations <-
#' get_condition_biased_comigrations(clustering_result=example_clusters,
#' count_table=comigrations_size10, selected_conditions=conditions,
#' condition_composition=condition_composition,
#' threshold_percent=70)
#'
get_condition_biased_comigrations <- function(clustering_result, count_table=NULL,
selected_conditions, condition_composition, threshold_percent=50,
color_for_null='gray', color_for_highly_matched='red4',
cex=0.7, alpha=0.3) {
Condition <- Percent_ratio <- NULL
if (is.null(count_table)) {
count_table <- generate_count_table(clustering_result, selected_conditions, 0)
}
ratio_threshold <- threshold_percent
biased_clusters <- list()
for (condition in selected_conditions) {
tmp <- subset(condition_composition, Percent_ratio>=ratio_threshold & Condition==condition)
biased_clusters[[condition]] <- tmp$Cluster
}
alluvia_colors <- c()
for (i in seq_len(nrow(count_table))) {
matched_conditions <- c()
for (condition in selected_conditions) {
cl <- count_table[i,condition]
if (!is.na(match(cl, biased_clusters[[condition]]))) {
matched_conditions <- c(matched_conditions, condition)
}
}
if (length(matched_conditions)==1) {
alluvia_colors <- c(alluvia_colors, clustering_result$color_for_condition[matched_conditions])
} else if (length(matched_conditions)>1) {
alluvia_colors <- c(alluvia_colors, color_for_highly_matched)
} else {
alluvia_colors <- c(alluvia_colors, color_for_null)
}
}
cnt_table_4draw <- count_table[!is.na(match(alluvia_colors, c(clustering_result$color_for_condition, color_for_highly_matched))),]
alluvia_colors <- alluvia_colors[!is.na(match(alluvia_colors, c(clustering_result$color_for_condition, color_for_highly_matched)))]
if (nrow(cnt_table_4draw)>0){
return(draw_alluvial_plot(clustering_result, selected_conditions, cnt_table_4draw,
alluvia_colors = alluvia_colors, cex=cex, alpha=alpha))
} else {
print("No biased comigration was found")
}
}
#' @title get_comigrations_by_name
#' @description 'get_comigrations_by_name' finds proteins comigrated with the given proteins
#' @param clustering_result A list containing XINA clustering results. See \link[XINA]{xina_clustering}
#' @param selected_conditions A vector of condition names used in XINA clustering results.
#' The number of selected conditions should be at least two.
#' @param protein_list A vector containing gene names.
#' @param cex Size of cluster number on block axis. Default if 0.7. See \link[alluvial]{alluvial}
#' @param alpha Transparency of alluvia colors. Default is 0.3. See \link[alluvial]{alluvial}
#' @return An alluvial plot displaying comigrations and the data frame containing comigrations of the input proteins
#' @export
#' @examples
#'
#' # load XINA example data
#' data(xina_example)
#'
#' # the clustering result table
#' all_proteins <- as.character(example_clusters$aligned$`Gene name`)
#' # get a vector of experimental conditions analyzed in the clustering results
#' classes <- as.vector(example_clusters$condition)
#'
#' comigrated_prots_all <- get_comigrations_by_name(example_clusters, classes, all_proteins[1:3])
#'
get_comigrations_by_name <- function(clustering_result, selected_conditions, protein_list, cex=0.7, alpha=0.3){
matched_comigrations_all <- data.frame()
comigrated_prots_all <- list()
count_table <- generate_count_table(clustering_result, selected_conditions, 0)
found_proteins <- c()
for (prot_name in protein_list) {
cl_matched <- clustering_result$aligned[clustering_result$aligned$"Gene name"==prot_name, selected_conditions]
comigrations_matched <- count_table
comigrated_prots <- clustering_result$aligned
flag_found <- TRUE
if (length(cl_matched)==0) {
flag_found <- FALSE
} else if (is.na(sum(as.numeric(cl_matched)))) {
flag_found <- FALSE
} else if (sum(as.numeric(cl_matched))==0) {
flag_found <- FALSE
}
if (flag_found){
if (length(selected_conditions)==1){
names(cl_matched) <- selected_conditions
}
for (condition in selected_conditions){
comigrations_matched <- subset(comigrations_matched, comigrations_matched[condition]==as.integer(cl_matched[condition]))
comigrated_prots <- subset(comigrated_prots, comigrated_prots[condition]==as.integer(cl_matched[condition]))
}
found_proteins <- c(found_proteins, prot_name)
comigrated_prots_all[[prot_name]] <- comigrated_prots
if (nrow(matched_comigrations_all)==0) {
matched_comigrations_all <- comigrations_matched
} else {
matched_comigrations_all <- rbind(matched_comigrations_all,comigrations_matched)
}
} else {
print(paste(prot_name," is not found in the your 'selected_conditions':",selected_conditions))
comigrated_prots_all[[prot_name]] <- NULL
}
}
rownames(matched_comigrations_all) <- found_proteins
if (length(selected_conditions)>1){
comigrated_prots_all[["comigrations"]] <- draw_alluvial_plot(clustering_result, selected_conditions, matched_comigrations_all,
cex=cex, alpha=alpha)
} else {
comigrated_prots_all[["comigrations"]] <- matched_comigrations_all
}
return(comigrated_prots_all)
}
#' @title draw_alluvial_plot
#' @description 'draw_alluvial_plot' draw a alluvial plot
#' @param clustering_result A list containing XINA clustering results.
#' See \link[XINA]{xina_clustering}.
#' @param selected_conditions A vector of condition names used in XINA clustering results.
#' The number of selected conditions should be at least two.
#' @param count_table A data frame generated by using \link[plyr]{count}.
#' @param alluvia_colors A vector containing the user-defined colors for each alluvium.
#' @param cex Size of cluster number on block axis. Default if 0.7. See \link[alluvial]{alluvial}.
#' @param alpha Transparency of alluvia colors. Default is 0.3. See \link[alluvial]{alluvial}.
#' @return An alluvial plot displaying comigrations and the data frame containing the input count_table with colors.
#' @import alluvial
#' @export
#' @examples
#'
#' # load XINA example data
#' data(xina_example)
#'
#' # get a vector of experimental conditions analyzed in the clustering results
#' classes <- as.vector(example_clusters$condition)
#'
#' comigrations_size_over5 <- alluvial_enriched(example_clusters, classes, comigration_size=5)
#' draw_alluvial_plot(example_clusters, classes, comigrations_size_over5)
#'
draw_alluvial_plot <- function(clustering_result, selected_conditions, count_table,
alluvia_colors=NULL, cex=0.7, alpha=0.3){
# Set color codes
if (is.null(alluvia_colors)) {
alluvia_colors <- c()
color_codes <- get_colors(clustering_result$nClusters, set='vivid')
for (i in seq_len(nrow(count_table))){
cn <- as.numeric(as.vector(count_table[i,1]))
if (cn == 0){
alluvia_colors <- c(alluvia_colors, "#BEBEBE")
} else {
alluvia_colors <- c(alluvia_colors, color_codes[cn])
}
}
} else {
if (length(alluvia_colors) > nrow(count_table)){
alluvia_colors <- alluvia_colors[seq_len(nrow(count_table))]
} else if (length(alluvia_colors) < nrow(count_table)){
difference <- nrow(count_table) - length(alluvia_colors)
for (i in seq_len(difference)) {
alluvia_colors <- c(alluvia_colors, 'gray')
}
warning(paste(difference, "comigrations were colored by gray because alluvial_colors is not enough to color all the comigrations"))
}
}
# For the dimension error when there is only one row in count_table
if (nrow(count_table)==0){
stop("No biased comigration was found")
} else if (nrow(count_table)==1){
Comigration_size <- c(count_table$Comigration_size,0)
cols <- c(alluvia_colors,alluvia_colors)
bds <- c(alluvia_colors,alluvia_colors)
} else {
Comigration_size <- count_table$Comigration_size
cols <- alluvia_colors
bds <- alluvia_colors
}
# Draw an alluvial plot
# count_table[,1:length(selected_conditions)]
alluvial(count_table[,selected_conditions],
freq=Comigration_size,
col=cols,
border=bds,
cex=cex,
alpha=alpha)
if (is.null(count_table$"Alluvia_color")) {
return(cbind(count_table, Alluvia_color=alluvia_colors))
} else {
count_table$"Alluvia_color" <- alluvia_colors
return(count_table)
}
}
#' @title alluvial_enrichment_tests
#' @description Fisher's exact test to calculate the significance over all comigrations.
#' The following 2x2 table was used to calculate p-value from Fisher's exact test.
#' To evaluate significance of comigrated proteins from cluster #1 in control to cluster #2 in test condition,
#' \tabular{rll}{
#' \tab \strong{cluster #1 in control} \tab \strong{other clusters in control}\cr
#' \strong{cluster #2 in test} \tab 65 (TP) \tab 175 (FP)\cr
#' \strong{other clusters in test} \tab 35 (FN) \tab 979 (TN)\cr
#' }
#' 'alluvial_enrichment_tests' also provides another statistical methods including Hypergeometric test and Chi-square test.
#' @param count_table A data frame generated by using \link[plyr]{count}.
#' @param c1 A selected cluster in the first condition.
#' @param c2 A selected cluster in the second condition.
#' @param non_cluster The cluster number for proteins that were not detected in a specific sample. Default is 0.
#' @param test_type Enrichment test type.
#' 'fisher' = Fisher's exact test, 'hyper' = Hypergeometric test, 'chisq' = Chi-square test
#' @return P-value of comigration enrichment test and 2x2 table information
alluvial_enrichment_tests <- function(count_table, c1, c2, non_cluster=0, test_type='fisher'){
matched_in_list <- sum(subset(count_table, count_table[,1]==c1 & count_table[,2]==c2)$Comigration_size)
non_matched_in_list <- sum(subset(count_table, count_table[,1]==c1 & count_table[,2]!=c2)$Comigration_size)
matched_in_all <- sum(subset(count_table, count_table[,1]!=c1 & count_table[,2]==c2)$Comigration_size)
non_matched_in_all <- sum(subset(count_table, count_table[,1]!=c1 & count_table[,2]!=c2)$Comigration_size)
data_table <- c(matched_in_list, non_matched_in_list, matched_in_all, non_matched_in_all)
counts = (matrix(data = data_table, nrow = 2))
pval <- 1
if (test_type=='fisher') {
pval <- stats::fisher.test(counts)$p.value
} else if (test_type=='hyper') {
pval <- stats::dhyper(matched_in_list, matched_in_all, non_matched_in_all, matched_in_list+non_matched_in_list)
} else if (test_type=='chisq') {
pval <- stats::chisq.test(counts)$p.value
} else {
stop("You chose wrong test_type that should be one of c('fisher', 'hyper', 'chisq')")
}
return_table <- c(data_table, pval)
names(return_table) <- c("TP","FP","FN","TN","Pvalue")
return(return_table)
}
#' @title generate_count_table
#' @description Count the number of comigrated proteins using \link[plyr]{count}
#' @param clustering_result A list containing XINA clustering results. See \link[XINA]{xina_clustering}
#' @param selected_conditions A vector of condition names used in XINA clustering results.
#' @param comigration_size The number of proteins comigrated together in the selected conditions of XINA clustering results. Default is 0.
#' @return A data frame containing comigrations.
generate_count_table <- function(clustering_result, selected_conditions, comigration_size) {
# Generate count table
aligned_clusters <- clustering_result$aligned
selected_data <- data.frame(Gene_name=aligned_clusters$`Gene name`)
for (i in selected_conditions){
selected_data <- cbind(selected_data, as.integer(aligned_clusters[i][[1]]))
}
colnames(selected_data) <- c("Gene_name",selected_conditions)
count_table <- plyr::count(selected_data[selected_conditions])
count_table$"RowNum" <- seq_len(nrow(count_table))
na_cluster <- count_table
for (i in selected_conditions){
if (nrow(na_cluster)>0) {
na_cluster <- subset(na_cluster, na_cluster[i]==0)
}
}
return_table <- count_table[!(count_table$RowNum %in% na_cluster$RowNum),]
colnames(return_table) <- c(selected_conditions, "Comigration_size","RowNum")
return(return_table)
}
#' @title alluvial_enriched
#' @description 'alluvial_enriched’ draws an alluvial plot and finds comigrated proteins.
#' The comigration is a group of proteins that show the same expression pattern,
#' classified and evaluated by XINA clustering, in at least two conditions.
#' XINA can reduce the dataset complexity by filtering based on
#' the number of comigrated proteins (size, ’comigration_size’ parameter) and
#' perform an enrichment test (P-value of Fisher’s exact test, ’pval_threshold’)
#' to determine significance of enriched comigrations.
#' The Fisher’s exact test can only be done for two conditions at a time.
#' The following 2x2 table was used to calculate the P-value from the Fisher’s exact test.
#' To evaluate significance of co-migrated proteins from cluster #1 in control to cluster #2 in test group,
#' \tabular{rll}{
#' - \tab cluster #1 in control \tab other clusters in control\cr
#' cluster #2 in test \tab 65 (TP) \tab 175 (FP)\cr
#' other clusters in test \tab 35 (FN) \tab 979 (TN)\cr
#' }
#' @param clustering_result A list containing XINA clustering results. See \link[XINA]{xina_clustering}
#' @param selected_conditions A vector of condition names used in XINA clustering results.
#' The number of selected conditions should be at least two.
#' @param comigration_size The number of proteins comigrated together in the selected conditions of XINA clustering results.
#' Default is 0
#' @param pval_threshold This option is avaiable only when you selected two conditions for comigration search.
#' @param pval_method Method for p-value adjustment. See \link[stats]{p.adjust}
#' @param cex Scaling of fonts of category labels. Default if 0.7. See \link[alluvial]{alluvial}
#' @param alpha Transparency of the stripes. Default if 0.3. See \link[alluvial]{alluvial}
#' @return A data frame containing comigrations and an alluvial plot showing comigrations
#' @export
#' @examples
#'
#' # load XINA example data
#' data(xina_example)
#'
#' # Get the experimental conditions in the example data
#' classes <- as.vector(example_clusters$condition)
#'
#' # Get comigrations without any thresholds
#' all_comigrations <- alluvial_enriched(example_clusters, classes)
#'
#' # Get comigrations that have >= 5 size (the number of comigrated proteins)
#' all_cor_enriched <- alluvial_enriched(example_clusters, classes, comigration_size=5)
#'
#' # Get all the comigrations between Control and Stimulus1
#' comigrations_Control_Stimulus1 <- alluvial_enriched(example_clusters,
#' c(classes[1],classes[2]))
#'
#' # Get comigrations between Control and Stimulus1, that have >=5 size
#' comigrations_Control_Stimulus1_over5 <- alluvial_enriched(example_clusters,
#' c(classes[1],classes[2]), comigration_size=5)
#'
#' # Get comigrations between Control and Stimulus1,
#' # that have >= 5 size and enrichment FDR <= 0.01
#' comigrations_Control_Stimulus1_pval0.01_size5 <- alluvial_enriched(example_clusters,
#' c(classes[1],classes[2]), comigration_size=5, pval_threshold=0.01)
#'
#' # Get comigrations between Control and Stimulus1,
#' # that have >= 5 size and enrichment Benjamini & Yekutieli <= 0.01
#' comigrations_Control_Stimulus1_BY0.01_size5 <- alluvial_enriched(example_clusters,
#' c(classes[1],classes[2]), comigration_size=5, pval_threshold=0.01, pval_method="BY")
#'
alluvial_enriched <- function(clustering_result, selected_conditions, comigration_size=0,
pval_threshold=1, pval_method='fdr', cex=0.7, alpha=0.3) {
Comigration_size <- Pvalue.adjusted <- NULL
# For debugging
# selected_conditions <- c(classes[1],classes[2],classes[3])
# https://cran.r-project.org/web/packages/alluvial/vignettes/alluvial.html#changing-layers
if (length(selected_conditions)<2){
stop("'selected_conditions' requires at least two")
} else if (length(selected_conditions)>2) {
print("length(selected_conditions) > 2, so XINA can't apply the enrichment filter
Can't apply the enrichment filter, so pval_threshold is ignored")
pval_threshold <- 1
}
# generate a count_table
count_table <- generate_count_table(clustering_result, selected_conditions, 0)
# Do enrichment test
tp <- c()
fp <- c()
fn <- c()
tn <- c()
p.values <- c()
if (length(selected_conditions)==2){
for (j in seq_len(nrow(count_table))) {
c1 <- count_table[j,1]
c2 <- count_table[j,2]
test_result <- alluvial_enrichment_tests(count_table,c1,c2)
tp <- c(tp, test_result["TP"])
fp <- c(fp, test_result["FP"])
fn <- c(fn, test_result["FN"])
tn <- c(tn, test_result["TN"])
p.values <- c(p.values, test_result["Pvalue"])
}
pval_adjusted <- stats::p.adjust(p.values,method=pval_method)
} else {
p.values <- rep(NA,nrow(count_table))
pval_adjusted <- rep(NA,nrow(count_table))
tp <- rep(NA,nrow(count_table))
fp <- rep(NA,nrow(count_table))
fn <- rep(NA,nrow(count_table))
tn <- rep(NA,nrow(count_table))
}
count_table_all <- cbind(count_table,
PValue=p.values,
Pvalue.adjusted=pval_adjusted,
TP=tp,FP=fp,FN=fn,TN=tn)
if (pval_threshold == 1) {
count_table <- subset(count_table_all, Comigration_size>=comigration_size)
} else {
count_table <- subset(count_table_all, Pvalue.adjusted<=pval_threshold & Comigration_size>=comigration_size)
}
if (nrow(count_table)>0){
count_table_colored <- draw_alluvial_plot(clustering_result, selected_conditions, count_table, cex=cex, alpha=alpha)
} else {
print("Can't draw alluvial plot because none of features passed the thresholds")
}
return(count_table_colored)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.