# FUNCTION
# - arguments: collection, query_geneset, top correlated query_genesets
# - returns: data.frame which contains the pairs between the query_geneset
# and it's top corralated genesets after filtering. Results are sorted
# in descending correlation order
# NOTES
# - Collection: only accepting pathprint,MSigDB_H,MSigDB_C2_CP,MSigDB_C5_GO_BP
# - query_geneset: must belong to the collection and be spelled exactly right!
# (do we need a search feature that allows the user to search
# for query_genesets?)
# - The initial data.frame is filtered by minimum absolute correlation
# and maximum p-value
utils::globalVariables(c("pathprint.Hs.gs", "pathCor_Hv5.1_dframe",
"pathCor_Hv5.1_unadjusted_dframe",
"h_gs_v5.1","cp_gs_v5.1", "cp_gs_v5.1",
"pathCor_GOBPv5.1_dframe",
"pathCor_GOBPv5.1_unadjusted_dframe"))
#' Select a single pathway/gene set from one of the four collections ( MSigDB H
#' hallmark gene sets, MSigDB C2 Canonical pathways, MSigDB C5 GO BP gene sets,
#' and Pathprint ) and discover its correlated pathway/gene sets within the
#' same collection.
#'
#' @param collection pathways' collection
#' @param query_geneset the single pathway of interest
#' @param adj_overlap whether the correlation coefficients are adjusted for gene
#' overlap
#' @param top most correlated genesets/pathways
#' @param min_abs_corr minimum absolute correlation
#' @param max_pval maximum p-value
#'
#' @return pcxn object
#' @export
#'
#' @examples
#' \dontrun{
#' pcxn<- pcxn_explore("pathprint","Alzheimer's disease (KEGG)",
#' adj_overlap = TRUE, 10, 0.05, 0.05)
#' }
pcxn_explore <- function(collection = c("pathprint","MSigDB_H","MSigDB_C2_CP",
"MSigDB_C5_GO_BP"),
query_geneset,
adj_overlap = FALSE,
top = 10,
min_abs_corr = 0.05,
max_pval = 0.05) {
if(missing(query_geneset))
stop( "Please insert query_geneset argument")
# loading the datasets
correct_query_geneset_flag <- FALSE
acceptable_collections = c("pathprint","MSigDB_H","MSigDB_C2_CP",
"MSigDB_C5_GO_BP")
if ( collection == "pathprint" ) {
if ( adj_overlap ) {
data(pathCor_pathprint_v1.2.3_dframe, envir = environment())
matrix <- pathCor_pathprint_v1.2.3_dframe
}
else {
data(pathCor_pathprint_v1.2.3_unadjusted_dframe,
envir = environment())
matrix <- pathCor_pathprint_v1.2.3_unadjusted_dframe
}
data(pathprint.Hs.gs, envir = environment())
if ( query_geneset %in% names(pathprint.Hs.gs) )
correct_query_geneset_flag <- TRUE
}
if ( collection == "MSigDB_H" ) {
if ( adj_overlap ) {
data(pathCor_Hv5.1_dframe, envir = environment())
matrix <- pathCor_Hv5.1_dframe
}
else {
data(pathCor_Hv5.1_unadjusted_dframe, envir = environment())
matrix <- pathCor_Hv5.1_unadjusted_dframe
}
tryCatch({
data(h_gs_v5.1.rda, envir = environment())
}, warning = function(w) {
}, error = function(e) {
}, finally = {
data('h_gs_v5.1', envir = environment())
})
if ( query_geneset %in% names(h_gs_v5.1) )
correct_query_geneset_flag <- TRUE
}
if ( collection == "MSigDB_C2_CP" ) {
if ( adj_overlap ) {
data(pathCor_CPv5.1_dframe, envir = environment())
matrix <- pathCor_CPv5.1_dframe
}
else {
data(pathCor_CPv5.1_unadjusted_dframe, envir = environment())
matrix <- pathCor_CPv5.1_unadjusted_dframe
}
tryCatch({
data(cp_gs_v5.1.rda, envir = environment())
}, warning = function(w) {
}, error = function(e) {
}, finally = {
data('cp_gs_v5.1', envir = environment())
})
if ( query_geneset %in% names(cp_gs_v5.1) )
correct_query_geneset_flag <- TRUE
}
if ( collection == "MSigDB_C5_GO_BP" ) {
if ( adj_overlap ) {
data(pathCor_GOBPv5.1_dframe, envir = environment())
matrix <- pathCor_GOBPv5.1_dframe
}
else {
data(pathCor_GOBPv5.1_unadjusted_dframe, envir = environment())
matrix <- pathCor_GOBPv5.1_unadjusted_dframe
}
tryCatch({
data(gobp_gs_v5.1.rda, envir = environment())
}, warning = function(w) {
}, error = function(e) {
}, finally = {
data('gobp_gs_v5.1', envir = environment())
})
if ( query_geneset %in% names(gobp_gs_v5.1) )
correct_query_geneset_flag <- TRUE
}
# stop if wrong collection inserted or query_geneset
# does not match the collection
if(!(collection %in% acceptable_collections))
stop( "Invalid collection name, please choose between: pathprint,
MSigDB_H, MSigDB_C2_CP or MSigDB_C5_GO_BP")
if(!(correct_query_geneset_flag))
stop( "query_geneset does not belong to the collection")
# step 1: Find occurences of the query_geneset in 1st or 2nd column.
# Basically find every pathway connected to the query pathway and
# sort them by correlation
m1 <- subset(matrix,Pathway.A == query_geneset)
m2 <- subset(matrix,Pathway.B == query_geneset)
step1_matrix <- as.data.frame(rbind(m1,m2))
top_step1_matrix<-
(step1_matrix[order(-abs(step1_matrix$PathCor)),])[1:top,]
# Most correlated genesets + query
interesting_genesets <- unique(as.list(c(top_step1_matrix$Pathway.A,
top_step1_matrix$Pathway.B)))
# create matrix with geneset groups
info <- list(query_geneset)
names(info) <- "query_geneset"
# Step 2: Extracting only the pairwise combinations of our query geneset
# and the top correlated
step2_matrix <- subset(matrix, Pathway.A %in% interesting_genesets &
Pathway.B %in% interesting_genesets)
# step 3: Filtering on absolute correlation (>=) and p-value (<=)
step3_matrix <- subset(step2_matrix, abs(PathCor) >= min_abs_corr &
p.value <= max_pval)
print(paste("Successful exploring: Based on ", query_geneset ," and ",top,
" top correlated genesets, ", dim(step3_matrix)[1],
" correlation pairs were found.", sep =""))
po = new("pcxn",type = "pcxn_explore", data = as.matrix(step3_matrix),
geneset_groups = as.list(info))
return(po)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.