Nothing
###TOPOICSIM FUNCTIONS
#' GAPGOM internal - .go_ids_lookup()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Looks up goids per id (entrez/ensembl).
#'
#' @section Notes:
#' Internal function used in ().
#'
#' @param ids general ids that you want to search for in godata.
#' @param go_data the queried godata neccesary for the lookup
#' @param drop list of evidences you want to ignore.
#'
#' @return return the translation dataframe containing conversion from id to
#' goids.
#' @import data.table
#' @keywords internal
.go_ids_lookup <- function(ids, go_data, custom_genes=NULL, drop=NULL) {
go_gene_anno <- data.table(go_data@geneAnno)
go_gene_anno <- go_gene_anno[!go_gene_anno$EVIDENCE %in% drop, c(seq_len(2))]
go_gene_anno <- unique(go_gene_anno[go_gene_anno[[1]] %in% ids,])
passed_ids <- list()
go_df <- data.frame()
for (id in ids) {
# test if id has already occured earlier
goids <- passed_ids[[id]]
if (is.null(goids)) {
goids <- unique(go_gene_anno[go_gene_anno[[1]]==as.character(id),]$GO)
passed_ids[[id]] <- c(goids)
}
if (length(goids) != 0) {
go_df <- rbind(go_df, data.frame(ID=id, GO=goids))
}
}
if (!is.null(custom_genes)) {
go_df <- rbind(go_df,
data.table::rbindlist(lapply(names(custom_genes),
function(id, cus_genes) {
return(data.frame(ID=id, GO=cus_genes[[id]]))
}, custom_genes)
))
}
go_df <- unique(as.data.frame(go_df))
return(go_df)
}
#' GAPGOM internal - .set_values()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Sets values within a topoclsim score matrix.
#'
#' @section Notes:
#' Internal function used in expression_prediction_function().
#'
#' @param item1 first item
#' @param item2 second item
#' @param the_matrix score matrix
#' @param value value to be set
#'
#' @return return the matrix with newly set items.
#' @importFrom fastmatch %fin%
#' @keywords internal
.set_values <- function(item1, item2, the_matrix, value) {
# set opposite pair to the same value if it exists
if (item1 %in% rownames(the_matrix) & item2 %fin% colnames(the_matrix)) {
the_matrix[item1, item2] <- value
}
if (item2 %in% rownames(the_matrix) & item1 %fin% colnames(the_matrix)) {
the_matrix[item2, item1] <- value
}
return(the_matrix)
}
### LNCRNAPRED FUNCTIONS
#' GAPGOM internal - .generate_translation_df()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Generates a translation df specific for ExpressionSets, this is then used
#' for looking up ids and their respective GOIDS.
#'
#' @section Notes:
#' Internal function used in expression_prediction_function().
#'
#' @param expression_set ExpressionSet object --> see Biobase package.
#' @param organism where to be scanned genes reside in, this option
#' is neccesary to select the correct GO DAG. Options are based on the org.db
#' bioconductor package;
#' http://www.bioconductor.org/packages/release/BiocViews.html#___OrgDb
#' Following options are available: "fly", "mouse", "rat", "yeast",
#' "zebrafish", "worm", "arabidopsis", "ecolik12", "bovine", "canine",
#' "anopheles", "ecsakai", "chicken", "chimp", "malaria", "rhesus", "pig",
#' "xenopus". Fantom5 data only has "human" and "mouse" available depending
#' on the dataset.
#' @param keytype keytype used in querying of godata/columnnames
#' @param ontology desired ontology to use for prediction. One of three;
#' "BP" (Biological process), "MF" (Molecular function) or "CC"
#' (Cellular Component). Cellular Component is not included with the package's
#' standard data and will thus yield no results.
#' @param verbose set to true for more informative/elaborate output.
#'
#' @return return the translation dataframe containing conversion from general
#' ids to entrez/ensembl ids (and others) and goids.
#'
#' @import data.table
#' @importFrom Biobase featureData pData assayData
#' @importFrom fastmatch %fin%
#' @keywords internal
.generate_translation_df <- function(expression_set, organism, ontology,
keytype, verbose = FALSE, go_data = NULL) {
keys_col <- .resolve_keys_col(expression_set, keytype)
if (is.null(go_data)) {
if (verbose) {
go_data <- set_go_data(organism, ontology, computeIC = FALSE,
keytype = keytype)
} else {
go_data <- suppressMessages(set_go_data(organism, ontology,
computeIC = FALSE,
keytype = keytype))
}
}
go_gene_anno <- unique(data.table(go_data@geneAnno)[,c(seq_len(2))])
# convert entrez_ids and grab subset of godata (quicker)
all_keys <- lapply(pData(featureData(expression_set))[, keys_col],
.entrezraw_to_entrez)
all_keys <- unique(unlist(all_keys, FALSE, FALSE))
# grab correct go data
go_gene_anno <- unique(go_gene_anno[go_gene_anno[[1]] %fin%
all_keys,])
# keep track of row to properly bind main ID
rowtracker <- 0
passed_ids <- list()
id_go_dfs <- list()
for (i in seq_len(nrow(pData(featureData(expression_set))))) {
rawid <- as.character(pData(featureData(expression_set))[i, keys_col])
ids <- .entrezraw_to_entrez(rawid)
# test if id has already occurred earlier
goids <- passed_ids[[rawid]]
if (is.null(goids)) {
goids <- go_gene_anno[go_gene_anno[[1]] %fin%
ids,]$GO
non_duplicated_goids <- goids[!duplicated(goids)]
passed_ids[[rawid]] <-
c(non_duplicated_goids)
}
# check if an output exists, if so return.
if (length(goids) != 0){
id_go_dfs[[i]] <- CJ(ORIGID=rownames(
assayData(expression_set)[["exprs"]])[i],
ID=rawid, GO=goids)
}
}
# bind the results, filter uniques and return.
id_go_df <- rbindlist(id_go_dfs)
id_go_df <- unique(as.data.frame(id_go_df))
return(id_go_df)
}
#' GAPGOM internal - .resolve_keys_col()
#'
#' This function is an internal function and should not be called by the user.
#'
#' resolves columnname for arbitrary ids within an expression_set
#'
#' @section Notes:
#' Internal function used in .generate_translation_df().
#'
#' @param expression_set ExpressionSet object --> see Biobase package.
#' @param keytype keytype used in querying of godata
#'
#' @return column name of id
#'
#' @importFrom Biobase featureData pData assayData
#' @keywords internal
.resolve_keys_col <- function(expression_set, keytype) {
colnames_vector <- colnames(pData(featureData(expression_set)))
if (keytype == "ENTREZID") {
keytype <- "entrez"
}
# create regex
regex_str <- vapply(strsplit(keytype, split="")[[1]], function(x) {
return(paste0("[", toupper(x), tolower(x), "]"))}, character(1))
regex_str <- paste0(".*", paste0(regex_str, collapse=""), ".*")
exp <- regexec(regex_str, colnames_vector)
# get result
regex_result <- unlist(regmatches(colnames_vector, exp), FALSE, FALSE)
if (length(regex_result) < 1) {
return(NULL)
} else {
return(regex_result)
}
}
#' GAPGOM internal - .ext_id_to_term_id()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Quantifies amount of extracted go terms within the list select top genes.
#'
#' @section Notes:
#' Internal function used in enrichment_analysis().
#'
#' @param data extracted_genes with correct onto etc. (matrix)
#' @param list_top_genes The list_top_genes matrix
#'
#' @return return the quantified matrix
#'
#' @import data.table
#' @importFrom GO.db GO
#' @keywords internal
.ext_id_to_term_id <- function(data, list_top_genes) {
# add 1 for each go that is in list top genes.
dtdata <- as.data.table(data)
# GO from GO.db isn't actually used because the keyword is not evaluated.
# this import is just to circumvent bioccheck
quantified_only <- dtdata[dtdata$ORIGID %in%
as.data.table(list_top_genes)[[1]], .N, by=GO]
non_quantified_gos <- unique(data[!(data$GO %in% quantified_only$GO), ]$GO)
return(as.data.frame(rbind(quantified_only,
list(non_quantified_gos,
rep(0, length(non_quantified_gos))))))
}
#' GAPGOM internal - .term_id_to_ext_id()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Quantifies amount of total extracted go terms
#'
#' @section Notes:
#' Internal function used in enrichment_analysis().
#'
#' @param data extracted_genes with correct onto etc. (matrix)
#'
#' @return return the quantified matrix
#'
#' @import data.table
#' @importFrom GO.db GO
#' @keywords internal
.term_id_to_ext_id <- function(data) {
dtdata <- as.data.table(data)
# GO from GO.db isn't actually used because the keyword is not evaluated.
# this import is just to circumvent bioccheck
return(as.data.frame(dtdata[, .N, by=GO]))
}
#' GAPGOM internal - .entrezraw_to_entrez()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Converts a raw ID from fantom data to a normal ID.
#'
#' @section Notes:
#' Internal function used in enrichment_analysis().
#'
#' @param rawid Raw entrez id. E.G.; entrez:23456
#'
#' @return return the normal ID
#'
#' @keywords internal
.entrezraw_to_entrez <- function(rawid) {
rawid <- as.character(rawid)
# first check if the id is raw at all.
regex_str <- "[Ee][Nn][Tt][Rr][Ee][Zz][Gg][Ee][Nn][Ee]:\\d*,?"
exp <- regexec(regex_str, rawid)
# get result
regex_result <- unlist(regmatches(rawid, exp), FALSE, FALSE)
# check if raw id
if (length(regex_result) > 0) {
# regex match!
ids_split <- unlist(strsplit(rawid,
",|:"), FALSE, FALSE)
ids <- ids_split[seq(2, length(ids_split), 2)]
} else {
# in any other case, or where the id is not entrez, return the input.
ids <- rawid
}
return(ids)
}
#' GAPGOM internal - .resolve_genes_unique_gos()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Resolves all unique go pairs given unique gene pairs.
#'
#' @section Notes:
#' Internal function used in topo_ic_sim_genes().
#'
#' @param unique_pairs_genes data.table with unique pairs of gene combinations.
#' @param topoargs topoicsim args, see data_prep
#'
#' @return return a data.table with all unique go combination pairs.
#'
#' @keywords internal
.resolve_genes_unique_gos <- function(unique_pairs_genes, topoargs) {
all_gos1 <- c()
all_gos2 <- c()
for (i in seq_len(nrow(unique_pairs_genes))) {
pair <- unique_pairs_genes[i,]
gene1 <- pair[[1]]
gene2 <- pair[[2]]
# first check if gene is custom gene or not.
# gene1
if (gene1 %in% names(topoargs$custom_genes1)) {
gos1 <- topoargs$custom_genes1[[gene1]]
} else if (gene1 %in% names(topoargs$custom_genes2)) {
gos1 <- topoargs$custom_genes2[[gene1]]
} else {
gos1 <- as.character(topoargs$translation_to_goids[
topoargs$translation_to_goids$ID==gene1,]$GO)
}
#gene2
if (gene2 %in% names(topoargs$custom_genes1)) {
gos2 <- topoargs$custom_genes1[[gene2]]
} else if (gene2 %in% names(topoargs$custom_genes2)) {
gos2 <- topoargs$custom_genes2[[gene2]]
} else {
gos2 <- as.character(topoargs$translation_to_goids[
topoargs$translation_to_goids$ID==gene2,]$GO)
}
all_gos1 <- c(all_gos1, gos1)
all_gos2 <- c(all_gos2, gos2)
}
all_go_pairs_df <- .unique_combos(unique(all_gos1), unique(all_gos2))
colnames(all_go_pairs_df) <- c("GO1", "GO2")
return(all_go_pairs_df)
}
#' GAPGOM internal - .organism_to_species_lib()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Makes conversion of organism names to package names a little easier
#'
#' @section Notes:
#' Internal function used in set_go_data().
#'
#' @param organism the desired organism
#'
#' @return the lib name
#'
#' @keywords internal
.organism_to_species_lib <- function(organism) {
return(switch(tolower(organism), human = "org.Hs.eg.db",
fly = "org.Dm.eg.db",
mouse = "org.Mm.eg.db",
rat = "org.Rn.eg.db",
yeast = "org.Sc.sgd.db",
zebrafish = "org.Dr.eg.db",
worm = "org.Ce.eg.db",
arabidopsis = "org.At.tair.db",
ecolik12 = "org.EcK12.eg.db",
bovine = "org.Bt.eg.db",
canine = "org.Cf.eg.db",
anopheles = "org.Ag.eg.db",
ecsakai = "org.EcSakai.eg.db",
chicken = "org.Gg.eg.db",
chimp = "org.Pt.eg.db",
malaria = "org.Pf.plasmo.db",
rhesus = "org.Mmu.eg.db",
pig = "org.Ss.eg.db",
xenopus = "org.Xl.eg.db",
message("Error, invalid organism; \"", organism , "\"!")))
}
#' GAPGOM internal - .get_package_version()
#'
#' This function is an internal function and should not be called by the user.
#'
#' Gets the version number of a package.
#'
#' @section Notes:
#' Internal function used in checking precalculated values for topoicsim.
#'
#' @param pckg_name package name
#'
#' @return package version
#'
#' @importFrom utils installed.packages
#' @keywords internal
.get_package_version <- function(pckg_name) {
inst_pckgs <- installed.packages()
return(inst_pckgs[inst_pckgs[,"Package"] == pckg_name,"Version"])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.