#!/usr/bin/env Rscript
#
# This file is part of the `OmnipathR` R package
#
# Copyright
# 2018-2024
# Saez Lab, Uniklinik RWTH Aachen, Heidelberg University
#
# File author(s): Alberto Valdeolivas
# Dénes Türei (turei.denes@gmail.com)
# Attila Gábor
#
# Distributed under the MIT (Expat) License.
# See accompanying file `LICENSE` or find a copy at
# https://directory.fsf.org/wiki/License:Expat
#
# Website: https://r.omnipathdb.org/
# Git repo: https://github.com/saezlab/OmnipathR
#
utils::globalVariables(
c("category", "uniprot", "genesymbol", "annotations",
"target", "database", "category_intercell_source", "target_genesymbol",
"category_intercell_target", "parent_intercell_source",
"database_intercell_target","parent_intercell_target","source_genesymbol",
"is_stimulation", "is_inhibition", "consensus_direction",
"consensus_stimulation","consensus_inhibition","dip_url","sources",
"references", "curation_effort", "dorothea_level", "n_references",
"n_resources", "ncbi_tax_id_target", "ncbi_tax_id_source")
)
ORGANISMS_SUPPORTED <- c(9606L, 10090L, 10116L)
.omnipath_qt_synonyms <- list(
ptms = 'enzsub',
enz_sub = 'enzsub',
complex = 'complexes',
interaction = 'interactions',
network = 'interactions'
)
.omnipath_qt_messages <- list(
interactions = 'interactions',
enzsub = 'enzyme-substrate relationships',
complexes = 'protein complexes',
annotations = 'annotation records',
intercell = 'intercellular communication role records'
)
.omnipath_qt_nolicense <- c(
'annotations_summary',
'intercell_summary'
)
.omnipath_arg_synonyms <- list(
select_organism = 'organisms',
filter_databases = 'resources',
databases = 'resources',
select_genes = 'proteins'
)
.omnipath_default_fields <- list(
enzsub = c('sources', 'references', 'curation_effort'),
interactions = c('sources', 'references', 'curation_effort')
)
.omnipath_querystring_param <- c(
'genesymbols',
'resources',
'datasets',
'organisms',
'dorothea_levels',
'dorothea_methods',
'source_target',
'fields',
'format',
'directed',
'signed',
'loops',
'enzymes',
'substrates',
'partners',
'proteins',
'entity_types',
'sources',
'targets',
'residues',
'scope',
'aspect',
'source',
'categories',
'parent',
'transmitter',
'receiver',
'secreted',
'plasma_membrane_transmembrane',
'plasma_membrane_peripheral',
'topology',
'causality',
'license',
'password',
'types'
)
.omnipath_querystring_synonyms <- list(
organism = 'organisms',
resource = 'resources',
databases = 'resources',
database = 'resources',
dorothea_level = 'dorothea_levels',
tfregulons_levels = 'dorothea_levels',
tfregulons_level = 'dorothea_levels',
genesymbol = 'genesymbols',
field = 'fields',
dataset = 'datasets',
directions = 'directed',
entity_type = 'entity_types',
signs = 'signed',
modification = 'types',
type = 'types',
interaction_type = 'types',
interaction_types = 'types'
)
.omnipath_param_misc_keys <- c(
'query_type',
'default_fields',
'silent',
'logicals',
'download_args',
'references_by_resource',
'add_counts',
'qt_message',
'exclude',
'extra_attrs',
'evidences',
'json_param',
'strict_evidences',
'genesymbol_resource',
'orthology_targets',
'keep_evidences',
'cache'
)
#' Download data from the OmniPath web service
#'
#' This is the most generic method for accessing data from the OmniPath web
#' service. All other functions retrieving data from OmniPath call this
#' function with various parameters. In general, every query can retrieve data
#' in tabular or JSON format, the tabular (data frame) being the default.
#'
#' @param query_type Character: "interactions", "enzsub", "complexes",
#' "annotations", or "intercell".
#' @param organism Character or integer: name or NCBI Taxonomy ID of the
#' organism. OmniPath is built of human data, and the web service provides
#' orthology translated interactions and enzyme-substrate relationships for
#' mouse and rat. For other organisms and query types, orthology
#' translation will be called automatically on the downloaded human data
#' before returning the result.
#' @param resources Character vector: name of one or more resources. Restrict
#' the data to these resources. For a complete list of available resources,
#' call the `<query_type>_resources` functions for the query type of
#' interst.
#' @param datasets Character vector: name of one or more datasets. In the
#' interactions query type a number of datasets are available. The default
#' is caled "omnipath", and corresponds to the curated causal signaling
#' network published in the 2016 OmniPath paper.
#' @param types Character vector: one or more interaction types, such as
#' "transcriptional" or "post_translational". For a full list of
#' interaction types see `query_info("interaction")$types`.
#' @param genesymbols Character or logical: TRUE or FALS or "yes" or "no".
#' Include the `genesymbols` column in the results. OmniPath uses UniProt
#' IDs as the primary identifiers, gene symbols are optional.
#' @param fields Character vector: additional fields to include in the result.
#' For a list of available fields, call `query_info("interactions")`.
#' @param default_fields Logical: if TRUE, the default fields will be included.
#' @param silent Logical: if TRUE, no messages will be printed. By default a
#' summary message is printed upon successful download.
#' @param logicals Character vector: fields to be cast to logical.
#' @param format Character: if "json", JSON will be retrieved and processed
#' into a nested list; any other value will return data frame.
#' @param download_args List: parameters to pass to the download function,
#' which is `readr::read_tsv` by default, and `jsonlite::safe_load`.
#' @param references_by_resource Logical: if TRUE,, in the `references`
#' column the PubMed IDs will be prefixed with the names of the resources
#' they are coming from. If FALSE, the `references` column will be a list
#' of unique PubMed IDs.
#' @param add_counts Logical: if TRUE, the number of references and number of
#' resources for each record will be added to the result.
#' @param license Character: license restrictions. By default, data from
#' resources allowing "academic" use is returned by OmniPath. If you use
#' the data for work in a company, you can provide "commercial" or
#' "for-profit", which will restrict the data to those records which are
#' supported by resources that allow for-profit use.
#' @param password Character: password for the OmniPath web service. You can
#' provide a special password here which enables the use of `license =
#' "ignore"` option, completely bypassing the license filter.
#' @param exclude Character vector: resource or dataset names to be excluded.
#' The data will be filtered after download to remove records of the
#' excluded datasets and resources.
#' @param json_param List: parameters to pass to the `jsonlite::fromJSON` when
#' processing JSON columns embedded in the downloaded data. Such columns
#' are "extra_attrs" and "evidences". These are optional columns which
#' provide a lot of extra details about interactions.
#' @param strict_evidences Logical: reconstruct the "sources" and "references"
#' columns of interaction data frames based on the "evidences" column,
#' strictly filtering them to the queried datasets and resources. Without
#' this, the "sources" and "references" fields for each record might
#' contain information for datasets and resources other than the queried
#' ones, because the downloaded records are a result of a simple filtering
#' of an already integrated data frame.
#' @param genesymbol_resource Character: "uniprot" (default) or "ensembl". The
#' OmniPath web service uses the primary gene symbols as provided by
#' UniProt. By passing "ensembl" here, the UniProt gene symbols will be
#' replaced by the ones used in Ensembl. This translation results in a loss
#' of a few records, and multiplication of another few records due to
#' ambiguous translation.
#' @param cache Logical: use caching, load data from and save to the. The cache
#' directory by default belongs to the user, located in the user's default
#' cache directory, and named "OmnipathR". Find out about it by
#' \code{getOption("omnipathr.cachedir")}. Can be changed by
#' \code{\link{omnipath_set_cachedir}}.
#' @param ... Additional parameters for the OmniPath web service. These
#' parameters will be processed, validated and included in the query
#' string. Many parameters are already explicitly set by the arguments
#' above. A number of query type specific parameters are also available,
#' learn more about these by the \code{\link{query_info}} function. For
#' functions more specific than \code{\link{omnipath_query}}, arguments for
#' all downstream functions are also passed here.
#'
#' @return Data frame (tibble) or list: the data returned by the OmniPath web
#' service (or loaded from cache), after processing. Nested list if the
#' "format" parameter is "json", otherwise a tibble.
#'
#' @examples
#' interaction_data <- omnipath_query("interaction", datasets = "omnipath")
#' interaction_data
#'
#' @importFrom magrittr %<>% %>%
#' @importFrom tibble as_tibble
#' @importFrom readr read_tsv cols col_character
#' @importFrom utils modifyList
#' @importFrom rlang !!!
#' @export
omnipath_query <- function(
query_type,
organism = 9606L,
resources = NULL,
datasets = NULL,
types = NULL,
genesymbols = 'yes',
fields = NULL,
default_fields = TRUE,
silent = FALSE,
logicals = NULL,
download_args = list(),
format = 'data.frame',
references_by_resource = TRUE,
add_counts = TRUE,
license = NULL,
password = NULL,
exclude = NULL,
json_param = list(),
strict_evidences = FALSE,
genesymbol_resource = 'UniProt',
cache = NULL,
...
){
datasets %<>% setdiff(exclude)
resources %<>% setdiff(exclude)
cache %<>% use_cache
param <-
environment() %>%
as.list %>%
c(list(...)) %>%
omnipath_check_param
url <-
param %>%
omnipath_build_url %>%
c(`if`(
getOption('omnipathr.notls_fallback') &&
!getOption('omnipathr.notls_force'),
omnipath_build_url(param, notls = TRUE),
NULL
))
download_args_defaults <- list(
url = url,
cache = cache
)
dataframe_defaults <- list(
fun = read_tsv,
col_types = `if`(
'dorothea_level' %in% param$fields,
cols(dorothea_level = col_character()),
cols()
),
progress = FALSE,
show_col_types = FALSE
)
json_defaults <- list(
fun = safe_json,
simplifyDataFrame = FALSE
)
download_args %<>%
modifyList(
`if`(
empty_or_equals(param$format, 'json'),
json_defaults,
dataframe_defaults
),
.
) %>%
modifyList(download_args_defaults, .)
result <-
do.call(omnipath_download, download_args) %>%
omnipath_post_download(
url = url,
logicals = logicals,
references_by_resource = references_by_resource,
strict_evidences = strict_evidences,
exclude = exclude,
param = param,
add_counts = add_counts,
silent = silent
)
return(result)
}
#' Post-processing of the data downloaded from OmniPath
#'
#' @importFrom magrittr %<>% %>%
#' @importFrom tibble as_tibble
#' @importFrom rlang !!!
#' @noRd
omnipath_post_download <- function(
result,
url,
param,
logicals = NULL,
references_by_resource = TRUE,
strict_evidences = FALSE,
exclude = NULL,
add_counts = TRUE,
silent = FALSE
) {
omnipath_check_result(result, url)
result %<>% cast_logicals(logicals)
result %<>% strip_resource_labels(references_by_resource)
result %<>% apply_exclude(exclude)
result %<>% deserialize_extra_attrs(!!!param$json_param)
result %<>% deserialize_evidences(!!!param$json_param)
if(strict_evidences && param$query_type == 'interactions') {
result %<>% only_from(
datasets = param$datasets,
resources = param$resources,
.keep = param$keep_evidences
)
}
if(param$query_type %in% c('interactions', 'enzsub') && add_counts){
result %<>% count_references
result %<>% count_resources
}
if(is.data.frame(result)){
result %<>% as_tibble
}
if(!is_empty_2(param$orthology_targets)){
result %<>% omnipath_orthology_translate(param)
} else if(
!is_empty_2(param$genesymbol_resource) &&
str_to_lower(param$genesymbol_resource[1L]) != 'uniprot'
) {
result %<>% update_genesymbols(param, param$organisms[1L])
}
from_cache <- result %>% is_from_cache
# reporting and returning result
loglevel <- `if`(
silent,
logger::DEBUG,
logger::SUCCESS
)
msg <- '%soaded %d %s%s.'
logger::log_level(
level = loglevel,
msg,
`if`(from_cache, 'L', 'Downl'),
`if`(
is.data.frame(result),
nrow(result),
length(result)
),
param$qt_message,
`if`(from_cache, ' from cache', '')
)
return(result)
}
#' Consolidate arguments for OmniPath calls
#'
#' Most importantly, control argument expansion by ensuring the priority of
#' the explicitely built in synonyms.
#'
#' @param dots List: dots from the parent.
#' @param ... Override arguments.
#'
#' @return A list of arguments for `omnipath_query`.
#'
#' @importFrom magrittr %>% inset2 extract
#' @importFrom stringr str_extract
#' @importFrom purrr map
#' @noRd
omnipath_args <- function(dots, ...) {
calling_env <- parent.frame()
override <- list(...) %>% qs_synonyms
calling_fun <- sys.function(-1L)
defaults <-
calling_fun %>%
formals %>%
qs_synonyms
calling_env %>%
as.list %>%
qs_synonyms %>%
modifyList(defaults, .) %>%
inset2('...', NULL) %>%
c(list()) %>%
modifyList(dots %>% qs_synonyms) %>%
modifyList(override) %>%
inset2('...', NULL) %T>%
{log_trace('Arguments for OmniPath query: %s', compact_repr(.))}
}
#' Checks the arguments of \link{omnipath_query}, corrects some easy to
#' confuse or deprecated synonyms and selects the message printed by
#' the download function.
#' Not exported.
#'
#' @importFrom magrittr %<>% %>%
#' @importFrom logger log_warn
#' @importFrom purrr map_int
#' @importFrom dplyr first
#'
#' @noRd
omnipath_check_param <- function(param){
# mapping query type synonyms
param$query_type <- `if`(
!is.null(param$query_type) &
param$query_type %in% names(.omnipath_qt_synonyms),
.omnipath_qt_synonyms[[param$query_type]],
param$query_type
)
param %<>% add_qt_message
param %<>% qs_synonyms
param$format %<>% {`if`(empty_or_equals(.,'json'), ., NULL)}
# checking DoRothEA confidence level values
if(
'dorothea_levels' %in% names(param) &&
!all(param$dorothea_levels %in% c('A', 'B', 'C', 'D'))
){
msg <- 'DoRothEA confidence levels available are A, B, C and D.'
log_warn(msg)
warning(msg)
}
# extra_attrs and evidences are accepted also as an argument
for(name in c('extra_attrs', 'evidences')) {
if(if_null(param[[name]], FALSE)) {
param$fields %<>% union(name)
param[[name]] <- NULL
}
}
param$keep_evidences <- 'evidences' %in% param$fields
if(param$strict_evidences) {
param$fields %<>% union('evidences')
}
# adding default fields if not disabled
param$fields <- `if`(
param$default_fields &&
param$query_type %in% names(.omnipath_default_fields),
unique(
c(
param$fields,
.omnipath_default_fields[[param$query_type]],
`if`(
'dorothea' %in% param$datasets,
'dorothea_level',
NULL
)
)
),
param$fields
)
organisms_supported <- `if`(
param$query_type %in% c('interactions', 'enzsub'),
ORGANISMS_SUPPORTED,
9606L
)
# allow organism names
param$organisms %<>% map_int(ncbi_taxid)
param$orthology_targets <- param$organisms %>% setdiff(organisms_supported)
param$organisms %<>% intersect(organisms_supported)
log_trace('Organism(s): %s', enum_format(param$organisms))
log_trace('Orthology targets: %s', enum_format(param$orthology_targets))
if(length(param$orthology_targets > 0L)) {
param$organisms %<>% if_null_len0(9606L)
if(length(param$organisms) > 1L || param$organisms != 9606L) {
log_warn(
paste0(
'The result will be translated to `%s` by orthology; ',
'querying for human.'
),
param$orthology_targets %>% first
)
param$organisms <- 9606L
}
}
# removing some fields according to query type
if(!param$query_type %in% c('interactions', 'enzsub')){
param$genesymbols <- NULL
param$organisms <- NULL
}
# checking for wrong resource names
if(!is.null(param$resources)){
all_resources <- resources(param$query_type)
unknown_resources <- setdiff(param$resources, all_resources)
if(length(unknown_resources) != 0){
warning(
sprintf(
paste(
'The following resources are not available: %s.',
'Check the resource names for spelling mistakes.'
),
paste0(unknown_resources, collapse = ', ')
)
)
}
}
# setting up generic defaults from options
for(opt in c('license', 'password')){
param[opt] <- `if`(
(
is.null(param[[opt]]) &&
!param$query_type %in% .omnipath_qt_nolicense
),
options(sprintf('omnipathr.%s', opt)),
param[[opt]]
)
}
return(param)
}
#' Replace synonymous keys in query string parameters
#'
#' @importFrom magrittr %>% %<>% inset2
#' @noRd
qs_synonyms <- function(param) {
# mapping the query string parameter synonyms
for(name in names(param)){
if(
name %in% names(.omnipath_querystring_synonyms) &&
!.omnipath_querystring_synonyms[[name]] %in% names(param)
){
new_name <- .omnipath_querystring_synonyms[[name]]
param %<>%
inset2(new_name, param[[name]]) %>%
inset2(name, NULL)
}
}
return(param)
}
#' Adds a message printed upon successful download
#'
#' @noRd
add_qt_message <- function(param) {
# adding the message template which will be printed upon successful
# download
param$qt_message <- `if`(
!is.null(param$query_type) &
param$query_type %in% names(.omnipath_qt_messages),
.omnipath_qt_messages[[param$query_type]],
'records'
)
return(param)
}
#' Constructs the URL by creating a base URL according to the query type and
#' adding all user or package defined query string parameters.
#' Not exported.
#'
#' @importFrom magrittr %>%
#' @importFrom logger log_warn
#' @importFrom purrr reduce
#' @importFrom utils URLencode
#'
#' @noRd
omnipath_build_url <- function(param, notls = FALSE){
baseurl <- omnipath_url(param$query_type, notls = notls)
unknown_param <- setdiff(
names(param),
unique(c(
.omnipath_querystring_param,
names(.omnipath_querystring_synonyms),
.omnipath_param_misc_keys
))
)
if(length(unknown_param) > 0L){
log_warn(
'Unknown %s: %s.',
unknown_param %>% plural('parameter'),
unknown_param %>% pretty_list
)
}
url <-
.omnipath_querystring_param %>%
reduce(
function(url, key){
omnipath_url_add_param(url, key, param[[key]])
},
.init = baseurl
) %>%
URLencode
return(url)
}
#' Appends a query string parameter to the URL.
#' Not exported, used internally for assembling the URLs.
#'
#' @noRd
omnipath_url_add_param <- function(url, name, values = NULL){
values <- `if`(
is.null(values),
NULL,
`if`(
identical(values, TRUE),
'yes',
`if`(
identical(values, FALSE),
'no',
values
)
)
)
url <- `if`(
all(is.null(values)) || all(is.na(values)),
url,
sprintf(
'%s%s%s=%s',
url,
`if`(grepl('?', url, fixed = TRUE), '&', '?'),
name,
paste(values, collapse = ',')
)
)
return(url)
}
#' Translate an OmniPath data frame by orthology
#'
#' @param data A data frame from OmniPath, or any data frame with UniProt IDs
#' in a column named "uniprot", or columns named "source" and "target", or
#' "enzyme" and "substrate".
#' @param param List or character: OmniPath query parameters or the name of a
#'
#' @return Data frame: the input data frame with the UniProt IDs translated to
#' the target organism by orthology.
#'
#' @importFrom magrittr %T>% %>% extract2
#' @importFrom dplyr first
#' @importFrom rlang !! sym
#' @importFrom purrr reduce
#' @importFrom logger log_warn log_info
#' @noRd
omnipath_orthology_translate <- function(data, param) {
target <-
param %>%
{`if`(is.list(.), extract2(., 'orthology_targets'), .)} %T>%
{`if`(
!is.null(.) && length(.) > 1L,
log_warn(
paste0(
'Orthology translation works for only one organism in one ',
'query. Translating only to the first target organism: `%i`.'
),
first(.)
),
NA
)} %>%
first
if(!is.na(target)) {
log_info(
'Translating to `%s` by orthology, %i records before translation',
target,
nrow(data)
)
genesymbol_resource <-
param %>%
{`if`(is.list(.), extract2(., 'genesymbol_resource'), 'uniprot')}
data <-
reduce(
uniprot_columns(data),
~orthology_translate_column(
.x,
!!sym(.y),
source_organism = 9606L,
target_organism = target,
replace = TRUE
),
.init = data
) %T>%
{log_info('%i records after orthology translation.', nrow(.))} %>%
update_genesymbols(genesymbol_resource, organism = target)
}
return(data)
}
#' Update gene symbols in the data frame
#'
#' @param data A data frame from OmniPath, or any data frame with UniProt IDs
#' in a column named "uniprot", or columns named "source" and "target", or
#' "enzyme" and "substrate".
#' @param param List or character: OmniPath query parameters or the name of a
#' Gene Symbol resource, either "uniprot" or "ensembl".
#' @param organism Character or integer: name or NCBI Taxonomy ID of the
#' organism the UniProt IDs in the data frame belong to.
#'
#' @return Data frame: the input data frame with the gene symbol columns
#' updated.
#'
#' @importFrom magrittr %>% extract2 is_in
#' @importFrom logger log_warn log_info
#' @importFrom purrr reduce
#' @importFrom stringr str_to_lower
#' @importFrom rlang !! !!! sym :=
#' @noRd
update_genesymbols <- function(data, param, organism = 9606L) {
up_cols <- data %>% uniprot_columns
gs_cols <- data %>% genesymbol_columns
if(is_empty_2(up_cols)) {
warn <-
paste0(
'No columns with UniProt IDs found, ',
'not updating gene symbols.'
)
log_warn(warn)
warning(warn)
return(data)
}
resource <-
param %>%
{`if`(is.list(.), extract2(., 'genesymbol_resource'), .)} %>%
str_to_lower
if(resource %>% is_in(c('ensembl', 'uniprot')) %>% not) {
log_warn(
paste0(
'Unknown genesymbol resource: `%s`. ',
'Using UniProt, the default one, instead.'
),
resource
)
resource <- 'uniprot'
}
resource %T>%
{log_info('Setting gene symbols from `%s`.', .)} %>%
{reduce(
up_cols,
~translate_ids(
.x,
!!sym(.y) := uniprot,
!!sym(
`if`(
.y == 'uniprot',
'genesymbol',
sprintf('%s_genesymbol', .y)
)
) := genesymbol,
organism = organism,
ensembl = resource == 'ensembl',
keep_untranslated = FALSE
),
.init = data
)} %T>%
{log_info('%i records after setting gene symbols.', nrow(.))}
}
#' Names of UniProt columns in an OmniPath data frame
#'
#' @param data A data frame from OmniPath.
#'
#' @importFrom magrittr %>% is_in
#' @noRd
uniprot_columns <- function(data) {
UP_COLS <- list(
'uniprot',
c('enzyme', 'substrate'),
c('source', 'target')
)
for(cols in UP_COLS) {
if(cols %>% is_in(data %>% colnames) %>% all) {
return(cols)
}
}
}
#' Names of Gene Symbol columns in an OmniPath data frame
#'
#' @param data A data frame from OmniPath.
#'
#' @importFrom magrittr %>%
#' @noRd
genesymbol_columns <- function(data) {
data %>%
uniprot_columns %>%
{`if`(
is.null(.) || length(.) == 0L,
.,
`if`(
.[1L] == 'uniprot',
'genesymbol',
map_chr(., ~sprintf('%s_genesymbol', .))
)
)} %>%
intersect(data %>% colnames)
}
#' Checks whether the response is real data or an error message.
#' In case of error stops the execution and prints the URL and the message
#' from the server.
#'
#' @noRd
omnipath_check_result <- function(result, url){
if(length(result) == 1){
server_msg <- paste(result[[1]], collapse = '\n')
stop(
sprintf(
'Failed to download data from OmniPath:\nURL: %s\n%s\n',
url,
server_msg
)
)
}
}
#' Makes sure the boolean variables, listed in argument `logicals`, are of
#' R logical type. Converts various string and numeric representations.
#' Checks only for TRUE values, whatever does not match remains FALSE.
#'
#' @noRd
cast_logicals <- function(data, logicals = NULL){
true_values <- c('True', '1', 'TRUE', 'T', 'yes', 'YES', 'Y', 'y')
for(name in logicals){
data[[name]] <- (
identical(data[[name]], TRUE) |
data[[name]] %in% true_values |
(is.numeric(data[[name]]) & data[[name]] > 0)
)
}
return(data)
}
#' Removes records which are only from resources to be excluded
#'
#' @param data A data frame from the OmniPath web service.
#' @param exclude Character vector with the resource names to exclude.
#'
#' @return The input data frame with records removed according to the
#' exclude list.
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr filter
#' @importFrom rlang !! sym
#' @importFrom purrr map_lgl
#' @noRd
apply_exclude <- function(data, exclude){
col <- data %>% resources_colname
data %>%
`if`(
is.null(exclude),
.,
filter(
.,
map_lgl(
str_split(!!sym(col), ';'),
function(x){x %>% setdiff(exclude) %>% length %>% as.logical}
)
)
)
}
#' For a character vector splits each element and re-joins sorted unique
#' values.
#'
#' @noRd
split_unique_join <- function(
x,
sep = ';',
outsep = sep,
method = NULL
){
method <- `if`(
is.null(method),
function(values, outsep, ...){
paste(sort(unique(values)), collapse = outsep)
},
method
)
return(
split_apply(
x = x,
method = method,
sep = sep,
outsep
)
)
}
#' For a character vector splits each element and applies a method for
#' each sub vector.
#'
#' @importFrom purrr map
#' @importFrom magrittr %>%
#'
#' @noRd
split_apply <- function(
x,
method,
sep = ';',
...
){
x %>%
strsplit(sep) %>%
map(method, ...) %>%
unlist()
}
#' For an interactions or enzyme-substrate data frame adds a column
#' `n_resources` with the number of resources for each record.
#'
#' @noRd
count_resources <- function(data, only_primary = TRUE){
data[['n_resources']] <- split_apply(
data$sources,
method = function(values, only_primary){
if(only_primary){
values <- values[!grepl('_', values)]
}
return(length(values))
},
sep = ';',
only_primary = only_primary
)
return(data)
}
#' For an interactions or enzyme-substrate data frame adds a column
#' `n_references` with the number of references for each record.
#'
#' @importFrom dplyr n_distinct mutate
#' @importFrom magrittr %>%
#' @noRd
count_references <- function(data){
# NSE vs. R CMD check workaround
n_references <- references <- NULL
data %>%
mutate(
n_references = ifelse(
is.na(references),
0,
strip_resource_labels(
.,
inplace = FALSE,
method = n_distinct
)
)
)
}
#' For each undirected interaction adds a duplicate with the source and
#' target nodes swapped.
#'
#' @noRd
swap_undirected <- function(data){
is_directed <- NULL
data <- data %>%
filter(is_directed == 0) %>%
rename(
source = target,
target = source,
source_genesymbol = target_genesymbol,
target_genesymbol = source_genesymbol
) %>%
{`if`(
'ncbi_tax_id_source' %in% names(.),
rename(
.,
ncbi_tax_id_source = ncbi_tax_id_target,
ncbi_tax_id_target = ncbi_tax_id_source
),
.
)} %>%
bind_rows(data)
return(data)
}
#' Downloader dedicated to OmniPath web service URLs
#'
#' Just a thin wrapper around \code{download_base}.
#'
#' @param url Character: the URL to download. Elements after the first will
#' be used as fallback URLs in case the first one fails.
#' @param fun The downloader function. Should be able to accept \code{url}
#' as its first argument.
#' @param cache Logical: use the cache.
#' @param ... Passed to the internal function \code{download_base} and
#' from there ultimately to \code{fun}.
#'
#' @importFrom logger log_trace log_info log_error
#' @noRd
omnipath_download <- function(url, fun, cache = NULL, ...) {
cache %<>% use_cache
if(cache) {
for(the_url in url) {
log_trace('Looking up in cache: `%s`', the_url)
from_cache <- omnipath_cache_load(url = the_url)
if(!is.null(from_cache)){
log_info('Loaded from cache: `%s`', the_url)
attr(from_cache, 'url') <- the_url
return(from_cache)
}
}
}
for(the_url in url) {
log_trace('Attempting `%s`', the_url)
result <- tryCatch(
download_base(the_url, fun, ...),
error = function(e) {
log_warn(
'Failed to download: `%s`; error: %s',
the_url,
conditionMessage(e)
)
}
)
if(!is.null(result)){
log_info('Successfully retrieved: `%s`', the_url)
if(cache) {
omnipath_cache_save(data = result, url = the_url)
}
attr(result, 'url') <- the_url
return(result)
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.