#' Lipidmaps Structure connector class.
#'
#' Connector class for Lipidmaps Structure.
#'
#' @examples
#' # Create an instance with default settings:
#' mybiodb <- biodb::newInst()
#'
#' # Create a connector
#' conn <- mybiodb$getFactory()$createConn('lipidmaps.structure')
#'
#' # Get an entry
#' e <- conn$getEntry('LMFA00000001')
#'
#' # Terminate instance.
#' mybiodb$terminate()
#'
#' @importFrom biodb BiodbConn
#' @importFrom R6 R6Class
#' @importFrom lifecycle deprecate_stop
#' @export
LipidmapsStructureConn <- R6::R6Class("LipidmapsStructureConn",
inherit=biodb::BiodbConn,
public=list(
#' @description
#' Calls LMSDSearch web service. See
#' https://www.lipidmaps.org/data/structure/programmaticaccess.html for
#' details.
#' @param mode The search mode: 'ProcessStrSearch', 'ProcessTextSearch' or
#' 'ProcessTextOntologySearch'. Compulsory.
#' @param output.mode If set to 'File', will output a in format `output.type`,
#' otherwise will output HTML.
#' @param output.type The output format: 'TSV', 'CSV' or 'SDF'.
#' @param output.delimiter The delimiter for TSV or CSV formats: 'Tab', 'Comma',
#' 'Semicolon'.
#' @param output.quote If quotes are to be used: 'Yes' or 'No'.
#' @param output.column.header If header must be output: 'Yes' or 'No'.
#' @param lmid a Lipidmaps ID.
#' @param name The name to search for.
#' @param formula The chemical formula to search for.
#' @param search.type The search type: 'SubStructure' or 'ExactMatch'.
#' @param smiles.string A SMILES to search for.
#' @param exact.mass The mass to search for.
#' @param exact.mass.offset The tolerance on the mass search.
#' @param core.class An integer number from 1 to 8.
#' @param main.class An integer number. See Lipidmaps documentation.
#' @param sub.class An integer number. See Lipidmaps documentation.
#' @param retfmt Use to set the format of the returned value. 'plain' will
#' return the raw results from the server, as a character value. 'request' will
#' return the request that would have been sent, as a BiodbRequest object.
#' 'parsed' will return data frame. 'ids' will return a character vector
#' containing the IDs of the matching entries.
#' @return Depending on `retfmt`.
wsLmsdSearch=function(mode=NULL, output.mode=NULL, output.type=NULL,
output.delimiter=NULL, output.quote=NULL, output.column.header=NULL,
lmid=NULL, name=NULL, formula=NULL, search.type=NULL, smiles.string=NULL,
exact.mass=NULL, exact.mass.offset=NULL, core.class=NULL, main.class=NULL,
sub.class=NULL, retfmt=c('plain', 'request', 'parsed', 'ids')) {
# Check parameters
retfmt <- match.arg(retfmt)
chk::chkor_vld(chk::vld_null(mode), chk::vld_subset(mode,
c('ProcessStrSearch', 'ProcessTextSearch',
'ProcessTextOntologySearch')))
if (retfmt == 'ids') {
output.mode <- 'File'
output.type <- 'TSV'
}
chk::chkor_vld(chk::vld_null(output.mode),
chk::vld_subset(output.mode, c('File')))
chk::chkor_vld(chk::vld_null(output.type),
chk::vld_subset(output.type, c('TSV', 'CSV', 'SDF')))
chk::chkor_vld(chk::vld_null(output.delimiter),
chk::vld_subset(output.delimiter, c('Tab', 'Comma', 'Semicolon')))
chk::chkor_vld(chk::vld_null(output.quote),
chk::vld_subset(output.quote, c('Yes', 'No')))
chk::chkor_vld(chk::vld_null(output.column.header),
chk::vld_subset(output.column.header, c('Yes', 'No')))
# Build request
fct.args <- as.list(environment())
fct.args[['retfmt']] <- NULL
request <- do.call(private$buildWsLmsdSearchRequest, fct.args)
if (retfmt == 'request')
return(request)
# Send request
results <- self$getBiodb()$getRequestScheduler()$sendRequest(request)
results <- unlist(results)
# Parse
if (retfmt != 'plain' && output.mode == 'File')
results <- private$parseWsLmsdSearchResult(results=results,
retfmt=retfmt, output.type=output.type,
output.delimiter=output.delimiter,
output.column.header=output.column.header,
output.quote=output.quote)
return(results)
},
#' @description
#' Calls LMSD web service for downloading one entry.
#' @param lmid The accession number of the entry to retrieve.
#' @param format The output format (either 'tsv' or 'csv').
#' @param retfmt Use to set the format of the returned value. 'plain' will
#' return the raw results from the server, as a character value. 'request' will
#' return the request that would have been sent, as a BiodbRequest object.
#' 'parsed' will return data frame.
#' @return Depending on `retfmt`.
wsLmsd=function(lmid, format=c('tsv', 'csv'),
retfmt=c('plain', 'request', 'parsed')) {
format <- match.arg(format)
retfmt <- match.arg(retfmt)
chk::chk_string(lmid)
# Build request
url <- paste0(self$getPropValSlot('urls', 'lmsd.url'), lmid)
params <- list(format=format)
request <- self$makeRequest(method='get',
url=biodb::BiodbUrl$new(url=url, params=params))
if (retfmt == 'request')
return(request)
# Send request
results <- self$getBiodb()$getRequestScheduler()$sendRequest(request)
# Parse
if (retfmt != 'plain') {
sep <- if (format == 'tsv') "\t" else ','
results <- read.table(text=results, sep=sep, header=TRUE,
comment.char='', stringsAsFactors=FALSE, quote='"', fill=TRUE)
}
return(results)
},
#' @description
#' Calls LMSDRecord web service. See
#' http://www.lipidmaps.org/data/structure/programmaticaccess.html.
#' @param lmid A character vector containing the IDs of the wanted entries.
#' @param mode If set to 'File', will output a in format `output.type`,
#' otherwise will output HTML.
#' @param output.type The output format: 'TSV', 'CSV' or 'SDF'.
#' @param output.delimiter The delimiter for TSV or CSV formats: 'Tab', 'Comma',
#' 'Semicolon'.
#' @param output.quote If quotes are to be used: 'Yes' or 'No'.
#' @param output.column.header If header must be output: 'Yes' or 'No'.
#' @param retfmt Use to set the format of the returned value. 'plain' will
#' return the raw results from the server, as a character value. 'request' will
#' return the request that would have been sent, as a BiodbRequest object.
#' 'parsed' will return data frame.
#' @return Depending on `retfmt`.
wsLmsdRecord=function(lmid, mode=NULL, output.type=NULL, output.delimiter=NULL,
output.quote=NULL, output.column.header=NULL,
retfmt=c('plain', 'request', 'parsed')) {
lifecycle::deprecate_stop('0.99.0', 'wsLmsdRecord()')
retfmt <- match.arg(retfmt)
chk::chk_string(lmid)
chk::chkor_vld(chk::vld_null(output.mode),
chk::vld_subset(output.mode, c('File', 'Download')))
chk::chkor_vld(chk::vld_null(output.type),
chk::vld_subset(output.type, c('TSV', 'CSV', 'SDF', 'MDLMOL')))
chk::chkor_vld(chk::vld_null(output.delimiter),
chk::vld_subset(output.delimiter, c('Tab', 'Comma', 'Semicolon')))
chk::chkor_vld(chk::vld_null(output.quote),
chk::vld_subset(output.quote, c('Yes', 'No')))
chk::chkor_vld(chk::vld_null(output.column.header),
chk::vld_subset(output.column.header, c('Yes', 'No')))
# Build request
url <- paste0(self$getPropValSlot('urls', 'base.url'), 'LMSDRecord.php')
params <- list(LMID=lmid)
if ( ! is.null(mode))
params <- c(params, Mode=mode)
if ( ! is.null(output.type))
params <- c(params, OutputType=output.type)
if ( ! is.null(output.delimiter))
params <- c(params, OutputDelimiter=output.delimiter)
if ( ! is.null(output.quote))
params <- c(params, OutputQuote=output.quote)
if ( ! is.null(output.column.header))
params <- c(params, OutputColumnHeader=output.column.header)
request <- self$makeRequest(method='get',
url=biodb::BiodbUrl$new(url=url, params=params))
if (retfmt == 'request')
return(request)
# Send request
results <- self$getBiodb()$getRequestScheduler()$sendRequest(request)
# Parse
if (retfmt != 'plain' && mode %in% c('File', 'Download'))
results <- private$parseWsLmsdSearchResult(results=results,
retfmt=retfmt, output.type=output.type,
output.delimiter=output.delimiter,
output.column.header=output.column.header,
output.quote=output.quote)
return(results)
}
),
private=list(
doSearchForEntries=function(fields=NULL, max.results=0) {
ids <- character()
if ( ! is.null(fields)) {
# Configure mass search
exact.mass <- NULL
exact.mass.offset <- NULL
if ('monoisotopic.mass' %in% names(fields)) {
rng <- do.call(Range$new, fields[['monoisotopic.mass']])
exact.mass <- rng$getValue()
exact.mass.offset <- rng$getDelta()
}
# Configure name search
name <- fields$name
# Search
ids <- self$wsLmsdSearch(mode='ProcessStrSearch', output.mode='File',
name=name, exact.mass=exact.mass,
exact.mass.offset=exact.mass.offset, retfmt='ids')
}
# Cut
if (max.results > 0 && max.results < length(ids))
ids <- ids[seq_len(max.results)]
return(ids)
}
,doGetEntryContentRequest=function(ids, concatenate=TRUE) {
fct <- function(id) self$wsLmsd(lmid=id, format='tsv',
retfmt='request')$getUrl()$toString()
return(vapply(ids, fct, FUN.VALUE=''))
}
,doGetEntryIds=function(max.results=NA_integer_) {
# Retrieve all IDs
ids <- self$wsLmsdSearch(mode='ProcessStrSearch', output.mode='File',
retfmt='ids')
return(ids)
}
,doGetEntryPageUrl=function(id) {
u <- self$getPropValSlot('urls', 'base.url')
fct <- function(x) biodb::BiodbUrl$new(url=u,
params=list(LMID=x))$toString()
return(vapply(id, fct, FUN.VALUE=''))
}
,buildWsLmsdSearchRequest=function(mode, output.mode, output.type,
output.delimiter, output.quote, output.column.header, lmid, name, formula,
search.type, smiles.string, exact.mass, exact.mass.offset, core.class,
main.class, sub.class) {
params <- list(Mode=mode)
if ( ! is.null(output.mode))
params <- c(params, OutputMode=output.mode)
if ( ! is.null(output.type))
params <- c(params, OutputType=output.type)
if ( ! is.null(output.delimiter))
params <- c(params, OutputDelimiter=output.delimiter)
if ( ! is.null(output.quote))
params <- c(params, OutputQuote=output.quote)
if ( ! is.null(output.column.header))
params <- c(params, OutputColumnHeader=output.column.header)
if ( ! is.null(lmid))
params <- c(params, LMID=lmid)
if ( ! is.null(name))
params <- c(params, Name=name)
if ( ! is.null(formula))
params <- c(params, Formula=formula)
if ( ! is.null(search.type))
params <- c(params, SearchType=search.type)
if ( ! is.null(smiles.string))
params <- c(params, SMILESString=smiles.string)
if ( ! is.null(exact.mass))
params <- c(params, ExactMass=exact.mass)
if ( ! is.null(exact.mass.offset))
params <- c(params, ExactMassOffSet=exact.mass.offset)
if ( ! is.null(core.class))
params <- c(params, CoreClass=core.class)
if ( ! is.null(main.class))
params <- c(params, MainClass=main.class)
if ( ! is.null(sub.class))
params <- c(params, SubClass=sub.class)
u <- biodb::BiodbUrl$new(url=c(self$getPropValSlot('urls', 'base.url'),
'structure', 'LMSDSearch.php'), params=params)
request <- self$makeRequest(method='get', url=u)
return(request)
}
,parseWsLmsdSearchResult=function(results, retfmt, output.type,
output.delimiter, output.column.header, output.quote) {
# Mode must be set or HTML will be output
# See https://www.lipidmaps.org/data/structure/programmaticaccess.html
# Parse TSV/CSV table
if (is.null(output.type) || output.type %in% c('TSV', 'CSV')) {
if (is.null(output.type) || output.type == 'TSV')
sep <- "\t"
else
sep <- if (is.null(output.delimiter)
|| output.delimiter == 'Comma') ',' else ';'
header <- (is.null(output.column.header)
|| output.column.header == 'Yes')
quote <- if (is.null(output.quote)
|| output.quote == 'No') '' else '"'
# 2022-12-01 A bug in the web service generates a HTML header before
# the table:
# <script src='/js/jquery/jquery.min.js'></script>
# ...
# <style>
# ...
# .datatable th {
# ...
# }
# ...
# </style>
#
# LM_ID ...
# We try to remove it first:
results <- sub("^.*</style>\\s*", "", results)
results <- read.table(text=results, sep=sep, header=header,
comment.char='', stringsAsFactors=FALSE, quote=quote, fill=TRUE)
}
else
biodb::error('Only TSV and CSV output types are parsable.')
# Extract IDs
if (retfmt == 'ids')
results <- results[['LM_ID']]
return(results)
}
))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.