#-------------------------------------------------------------------------------
#
# hdl proteome watch
#
#-------------------------------------------------------------------------------
#' hdl proteomewatch proteins
#' @return string vector: HDLProteomeWatch protein entries
#' @examples
#' hdlproteins()
#' @export
hdlproteins <- function(){
url <- 'https://homepages.uc.edu/~davidswm/HDL%20Proteome%20Watch%202021.xlsx'
dir <- file.path(tools::R_user_dir('autonomics', 'cache'), 'hdlproteomewatch')
dir.create(dir, recursive = TRUE, showWarnings = FALSE)
file <- file.path(dir, basename(url))
if (!file.exists(file)) download.file(url, destfile = file, mode = 'wb')
hdlproteins <- readxl::read_excel(file, sheet = 3, range = 'C9:E944', col_names = c('entry', 'mw', 'uniprot'))
hdlproteins %<>% extract2('entry')
hdlproteins
}
#' Tag hdlproteins
#' @param object SummarizedExperiment
#' @param verbose TRUE or FALSE
#' @return SummarizedExperiment
#' @examples
#' file <- system.file('extdata/billing19.proteingroups.txt', package = 'autonomics')
#' object <- read_maxquant_proteingroups(file)
#' object %<>% tag_hdlproteins()
#' fdt(object)
#' @export
tag_hdlproteins <- function(object, verbose = TRUE){
hdl <- protein <- NULL
hdlproteins <- autonomics::hdlproteins()
fdt0 <- fdt(object)[, c('feature_id', 'protein')]
fdt0 %<>% uncollapse(protein, sep = ';')
fdt0 <- fdt0[, hdl := protein %in% hdlproteins]
fdt0 <- fdt0[, .(hdl = any(hdl)), by = feature_id]
fdt0$hdl %<>% as.numeric()
object %<>% merge_fdt(fdt0)
if (verbose) message(sprintf(
'\tAdd fdt(object)$hdl : %d/%d HDL proteingroups, %d/%d HDLProteomeWatch proteins',
sum(fdt(object)$hdl),
nrow(object),
length(intersect(fdt(object)[hdl==1, protein], hdlproteins)),
length(hdlproteins)))
object
}
#-------------------------------------------------------------------------------
#
# open targets
#
#-------------------------------------------------------------------------------
#' opentargets dir
#' @export
OPENTARGETSDIR <- file.path(tools::R_user_dir('autonomics', 'cache'), 'opentargets', '22.04')
download_opentargets_targets <- function(){
if (!requireNamespace('XML', quietly = TRUE)){
message("BiocManager::install('XML'). Then re-run.")
}
if (!dir.exists(file.path(OPENTARGETSDIR, 'targets'))){
ftpdir <- 'http://ftp.ebi.ac.uk/pub/databases/opentargets/platform/22.04/output/etl/json/targets/'
ftpfiles <- XML::getHTMLLinks(ftpdir) %>% setdiff(c('../', '_SUCCESS'))
ftpfile <- ftpfiles[1]
dir.create(file.path(OPENTARGETSDIR, 'targets'), showWarnings = FALSE, recursive = TRUE)
for (i in seq_along(ftpfiles)){
download.file(url = paste0(ftpdir, ftpfiles[i]),
destfile = file.path(OPENTARGETSDIR, 'targets', ftpfiles[i]))
}
}
return(file.path(OPENTARGETSDIR, 'targets'))
}
null_to_str <- function(x){ x[is.null(x)] <- ''; x }
extract_proteinids <- function(x){
x %<>% extract2('proteinIds')
if (is.null(x)) return('')
x %<>% data.table()
x %<>% extract(source %in% c('uniprot_swissprot', 'uniprot_trembl'))
x %<>% extract2('id')
x %<>% paste0(collapse = ';')
x
}
extract_functiondescriptions <- function(x){
x %<>% extract2('functionDescriptions')
x %<>% paste0(collapse = ';')
x %<>% null_to_str()
x
}
# file <- list.files(file.path(OPENTARGETSDIR, 'targets'))[1]
# read_opentargets_targets(file)
.read_opentargets_targets <- function(file){
if (!requireNamespace('jsonlite', quietly = TRUE)){
message("BiocManager::install('jsonlite'). Then re-run.")
}
lines <- readLines(file.path(OPENTARGETSDIR, 'targets', file))
lines %<>% lapply(jsonlite::fromJSON)
lines %<>% lapply( function(x){ data.table(
ensembl = x$id,
genesymbol = x$approvedSymbol,
genename = x$approvedName,
uniprot = extract_proteinids(x),
`function` = extract_functiondescriptions(x))})
lines %<>% rbindlist()
lines
}
save_opentargets_targets <- function(){
files <- dir(file.path(OPENTARGETSDIR, 'targets'))
dt <- lapply(files, .read_opentargets_targets)
dt %<>% rbindlist()
fwrite(dt, file.path(OPENTARGETSDIR, 'targets.tsv'), sep = '\t')
}
#' Add opentargets annotations
#' @param object SummarizedExperiment
#' @param cols character vector
#' @param verbose TRUE or FALSE
#' @return SummarizedExperiment
#' @examples
#' file <- system.file('extdata/billing19.proteingroups.txt', package = 'autonomics')
#' object <- read_maxquant_proteingroups(file)
#' object %<>% add_opentargets_by_uniprot()
#' @export
add_opentargets_by_uniprot <- function(
object, cols = c('genesymbol', 'genename', 'function'), verbose = TRUE
){
# Assert
assert_is_valid_sumexp(object)
canonical <- uniprot <- NULL
# Read
file <- file.path(OPENTARGETSDIR, 'targets.tsv')
if (!file.exists(file)){
cmessage('\tFirst `download_opentargets_targets`. Returning object unchanged.')
return(object)
}
if (verbose) cmessage('\tAdd opentargets annotations')
if (verbose) cmessage('\t\tRead %s', file)
if (verbose) cmessage('\t\tAdd %s', paste0(cols, collapse = '/'))
targetsdt <- fread(file, select = c('uniprot', cols))
targetsdt %<>% unique()
targetsdt %<>% separate_rows(uniprot)
targetsdt %<>% data.table()
targetsdt %<>% extract(uniprot != '')
targetsdt %<>% unique()
targetsdt %<>% extract(, lapply(.SD, paste_unique, collapse = ';'), by = 'uniprot')
# Add
idvar <- if ('fosId' %in% fvars(object)) 'fosId' else 'proId'
fdt0 <- fdt(object)[, c(idvar, 'canonical'), with = FALSE]
fdt0 %<>% separate_rows(canonical)
fdt0 %<>% data.table()
fdt0 %<>% merge(targetsdt, by.x = 'canonical', by.y = 'uniprot', all.x = TRUE, sort = FALSE)
fdt0[, canonical := NULL]
fdt0 %<>% extract(, lapply(.SD, paste_unique, collapse = ';'), by = idvar)
object %<>% merge_fdt(fdt0, by.x = idvar, by.y = idvar)
# Return
object
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.