#' Converting gene ids using annotation databases
#'
#' This function is designed to convert one type of gene ids to another type, such as Ensembl, Entrez, UniProt.
#' @param db A character of the annotation database name such as \code{'org.Hs.eg.db'}.
#' @param data A \code{data.frame} containing the gene ids in rownames to convert from.
#' @param from.id,to.id The type of ids to convert from (\code{'ENSEMBL'}, rownames in \code{data}) and to (\code{'SYMBOL'}) (UniProt) respectively.
#' @param desc Logical. If \code{TRUE}, the description of each gene will be included.
#' @param other Other information to be added, such as `c('ENZYME', 'PATH')`. See `columns(org.Hs.eg.db)`.
#' @return A \code{data.frame}.
#' @examples
#' library(org.Hs.eg.db)
#' # Human Ensembl gene ids are rownames in the data frame.
#' data <- data.frame(tissue1=10:12, tissue2=20:22, row.names=c('ENSG00000006047',
#' 'ENSG00000268433', 'ENSG00000268555'))
#' data <- cvt_id(db='org.Hs.eg.db', data=data, from.id='ENSEMBL', to.id='SYMBOL', desc=TRUE)
#' data
#' @author Jianhai Zhang \email{jzhan067@@ucr.edu} \cr Dr. Thomas Girke \email{thomas.girke@@ucr.edu}
#' @references
#' Pagès H, Carlson M, Falcon S, Li N (2022). _AnnotationDbi: Manipulation of SQLite-based annotations in Bioconductor_. R package version 1.60.0, <https://bioconductor.org/packages/AnnotationDbi>.
#' Morgan M, Obenchain V, Hester J, Pagès H (2022). SummarizedExperiment: SummarizedExperiment container. R package version 1.28. 0, <https://bioconductor.org/packages/SummarizedExperiment>.
#' @export
#' @importFrom SummarizedExperiment SummarizedExperiment rowData rowData<-
cvt_id <- function(db, data, from.id, to.id, desc=FALSE, other=NULL) {
# save(db, data, from.id, to.id, desc, other, file='cvt.id.arg')
if (!requireNamespace("AnnotationDbi", quietly = TRUE)) {
msg <- 'Please install the "AnnotationDbi" package!'
warning(msg); return(msg)
}; if (TRUE %in% desc) desc <- 'GENENAME' else desc <- NULL
vec <- FALSE
if (!is(data, 'SummarizedExperiment')) if (is(data, 'character')) {
if (any(duplicated(data))) return(wng('Duplicated IDs are detected!'))
data <- SummarizedExperiment(assays=matrix(rep(0, length(data)*2), ncol=2, dimnames = list(data, c('value1', 'value2')))); vec <- TRUE
} else if (is(as.data.frame(data), 'data.frame')) data <- SummarizedExperiment(assays=data)
ann <- AnnotationDbi::select(get(db), keys=rownames(data), keytype=from.id, columns=c(from.id, to.id, desc, other))
rdat <- rowData(data)
if (ncol(rdat)>0) ann <- cbind(rdat[ann[, from.id], ], ann)
# If 'to.id' is not available or duplicated, use 'from.id'.
ids <- ann[, to.id]
idx <- ids=='' | duplicated(ids) | is.na(ids)
ann$to.id <- ann[, to.id]
# Original ids are preserved, not filtered.
ann$to.id[idx] <- ann[idx, from.id]
# Useless, since all ids in from.id will be retained in ann even if not found in db.
inter <- intersect(rownames(data), ann[, from.id])
# ids from data: not available in the database.
data.dif <- data[setdiff(rownames(data), inter), , drop=FALSE]
# Convert ids.
dat <- data; data <- data[inter, , drop=FALSE]
ann <- subset(ann, get(from.id) %in% inter & (!duplicated(get(from.id))))
data <- data[order(rownames(data)), , drop=FALSE]
ann <- ann[order(ann[, from.id]), , drop=FALSE]
rownames(data) <- ann$to.id
if (is(data, 'SummarizedExperiment')|is(data, 'SingleCellExperiment')) {
rowData(data) <- ann
if (!is.null(desc)) rowData(data)$desc <- ann$GENENAME
if (nrow(data.dif)>0) {
if (!is.null(desc)) rowData(data.dif)$desc <- NA
data <- rbind(data, data.dif)
};
} else {
if (!is.null(desc)) data$desc <- ann$GENENAME
if (nrow(data.dif)>0) {
if (!is.null(desc)) data.dif$desc <- NA
data <- rbind(data, data.dif)
}
}
# Keep original row orders
r1 <- rownames(dat); r2 <- rowData(data)[, from.id]; data <- data[base::match(r1, r2), ]
if (vec==TRUE) {
if (is.null(desc)) des <- NULL else des <- 'desc'
return(rowData(data)[, c(from.id, to.id, des)])
} else data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.