R/TCGA_id_conversion.R

Defines functions id_conversion_TCGA

Documented in id_conversion_TCGA

#' Convert ENSEMBL gene id to gene Symbol in TCGA
#'
#' @param profiles a data.frame of gene expression data, 
#' each column is a sample, 
#' and each row is a gene. 
#' @param toType one of 'keytypes(org.Hs.eg.db)'
#'
#' @return a data.frame, gene symbols and their expression value
#' @export
#'
#' @examples
#' library(org.Hs.eg.db)
#' data(profile)
#' result <- id_conversion_TCGA(profile)
id_conversion_TCGA <- function(profiles, toType = "SYMBOL") {
    rownames(profiles) <- gsub("\\..*", "", rownames(profiles))
    genes <- clusterProfiler::bitr(rownames(profiles),
        fromType = "ENSEMBL",
        toType = toType, OrgDb = org.Hs.eg.db::org.Hs.eg.db, drop = FALSE
    )

    genes <- genes[!duplicated(genes[, 1]), ]
    rownames(genes) <- genes[, 1]
    profiles2 <- as.matrix(profiles)
    rownames(profiles2) <- genes[rownames(profiles), 2]
    profiles2 <- profiles2[!is.na(rownames(profiles2)), ]
    return(profiles2)
}
huerqiang/GeoTcgaData documentation built on March 21, 2024, 1:42 a.m.