R/PlotGene.R

Defines functions EpiMix_PlotGene

Documented in EpiMix_PlotGene

#' The EpiMix_PlotGene function
#' @description plot the genomic coordinate, DM values and chromatin state for each CpG probe of a specific gene.
#' @details this function requires R package dependencies: karyoploteR, TxDb.Hsapiens.UCSC.hg19.knownGene, org.Hs.eg.db
#' @param gene.name character string indicating the name of the gene to be plotted.
#' @param EpiMixResults the resulting list object returned from the function of EpiMix.
#' @param met.platform character string indicating the type of the microarray where the DNA methylation data were collected. The value should be either 'HM27', 'HM450' or 'EPIC'. Default: 'HM450'
#' @param roadmap.epigenome.id character string indicating the epigenome id (EID) for a reference tissue or cell type. Default: 'E002'. If the value is empty (""), no histone modifications plot will show.\
#' Note: Keep this value empty if using the Windows system, since this feature is not supported in Windows.
#' @param left.gene.margin  numeric value indicating the number of extra nucleotide bases to be plotted on the left side of the target gene. Default: 10000.
#' @param right.gene.margin numeric value indicating the number of extra nucleotide bases to be plotted on the right side of the target gene. Default: 10000.
#' @param gene.name.font numeric value indicating the font size for the gene name. Default: 0.7.
#' @param plot.transcripts logic indicating whether to plot each individual transcript of the gene. Default: TRUE. If False, the gene will be plotted with a single rectangle, without showing the structure of individual transcripts.
#' @param plot.transcripts.structure logic indicating whether to plot the transcript structure (introns and exons). Non-coding exons are shown in green and the coding exons are shown in red. Default: TRUE.
#' @param show.probe.name logic indicating whether to show the name(s) for each differentially methylated CpG probe. Default: TRUE
#' @param probe.name.font numeric value indicating the font size of the name(s) for the differentially methylated probe(s) in pixels. Default: 0.6.
#' @param y.label.font font size of the y axis label
#' @param y.label.margin distance between y axis label and y axis
#' @param axis.number.font font size of axis ticks and numbers
#' @param chromatin.label.font font size of the labels of the histone proteins
#' @param chromatin.label.margin distance between the histone protein labels and axis
#' @importFrom biomaRt useDataset getBM useEnsembl
#' @importFrom GenomicRanges makeGRangesFromDataFrame seqnames start end mcols
#' @importFrom S4Vectors queryHits subjectHits
#' @importFrom RCurl getURL
#' @importFrom RColorBrewer brewer.pal
#' @return plot of the genomic coordinate, DM values and chromatin state for each CpG probe of a specific gene.
#' @details
#' roadmap.epigenome.id: since the chromatin state is tissue or cell-type specific,
#' EpiMix needs to know the reference tissue or cell type in order to retrieve the proper DNase-seq and histone ChIP-seq data.
#' Available epigenome ids can be obtained from the Roadmap Epigenomic study (Nature, PMID: 25693563, figure 2).
#' They can also be retrieved from the list.epigenomes() function.
#' @export
#' @examples
#' \donttest{
#' library(karyoploteR)
#' library(TxDb.Hsapiens.UCSC.hg19.knownGene)
#' library(org.Hs.eg.db)
#' library(regioneR)
#'
#' data(Sample_EpiMixResults_Regular)
#'
#' gene.name = 'CCND2'
#'
#' roadmap.epigenome.id = 'E096'
#'
#' EpiMix_PlotGene(gene.name = gene.name,
#'                 EpiMixResults = Sample_EpiMixResults_Regular,
#'                 met.platform = 'HM450',
#'                 roadmap.epigenome.id = roadmap.epigenome.id)
#' }
#'
EpiMix_PlotGene <- function(gene.name, EpiMixResults, met.platform = "HM450", roadmap.epigenome.id = "E002",
    left.gene.margin = 10000, right.gene.margin = 10000, gene.name.font = 0.7, show.probe.name = TRUE,
    probe.name.font = 0.6, plot.transcripts = TRUE, plot.transcripts.structure = TRUE,
    y.label.font = 0.8, y.label.margin = 0.1, axis.number.font = 0.5, chromatin.label.font = 0.7,
    chromatin.label.margin = 0.02) {

    if (!requireNamespace("karyoploteR")) {
        message("This function requires the 'karyoploteR' package.")
        return(invisible())
    }

    if (!requireNamespace("TxDb.Hsapiens.UCSC.hg19.knownGene")) {
        message("This function requires the 'TxDb.Hsapiens.UCSC.hg19.knownGene' package.")
        return(invisible())
    }

    if (!requireNamespace("org.Hs.eg.db")) {
        message("This function requires the 'org.Hs.eg.db' package.")
        return(invisible())
    }

    if (!requireNamespace("regioneR")) {
        message("This function requires the 'org.Hs.eg.db' package.")
        return(invisible())
    }

    # Retrieve the genetic coordinates of the gene
    cat("Retrieving genetic information for", gene.name, "from Ensembl...\n")

    # the bigwig visulization of histone modifciations and DNA-binding proteins
    # only support hg19.
    genome <- "hg19"
    GRCh <- NULL
    if (genome == "hg19") {
        GRCh <- 37
    } else if (genome == "hg38") {
        GRCh <- 38
    }
    mart <- biomaRt::useEnsembl(biomart = "ensembl", GRCh = GRCh, dataset = "hsapiens_gene_ensembl")
    genePosition <- biomaRt::getBM(filters = "hgnc_symbol", attributes = c("chromosome_name",
        "start_position", "end_position"), values = gene.name, mart = mart)
    chr_ID <- genePosition$chromosome_name  # '16'
    chr_name <- paste0("chr", chr_ID)  # 'chr16'

    # Retrieve probe information
    cat("Retrieving probe annotation...\n")
    ProbeAnnotation <- EpiMix_getInfiniumAnnotation(plat = met.platform, genome = genome)
    ProbeAnnotation <- convertAnnotToDF(ProbeAnnotation)
    ProbeAnnotation <- .mapProbeGene(ProbeAnnotation)
    ProbeAnnotation <- ProbeAnnotation[ProbeAnnotation$gene == gene.name, ]
    ProbeAnnotation <- ProbeAnnotation[order(ProbeAnnotation$CpG_beg), ]

    # Find the start and the end position of the gene. Note: gene can be
    # inversely positioned on the genome.
    start_position <- genePosition$start_position
    end_position <- genePosition$end_position
    start_position <- start_position - left.gene.margin
    end_position <- end_position + right.gene.margin
    gene.region <- regioneR::toGRanges(data.frame(chr_name, start_position, end_position))

    # Set up some parameters for graph
    pp <- karyoploteR::getDefaultPlotParams(plot.type = 1)
    pp$leftmargin <- 0.15
    pp$topmargin <- 15
    pp$bottommargin <- 15
    pp$ideogramheight <- 5
    pp$data1inmargin <- 10
    pp$data1outmargin <- 0

    kp <- karyoploteR::plotKaryotype(genome = genome, zoom = gene.region, plot.type = 1,
        plot.params = pp)
    karyoploteR::kpAddBaseNumbers(kp, tick.dist = (end_position - start_position)/6,
        minor.tick.dist = (end_position - start_position)/12, add.units = TRUE, cex = 0.5,
        tick.len = 3)

    # Plot gene structure with transcripts
    cat("Retrieving transcript information from TxDB...\n")

    # if(genome == 'hg19'){ TxDB_genome<- TxDb.Hsapiens.UCSC.hg19.knownGene
    # }else{ TxDB_genome<- TxDb.Hsapiens.UCSC.hg38.knownGene }

    TxDB_genome <- TxDb.Hsapiens.UCSC.hg19.knownGene

    genes.data <- karyoploteR::makeGenesDataFromTxDb(TxDB_genome, karyoplot = kp)

    # gene.data contain the transcript information for all the genes in the
    # designated plot region, we need to filter the information to the gene
    # specified by the users
    gene_id_map <- AnnotationDbi::select(org.Hs.eg.db, keys = gene.name, columns = c("ENTREZID"),
        keytype = "SYMBOL")

    gene.id <- gene_id_map$ENTREZID
    genes.data$genes <- genes.data$genes[gene.id]
    genes.data$transcripts <- genes.data$transcripts[gene.id]
    tx.id <- as.character(genes.data$transcripts[[1]]$tx_id)
    genes.data$coding.exons <- genes.data$coding.exons[tx.id]
    genes.data$non.coding.exons <- genes.data$non.coding.exons[tx.id]
    genes.data <- karyoploteR::addGeneNames(genes.data)
    # genes.data <- mergeTranscripts(genes.data)

    r0_Gene <- 0.02
    r1_Gene <- 0.15
    karyoploteR::kpDataBackground(kp, r0 = r0_Gene - 0.02, r1 = r1_Gene + 0.01, col = "#AACBFF")
    karyoploteR::kpPlotGenes(kp, data = genes.data, data.panel = 1, r0 = r0_Gene,
        r1 = r1_Gene, gene.name.position = "left", gene.name.cex = gene.name.font,
        plot.transcripts = plot.transcripts, plot.transcripts.structure = plot.transcripts.structure,
        non.coding.exons.col = "green", non.coding.exons.border.col = "green", coding.exons.col = "red",
        coding.exons.border.col = "red")

    karyoploteR::kpAddLabels(kp, labels = "Transcripts", r0 = r0_Gene, r1 = r1_Gene,
        srt = 90, pos = 1, label.margin = y.label.margin, cex = y.label.font)

    # Plot DM values for CpGs Find differentially methylated CpG probes for the
    # target gene
    CpGProbes <- ProbeAnnotation$probeID
    MethylationDrivers <- EpiMixResults$MethylationDrivers
    MixtureStates <- EpiMixResults$MixtureStates

    # Obtain vectors recording x,y coordinates for each CpG probes
    x <- character(0)
    y <- character(0)

    for (i in c(1:length(CpGProbes))) {
        CpG_Position <- ProbeAnnotation$CpG_beg[i]
        x <- c(x, CpG_Position)
        DM_Value <- 0
        if (CpGProbes[i] %in% MethylationDrivers) {
            values <- unlist(MixtureStates[CpGProbes[i]])
            if (min(values) < 0)
                DM_Value <- min(values)  # This will change in the future for dual methylated CpGs
            if (max(values) > 0)
                DM_Value <- max(values)
        }
        y <- c(y, DM_Value)
    }
    x <- as.numeric(x)
    y <- as.numeric(y)

    # Plot axis and labels
    ymax <- ymin <- 0
    if (max(y) > 0) {
        ymax <- round(max(y), digits = 1) + 0.1
    }
    if (min(y) < 0) {
        ymin <- round(min(y), digits = 1) - 0.1
    }

    r0_DMValue <- r1_Gene + 0.03
    r1_DMValue <- r0_DMValue + 0.2

    karyoploteR::kpAxis(kp, r0 = r0_DMValue, r1 = r1_DMValue, ymin = ymin, ymax = ymax,
        tick.pos = c(ymin, 0, ymax), cex = axis.number.font)
    karyoploteR::kpAddLabels(kp, labels = "DM value", r0 = r0_DMValue, r1 = r1_DMValue,
        srt = 90, pos = 1, label.margin = y.label.margin, cex = y.label.font)

    # Plot points and lines
    karyoploteR::kpPoints(kp, chr = chr_name, x = x, y = y, ymin = ymin, ymax = ymax,
        r0 = r0_DMValue, r1 = r1_DMValue, cex = 0.7, col = "blue")
    karyoploteR::kpLines(kp, chr = chr_name, x = x, y = y, ymin = ymin, ymax = ymax,
        r0 = r0_DMValue, r1 = r1_DMValue, lwd = 1.5)

    # Plot the name for differentially methylated CpG probes
    if (show.probe.name) {
        pos <- 1
        if (max(y) > 0)
            pos <- 3
        karyoploteR::kpText(kp, chr = chr_name, x = x[which(y != 0)], y = y[which(y !=
            0)], ymin = ymin, ymax = ymax, r0 = r0_DMValue, r1 = r1_DMValue, labels = CpGProbes[which(y !=
            0)], col = "red", pos = pos, offest = 0.3, cex = probe.name.font)
    }


    # Plot signals for histone marks and DNA binding proteins Construct a
    # vector with urls of the bigwig files
    if(length(roadmap.epigenome.id) > 0){
      cat("Retrieving signal intensity of histone marks and DNase sensitivity from RoadmapEpigenomics...\n")

      base_url <- "https://egg2.wustl.edu/roadmap/data/byFileType/signal/consolidated/macs2signal/pval/"
      urlData <- RCurl::getURL(base_url)
      urlData2 <- unlist(strsplit(urlData, "\\n"))
      filenames <- as.matrix(urlData2[grep(roadmap.epigenome.id, urlData2)])
      filenames <- unlist(strsplit(filenames, ">|<"))
      filenames <- filenames[grep(roadmap.epigenome.id, filenames)]
      filenames <- filenames[-grep("a href", filenames)]
      DNA.binding <- character(0)
      for (file in filenames) {
        start <- unlist(gregexpr("-", file)) + 1
        end <- unlist(gregexpr("pval", file)) - 2
        protein.name <- substring(file, start, end)
        DNA.binding[protein.name] <- paste0(base_url, file)
      }

      r0_Histone <- r1_DMValue + 0.03
      total.tracks <- length(DNA.binding)
      colors <- RColorBrewer::brewer.pal(total.tracks, "Set3")
      out.at <- karyoploteR::autotrack(1:total.tracks, total.tracks, margin = 0.3,
                                       r0 = r0_Histone)

      karyoploteR::kpAddLabels(kp, labels = "Protein signals", r0 = out.at$r0, r1 = out.at$r1,
                               cex = y.label.font, srt = 90, pos = 1, label.margin = y.label.margin)

      for (i in seq_len(total.tracks)) {
        at <- karyoploteR::autotrack(i, total.tracks, r0 = out.at$r0, r1 = out.at$r1,
                                     margin = 0.1)
        kp <- karyoploteR::kpPlotBigWig(kp, data = DNA.binding[i], ymax = "visible.region",
                                        r0 = at$r0, r1 = at$r1, col = colors[i])
        computed.ymax <- ceiling(kp$latest.plot$computed.values$ymax)
        karyoploteR::kpAxis(kp, ymin = 0, ymax = computed.ymax, tick.pos = computed.ymax,
                            r0 = at$r0, r1 = at$r1, cex = axis.number.font)
        karyoploteR::kpAddLabels(kp, labels = names(DNA.binding)[i], r0 = at$r0,
                                 r1 = at$r1, cex = chromatin.label.font, label.margin = chromatin.label.margin)
      }
    }
}


# library(AnnotationHub)
# ah = AnnotationHub()
# epiFiles <- query(ah , c("EpigenomeRoadMap", roadmap.epigenome.id, "BigWig", "pval.signal.bigwig"))
# epiFiles <- epiFiles[!grepl("imputed", epiFiles$title)]
#
# DNA.binding <- character(0)
# for(id in seq_along(epiFiles$title)){
#   file <- epiFiles$title[id]
#   start <- unlist(gregexpr("-", file)) + 1
#   end <- unlist(gregexpr("pval", file)) - 2
#   protein.name <- substring(file, start, end)
#   DNA.binding[protein.name] <- epiFiles$ah_id[id]
# }

# Plot two lines encompassing the differentially methylated CpG probes
# karyoploteR :: kpSegments(kp, chr=chr_name, x0=x[which(y!=0)]-20,
# x1=x[which(y!=0)]-20, y0=rep(r0_DMValue, length(x[which(y!=0)])), y1
# =rep(1, length(x[which(y!=0)]))) karyoploteR :: kpSegments(kp,
# chr=chr_name, x0=x[which(y!=0)]+20, x1=x[which(y!=0)]+20,
# y0=rep(r0_DMValue, length(x[which(y!=0)])), y1 =rep(1,
# length(x[which(y!=0)])))
gevaertlab/EpiMix documentation built on July 20, 2023, 9:28 a.m.