11 - ELMER: Use case
In ELMER: Inferring Regulatory Element Landscapes and Transcription Factor Networks Using Cancer Methylomes

Introduction

This section contains the complete ELMER code for the following analysis:

Vignette example
BRCA Supervised analysis
BRCA Unsupervised analysis

Vignette example

Below is the complete code that was explained in the other sections.

library(MultiAssayExperiment)
library(ELMER.data)
library(ELMER)
# get distal probes that are 2kb away from TSS on chromosome 1
distal.probes <- get.feature.probe(genome = "hg19", 
                                   met.platform = "450K", 
                                   rm.chr = paste0("chr",c(2:22,"X","Y")))
data(LUSC_RNA_refined,package = "ELMER.data") # GeneExp
data(LUSC_meth_refined,package = "ELMER.data") # Meth

mae <- createMAE(exp = GeneExp, 
                 met = Meth,
                 save = TRUE,
                 linearize.exp = TRUE,
                 save.filename = "mae.rda",
                 filter.probes = distal.probes,
                 met.platform = "450K",
                 genome = "hg19",
                 TCGA = TRUE)

group.col <- "definition"
group1 <-  "Primary solid Tumor"
group2 <- "Solid Tissue Normal"
dir.out <- "result"
diff.dir <-  "hypo" # Search for hypomethylated probes in group 1

sig.diff <- get.diff.meth(data = mae, 
                          group.col = group.col,
                          group1 = group1,
                          group2 = group2,
                          minSubgroupFrac = 0.2,
                          sig.dif = 0.3,
                          diff.dir = diff.dir,
                          cores = 1, 
                          dir.out = dir.out,
                          pvalue = 0.01)


nearGenes <- GetNearGenes(data = mae, 
                          probes = sig.diff$probe, 
                          numFlankingGenes = 20) # 10 upstream and 10 dowstream genes

pair <- get.pair(data = mae,
                 group.col = group.col,
                 group1 = group1,
                 mode = "unsupervised",
                 group2 = group2,
                 nearGenes = nearGenes,
                 diff.dir = diff.dir,
                 minSubgroupFrac = 0.4, # % of samples to use in to create groups U/M
                 permu.dir = file.path(dir.out,"permu"),
                 permu.size = 100, # Please set to 100000 to get significant results
                 raw.pvalue = 0.05,   
                 Pe = 0.01, # Please set to 0.001 to get significant results
                 filter.probes = TRUE, # See preAssociationProbeFiltering function
                 filter.percentage = 0.05,
                 filter.portion = 0.3,
                 dir.out = dir.out,
                 cores = 1,
                 label = diff.dir)

# Identify enriched motif for significantly hypomethylated probes which 
# have putative target genes.
enriched.motif <- get.enriched.motif(data = mae,
                                     probes = pair$Probe, 
                                     dir.out = dir.out, 
                                     label = diff.dir,
                                     min.incidence = 10,
                                     lower.OR = 1.1)

TF <- get.TFs(data = mae, 
              mode = "unsupervised",
              group.col = group.col,
              group1 = group1,
              group2 = group2,
              enriched.motif = enriched.motif,
              dir.out = dir.out,
              cores = 1, 
              label = diff.dir)

BRCA Supervised analysis

library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")

file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
if(file.exists(file)) {
  mae <- get(load(file))
} else {
  getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
          basedir = "DATA", # Where data will be downloaded
          genome  = "hg38") # Genome of refenrece "hg38" or "hg19"

  distal.probes <- get.feature.probe(feature = NULL,
                                     genome = "hg38", 
                                     met.platform = "450K") 

  mae <- createMAE(exp = "~/paper_elmer/Data/BRCA/BRCA_RNA_hg38.rda", 
                   met = "~/paper_elmer/Data/BRCA/BRCA_meth_hg38.rda", 
                   met.platform = "450K",
                   genome = "hg38",
                   linearize.exp = TRUE,
                   filter.probes = distal.probes,
                   met.na.cut = 0.2,
                   save = FALSE,
                   TCGA = TRUE) 
  # Remove FFPE samples from the analysis
  mae <- mae[,!mae$is_ffpe]

  # Get molecular subytpe information from cell paper and more metadata (purity etc...)
  # https://doi.org/10.1016/j.cell.2015.09.033
  file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
  downloader::download(file, basename(file))
  subtypes <- readxl::read_excel(basename(file), skip = 2)

  subtypes$sample <- substr(subtypes$Methylation,1,16)
  meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
  meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
  meta.data <- S4Vectors::DataFrame(meta.data)
  rownames(meta.data) <- meta.data$sample
  stopifnot(all(meta.data$patient == colData(mae)$patient))
  colData(mae) <- meta.data
  save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
}
dir.out <- "BRCA_unsupervised_hg38/hypo"
cores <- 10
diff.probes <- get.diff.meth(data = mae, 
                             group.col = "definition",
                             group1 = "Primary solid Tumor",
                             group2 = "Solid Tissue Normal",
                             diff.dir = "hypo", # Get probes hypometh. in group 1 
                             cores = cores,
                             minSubgroupFrac = 0.2, # % group samples  used. 
                             pvalue = 0.01, 
                             sig.dif = 0.3,
                             dir.out = dir.out,
                             save = TRUE)

# For each differently methylated probes we will get the 
# 20 nearby genes (10 downstream and 10 upstream)
nearGenes <- GetNearGenes(data = mae, 
                          probes =  diff.probes$probe, 
                          numFlankingGenes = 20)

# This step is the most time consuming. Depending on the size of the groups
# and the number of probes found previously it migh take hours
Hypo.pair <- get.pair(data = mae,
                      nearGenes = nearGenes,
                      group.col = "definition",
                      group1 = "Primary solid Tumor",
                      group2 = "Solid Tissue Normal",
                      permu.dir = paste0(dir.out,"/permu"),
                      permu.size = 10000, 
                      mode = "unsupervised",
                      minSubgroupFrac = 0.4, # 40% of samples to create U and M
                      raw.pvalue = 0.001,   
                      Pe = 0.001, 
                      filter.probes = TRUE,
                      filter.percentage = 0.05,
                      filter.portion = 0.3,
                      dir.out = dir.out,
                      cores = cores,
                      label = "hypo")
# Number of pairs: 2950 


enriched.motif <- get.enriched.motif(data = mae,
                                     min.motif.quality = "DS",
                                     probes = unique(Hypo.pair$Probe), 
                                     dir.out = dir.out, 
                                     label = "hypo",
                                     min.incidence = 10,
                                     lower.OR = 1.1)
TF <- get.TFs(data = mae, 
              group.col = "definition",
              group1 = "Primary solid Tumor",
              group2 = "Solid Tissue Normal",
              minSubgroupFrac = 0.4, # Set to 1 if supervised mode
              enriched.motif = enriched.motif,
              dir.out = dir.out, 
              cores = cores, 
              label = "hypo")

BRCA Unsupervised analysis

library(stringr)
library(TCGAbiolinks)
library(dplyr)
library(ELMER)
library(MultiAssayExperiment)
library(parallel)
library(readr)
#-----------------------------------
# 1 - Samples
# ----------------------------------
dir.create("~/paper_elmer/",showWarnings = FALSE)
setwd("~/paper_elmer/")

file <- "mae_BRCA_hg38_450K_no_ffpe.rda"
if(file.exists(file)) {
  mae <- get(load(file))
} else {
  getTCGA(disease = "BRCA", # TCGA disease abbreviation (BRCA,BLCA,GBM, LGG, etc)
          basedir = "DATA", # Where data will be downloaded
          genome  = "hg38") # Genome of refenrece "hg38" or "hg19"

  distal.probes <- get.feature.probe(feature = NULL,
                                     genome = "hg38", 
                                     met.platform = "450K") 

  mae <- createMAE(exp = "DATA/BRCA/BRCA_RNA_hg38.rda", 
                   met = "DATA/BRCA/BRCA_meth_hg38.rda", 
                   met.platform = "450K",
                   genome = "hg38",
                   linearize.exp = TRUE,
                   filter.probes = distal.probes,
                   met.na.cut = 0.2,
                   save = FALSE,
                   TCGA = TRUE) 
  # Remove FFPE samples from the analysis
  mae <- mae[,!mae$is_ffpe]

  # Get molecular subytpe information from cell paper and more metadata (purity etc...)
  # https://doi.org/10.1016/j.cell.2015.09.033
  file <- "http://ars.els-cdn.com/content/image/1-s2.0-S0092867415011952-mmc2.xlsx"
  downloader::download(file, basename(file))
  subtypes <- readxl::read_excel(basename(file), skip = 2)

  subtypes$sample <- substr(subtypes$Methylation,1,16)
  meta.data <- merge(colData(mae),subtypes,by = "sample",all.x = T)
  meta.data <- meta.data[match(colData(mae)$sample,meta.data$sample),]
  meta.data <- S4Vectors::DataFrame(meta.data)
  rownames(meta.data) <- meta.data$sample
  stopifnot(all(meta.data$patient == colData(mae)$patient))
  colData(mae) <- meta.data
  save(mae, file = "mae_BRCA_hg38_450K_no_ffpe.rda")
}

cores <- 6
direction <- c( "hypo","hyper")
genome <- "hg38"
group.col  <- "PAM50"
groups <- t(combn(na.omit(unique(colData(mae)[,group.col])),2))
for(g in 1:nrow(groups)) {
  group1 <- groups[g,1]
  group2 <- groups[g,2]
  for (j in direction){
    tryCatch({
      message("Analysing probes ",j, "methylated in ", group1, " vs ", group2)
      dir.out <- paste0("BRCA_supervised_",genome,"/",group1,"_",group2,"/",j)
      dir.create(dir.out, recursive = TRUE)
      #--------------------------------------
      # STEP 3: Analysis                     |
      #--------------------------------------
      # Step 3.1: Get diff methylated probes |
      #--------------------------------------
      Sig.probes <- get.diff.meth(data       = mae,
                                  group.col  = group.col,
                                  group1     = group1,
                                  group2     = group2,
                                  sig.dif    = 0.3,
                                  minSubgroupFrac = 1,
                                  cores      = cores,
                                  dir.out    = dir.out,
                                  diff.dir   = j,
                                  pvalue     = 0.01)
      if(nrow(Sig.probes) == 0) next
      #-------------------------------------------------------------
      # Step 3.2: Identify significant probe-gene pairs            |
      #-------------------------------------------------------------
      # Collect nearby 20 genes for Sig.probes
      nearGenes <- GetNearGenes(data  = mae,
                                probe = Sig.probes$probe)

      pair <- get.pair(data       = mae,
                       nearGenes  = nearGenes,
                       group.col  = group.col,
                       group1     = group1,
                       group2     = group2,
                       permu.dir  = paste0(dir.out,"/permu"),
                       dir.out    = dir.out,
                       mode       = "supervised", 
                       diff.dir   = j,
                       cores      = cores,
                       label      = j,
                       permu.size = 10000,
                       raw.pvalue = 0.001)

      Sig.probes.paired <- readr::read_csv(paste0(dir.out,
                                           "/getPair.",j,
                                           ".pairs.significant.csv"))[,1, drop = TRUE]


      #-------------------------------------------------------------
      # Step 3.3: Motif enrichment analysis on the selected probes |
      #-------------------------------------------------------------
      if(length(Sig.probes.paired) > 0 ){
        #-------------------------------------------------------------
        # Step 3.3: Motif enrichment analysis on the selected probes |
        #-------------------------------------------------------------
        enriched.motif <- get.enriched.motif(probes  = Sig.probes.paired,
                                             dir.out = dir.out,
                                             data    = mae,
                                             label   = j,
                                             plot.title =  paste0("BRCA: OR for paired probes ",
                                                                  j, "methylated in ",
                                                                  group1, " vs ",group2))
        motif.enrichment <- readr::read_csv(paste0(dir.out,
                                            "/getMotif.",j,
                                            ".motif.enrichment.csv"))
        if(length(enriched.motif) > 0){
          #-------------------------------------------------------------
          # Step 3.4: Identifying regulatory TFs                        |
          #-------------------------------------------------------------
          print("get.TFs")

          TF <- get.TFs(data           = mae,
                        enriched.motif = enriched.motif,
                        dir.out        = dir.out,
                        mode           = "supervised",
                        group.col      = group.col,
                        group1         = group1,
                        diff.dir       = j,
                        group2         = group2,
                        cores          = cores,
                        label          = j)
          TF.meth.cor <- get(load(paste0(dir.out,
                                         "/getTF.",j,
                                         ".TFs.with.motif.pvalue.rda")))
          save(mae,TF, enriched.motif, Sig.probes.paired,
               pair, nearGenes, Sig.probes, motif.enrichment,
               TF.meth.cor,
               file = paste0(dir.out,"/ELMER_results_",j,".rda"))
        }
      }
    }, error = function(e){
      message(e)
    })
  }
}

Any scripts or data that you put into this service are public.

ELMER documentation built on Nov. 8, 2020, 4:59 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ELMER
Inferring Regulatory Element Landscapes and Transcription Factor Networks Using Cancer Methylomes

11 - ELMER: Use case
In ELMER: Inferring Regulatory Element Landscapes and Transcription Factor Networks Using Cancer Methylomes

Introduction

Vignette example

BRCA Supervised analysis

BRCA Unsupervised analysis

Try the ELMER package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ELMER Inferring Regulatory Element Landscapes and Transcription Factor Networks Using Cancer Methylomes

11 - ELMER: Use case In ELMER: Inferring Regulatory Element Landscapes and Transcription Factor Networks Using Cancer Methylomes

Introduction

Vignette example

BRCA Supervised analysis

BRCA Unsupervised analysis

Try the ELMER package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ELMER
Inferring Regulatory Element Landscapes and Transcription Factor Networks Using Cancer Methylomes

11 - ELMER: Use case
In ELMER: Inferring Regulatory Element Landscapes and Transcription Factor Networks Using Cancer Methylomes