Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
message = FALSE,
warning = FALSE
)
## ----setup--------------------------------------------------------------------
library(GenomicRanges)
library(smer)
## ----bim_data-----------------------------------------------------------------
bim_data <- data.frame(
chromosome = c(1, 1, 1, 2, 2, 2, 3, 3, 3),
variant_id = c("rs1", "rs2", "rs3", "rs4", "rs5", "rs6", "rs7", "rs8", "rs9"),
cm_position = c(0, 0, 0, 0, 0, 0, 0, 0, 0),
bp_position = c(10, 20, 30, 40, 50, 60, 70, 80, 90),
allele1 = c("A", "A", "A", "G", "C", "C", "T", "T", "A"),
allele1 = c("G", "G", "G", "A", "T", "T", "A", "A", "G")
)
bim_data$index <- 1:nrow(bim_data)
# DHS intervals
hg19_dhs_regions <- data.frame(
chromosome = c(1, 2, 3),
start = c(5, 45, 85),
stop = c(15, 55, 95)
)
# LD block intervals
hg19_ld_blocks <- data.frame(
chromosome = c(1, 1, 2, 2, 3, 3, 3),
start = c(5, 25, 35, 45, 65, 75, 85),
stop = c(25, 35, 45, 65, 75, 85, 95)
)
## ----dhs_data-----------------------------------------------------------------
# Convert .bim to GRanges object
bim_gr <- GRanges(
seqnames = paste0("chr", bim_data$chromosome),
ranges = IRanges(start = bim_data$bp_position, end = bim_data$bp_position),
variant_id = bim_data$variant_id,
genome = "hg19"
)
# Convert DHS to GRanges object
dhs_gr <- GRanges(
seqnames = paste0("chr", hg19_dhs_regions$chromosome),
ranges = IRanges(start = hg19_dhs_regions$start, end = hg19_dhs_regions$stop),
genome = "hg19"
)
# Find overlaps of BIM variants and DHS intervals
overlaps <- findOverlaps(bim_gr, dhs_gr, maxgap = 0)
# Extract overlapping variants
dhs_data <- bim_data[queryHits(overlaps), ]
dhs_data <- dhs_data[!duplicated(dhs_data$index), ]
## -----------------------------------------------------------------------------
# Convert to GRanges object
ld_gr <- GRanges(
seqnames = paste0("chr", hg19_ld_blocks$chromosome),
ranges = IRanges(start = hg19_ld_blocks$start, end = hg19_ld_blocks$stop),
genome = "hg19"
)
# Find LD block of bim variants
ld_overlaps <- findOverlaps(query = bim_gr, subject = ld_gr)
## ----write_mask---------------------------------------------------------------
output_file <- tempfile()
gxg_group <- "gxg"
ld_group <- "ld"
gxg_variants <- dhs_data$index - 1 # 0-base index for C++
create_hdf5_file(output_file)
for (j in bim_data$index - 1) { # 0-base index for C++
# Write DHS mask
gxg_ds <- sprintf("%s/%d", gxg_group, j)
write_hdf5_dataset(file_name = output_file,
dataset_name = gxg_ds,
gxg_variants)
# Find LD block of focal SNP
focal_gr <- ld_gr[subjectHits(ld_overlaps[j,])]
# Find variants in LD block of focal SNP
focal_ld <- findOverlaps(query = bim_gr, subject = focal_gr)
ld_data <- bim_data[queryHits(focal_ld),]
ld_variants <- ld_data$index - 1 # 0-base index for C++
# Write LD mask
ld_ds <- sprintf("%s/%d", ld_group, j)
write_hdf5_dataset(file_name = output_file,
dataset_name = ld_ds,
ld_variants)
}
dhs_indices <- read_hdf5_dataset(file_name = output_file, dataset_name = gxg_ds)
print(sprintf("DHS indices: %s", paste(dhs_indices, collapse = ", ")))
## ----run_sme, eval = FALSE----------------------------------------------------
# sme_result <- sme(
# plink_file = "/path/to/plink/data",
# pheno_file = "/path/to/pheno/data",
# mask_file = "/path/to/mask/file",
# gxg_indices = c(1, 2, 3),
# chunk_size = 250,
# n_randvecs = 10,
# n_blocks = 200,
# n_threads = 6
# )
## ----sessinfo-----------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.