#library(shiny)
#library(geuvPack)
#if (!exists("geuFPKM")) data(geuFPKM)
#library(ggplot2)
#library(BiocRnaHap)
#library(magrittr)
#library(dplyr)
#
#allg = sort(genegr$gene_name)
# varnum_to_use = 2:6
# haptab = NA06986_rnahaps
#' produce a filtered RNA-haplotype table as GRanges, limiting
#' to SNP-sets of specific cardinality and (optionally) providing a set
#' of gene ranges 'nearest' to the initial SNP
#' @importFrom GenomicRanges GRanges
#' @importFrom IRanges IRanges
#' @param haptab data.frame corresponding to phaser haplotypes.txt table
#' @param genegr a GRanges with gene ranges
#' @param varnum_to_use the values of the 'variants' field for which records are retained
#' @param useNearestRubric logical(1) defaults to FALSE
#' @examples
#' head(filter_haptab())
#' @export
filter_haptab = function(haptab=BiocRnaHap::NA06986_rnahaps,
genegr = ensembldb::genes(EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75),
varnum_to_use=2:6, useNearestRubric=FALSE) {
haptab_gr = GenomicRanges::GRanges(haptab$contig,
IRanges::IRanges(haptab$start, haptab$stop))
mcols(haptab_gr) = haptab[,-c(1:4)]
haptab_gr_filt = haptab_gr[haptab_gr$variants %in% varnum_to_use]
haptab_gr_filt = haptab_gr_filt[ grep("^rs", haptab_gr_filt$variant_ids)]
if (useNearestRubric) {
gn = genegr[nearest(haptab_gr_filt, genegr)]
gn = gn[which(nchar(gn$gene_name)>0)]
}
else gn = genegr
list(haptab_filt = haptab_gr_filt, genes_near=gn)
}
#' produce a GRanges with RNA haplotypes near a specific gene
#' @param sym character(1) gene symbol
#' @param haptab data.frame corresponding to phaser haplotypes.txt table
#' @param genes a GRanges as produced by ensembldb::genes
#' @param radius numeric(1) interval upstream and downstream to be included with gene region for selection of RNA haplotypes
#' @param min_total_reads numeric(1) lower bound on number of reads needed to retain a haplotype
#' @param min_variants lower bound on number of variants in the haplotype to warrant retention
#' @param varnum_to_use the values of the 'variants' field for which records are retained
#' @note grep() is used with fixed=TRUE to find sym in genes()$gene_name
#' @examples
#' rnahapsNearGene("GSDMB")
#' @export
rnahapsNearGene = function(sym, haptab=NA06986_rnahaps,
genes=ensembldb::genes(EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75),
radius=10000, min_total_reads=15, min_variants=2,
varnum_to_use = 2:6) {
ind = grep(sym,genes$gene_name,fixed=TRUE)
if (length(ind)==0) stop("sym not found")
if (length(ind)>1) {
warning("multiple representations of symbol, using first")
ind = ind[1]
}
haptab_gr_filt = filter_haptab(haptab)$haptab_filt
# haptab_gr = GRanges(haptab$contig, IRanges(haptab$start, haptab$start))
# mcols(haptab_gr) = haptab[,-c(1:4)]
# haptab_gr_filt = haptab_gr[haptab_gr$variants %in% varnum_to_use]
# haptab_gr_filt = haptab_gr_filt[ grep("^rs", haptab_gr_filt$variant_ids)]
chk = subsetByOverlaps(haptab_gr_filt, genes[ind]+radius)
chk[which(chk$reads_total>=min_total_reads & chk$variants >= min_variants)]
}
#' compute a data.frame with GEUVADIS expression for a selected gene
#' and associated haplotypes formed using entries in snps
#' @note it is assumed that the entries in `snps` are elements of
#' a 'long-range' RNA-based haplotype as
#' inferred by phaser
#' @param sym a character(1) gene symbol
#' @param snps a vector of SNP identifiers
#' @param vcf a CompressedVcf instance from VariantAnnotation
#' @param \dots not currently used
#' @examples
#' requireNamespace("geuvPack")
#' if (!exists("geuFPKM")) {
#' require(geuvPack)
#' data(geuFPKM)
#' }
#' tab = geuvEvH(vcf=BiocRnaHap::gsd_vcf)
#' head(tab)
#' require(ggplot2)
#' ggplot(tab, aes(x=haplotypes, y=exprs, colour=popcode)) +
#' geom_boxplot(aes(group=haplotypes), outlier.size=0) +
#' geom_jitter()
#' @export
geuvEvH = function (sym = "GSDMB", snps = c("rs2305480", "rs2305479"),
vcf = NULL, ...)
{
if (!exists("geuFPKM"))
stop("load geuvPack and data(geuFPKM) to use this function")
ind = grep(sym, rowData(geuFPKM)$gene_name)
stopifnot(length(ind) > 0)
if (length(ind) > 1)
warning("multiple quantifications for requested gene, using first available")
ex = log(assay(geuFPKM[ind, ]) + 1)
names(ex) = colnames(geuFPKM)
if (is.null(vcf)) vcf = look1kg(snps)
gtvec = apply(geno(vcf)$GT, 2, paste, collapse = ":")
okids = intersect(colnames(geuFPKM), names(gtvec))
ex = as.numeric(ex[okids])
names(ex) = okids
pc = geuFPKM[,okids]$popcode
data.frame(ids = okids, exprs = ex, haplotypes = gtvec[okids],
gene = sym, popcode=pc)
}
#' create list of snpids in RNA-defined haplotypes
#' @param sym character(1) gene symbol
#' @param radius numeric(1) interval around gene to include
#' @param rna_hap_tab a data.frame as generated by phaser
#' @param \dots passed to rnahapsNearGene
#' @return a list
#' @examples
#' variant_options()
#' @export
variant_options = function(sym="ORMDL3", radius=50000, rna_hap_tab=NA06986_rnahaps, ...) {
ans = mcols(rnahapsNearGene(sym=sym, radius=radius,
haptab=rna_hap_tab, ...))$variant_ids
strsplit(ans, ",")
}
setClass("HapTab", representation(table="data.frame",
source="character"))
#' encapsulate phaser output text tables
#' @param datfr data.frame instance importing 'haplotypes.txt'
#' @param source character(1) tag characterizing origin
#' @examples
#' HapTab(NA06986_rnahaps, "phaser NA06986")
#' @export
HapTab = function(datfr, source)
new("HapTab", table=datfr, source=source)
setMethod("show", "HapTab", function(object) {
cat(sprintf("HapTab with %s records, source: %s\n",
nrow(object@table), object@source))
})
setClass("RnaHapSet", representation(
genesym = "character",
radius = "numeric",
haplist = "list",
source = "HapTab"))
#' encapsulate a list of SNP configurations produced as haplotypes by phaser
#' @param genesym character(1) symbol
#' @param radius numeric(1) region around gene to include
#' @param haplist a list as returned by `variant_options`
#' @param source a HapTab instance
#' @examples
#' ht = HapTab(NA06986_rnahaps, "phaser NA06986")
#' RnaHapSet("ORMDL3", radius=50000, source=ht,
#' haplist=variant_options("ORMDL3", 50000))
#' @export
RnaHapSet = function(genesym, radius, source,
haplist=variant_options(genesym, radius)) {
new("RnaHapSet", genesym=genesym, radius=radius, haplist=haplist,
source=source)
}
setMethod("show", "RnaHapSet", function(object) {
cat(sprintf("RnaHapSet for gene %s (radius %s):\n",
object@genesym, object@radius))
cat(sprintf(" there are %s haplotypes.\n", length(object@haplist)))
ii = lapply(object@haplist, paste, collapse=":")
jj = lapply(ii, function(x) paste0(x, ", "))
hapegs = do.call(paste0, jj)
if (nchar(hapegs)>55) hapegs = paste0(substr(hapegs, 1, 55), "...")
cat(" ", hapegs, "\n")
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.