##' function to return of list of variant GRanges that overlap a particular gene/locus for a set of ids
##'
##' This function takes a geneID, list of sample IDs, a metadata table, and returns a list of
##' alleles/variants that overlap a gene/locus or a set of genes/loci from these samples
##' @title
##' @param gene the Entrez gene id(s) for the gene(s) of intest. if multiple it should be a vector
##' @param locus a GRanges object with the locus (loci) positional information and allele informtion. see help for example
##' @param txdb txdb of all genes
##' @param ids a list of ids for the samples of interest
##' @param meta a metaData table as generated by getCGPMetadata()
##' @param tech a character string denoting the technology ('rna_seq' or 'exome') to use
##' @param type a character string denoting the type of variants to be checked. Can be somatic, called, or raw
##' @param exact only used with the locus option, is a true/false to determine if exact matches to alleles are wanted.
##' If true the GR must have a elementMetaData called variant with the allele of interest given
##' @param save_dir directory to save VCF files. If null, no vcf files will be generated
##' @param cores number of cores to run on
##' @return a list of variant GRanges
##' @author Jeremiah Degenhardt
##'
##'
##' @export
getAlleleByLocus_mod<- function(gene=NULL,
locus =NULL,
gene_db,
ids,
meta,
tech = "exon",
type="somatic",
exact=FALSE,
save_dir=NULL,
cores=detectCores()){
warning("This function is hard coded with the genome information from the CGP3.0 release...")
genome_dir = '/gne/research/workspace/degenhj2'
genome = 'hg19_IGIS21'
if(tech=="exon"){
bq = 56
}else{
bq=23
}
if(!is.null(gene) && !is.null(locus)){
stop("must supply either a gene ID or locus GRanges but not both")
}
if(!is.null(gene)){
##genes <- normalizeGeneNames(gene_db)
gId <- paste("GeneID:", gene, sep = "")
InTxdb <- gId %in% names(genes)
if(any(!InTxdb)){
missing <- gId[which(!InTxdb)]
gId <- gId[InTxdb]
message(missing, " is not in the txdb. \nRemoving this gene from further analysis")
}
gene <- genes[gId]
gene <-unlist(gene)
gene<-reduce(gene)
}
if(!is.null(locus)){
gene <- locus
}
##gene <- normalizeChrNames(gene)
if(type =="somatic"){
post <- ".sample_specific_variants_granges.RData"
}else{
post <- ".merged.uniques.bam"
}
in_gene <- mclapply(ids, mc.cores=cores, function(sam){
if(tech == "exome"){
meta<-meta[meta$project == "exome",]
}else if(tech =="rna_seq"){
meta<-meta[meta$project == "rna_seq",]
}
##dir<-meta$results_dir[meta$srcid==sam]
if(type =="somatic"){
dir <-"/gnet/is3/research/data/bioinfo/ngs_analysis/degenhj2/CGP3.0_colon_var_rerun/TN/"
}else{
dir<-meta$results_dir[meta$srcid==sam]
}
if(length(dir)==0){
message(paste("The SAM ", sam, " does not exist in meta data", sep=""))
return(GRanges())
}
if(type =="somatic"){
if(file.exists(file.path(dir, sam, paste(sam, post, sep = "")))){
gr <- load(file.path(dir, sam, paste(sam, post, sep = "")))
gr <- get(gr)
}else{
message(paste("The SAM ", sam ," does not have sample specific variants", sep =""))
return(GRanges())
}
}
if(type == "called"){
if(file.exists(file.path(dir,"bams/merged/", paste(sam, post, sep = "")))){
bam_file <- file.path(dir,"bams/merged/", paste(sam, post, sep = ""))
gg <- GmapGenome(genome=genome, directory=genome_dir)
param <- BamTallyParam(which=gene,
high_quality_cutoff = as.integer(bq),
minimum_mapq = 13L,
min_depth = 0L, variant_strand = 1L,
ignore_query_Ns = TRUE,
indels = FALSE)
gr <- bam_tally(bam_file, gg, param)
##gr <- tally2GR(bamfiles=bam_file, genome=genome,genome_dir=genome_dir,chr_gr=gene, variant_strand=1, map_qual=13, bqual_thresh=bq)
gr <-variantFilter(gr, useQual=TRUE)
gr <- gr$filtered_granges
}else{
message(paste("The SAM ", sam, " does not have bam files", sep = ""))
return(GRanges())
}
}
if(type == "raw"){
bam_file <- file.path(dir,"bams/merged/", paste(sam, post, sep = ""))
gg <- GmapGenome(genome=genome, directory=genome_dir)
param <- BamTallyParam(which=gene,
high_quality_cutoff = as.integer(bq),
minimum_mapq = 13L,
min_depth = 0L, variant_strand = 0L,
ignore_query_Ns = TRUE,
indels = FALSE)
gr <- bam_tally(bam_file, gg, param)
gr <- tally2GR(bamfiles=bam_file, genome=genome,genome_dir=genome_dir,chr_gr=gene, variant_strand=0, map_qual=13, bqual_thresh=bq)
}
##gr <- normalizeChrNames(gr)
if(exact && !is.null(locus)){
vec1 <- paste(seqnames(gr), start(gr), values(gr)$read, sep = ":")
vec2 <- paste(seqnames(gene), start(gene), values(gene)$variant, sep = ":")
ind <- which(vec1 %in% vec2)
InGene <- gr[ind]
}else{
seqlevels(gene) <- seqlevels(gr)
seqlengths(gene) <- seqlengths(gr)
over <- suppressWarnings(findOverlaps(gr, gene))
InGene <- gr[unique(queryHits(over))]
if(!is.null(save_dir)){
InGene <- sort(InGene)
cgpGr2vcf(GR=InGene, filename=paste(save_dir,"/", sam, ".vcf", sep = ""), project="Mutations_by_gene", sample_id=sam)
}
}
return(InGene)
})
names(in_gene) <- as.character(ids)
return(in_gene)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.