GFF3_COLNAMES <- c("source","type","phase","ID","Name","Parent","GeneID")
GFF2_COLNAMES<-c("source","type","phase",
"gene_id","gene_name","transcript_id","transcript_name")
#' copy from GenomicFeatures
.GENE_TYPES <- c("gene", "pseudogene", "transposable_element_gene")
.TX_TYPES <- c("transcript", "pseudogenic_transcript", "primary_transcript",
"mRNA", "ncRNA", "rRNA", "snoRNA", "snRNA", "tRNA", "tmRNA",
"miRNA", "miRNA_primary_transcript",
"RNase_P_RNA", "RNase_MRP_RNA", "SRP_RNA", "misc_RNA",
"antisense_RNA", "antisense",
"lnc_RNA", "antisense_lncRNA", "transcript_region",
"pseudogenic_tRNA", "scRNA", "guide_RNA", "telomerase_RNA",
"vault_RNA", "Y_RNA")
.EXON_TYPES <- c("exon", "pseudogenic_exon", "coding_exon",
"five_prime_coding_exon", "interior_coding_exon",
"three_prime_coding_exon", "exon_of_single_exon_gene",
"interior_exon", "noncoding_exon",
"five_prime_noncoding_exon", "three_prime_noncoding_exon")
.CDS_TYPES <- c("CDS", "transposable_element_CDS",
"CDS_predicted", "edited_CDS")
.UTR_TYPES <- c("five_prime_UTR", "three_prime_UTR","UTR")
.STOP_CODON_TYPES <- c("start_codon","stop_codon")
### cory from GenomicFeatures
GFF_FEATURE_TYPES <- c(.GENE_TYPES, .TX_TYPES, .EXON_TYPES,
.CDS_TYPES, .STOP_CODON_TYPES,.UTR_TYPES)
GENE_FEATURE <- c(.EXON_TYPES,.CDS_TYPES,.UTR_TYPES,.STOP_CODON_TYPES)
#' detect file format for import gff or gff3 file
#' modified based on GenomicFeatures .detect_file_format function
#' @importFrom tools file_ext
#' @importFrom tools file_path_sans_ext
#' @importFrom rtracklayer FileForFormat
.detect_file <- function(file)
{
if (isSingleString(file)) {
file2 <- try(FileForFormat(file), silent=TRUE)
if (inherits(file2, "try-error"))
return(file_ext(file))
file <- file2
}
if (is(file, "RTLFile")) {
if (is(file, "GFF3File"))
return("gff3")
if (is(file, "GTFFile"))
return("gtf")
desc <- rtracklayer:::resourceDescription(file)
if (is(file, "CompressedFile")) {
## Mimic what import,CompressedFile,missing,missing method does.
desc <- file_path_sans_ext(desc)
}
type <- file_ext(desc)
return(type)
}
stop(wmsg("Invalid 'file'. Must be a path to a file, or an URL, ",
"or a GFF3File or GTFFile object."))
}
#' expand range from start and end
#' @importFrom BiocGenerics start
#' @importFrom BiocGenerics end
#' @importFrom BiocGenerics which
#' @importFrom GenomicRanges `start<-`
#' @importFrom GenomicRanges `end<-`
#' @param gr GRanges object
#' @param upstream flanking length for upstream
#' @param downstream flanking length for downstream
#' @export
#' @author Kai Guo
expandRange = function(gr, upstream=1000, downstream=1000) {
min = start(range(gr))
max = end(range(gr))
strand_is_minus = strand(gr) == "-"
on_plus = which(!strand_is_minus)
on_minus = which(strand_is_minus)
start(gr)[on_plus & start(gr) == min] = start(gr)[on_plus &
start(gr) == min] - upstream
end(gr)[on_minus & end(gr) == max] = end(gr)[on_minus &
end(gr) == max] + upstream
end(gr)[on_plus & end(gr) == max] = end(gr)[on_plus & end(gr) == max] +
downstream
start(gr)[on_minus & start(gr) == min] = start(gr)[on_minus &
start(gr) == min] - downstream
gr
}
#' unlist and add names
#' @param list a list include character vector
#' @author Kai Guo
.unlist.name <- function(list){
list.name <- rep(names(list),times=unlist(lapply(list,length)))
list <- unlist(list)
names(list) <- list.name
list
}
#' change mcols data colnames
#' @importFrom GenomicRanges GRanges
#' @importFrom S4Vectors `elementMetadata<-`
#' @importFrom S4Vectors elementMetadata
#' @param gr GRanges object
#' @param names new column names
#' @author Kai Guo
.colnames.mcol <- function(gr,names){
#### need to be change
names(elementMetadata(gr))[3:ncol(elementMetadata(gr))] <- names
gr
}
#' @title anova test for phenotype based on haplotype
#' @importFrom stats aov na.omit TukeyHSD
#' @importFrom dplyr left_join
#' @importFrom broom tidy
#' @importFrom magrittr %>%
#' @importFrom scales scientific
#' @importFrom tidyr separate
#' @param haps SeqHap object
#' @param gene gene name
#' @param feature pheaotype name
#' @export
#' @author Kai Guo
#'
aov.pheno.test <- function(haps, gene, feature){
haps <- hap@haplotype[[gene]]
pheno <- hap@pheno[,c("sample",feature)]
pheno <- na.omit(pheno)
haplist <- hap@haplist[[gene]]
hapl <- rep(names(haplist), times = unlist(lapply(haplist, length)))
hapx <- data.frame(sample=unlist(haplist),group=hapl)
pheno <- left_join(pheno, hapx, by=c("sample"="sample"))
sel <- names(which(table(pheno$group)>=3))
###select haplotype with three phenotype values
pheno <- subset(pheno,group%in%sel)
colnames(pheno)[2]<-"feature"
ptest <- tidy(TukeyHSD(aov(feature~group,data=pheno)))%>%
separate(comparison,c("group1","group2"),sep="-")
ptest$adj.p.value<-scientific(ptest$adj.p.value,digits=2)
ptest
}
#' @title extract sequence form SeqHap object
#' @param object SeqHap object
#' @param gene gene
#' @export
#' @author Kai Guo
setMethod("seqs", signature = (object="SeqHap"), function(object,gene=NULL){
res <- object@seqs[[gene]]
name <- names(object@haplist[[gene]])
names(res)<-name
res
})
#' @title extract final results from SeqHap object
#' @param object SeqHap object
#' @param gene gene
#' @importFrom GenomicRanges as.data.frame
#' @importFrom BiocGenerics unlist
#' @importFrom dplyr left_join
#' @importFrom tibble rownames_to_column
#' @export
#' @author Kai Guo
setMethod("results", signature = (object="SeqHap"), function(object,gene=NULL){
res <- object@haplotype[[gene]]
res <- as.data.frame(res)
haplist <- object@haplist[[gene]]
pheno <- object@pheno
hh <- data.frame(hp=rep(names(haplist),lapply(haplist,length)),sample=unlist(haplist),row.names = NULL)
hh <- merge(hh,object@pheno,by='sample')
res <- as.data.frame(t(res))%>%rownames_to_column(var='Info')%>%
left_join(hh,by=c('Info'='hp'))
return(res)
})
#' @title read hmp files and combine snp together
#' @importFrom readr read_delim
#' @param file the name of the file which the data are to be read from.
#' @param sep the field separator character.
#' @param comment character: a character vector of length one containing a
#' single character or an empty string.
#' @return data.frame
#' @author Kai Guo
#' @export
read_pheno <- function(file, sep = "\t"){
pheno <- read_delim(file, delim = sep)
colnames(pheno)[1] <- 'sample'
pheno
}
#' @title get name of list
#' @param name gene name
#' @param x haplist
#' @param y name
#' @param hapname haplotype name
.getlist <- function(name,x,y,hapname){
tx <- x[[name]]
names(tx) <- paste0(hapname,1:length(tx))
ty <- y[[name]]
tmp <- lapply(tx,function(x)ty[x])
return(tmp)
}
###
.color_scale <- function(c1="pink", c2="red") { #modified from DOSE
pal <- colorRampPalette(c(c1, c2))
colors <- pal(200)
return(colors)
}
.getIdx <- function(v, MIN, MAX) { #modified from DOSE
intervals <- seq(MIN, MAX, length.out=200)
max(which(intervals <= v))
}
#' check if REF same as hap
#' @importFrom magrittr %>%
#' @importFrom dplyr mutate
#' @importFrom GenomicRanges as.data.frame
#' @importFrom GenomicRanges mcols
#' @importFrom GenomicRanges `mcols<-`
#' @param gr GRanges object
.check_same_or_not <- function(gr){
gr <- mcols(gr)
gr <- as.data.frame(gr)
gr$alt <- sub('[\\/\\|].*','',gr[,2])
gr$cond <- gr$alt!=gr$REF
return(gr$cond)
}
#' @title read vcf files
#' @importFrom pegas read.vcf
#' @importFrom pegas VCFloci
#' @importFrom GenomicRanges makeGRangesFromDataFrame
#' @importFrom GenomicRanges mcols
#' @importFrom GenomicRanges `mcols<-`
#' @param file A ‘VcfFile’ (synonymous with ‘TabixFile’) instance or
#' character() name of the VCF file to be processed..
#' @export
#' @return GRanges object
#' @author Kai Guo
read_vcf <- function(file) {
info <- VCFloci(file, quiet = TRUE)
snp <- makeGRangesFromDataFrame(info, start.field = 'POS',
end.field = 'POS', keep.extra.columns = TRUE)
snp <- snp[, c('REF', 'ALT')]
vcf <- read.vcf(file, from = 1,to = nrow(info), quiet = TRUE)
mcols(snp)<-cbind(mcols(snp), t(as.data.frame(vcf)))
snp
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.