test_basic <- function()
filename <- system.file(package="GenomeInfoDb", "extdata",
"dataFiles", "Homo_sapiens.txt")
# check the format of the file
data <- read.table(filename,header=TRUE,sep="\t")
checkIdentical(c(25L, 7L), dim(data))
checkIdentical(c('circular', 'auto', 'sex',
'NCBI', 'UCSC', 'dbSNP', 'Ensembl'),
#check if first 3 columns contain only true or false entries
test_guessSpeciesStyle <- function()
got <- GenomeInfoDb:::.guessSpeciesStyle(c(paste0("chr",1:10)))
checkEquals(unique(got$style), "UCSC")
got <- GenomeInfoDb:::.guessSpeciesStyle(c(paste0("chr",1:22)))
checkEquals(unique(got$style), "UCSC")
got <- GenomeInfoDb:::.guessSpeciesStyle("chr2")
checkEquals(unique(got$style), "UCSC")
got <- GenomeInfoDb:::.guessSpeciesStyle("2")
checkEquals(unique(got$style), c("NCBI","Ensembl","MSU6","JGI2.F","AGPvF"))
got <- GenomeInfoDb:::.guessSpeciesStyle('T')
checkEquals(unique(got$style), "JGI2.F")
got <- GenomeInfoDb:::.guessSpeciesStyle(c("chr1","chr2","chr3",
"chr1_gl000191_random", "chr1_gl000192_random"))
checkEquals(unique(got$style), "UCSC")
got <- GenomeInfoDb:::.guessSpeciesStyle("h")
checkEquals(got, NA)
test_seqlevelsStyle_character <- function()
#1. correct seqnames
got <- seqlevelsStyle(c(paste0('chr',1:20)))
#2. mix seqnames from 2 styles for same organism
got2 <- seqlevelsStyle(c('1','MT','Pltd','chr1'))
#3. mix seqnames from 2 different organisms
got2 <- seqlevelsStyle(c('1','chr2RHet','chr3LHet'))
#4. incorrect seqnames
#5. empty Seqinfo obj - with no seqnames
test_seqlevelsStyle_Seqinfo <- function()
check_UCSC_NCBI_switch <- function(UCSC_genome, NCBI_assembly,
nmapped, UCSC_nunmapped, NCBI_nunmapped)
## Start with a Seqinfo object made from a UCSC genome.
si1 <- Seqinfo(genome=UCSC_genome)
UCSC_nseqlevels <- nmapped + UCSC_nunmapped
checkEquals(UCSC_nseqlevels, length(si1))
checkIdentical("UCSC", seqlevelsStyle(si1))
si2 <- si1
seqlevelsStyle(si2) <- "NCBI"
ugenomes <- unique(genome(si2))
if (UCSC_nunmapped == 0L) {
checkIdentical(NCBI_assembly, ugenomes)
checkIdentical("NCBI", seqlevelsStyle(si2))
} else {
checkIdentical(c(NCBI_assembly, UCSC_genome), ugenomes)
checkEquals(nmapped, sum(genome(si2) == NCBI_assembly))
checkIdentical(c("NCBI", "UCSC"), seqlevelsStyle(si2))
seqlevelsStyle(si2) <- "UCSC"
checkIdentical(si1, si2)
## Start with a Seqinfo object made from an NCBI assembly.
si1 <- Seqinfo(genome=NCBI_assembly)
NCBI_nseqlevels <- nmapped + NCBI_nunmapped
checkEquals(NCBI_nseqlevels, length(si1))
checkIdentical("NCBI", seqlevelsStyle(si1))
si2 <- si1
seqlevelsStyle(si2) <- "UCSC"
ugenomes <- unique(genome(si2))
if (NCBI_nunmapped == 0L) {
checkIdentical(UCSC_genome, ugenomes)
checkIdentical("UCSC", seqlevelsStyle(si2))
} else {
## 'ugenomes' will almost always be 'c(UCSC_genome, NCBI_assembly)'
## but the order is not 100% guaranteed (e.g. for
## musFur1/MusPutFur1.0 it's the opposite order).
#checkIdentical(c(UCSC_genome, NCBI_assembly), ugenomes)
checkEquals(2L, length(ugenomes))
checkTrue(setequal(c(UCSC_genome, NCBI_assembly), ugenomes))
checkEquals(nmapped, sum(genome(si2) == UCSC_genome))
## 'seqlevelsStyle(si2)' will almost always return c("UCSC", "NCBI")
## but the order is not 100% guaranteed (e.g. for
## musFur1/MusPutFur1.0 it's the opposite order).
#checkIdentical(c("UCSC", "NCBI"), seqlevelsStyle(si2))
checkEquals(2L, length(seqlevelsStyle(si2)))
checkTrue(setequal(c("UCSC", "NCBI"), seqlevelsStyle(si2)))
seqlevelsStyle(si2) <- "NCBI"
checkIdentical(si1, si2)
UCSC_NCBI <- list(
# Field 1: UCSC_genome
# Field 2: NCBI_assembly
# Field 3: nb of seqlevels that are mapped between UCSC and NCBI
# Field 4: nb of UCSC seqlevels that are not mapped to NCBI
# Field 5: nb of NCBI seqlevels that are not mapped to UCSC
# 1 2 3 4 5
list("apiMel2", "Amel_2.0", 16L, 1L, 7151L),
#list("wuhCor1", "ASM985889v3", 1L, 0L, 0L),
#list("bosTau6", "Bos_taurus_UMD_3.1", 3317L, 0L, 0L),
list("bosTau7", "Btau_4.6.1", 11691L, 1L, 1L),
list("bosTau8", "Bos_taurus_UMD_3.1.1", 3179L, 0L, 0L),
list("bosTau9", "ARS-UCD1.2", 2211L, 0L, 1L),
list("calJac3", "Callithrix jacchus-3.2", 14205L, 0L, 0L),
list("canFam3", "CanFam3.1", 3268L, 0L, 0L),
list("canFam4", "UU_Cfam_GSD_1.0", 2198, 0L, 0L),
list("ce6", "WS190", 7L, 0L, 0L),
list("ce10", "WBcel215", 7L, 0L, 0L),
list("ce11", "WBcel235", 7L, 0L, 0L),
list("danRer7", "Zv9", 1133L, 0L, 0L),
list("danRer10", "GRCz10", 1061L, 0L, 0L),
list("danRer11", "GRCz11", 1923L, 0L, 0L),
list("dm3", "Release 5", 14L, 1L, 0L),
list("dm6", "Release 6 plus ISO1 MT", 1870L, 0L, 0L),
list("galGal3", "Gallus_gallus-2.1", 34L, 23L, 17118L),
list("galGal4", "Gallus_gallus-4.0", 15932L, 0L, 0L),
list("galGal5", "Gallus_gallus-5.0", 23475L, 0L, 0L),
list("galGal6", "GRCg6a", 464L, 0L, 0L),
list("hg15", "NCBI33", 24L, 20L, 140L),
#list("hg16", "NCBI34", 24L, 18L, 138L),
#list("hg17", "NCBI35", 26L, 20L, 86L),
#list("hg18", "NCBI36", 26L, 23L, 97L),
list("hg19", "GRCh37.p13", 297L, 1L, 0L),
list("hg38", "GRCh38.p12", 595L, 0L, 0L),
list("macFas5", "Macaca_fascicularis_5.0", 7601L, 0L, 0L),
list("rheMac2", "Mmul_051212", 21L, 1L, 122143L),
list("rheMac3", "CR_1.0", 34102L, 1L, 0L),
list("rheMac8", "Mmul_8.0.1", 284728L, 0L, 0L),
list("rheMac10", "Mmul_10", 2939L, 0L, 0L),
list("monDom5", "MonDom5", 10L, 1L, 5006L),
list("mm8", "MGSCv36", 21L, 13L, 360L),
list("mm9", "MGSCv37", 22L, 13L, 283L),
list("mm10", "GRCm38", 66L, 0L, 99L),
list("musFur1", "MusPutFur1.0", 7741L, 0L, 42L),
list("panPan1", "panpan1", 10867L, 0L, 0L),
list("panPan2", "panpan1.1", 10274L, 0L, 0L),
list("panPan3", "Mhudiblu_PPA_v0", 4293L, 0L, 0L),
list("panTro2", "Pan_troglodytes-2.1", 26L, 26L, 29214L),
list("panTro3", "Pan_troglodytes-2.1.3", 24131L, 1L, 0L),
list("panTro4", "Pan_troglodytes-2.1.4", 24129L, 0L, 0L),
list("panTro5", "Pan_tro 3.0", 44449L, 0L, 0L),
list("panTro6", "Clint_PTRv2", 4346L, 0L, 0L),
list("rn5", "Rnor_5.0", 2739L, 0L, 0L),
list("rn6", "Rnor_6.0", 953L, 0L, 2L),
list("sacCer3", "R64", 17L, 0L, 0L),
list("susScr2", "Sscrofa9.2", 19L, 1L, 0L),
list("susScr3", "Sscrofa10.2", 4583L, 0L, 0L),
list("susScr11", "Sscrofa11.1", 613L, 0L, 0L),
list("taeGut2", "Taeniopygia_guttata-3.2.4", 37096L, 0L, 0L)
for (i in seq_along(UCSC_NCBI)) {
args <- UCSC_NCBI[[i]], args)
check_RefSeq_switch <- function(UCSC_genome, NCBI_assembly, UCSC_nunmapped)
is_RefSeq_accession <- GenomeInfoDb:::.is_RefSeq_accession
## Start with a Seqinfo object made from a UCSC genome.
si1 <- Seqinfo(genome=UCSC_genome)
## Remove problematic seqlevel chrUn_KI270752v1 (does not have an
## associated GenBank accession). Belongs to hg38.
si1 <- si1[setdiff(seqlevels(si1), "chrUn_KI270752v1")]
si2 <- si1
seqlevelsStyle(si2) <- "NCBI"
si3 <- si2
seqlevelsStyle(si3) <- "RefSeq"
checkIdentical(unname(genome(si2)), unname(genome(si3)))
style <- seqlevelsStyle(si3)
if (UCSC_nunmapped == 0L) {
checkTrue(identical("RefSeq", style) ||
identical(c("RefSeq", "NCBI"), style))
} else {
checkIdentical(c("RefSeq", "UCSC"), style)
has_changed <- seqnames(si3) != seqnames(si2)
checkTrue(!any(has_changed & genome(si2) != NCBI_assembly))
si4 <- si1
seqlevelsStyle(si4) <- "RefSeq"
checkIdentical(si3, si4)
seqlevelsStyle(si4) <- "UCSC"
checkIdentical(si1, si4)
seqlevelsStyle(si3) <- "NCBI"
checkIdentical(si2, si3)
seqlevelsStyle(si2) <- "UCSC"
checkIdentical(si1, si2)
## Start with a Seqinfo object made from an NCBI assembly.
si1 <- Seqinfo(genome=NCBI_assembly)
## Remove problematic seqlevel HSCHRUN_RANDOM_CTG29 (does not have an
## associated GenBank accession). Belongs to GRCh37.p13 and GRCh38.p12.
si1 <- si1[setdiff(seqlevels(si1), "HSCHRUN_RANDOM_CTG29")]
si2 <- si1
seqlevelsStyle(si2) <- "UCSC"
si3 <- si1
seqlevelsStyle(si3) <- "RefSeq"
checkIdentical(unname(genome(si1)), unname(genome(si3)))
style <- seqlevelsStyle(si3)
checkTrue(identical("RefSeq", style) ||
identical(c("RefSeq", "NCBI"), style))
has_changed <- seqnames(si3) != seqnames(si1)
si4 <- si2
seqlevelsStyle(si4) <- "RefSeq"
checkIdentical(si3, si4)
seqlevelsStyle(si4) <- "NCBI"
checkIdentical(si1, si4)
seqlevelsStyle(si2) <- "NCBI"
checkIdentical(si1, si2)
## Exclude some genomes from the RefSeq switch check. These genomes
## fail to pass the check for reasons that need to be investigated.
skip_RefSeq_switch <- c("canFam4", "rheMac3", "panTro3")
for (i in seq_along(UCSC_NCBI)) {
args <- UCSC_NCBI[[i]][c(1L, 2L, 4L)]
if (args[[1L]] %in% skip_RefSeq_switch)
next, args)
test_genomeStyles <- function()
checkIdentical("data.frame", class(genomeStyles("Homo sapiens")))
checkIdentical(c(25L, 7L), dim(genomeStyles("Homo sapiens")))
test_extractSeqlevels <- function()
got <- extractSeqlevels("Homo sapiens", "UCSC" )
checkException(extractSeqlevels("aaa","Homo sapiens"))
checkException(extractSeqlevels("Drosophila melanogaster"))
test_extractSeqlevelsByGroup <- function()
got <- extractSeqlevelsByGroup("Drosophila melanogaster","Ensembl","auto")
checkException(extractSeqlevelsByGroup("aaa","Homo sapiens"))
checkException(extractSeqlevelsByGroup("Drosophila melanogaster"))
checkException(extractSeqlevelsByGroup("Homo sapiens","auto","NCBI"))
test_seqlevelsInGroup <- function()
newch <- paste0("chr",c(1:22,"X","Y","M","1_gl000192_random","4_ctg9_hap1"))
got1 <- seqlevelsInGroup(newch, group="sex")
newchr <- as.character(c(1:22,"X","Y","MT"))
got2 <- seqlevelsInGroup(newchr, group="all","Homo sapiens","NCBI")
