test_that("Importing biom files yield SummarizedExperiment objects", {
skip_if_not(require("biomformat", quietly = TRUE))
rich_dense_file = system.file("extdata", "rich_dense_otu_table.biom",
package = "biomformat")
me <- importBIOM(rich_dense_file)
expect_s4_class(me, "SummarizedExperiment")
# load from object
x1 <- biomformat::read_biom(rich_dense_file)
me2 <- convertFromBIOM(x1)
expect_s4_class(me2, "SummarizedExperiment")
expect_equal(dim(me), dim(me2))
expect_equal(rowData(me), rowData(me2))
biom_object <- biomformat::read_biom(
system.file("extdata", "Aggregated_humanization2.biom",
package="mia")
)
tse <- convertFromBIOM(biom_object,
prefix.rm = FALSE,
rank.from.prefix = FALSE,
artifact.rm = TRUE,
pattern = "\"")
# Testing no prefixes removed
expect_true(rowData(tse) %>%
apply(2,grepl,pattern="sk__|([dkpcofgs]+)__") %>%
all())
# Testing no taxonomy ranks parsed
expect_false(
sapply(tolower(colnames(rowData(tse))),
function(x) x %in% TAXONOMY_RANKS) %>% all())
# Testing the artifact.rm, since the original artifact in the biom file
# is '\"'
expect_false(apply(rowData(tse), 2, grepl, pattern="^\"") %>% all())
# Testing prefixes removed
tse <- convertFromBIOM(biom_object,
prefix.rm=TRUE,
rank.from.prefix=TRUE,
artifact.rm = TRUE,
pattern = "\"")
expect_false(rowData(tse) %>%
apply(2,grepl,pattern="sk__|([dkpcofgs]+)__") %>%
all())
# Testing parsing taxonomy ranks from prefixes
tse <- convertFromBIOM(biom_object,
prefix.rm=FALSE,
rank.from.prefix=TRUE,
artifact.rm = TRUE,
pattern = "\"")
expect_true(
sapply(tolower(colnames(rowData(tse))),
function(x) x %in% TAXONOMY_RANKS) %>% all())
# Testing the artifact.rm, the original artifact in the biom file
# is '\"', as a test we rather try remove a non existing pattern.
tse <- convertFromBIOM(biom_object,
prefix.rm=FALSE,
rank.from.prefix=FALSE,
artifact.rm = TRUE,
pattern = "\\*|\\?")
# with wrong pattern artifact not cleaned
expect_true(apply(rowData(tse), 2, grepl, pattern="\"") %>% any())
# Testing the artifact.rm, with the value 'auto' to automatically
# detect the artifact and remove it (in our case the artifact is '\"').
tse <- convertFromBIOM(biom_object,
prefix.rm=FALSE,
rank.from.prefix=FALSE,
artifact.rm = TRUE)
# Checking if 'auto' has detected and cleaned the artifact
expect_false(apply(rowData(tse), 2, grepl, pattern="\"") %>% any())
# Testing the artifact.rm, with the value NULL to not detect or clean
# anything.
tse <- convertFromBIOM(biom_object,
prefix.rm=FALSE,
rank.from.prefix=FALSE,
artifact.rm = FALSE)
# Checking if the '\"' artifact still exists.
expect_true(apply(rowData(tse), 2, grepl, pattern="\"") %>% any())
# General final test
tse <- convertFromBIOM(biom_object,
prefix.rm=TRUE,
rank.from.prefix=TRUE,
artifact.rm = TRUE)
# check if '\"' cleaned
expect_false(apply(rowData(tse), 2, grepl, pattern="\"") %>% any())
# check if taxa prefixes removed
expect_false(rowData(tse) %>%
apply(2,grepl,pattern="sk__|([dkpcofgs]+)__") %>%
all())
# Check if rank names were parsed correctly
expect_true(
sapply(tolower(colnames(rowData(tse))),
function(x) x %in% TAXONOMY_RANKS) %>% all())
# General final test with another biom file
biom_object <- biomformat::read_biom(
system.file("extdata", "rich_dense_otu_table.biom",
package = "biomformat")
)
tse <- convertFromBIOM(biom_object,
prefix.rm=TRUE,
rank.from.prefix=TRUE,
artifact.rm = TRUE)
# check if taxa prefixes removed
expect_false(rowData(tse) %>%
apply(2,grepl,pattern="sk__|([dkpcofgs]+)__") %>%
all())
# Check if rank names were parsed correctly
expect_true(
sapply(tolower(colnames(rowData(tse))),
function(x) x %in% TAXONOMY_RANKS) %>% all())
# Check that convertToBIOM works
# Get errors if input is incorrect
expect_error( convertToBIOM() )
expect_error( convertToBIOM(c(1, 2, 3)) )
expect_error( convertToBIOM(tse, assay.type = "test") )
# Check that the output is correct
biom <- convertToBIOM(tse)
# Test type
expect_equal(biomformat::matrix_element_type(biom), "int")
# Assay
test <- as.matrix( biomformat::biom_data(biom) )
ref <- assay(tse)
expect_equal(test, ref)
# rowData
test <- as.data.frame( biomformat::observation_metadata(biom) )
ref <- as.data.frame( rowData(tse) )
expect_equal(test, ref)
# colData
test <- as.data.frame( biomformat::sample_metadata(biom) )
ref <- as.data.frame( colData(tse) )
expect_equal(test, ref)
# Test with empty rowData and colData and relative abundance (float) assay
rowData(tse) <- NULL
colData(tse) <- NULL
tse <- transformAssay(tse, method = "relabundance")
biom <- convertToBIOM(tse, assay.type = "relabundance")
# Test type
expect_equal(biomformat::matrix_element_type(biom), "float")
# Assay
test <- as.matrix( biomformat::biom_data(biom) )
ref <- assay(tse, "relabundance")
expect_equivalent(test, ref)
# rowData has one empty column (only NA values)
test <- as.data.frame( biomformat::observation_metadata(biom) )
expect_true( all(is.na(test)) && colnames(test) == "V1" )
# colData has one empty column (only NA values)
test <- as.data.frame( biomformat::sample_metadata(biom) )
expect_true( all(is.na(test)) && colnames(test) == "V1" )
})
test_that("Importing phyloseq objects yield TreeSummarizedExperiment objects", {
skip_if_not_installed("phyloseq")
data(GlobalPatterns, package="phyloseq")
me <- convertFromPhyloseq(GlobalPatterns)
expect_s4_class(me, "TreeSummarizedExperiment")
expect_equal(dim(me),c(19216,26))
data(enterotype, package="phyloseq")
me <- convertFromPhyloseq(enterotype)
expect_s4_class(me, "TreeSummarizedExperiment")
expect_equal(dim(me),c(553,280))
data(esophagus, package="phyloseq")
me <- convertFromPhyloseq(esophagus)
expect_s4_class(me, "TreeSummarizedExperiment")
expect_equal(dim(me),c(58,3))
esophagus2 <- esophagus
phyloseq::otu_table(esophagus2) <- t(phyloseq::otu_table(esophagus))
me2 <- convertFromPhyloseq(esophagus2)
expect_equal(me, me2)
})
test_that("Importing dada2 objects yield TreeSummarizedExperiment objects", {
skip_if_not_installed("dada2")
fnF <- system.file("extdata", "sam1F.fastq.gz", package="dada2")
fnR = system.file("extdata", "sam1R.fastq.gz", package="dada2")
dadaF <- dada2::dada(fnF, selfConsist=TRUE)
dadaR <- dada2::dada(fnR, selfConsist=TRUE)
me <- convertFromDADA2(dadaF, fnF, dadaR, fnR)
expect_s4_class(me, "TreeSummarizedExperiment")
})
test_that("Importing Mothur files yield SummarizedExperiment objects", {
counts <- system.file("extdata", "mothur_example.shared", package = "mia")
taxa <- system.file("extdata", "mothur_example.cons.taxonomy", package = "mia")
taxa2 <- system.file("extdata", "mothur_example.taxonomy", package = "mia")
meta <- system.file("extdata", "mothur_example.design", package = "mia")
se <- importMothur(counts)
se2 <- importMothur(counts)
expect_s4_class(se, "SummarizedExperiment")
expect_s4_class(se2, "SummarizedExperiment")
se <- importMothur(counts, taxa)
se2 <- importMothur(counts, taxa2)
expect_s4_class(se, "SummarizedExperiment")
expect_s4_class(se2, "SummarizedExperiment")
se <- importMothur(counts, col.file = meta)
se2 <- importMothur(counts, col.file = meta)
expect_s4_class(se, "SummarizedExperiment")
expect_s4_class(se2, "SummarizedExperiment")
se <- importMothur(assay.file = counts, row.file = taxa, col.file = meta)
se2 <- importMothur(assay.file = counts, row.file = taxa2, col.file = meta)
expect_s4_class(se, "SummarizedExperiment")
expect_s4_class(se2, "SummarizedExperiment")
# Checks dimensions, rownames, and colnames of assay
expect_equal(nrow(assays(se)$counts), 100)
expect_equal(rownames(assays(se)$counts)[1:10],
c("Otu001", "Otu002", "Otu003", "Otu004", "Otu005",
"Otu006", "Otu007", "Otu008", "Otu009", "Otu010"))
expect_equal(ncol(assays(se)$counts), 100)
expect_equal(colnames(assays(se)$counts)[1:10],
c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5",
"Sample6", "Sample7", "Sample8", "Sample9", "Sample10"))
expect_equal(nrow(assays(se2)$counts), 100)
expect_equal(rownames(assays(se2)$counts)[1:10],
c("Otu001", "Otu002", "Otu003", "Otu004", "Otu005",
"Otu006", "Otu007", "Otu008", "Otu009", "Otu010"))
expect_equal(ncol(assays(se2)$counts), 100)
expect_equal(colnames(assays(se)$counts)[1:10],
c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5",
"Sample6", "Sample7", "Sample8", "Sample9", "Sample10"))
# Checks that rowData has right dimensions, rownames, and colnames
expect_equal(nrow(rowData(se)), 100)
expect_equal(rownames(rowData(se))[1:10],
c("Otu001", "Otu002", "Otu003", "Otu004", "Otu005",
"Otu006", "Otu007", "Otu008", "Otu009", "Otu010"))
expect_equal(colnames(rowData(se)),
c("OTU", "Size", "Kingdom", "Phylum", "Order", "Class", "Family", "Genus"))
expect_equal(nrow(rowData(se2)), 100)
expect_equal(rownames(rowData(se2))[1:10],
c("Otu001", "Otu002", "Otu003", "Otu004", "Otu005",
"Otu006", "Otu007", "Otu008", "Otu009", "Otu010"))
expect_equal(colnames(rowData(se2)),
c("OTU", "Kingdom", "Phylum", "Order", "Class", "Family", "Genus"))
expect_equal(nrow(colData(se)), 100)
expect_equal(rownames(colData(se))[1:10],
c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5",
"Sample6", "Sample7", "Sample8", "Sample9", "Sample10"))
# Checks colData's dimensions and names of columns and rows
expect_equal(colnames(colData(se)),
c("group", "sex", "age", "drug", "label", "numOtus", "Group"))
expect_equal(nrow(colData(se2)), 100)
expect_equal(rownames(colData(se2))[1:10],
c("Sample1", "Sample2", "Sample3", "Sample4", "Sample5",
"Sample6", "Sample7", "Sample8", "Sample9", "Sample10"))
expect_equal(colnames(colData(se2)),
c("group", "sex", "age", "drug", "label", "numOtus", "Group"))
})
assay.file <- system.file("extdata", "table.qza", package = "mia")
row.file <- system.file("extdata", "taxonomy.qza", package = "mia")
col.file <- system.file("extdata", "sample-metadata.tsv", package = "mia")
refseq.file <- system.file("extdata", "refseq.qza", package = "mia")
test_that("make TSE worked properly while no sample or taxa data", {
skip_if_not(require("biomformat", quietly = TRUE))
## no sample data or taxa data
expect_silent(tse <- importQIIME2(assay.file))
expect_s4_class(tse, "TreeSummarizedExperiment")
expect_equal(dim(tse), c(770,34))
})
test_that("reference sequences of TSE", {
skip_if_not(require("biomformat", quietly = TRUE))
# 1. fasta file of refseq
tse <- importQIIME2(
assay.file,
refseq.file = refseq.file
)
tse2 <- importQIIME2(
assay.file,
refseq.file = refseq.file,
featureNamesAsRefseq = FALSE
)
expect_identical(tse@referenceSeq, importQZA(refseq.file))
expect_identical(tse2@referenceSeq, importQZA(refseq.file))
# 2. row.names of feature table as refseq
# 2.1 element of row.names of feature table is not DNA sequence
tse <- importQIIME2(
assay.file,
featureNamesAsRefseq = TRUE
)
expect_null(tse@referenceSeq)
# 2.2 element of row.names of feature table is a DNA sequence
#
# codes used for create sample data (donot run)
if (FALSE) {
.require_package("biomformat")
feature_tab <- importQZA(assay.file)
n_feature <- nrow(feature_tab)
random_seq <- sapply(
rep(20, n_feature),
function(size) {
paste(
sample(Biostrings::DNA_BASES, size, replace = TRUE),
collapse=""
)
}
)
row.names(feature_tab) <- random_seq
obj <- biomformat::make_biom(feature_tab)
biomformat::write_biom(obj, "data/table-rownamesInSeq.biom")
# create qza file using QIIME2
# qiime tools import \
# --input-path data/table-rownamesInSeq.biom
# --type 'FeatureTable[Frequency]'
# --input-format BIOMV100Format
# --output-path inst/extdata/table-rownamesInSeq.qza
unlink("data/table-rownamesInSeq.biom")
}
featureTableFile2 <- system.file(
"extdata",
"table-rownamesInSeq.qza",
package = "mia"
)
# featureNamesAsRefseq is TRUE, refseq.file is NULL, set row.names of
# feature table as reference sequences
tse <- importQIIME2(
featureTableFile2,
featureNamesAsRefseq = TRUE
)
feature_tab <- importQZA(featureTableFile2)
names_seq <- Biostrings::DNAStringSet(row.names(feature_tab))
names(names_seq) <- paste0("seq_", seq_along(names_seq))
expect_identical(tse@referenceSeq, names_seq)
# refseq.file is not NULL, featureNamesAsRefseq is TRUE,
# set the sequences from refseq.file as reference sequences
tse <- importQIIME2(
featureTableFile2,
featureNamesAsRefseq = TRUE,
refseq.file = refseq.file
)
expect_identical(tse@referenceSeq, importQZA(refseq.file))
# 3. refseq.file = NULL, featureNamesAsRefseq = FALSE
tse <- importQIIME2(
assay.file,
refseq.file = NULL,
featureNamesAsRefseq = FALSE
)
expect_null(tse@referenceSeq)
})
test_that("`.parse_taxonomy` work with any combination of taxonomic ranks", {
test_taxa <- matrix(
c("a", "k__Bacteria; c__Bacteroidia; s__", 0.88,
"b", "k__Bacteria; c__Clostridia; s__", 0.9),
ncol = 3,
byrow = TRUE,
dimnames = list(c("a", "b"), c("Feature.ID", "Taxon", "Confidence"))
)
expect_silent(mia:::.parse_taxonomy(test_taxa))
# at certain rank (e.g. species): some taxa can not be determined which
# species it assigned to (NA)
test_taxa <- matrix(
c("a", "k__Bacteria; c__Bacteroidia; s__", 0.88,
"b", "k__Bacteria; c__Clostridia;", 0.9),
ncol = 3,
byrow = TRUE,
dimnames = list(c("a", "b"), c("Feature.ID", "Taxon", "Confidence"))
)
expect_true(is.na(mia:::.parse_taxonomy(test_taxa)[2,"species"]))
# if the expexted order is not present it will return a correct result
test_taxa <- matrix(
c("a", "k__Bacteria; s__test; c__Bacteroidia", 0.88,
"b", "k__Bacteria; c__Clostridia;", 0.9),
ncol = 3,
byrow = TRUE,
dimnames = list(c("a", "b"), c("Feature.ID", "Taxon", "Confidence"))
)
expect_equal(mia:::.parse_taxonomy(test_taxa)[,"species"],c("s__test",NA))
expect_equal(mia:::.parse_taxonomy(test_taxa, prefix.rm = TRUE)[,"species"],
c("test",NA))
# Expect that output have only taxonomy levels that have information
expect_equal(ncol(mia:::.parse_taxonomy(test_taxa)), 3L)
})
test_that("`.read_q2sample_meta` remove the row contained `#q2:types`", {
expect_false(any(as(.read_q2sample_meta(col.file), "matrix") == "#q2:types"))
})
test_that('get file extension', {
expect_identical(.get_ext("a.b.c"), "c")
expect_identical(.get_ext("abc/a.b.c"), "c")
})
test_that('read qza file', {
expect_error(importQZA("abc"), "does not exist")
expect_error(importQZA(col.file), "must be in `qza` format")
})
test_that("Confidence of taxa is numberic", {
skip_if_not(require("biomformat", quietly = TRUE))
tse <- importQIIME2(
assay.file,
row.file = row.file
)
expect_true(is.numeric(S4Vectors::mcols(tse)$Confidence))
})
test_that("dimnames of feature table is identicle with meta data", {
skip_if_not(require("biomformat", quietly = TRUE))
feature_tab <- importQZA(assay.file)
sample_meta <- .read_q2sample_meta(col.file)
taxa_meta <- importQZA(row.file)
taxa_meta <- .subset_taxa_in_feature(taxa_meta, feature_tab)
new_feature_tab <- .set_feature_tab_dimnames(
feature_tab,
sample_meta,
taxa_meta
)
expect_identical(rownames(new_feature_tab), rownames(taxa_meta))
expect_identical(colnames(new_feature_tab), rownames(sample_meta))
# sample_meta or feature meta is NULL
sample_meta2 <- S4Vectors::make_zero_col_DFrame(ncol(feature_tab))
rownames(sample_meta2) <- colnames(feature_tab)
taxa_meta2 <- S4Vectors::make_zero_col_DFrame(nrow(feature_tab))
rownames(taxa_meta2) <- rownames(feature_tab)
expect_silent(.set_feature_tab_dimnames(feature_tab, sample_meta2, taxa_meta))
# sample meta or feature meta without any information, only contains sample/feature
# ID in its rownames
feature_tab3 <- S4Vectors::DataFrame(
sample1 = 1:3,
sample2 = 4:6,
sample3 = 7:9,
row.names = paste0("feature", 1:3)
)
sample_meta3 <- S4Vectors::DataFrame(row.names = paste0("sample", 3:1))
feature_meta3 <- S4Vectors::DataFrame(row.names = paste0("feature", c(2, 3, 1)))
new_feature_tab3 <- .set_feature_tab_dimnames(
feature_tab3,
sample_meta3,
feature_meta3
)
expect_identical(row.names(new_feature_tab3), paste0("feature", c(2, 3, 1)))
expect_identical(colnames(new_feature_tab3), paste0("sample", 3:1))
})
test_that("convertToPhyloseq", {
skip_if_not_installed("phyloseq")
# TSE object
data(GlobalPatterns, package="mia")
tse <- GlobalPatterns
phy <- convertToPhyloseq(GlobalPatterns)
# Test that assay is in otu_table
expect_equal(as.data.frame(phyloseq::otu_table(phy)@.Data), as.data.frame(assays(tse)$counts))
# Test that rowData is in tax_table
expect_equal(as.data.frame(phyloseq::tax_table(phy)@.Data), as.data.frame(rowData(tse)))
# Test that colData is in sample_table
expect_equal(phyloseq::sample_data(phy),
phyloseq::sample_data(data.frame(colData(tse))))
# Test that rowTree is in phy_tree
expect_identical(phyloseq::phy_tree(phy), rowTree(tse))
# Test that referenceSeq is in refseq. Expect error, because there should not be
# reference sequences.
expect_error(phyloseq::refseq(phy))
# Test with agglomeration that that pruning is done internally
test1 <- agglomerateByRank(tse, rank = "Phylum")
test2 <- agglomerateByRank(tse, rank = "Phylum", update.tree = TRUE)
test1_phy <- expect_warning(convertToPhyloseq(test1))
test2_phy <- convertToPhyloseq(test2)
expect_equal(length(phyloseq::phy_tree(test1_phy)$node),
length(ape::keep.tip(rowTree(test1), rowLinks(test1)$nodeLab)$node))
expect_equal(phyloseq::phy_tree(test1_phy)$tip.label, rownames(test2))
# The tip labels do not match because of renaming
expect_identical(phyloseq::phy_tree(test2_phy)$edge, rowTree(test2)$edge)
# Check that everything works also with agglomerated data
for (level in colnames(rowData(tse)) ){
temp <- agglomerateByRank(tse, rank = level)
expect_warning(convertToPhyloseq(temp))
}
tse2 <- tse
# Concerts data frame to factors
rowData(tse2) <- DataFrame(lapply(rowData(tse2), as.factor))
phy <- convertToPhyloseq(tse)
phy2 <- convertToPhyloseq(tse2)
expect_equal(phyloseq::tax_table(phy2), phyloseq::tax_table(phy))
# TSE object
data(esophagus, package="mia")
tse <- esophagus
phy <- convertToPhyloseq(tse, assay.type="counts")
# Test that assay is in otu_table
expect_equal(as.data.frame(phyloseq::otu_table(phy)@.Data), as.data.frame(assays(tse)$counts))
# Test that rowTree is in phy_tree
expect_identical(phyloseq::phy_tree(phy), rowTree(tse))
# Test that merging objects lead to correct phyloseq
tse <- mergeSEs(GlobalPatterns, esophagus, assay.type="counts", missing.values = 0)
pseq <- convertToPhyloseq(tse, assay.type="counts")
# Include rownames from both trees
tse_compare <- tse[ c(rownames(GlobalPatterns), rownames(esophagus)), ]
pseq_compare <- convertToPhyloseq(tse_compare, assay.type="counts")
expect_equal(phyloseq::otu_table(pseq), phyloseq::otu_table(pseq_compare))
})
test_that("Import HUMAnN file", {
file_path <- system.file("extdata", "humann_output.tsv", package = "mia")
tse <- importHUMAnN(file_path)
#
expect_true( length(taxonomyRanks(tse)) == 2 )
expect_true( ncol(rowData(tse)) == 4 )
expect_true( ncol(tse) == 3 )
expect_true( nrow(tse) == 12 )
expect_true( all(!is.na(assay(tse))) )
#
expect_error(importHUMAnN("test"))
expect_error(importHUMAnN(file_path, remove.suffix="test"))
expect_error(importHUMAnN(file_path, remove.suffix=1))
expect_error(importHUMAnN(file_path, remove.suffix=c(FALSE, TRUE)))
#
tse2 <- importHUMAnN(file_path, remove.suffix=TRUE)
# There is no suffix, should be equal
expect_equal(tse, tse2)
#
cd <- colData(tse)
tse2 <- importHUMAnN(file_path, colData = cd)
# colData should not change the result
expect_equal(tse, tse2)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.