knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(MicrobiomeR) pkg.data <- MicrobiomeR:::pkg.private
# Get the data files from package input_files <- pkg.data$input_files biom_file <- input_files$biom_files$silva # Path to silva biom file tree_file <- input_files$tree_files$silva # Path to silva tree file metadata_file <- input_files$metadata$two_groups # Path to Nephele metadata parse_func <- parse_taxonomy_silva_128 # A custom phyloseq parsing function for silva annotations # Get the phyloseq object phy_obj <- create_phyloseq(biom_file = biom_file, tree_file = tree_file, metadata_file = metadata_file, parse_func = parse_func) # Get the taxmap object in the raw format raw_metacoder <- as_MicrobiomeR_format(obj = phy_obj, format = "raw_format")
Metacoder is a highly useful package that comes with tons of features right out of the box. Functions
such as metacoder::filter_ambiguous_taxa()
, taxa::filter_taxa()
, and taxa::filter_obs()
for instance
can almost always be used in your workflow.
# Remove Archaea from the taxmap object metacoder_obj <- taxa::filter_taxa(obj = raw_metacoder, taxon_names == "Archaea", subtaxa = TRUE, invert = TRUE) # Ambiguous Annotation Filter - Remove taxonomies with ambiguous names metacoder_obj <- metacoder::filter_ambiguous_taxa(metacoder_obj, subtaxa = TRUE)
Three functions are provided by MicrobiomeR that do some basic filtering:
sample_id_filter()
for filtering samples.taxon_id_filter()
for filtering by taxon_id, which includes intermediate taxa.otu_id_filter()
for filtering by otu_id, which only includes leaf taxa.These functions all take the same parameters, most notably a transformation function (.f_transform), a filtering function (.f_filter), and a conditional function (.f_condition).
# Low Sample Filter - Remove the low samples # The sample filter should generally be implemented first metacoder_obj <- sample_id_filter(obj = metacoder_obj, .f_filter = ~sum(.), .f_condition = ~.>= 20, validated = TRUE)
The advanced filtering functions available with MicrobiomeR do several things. They wrap the basic filtering functions mentioned above, they wrap common metacoder and taxa functions, and they mimic the tools found in the phyloseq package. Below I've mentioned some of them and how they relate to the phyloseq package:
agglomerate_taxmap()
is equivalent to phyloseq::tax_glom()
.otu_proportion_filter()
is seen in the first step in phyloseq's preprocessing vignette.Prevalence Filtering
functions filter observations by their prevalence across samples.otu_prevelance_filter()
is seen in phyloseq's prevalence
filtering vignette.taxa_prevalence_filter()
is seen in phyloseq's taxonomic filtering vignette.cov_filter()
is seen in the ninth step in phyloseq's preprocessing vignette.# Master Threshold Filter - Add the otu_proportions table and then filter OTUs based on min % metacoder_obj <- otu_proportion_filter(obj = metacoder_obj, otu_percentage = 0.00001) # Taxon Prevalence Filter - Add taxa_abundance and taxa_proportions and then filter OTUs that do not # appear more than a certian amount of times in a certain percentage of samples at the specified # agglomerated rank. This is considered a supervised method, because it relies on intermediate # taxonomies to filter the data. # The default minimum abundance is 5 and the sample percentage is 0.5 (5%). # Phylum metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj, rank = "Phylum") # Class metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj, rank = "Class", validated = TRUE) # Order metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj, rank = "Order", validated = TRUE) # OTU Prevalence Filter - Filter OTUs that do not appear more than a certian amount of times in a # certain percentage of samples. This is considered an unsupervised method, because it relies only # on the leaf OTU ids to filter the data. metacoder_obj <- otu_prevalence_filter(obj = metacoder_obj, validated = TRUE) # Coefficient of Variation Filter - Filter OTUs based on the coefficient of variation metacoder_obj <- cov_filter(obj = metacoder_obj, coefficient_of_variation = 3, validated = TRUE)
As mentioned previously taxmap filtering can be done in any way that fits your needs with the taxa
and metacoder
packages. However, MicrobiomeR also provides some utility based function for
filtering/manipulating your observation data by hand. Observation data can be accessed within
the taxmap object. Make sure you don't manipulate existing data inside of your taxmap object
unless you're absolutely sure you know what you're doing.
# Get the abundance tables from the taxmap object taxa_abund <- metacoder_obj$data$taxa_abundance otu_abund <- metacoder_obj$data$otu_abundance # Transposing with one ID (taxon_id) taxa_abund %>% transposer(ids = "taxon_id", header_name = "samples") # Re-Transposing with one ID (taxon_id) taxa_abund %>% transposer("taxon_id", "samples") %>% transposer("samples", "taxon_id") # Transposing with two IDs (taxon_id, otu_id) # The column headers will be a combination of the categorical data that will # be parsed and split back into individual columns if retransposed. otu_abund %>% transposer("otu_id", "samples") # Re-Transposing with two IDs (taxon_id, otu_id) # When you transpose categorical data, the column headers are lost. # To re-transpose you have to supply these headers otu_abund %>% transposer("otu_id", "samples") %>% transposer("samples", "otu_id", separated_categories = c("taxon_id", "otu_id"))
# Transforming to proportions/percentages by COLUMN taxa_abund %>% transformer(func = ~./sum(.)) # Transforming by ROW is also possible # This function will transpose/retranspose so other information is needed taxa_abund %>% transformer(by = "row", func = ~./sum(.), ids = "taxon_id", header_name = "samples", separated_categories = c("taxon_id"))
This function was # borrowed from \url{https://www.r-bloggers.com/an-r-vlookup-not-so-silly-idea/}.
# Get analyzed Data metacoder_obj <- as_MicrobiomeR_format(obj = metacoder_obj, format = "analyzed_format") # Create agglomerated taxmaps for phylum/class phylum_mo <- agglomerate_taxmap(obj = metacoder_obj, rank = "Phylum") class_mo <- agglomerate_taxmap(obj = metacoder_obj, rank = "Class") # Get some observation data phylum_data <- phylum_mo$data$stats_tax_data class_data <- class_mo$data$stats_tax_data # Take the Phylum in the "phylum_data" and cross reference these in the "class_data". # Reutrn the "wilcox_p_value" of the class_data. class_p_value <- vlookup(lookup_vector = phylum_data$Phylum, df = class_data, match_var = "Phylum", return_var = "wilcox_p_value") # Create a new column in the phylum_data. new_data <- phylum_data %>% dplyr::mutate(class_p_value = class_p_value) new_data[c("taxon_id", "Phylum", "Class", "class_p_value", "wilcox_p_value")]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.