R/IO-methods.R

Defines functions read_hdf5_biom write_biom read_biom

Documented in read_biom read_hdf5_biom write_biom

################################################################################
################################################################################
#' Read a biom-format file, returning a \code{biom-class}.
#'
#' Import the data from a biom-format file into R, represented as an instance
#' of the \code{\link{biom-class}}; essentially a \code{\link{list}} with 
#' special constraints that map to \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}.
#' 
#' The BIOM file format (canonically pronounced biome) is designed to be a general-use format for representing biological sample by observation contingency tables. BIOM is a recognized standard for the \href{http://www.earthmicrobiome.org/}{Earth Microbiome Project} and is a \href{http://gensc.org/}{Genomics Standards Consortium} candidate project. Please see \href{http://biom-format.org/}{the biom-format home page} for more details.
#' 
#' It is tempting to include an argument identifying the 
#' biom-format version number of the data file being imported.
#' However, the biom-format version number is a required
#' field in the biom-format definition. 
#' Rather than duplicate this formal specification
#' and allow the possibility of a conflict, the version 
#' number of the biom format will be referred to only by
#' the "format" field in the biom formatted data,
#' or its representation in R.
#'
#' @usage read_biom(biom_file)
#'
#' @param biom_file (Required). A character string indicating the 
#'  file location of the biom formatted file. This is a HDF5 or JSON formatted file
#'  specific to biological datasets. 
#'  The format is formally defined at \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}
#'  and depends on the versioning.
#'
#' @return An instance of the \code{biom-class}.
#'
#' @seealso 
#' 
#' Function to create a biom object from R data,
#' \code{\link{make_biom}}.
#' 
#' Definition of the
#' \code{\link{biom-class}}. 
#' 
#' Function to write a biom format file from a biom object,
#' \code{\link{write_biom}}
#'
#' Accessor functions like \code{\link{header}}.
#'
#' @references \url{http://biom-format.org/}
#'
#' @importFrom jsonlite fromJSON
#' @export
#' @examples
#' # # # import with default parameters, specify a file
#' biom_file <- system.file("extdata", "rich_sparse_otu_table.biom", package = "biomformat")
#' biom_file
#' read_biom(biom_file)
#' biom_file <- system.file("extdata", "min_sparse_otu_table.biom", package = "biomformat")
#' biom_file
#' read_biom(biom_file)
#' ## The previous examples use system.file() because of constraints in specifying a fixed
#' ##   path within a reproducible example in a package. 
#' ## In practice, however, you can simply provide "hard-link"
#' ## character string path to your file:
#' # mybiomfile <- "path/to/my/biomfile.biom"
#' # read_biom(mybiomfile)
read_biom <- function(biom_file){	
	# Read the file, storing as a list 
	# generated by jsonlite w/ default JSON parsing params
	trash = try(silent=TRUE,
	            expr = {
	              x <- fromJSON(biom_file, simplifyDataFrame = FALSE, simplifyMatrix = FALSE)
	            })
	if(inherits(trash, "try-error")){
	  # If JSON interpretation attempt failed, try HDF5
	  trash = try(silent=TRUE,
	              expr = {x <- read_hdf5_biom(biom_file)})
	}
	if(inherits(trash, "try-error")){
	  # If still bad, throw helpful error.
	  stop("Both attempts to read input file:\n", 
	       biom_file, "\n", 
	       "either as JSON (BIOM-v1) or HDF5 (BIOM-v2).\n",
	       "Check file path, file name, file itself, then try again.")
	}
	# Use the biom() constructor function to 
	# instantiate a biom-class, perform validity checks. Return.
	return( biom(x) )
}
################################################################################
#' Write a biom-format v1 file, returning a \code{biom-class}.
#'
#' @param x (Required). A biom object that is going to be written to file
#'  as a proper biom formatted file, adhering to 
#'  \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}.
#'  
#' @param biom_file (Required). A character string indicating the 
#'  file location of the biom formatted file. This is a JSON formatted file
#'  specific to biological datasets. 
#'  The format is formally defined at 
#'  \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}
#'
#' @return Nothing. The first argument, \code{x}, is written to a file.
#'
#' @seealso 
#' 
#' Function to create a biom object from R data,
#' \code{\link{make_biom}}.
#' 
#' Definition of the
#' \code{\link{biom-class}}. 
#' 
#' The \code{\link{read_biom}} import function.
#'
#' Accessor functions like \code{\link{header}}.
#'
#' @references \url{http://biom-format.org/}
#'
#' @export
#' @importFrom jsonlite toJSON
#' @examples
#' biom_file <- system.file("extdata", "rich_sparse_otu_table.biom", package = "biomformat")
#' x = read_biom(biom_file)
#' outfile = tempfile()
#' write_biom(x, outfile)
#' y = read_biom(outfile)
#' identical(x, y) 
write_biom <- function(x, biom_file){
	cat(toJSON(x, always_decimal=TRUE, auto_unbox=TRUE), file=biom_file)
}
################################################################################
#' Read in a biom-format vs 2 file, returning a \code{list}.
#'
#' This function is meant only to be used if the user knows the file is
#' a particular version / hdf5 format. Otherwise, the `read_biom` file should be used.
#'
#' @param biom_file (Required). A biom object that is going to be written to file
#'  as a proper biom formatted file, adhering to 
#'  \href{http://biom-format.org/documentation/biom_format.html}{the biom-format definition}.
#'  
#' @return Nothing. The first argument, \code{x}, is written to a file.
#'
#' @seealso 
#' 
#' Function to create a biom object from R data,
#' \code{\link{make_biom}}.
#' 
#' Definition of the
#' \code{\link{biom-class}}. 
#' 
#' The \code{\link{read_hdf5_biom}} import function.
#'
#' Accessor functions like \code{\link{header}}.
#'
#' @references \url{http://biom-format.org/}
#'
#' @export
#' @importFrom rhdf5 h5read
#' @examples
#' biom_file <- system.file("extdata", "rich_sparse_otu_table_hdf5.biom", package = "biomformat")
#' x = read_hdf5_biom(biom_file)
#' x = biom(x)
#' outfile = tempfile()
#' write_biom(x, outfile)
#' y = read_biom(outfile)
#' identical(observation_metadata(x),observation_metadata(y))
#' identical(sample_metadata(x),sample_metadata(y))
#' identical(biom_data(x), biom_data(y))
read_hdf5_biom<-function(biom_file){
	x = h5read(biom_file,"/",read.attributes = TRUE)
	data = generate_matrix(x)
	rows = generate_metadata(x$observation)
	columns = generate_metadata(x$sample)
	shape = c(length(data),length(data[[1]]))
 
	id = attr(x,"id")
	vs = attr(x,"format-version")
	format = sprintf("Biological Observation Matrix %s.%s",vs[1],vs[2])
	format_url = attr(x,"format-url")
	type = "OTU table"
	generated_by = attr(x,"generated-by")
	date = attr(x,"creation-date")
	matrix_type = "dense"
	matrix_element_type = "int"
	
	namedList(id,format,format_url,type,generated_by,date,matrix_type,matrix_element_type,
		rows,columns,shape,data)
}
################################################################################
joey711/biomformat documentation built on Aug. 21, 2023, 2:24 p.m.