#' Count reads with featureCounts
#'
#' @param sample_info Data frame of sample metadata created with the
#' functions \code{create_sample_info} and \code{infer_strandedness}.
#' @param mappingdir Directory where .bam files are stored.
#' @param gff_path Path to GFF/GTF file with annotations.
#' @param fcountsdir Directory where the matrix of gene-level read counts will
#' be stored.
#' @param threads Number of threads for featureCounts. Default: 2.
#'
#' @return A gene expression matrix with genes in row names and samples in
#' column names. Two .tsv files with the gene expression matrix and count stats
#' are saved to `fcountsdir`.
#' @importFrom rtracklayer import export
#' @importFrom tools file_ext
#' @importFrom Rsubread featureCounts
#' @importFrom utils write.table
#' @export
#' @rdname featureCounts
#' @examples
#' data(sample_info)
#' mappingdir <- system.file("extdata", package="bears")
#' gff_path <- system.file("extdata", "Homo_sapiens.GRCh37.75_subset.gtf",
#' package="bears")
#' fcountsdir <- file.path(tempdir(), "fcountsdir")
#' if(subread_is_installed()) {
#' counts <- featureCounts(sample_info, mappingdir, gff_path, fcountsdir)
#' }
featureCounts <- function(sample_info = NULL,
mappingdir = "results/04_read_mapping",
gff_path = NULL,
fcountsdir = "results/05_quantification/featureCounts",
threads = 2) {
if(!subread_is_installed()) { stop("Unable to find subread in PATH.") }
if(!dir.exists(fcountsdir)) { dir.create(fcountsdir, recursive = TRUE) }
bamfiles <- list.files(mappingdir, pattern = ".bam", full.names = FALSE)
bamfiles <- gsub("Aligned.*", "", bamfiles)
sample_meta <- sample_info[!duplicated(sample_info$BioSample), ]
sample_meta <- sample_meta[sample_meta$BioSample %in% bamfiles, ]
is_paired <- ifelse(sample_meta$Layout == "PAIRED", TRUE, FALSE)
strandedness <- as.integer(vapply(seq_len(nrow(sample_meta)), function(x) {
return(translate_strandedness(sample_meta[x, "Orientation"],
sample_meta[x, "Layout"])$fcounts)
}, numeric(1)))
file <- paste0(mappingdir, "/",
sample_meta$BioSample, "Aligned.sortedByCoord.out.bam")
if(tools::file_ext(gff_path) == "gff3") {
gff <- rtracklayer::import(gff_path)
gff_path <- tempfile(fileext = ".gtf")
export <- rtracklayer::export(gff, gff_path)
}
fc <- Rsubread::featureCounts(
file, annot.ext = gff_path, nthreads = threads, minMQS = 20,
isPairedEnd = is_paired, strandSpecific = strandedness,
isGTFAnnotationFile = TRUE
)
exp_matrix <- fc$counts
colnames(exp_matrix) <- gsub("Aligned.*", "", colnames(exp_matrix))
utils::write.table(exp_matrix, quote=FALSE, sep = "\t", row.names = TRUE,
file = paste0(fcountsdir, "/exp_matrix.tsv"))
write.table(fc$stat, quote=FALSE, sep="\t", row.names = FALSE,
file = paste0(fcountsdir, "/count_stats_per_sample.tsv"))
return(exp_matrix)
}
#' Create a SummarizedExperiment object from featureCounts output
#'
#' @param sample_info Data frame of sample metadata created with the
#' functions \code{create_sample_info}.
#' @param fc_output Either a gene expression matrix generated by \code{fcount},
#' or the path to the .tsv file containing the gene expression matrix as
#' generated by \code{fcount}.
#'
#' @return A SummarizedExperiment object.
#' @rdname featureCounts2se
#' @export
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom methods is
#' @importFrom utils read.table
#' @examples
#' data(sample_info)
#' mappingdir <- system.file("extdata", package="bears")
#' gff_path <- system.file("extdata", "Homo_sapiens.GRCh37.75_subset.gtf",
#' package="bears")
#' fcountsdir <- file.path(tempdir(), "fcountsdir")
#' if(subread_is_installed()) {
#' counts <- featureCounts(sample_info, mappingdir, gff_path, fcountsdir)
#' se <- featureCounts2se(sample_info, counts)
#' }
featureCounts2se <- function(sample_info = NULL,
fc_output = NULL) {
if(!is(fc_output, "matrix")) { fc_output <- utils::read.table(fc_output) }
sample_metadata <- sample_info[!duplicated(sample_info$BioSample), ]
samples <- colnames(fc_output)
coldata <- sample_metadata[sample_metadata$BioSample %in% samples, ]
rownames(coldata) <- coldata$BioSample
coldata$BioSample <- NULL
se <- SummarizedExperiment::SummarizedExperiment(
assays = list(fcounts = fc_output), colData = coldata
)
return(se)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.