Nothing
#' Read Exon level files and create a GRangesList
#'
#' This function serves to read exon-level expression data. It works for exon
#' quantification (raw counts and RPKM) and junction quantification
#' (raw counts) files paths and represent such data as a
#' \linkS4class{GRangesList}. The data can be downloaded
#' via the TCGA Legacy Archive. File name and structure requirements are as
#' follows: The third position delimited by dots (".") in the file name should
#' be the universally unique identifier (UUID). The column containing the
#' ranged information is labeled "exon."
#'
#' @param filepaths A \code{character} vector of valid exon data file paths
#' @param sampleNames A \code{character} vector of TCGA barcodes to be applied
#' if not present in the data (default NULL)
#' @param fileNames A \code{character} vector of file names as downloaded from
#' the Genomic Data Commons Legacy archive (default NULL)
#' @param rangesColumn (default "exon") A single string indicating the name of
#' the column in the data containing the ranges information
#' @param nrows The number of rows to return from each of the files read in
#' (all rows by default)
#'
#' @return A \linkS4class{GRangesList} object
#'
#' @author M. Ramos
#'
#' @examples
#'
#' ## Load example file found in package
#' pkgDir <- system.file("extdata", package = "TCGAutils", mustWork = TRUE)
#' exonFile <- list.files(pkgDir, pattern = "cation\\.txt$", full.names = TRUE)
#'
#' filePrefix <- "unc.edu.32741f9a-9fec-441f-96b4-e504e62c5362.1755371."
#'
#' ## Add actual file name manually (due to Windows OS restriction)
#' makeGRangesListFromExonFiles(exonFile,
#' fileNames = paste0(filePrefix, basename(exonFile)),
#' sampleNames = "TCGA-AA-3678-01A-01R-0905-07")
#'
#' @export makeGRangesListFromExonFiles
makeGRangesListFromExonFiles <- function(filepaths, sampleNames = NULL,
fileNames = NULL, rangesColumn = "exon", nrows = Inf)
{
if (!is.null(sampleNames)) {
if (length(filepaths) != length(sampleNames))
stop("Inconsistent sample names obtained from file names")
} else {
queryNames <-
if (!is.null(fileNames)) fileNames else basename(filepaths)
sampleNames <-
filenameToBarcode(queryNames, TRUE)[["aliquots.submitter_id"]]
}
btData <- lapply(filepaths, function(file) {
if (requireNamespace("readr", quietly = TRUE))
readr::read_delim(file, delim = "\t", n_max = nrows)
else
read.delim(file, sep = "\t",
nrows = if (is.infinite(nrows)) -1 else nrows)
})
if (!length(sampleNames))
sampleNames <- NULL
names(btData) <- sampleNames
allrowdata <- if (requireNamespace("dplyr", quietly = TRUE))
dplyr::bind_rows(btData)
else
do.call(rbind, btData)
newGRanges <- GenomicRanges::GRanges(allrowdata[[rangesColumn]])
mcols(newGRanges) <- allrowdata[, names(allrowdata) != rangesColumn]
splitIndx <- rep(names(btData), vapply(btData, nrow, integer(1L)))
S4Vectors::splitAsList(newGRanges, splitIndx)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.