#' Read ImmunoSeq files
#'
#' Imports tab-separated value (.tsv) files exported by the Adaptive
#' Biotechnologies ImmunoSEQ analyzer and stores them as a list of data frames.
#'
#' @param path Path to the directory containing tab-delimited files. Only
#' files with the extension .tsv are imported. The names of the data frames are
#' the same as names of the files.
#' @param columns Column names from the tab-delimited files that you desire to
#' import, all others will be disregarded. May use "all" to import all columns.
#' A warning may be called if columns contain no data or must be coereced to a
#' different class. Usually this warning can be ignored.
#' @param recursive A Boolean value indicating whether tab-delimited files in
#' subdirectories should be imported. If TRUE, then all files in the parent as
#' well as the subdirectory are imported. If FALSE, then only files in the
#' parent directory are imported.
#' @details May import tab-delimited files containing antigen receptor
#' sequencing from other sources (e.g. miTCR or miXCR) as long as the column
#' names are the same as used by ImmunoSEQ files. Available column headings in
#' ImmunoSEQ files are:
#' "nucleotide", "aminoAcid", "count", "count (templates)", "count (reads)",
#' "count (templates/reads)", "frequencyCount", "frequencyCount (\%)", "cdr3Length",
#' "vMaxResolved", "vFamilyName", "vGeneName", "vGeneAllele", "vFamilyTies",
#' "vGeneNameTies", "vGeneAlleleTies", "dMaxResolved", "dFamilyName", "dGeneName",
#' "dGeneAllele", "dFamilyTies", "dGeneNameTies", "dGeneAlleleTies", "jMaxResolved",
#' "jFamilyName", "jGeneName", "jGeneAllele", "jFamilyTies", "jGeneNameTies",
#' "jGeneAlleleTies", "vDeletion", "d5Deletion", "d3Deletion", "jDeletion",
#' "n2Insertion", "n1Insertion", "vIndex", "n2Index", "dIndex", "n1Index",
#' "jIndex", "estimatedNumberGenomes", "sequenceStatus", "cloneResolved",
#' "vOrphon", "dOrphon", "jOrphon", "vFunction", "dFunction", "jFunction",
#' "fractionNucleated".
#'
#' IMPORTANT: be aware that Adaptive has changed the
#' column names of their files over time and if the headings of your files are
#' inconsistent, then specify column = "all" or include all variations of the
#' headings you want to important. For example, column = c("count",
#' "count (templates)", "count (reads)"). Also be aware that the "count"
#' column previously reported the number of sequencing reads in earlier
#' versions of ImmunoSEQ but now is equivalent to the
#' "estimatedNumberGenomes" column.
#' @return Returns a list of data frames. The names of each data frame are
#' assigned according to the original ImmunoSEQ file names.
#' @examples
#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq")
#'
#' file.list <- readImmunoSeq(path = file.path,
#' columns = c("aminoAcid", "nucleotide", "count",
#' "count (templates)", "count (reads)",
#' "count (templates/reads)",
#' "frequencyCount", "frequencyCount (%)",
#' "estimatedNumberGenomes"),
#' recursive = FALSE)
#' @export
#' @importFrom data.table fread
#' @importFrom plyr llply
readImmunoSeq <- function(path, columns = c("aminoAcid", "nucleotide", "count",
"count (templates)", "count (reads)",
"count (templates/reads)",
"frequencyCount", "frequencyCount (%)",
"estimatedNumberGenomes", "vFamilyName",
"dFamilyName", "jFamilyName","vGeneName",
"dGeneName", "jGeneName"),
recursive = FALSE) {
file.paths1 <- list.files(path, full.names = TRUE, all.files = FALSE,
recursive = recursive, pattern = ".tsv",
include.dirs = FALSE)
file.info <- file.info(file.paths1)
file.paths2 <- rownames(file.info)[file.info$size > 0]
if(!identical(file.paths1,file.paths2)){
warning("One or more of the files you are trying to import has no sequences and will be ignored.", call. = FALSE)
}
if (any(columns == "all")) {
file.list <- suppressWarnings(plyr::llply(file.paths2, data.table::fread,
data.table = FALSE, .progress = "text", fill = TRUE))
} else {
file.list <- suppressWarnings(plyr::llply(file.paths2, data.table::fread, select = columns,
data.table = FALSE, .progress = "text", fill = TRUE))
if(length(unique(plyr::llply(file.list, ncol))) > 1){
warning("One or more of the files you are trying to import do not contain all the columns you specified.", call. = FALSE)
}
}
file.names <- gsub(".tsv", "", basename(file.paths2))
names(file.list) <- file.names
i <- 1
for (i in 1:length(file.list)) {
colnames(file.list[[i]]) <- ifelse(grepl("frequencyCount*",
colnames(file.list[[i]])),
"frequencyCount",
colnames(file.list[[i]]))
colnames(file.list[[i]]) <- ifelse(grepl("count*",
colnames(file.list[[i]])),
"count",
colnames(file.list[[i]]))
}
return(file.list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.