#' @title Generates a SingleCellExperiment from .txt files
#'
#' @description Helper function to process raw .txt files acquired by the
#' Hyperion imaging system into a \code{\linkS4class{SingleCellExperiment}}
#' object. This function is mainly used to read-in data generated from a
#' "spillover slide". Here, each .txt file contains the measurements of
#' multiple pixels for a single stain across all open channels.
#'
#' @param x input can be of different types:
#' \describe{
#' \item{A path}{Full path to where the single stain .txt files are located.}
#' \item{A list object}{A named list object where each entry is a
#' \code{data.frame} or coercible to one. The names of each entry indicate the
#' spotted metals (see details).}
#' }
#' @param pattern pattern to select which files should be read in (default
#' \code{".txt$"}). Only used when \code{x} is a path.
#' @param metadata_cols character vector indicating which column entries of the
#' .txt files should be saved in the \code{colData(sce)} slot.
#' @param verbose logical indicating if additional information regarding the
#' spotted and acquired masses should be shown.
#' @param read_metal_from_filename should the sample metal and mass be extracted
#' from the file/object names?
#'
#' @return returns a SCE object where pixels are stored as columns and acquired
#' channels are stored as rows.
#'
#' @section Reading in .txt files for spillover correction:
#'
#' As described in the original publication, single metal spots are acquired
#' using the Hyperion imaging system. Each acquisition corresponds to one spot.
#' All acquisitions are stored in a single .mcd file and individual acquisitions
#' are stored in single .txt files.
#'
#' This function aggregates these measurements into a single
#' \code{SingleCellExperiment} object. For this, two inputs are possible:
#'
#' \enumerate{
#' \item \code{x} is a path:
#' By default all .txt files are read in from the specified path. Here, the path
#' should indicate the location of the spillover slide measurement. The file
#' names of the .txt file must contain the spotted metal isotope name in the
#' format \code{(mt)(mass)} (e.g. \code{Sm152} for Samarium isotope with the
#' atomic mass 152). Internally, the last occurrence of such a pattern is read
#' in as the metal isotope name and stored in the \code{colData(sce)$sample_id}
#' slot.
#'
#' \item \code{x} is a named list:
#' If there are issues with reading in the metal isotope names from the .txt
#' file names, the user can provide a list for which each entry contains the
#' contents of a single .txt file. The names of the list must indicate the
#' spotted metal in the format \code{(mt)(mass)}. These names will be stored in
#' the \code{colData(sce)$sample_id} slot.
#' }
#'
#' When \code{read_metal_from_filename = FALSE}, the function will not attempt
#' to read in the spotted metal isotopes from the file or list names. Therefore,
#' only the \code{sample_id} will be set based on the file/list names.
#'
#' @examples
#' # Read files from path
#' path <- system.file("extdata/spillover", package = "imcRtools")
#'
#' sce <- readSCEfromTXT(path)
#' sce
#'
#' # Read files as list
#' cur_file_names <- list.files(path, pattern = ".txt", full.names = TRUE)
#' cur_files <- lapply(cur_file_names, read.delim)
#' names(cur_files) <- sub(".txt", "", basename(cur_file_names))
#'
#' sce <- readSCEfromTXT(cur_files)
#' sce
#'
#' @author Nils Eling (\email{nils.eling@@dqbm.uzh.ch})
#'
#' @references
#' \href{https://www.sciencedirect.com/science/article/pii/S1550413118306910}{Chevrier,
#' S. et al. 2017. “Compensation of Signal Spillover in Suspension and Imaging
#' Mass Cytometry.” Cell Systems 6: 612–20.}
#'
#' @importFrom SingleCellExperiment SingleCellExperiment
#' @importFrom S4Vectors DataFrame
#' @importFrom SummarizedExperiment colData<- rowData<-
#' @importFrom stringr str_extract str_split str_extract_all
#' @importFrom readr read_delim
#' @export
readSCEfromTXT <- function(x,
pattern = ".txt$",
metadata_cols = c("Start_push", "End_push",
"Pushes_duration", "X",
"Y", "Z"),
verbose = TRUE,
read_metal_from_filename = TRUE){
if (all(is.character(x)) & length(x) == 1) {
if (!dir.exists(x)) {
stop("Path does not exist.")
}
cur_names <- list.files(x, pattern = pattern, full.names = FALSE)
if (length(cur_names) == 0) {
stop("Files could not be read in.")
}
if (read_metal_from_filename) {
cur_names <- str_extract_all(cur_names, "[A-Z]{1}[a-z]{0,1}[0-9]{2,3}")
cur_names <- unlist(lapply(cur_names, function(y){y[length(y)]}))
} else {
cur_names <- sub("\\.[^.]*$", "", basename(cur_names))
}
txt_list <- list.files(x, pattern = pattern, full.names = TRUE)
txt_list <- lapply(txt_list, read_delim, delim = "\t",
show_col_types = FALSE)
txt_list <- lapply(txt_list, as.data.frame)
names(txt_list) <- cur_names
} else if (is.list(x)) {
if (is.null(names(x))) {
stop("If 'x' is a list, it needs to be named.")
}
cur_names <- names(x)
txt_list <- lapply(x, as.data.frame)
} else {
stop("Input 'x' is not of the correct format.")
}
.valid.readSCEfromTXT.input(txt_list, cur_names,
metadata_cols, verbose,
read_metal_from_filename)
cur_out <- do.call(rbind, txt_list)
# Construct SCE object
cell_meta <- DataFrame(cur_out[metadata_cols])
if (read_metal_from_filename) {
cell_meta$sample_id <- str_extract(rownames(cell_meta),
"^[A-Z]{1}[a-z]{0,1}[0-9]{2,3}")
cell_meta$sample_metal <- str_extract(cell_meta$sample_id,
"^[A-Z]{1}[a-z]{0,1}")
cell_meta$sample_mass <- str_extract(cell_meta$sample_id, "[0-9]{2,3}$")
} else {
cell_meta$sample_id <- str_split(rownames(cell_meta), "\\.",
simplify = TRUE)[,1]
}
cur_counts <- cur_out[grepl("[A-Z]{1}[a-z]{0,1}[0-9]{2,3}", colnames(cur_out))]
cur_counts <- t(cur_counts)
channel_name <- str_extract(rownames(cur_counts),
"[A-Z]{1}[a-z]{0,1}[0-9]{2,3}Di")
channel_meta <- DataFrame(channel_name = channel_name,
marker_name = sub("Di", "", channel_name))
sce <- SingleCellExperiment(assays = list(counts = cur_counts))
colData(sce) <- cell_meta
rowData(sce) <- channel_meta
return(sce)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.