Nothing
#' Export data in SingleCellExperiment object
#' @param inSCE A \link[SingleCellExperiment]{SingleCellExperiment} object
#' that contains the data. QC metrics are stored in colData of the
#' singleCellExperiment object.
#' @param samplename Sample name. This will be used as name of subdirectories
#' and the prefix of flat file output. Default is 'sample'.
#' @param type Type of data. The type of data stored in SingleCellExperiment object.
#' It can be 'Droplets'(raw droplets matrix) or 'Cells' (cells matrix).
#' @param directory Output directory. Default is './'.
#' @param format The format of output. It currently supports flat files, rds files
#' and python h5 files. It can output multiple formats. Default: c("SCE", "AnnData", "FlatFile", "HTAN").
#' @return Generates a file containing data from \code{inSCE}, in specified \code{format}.
#' @examples
#' data(scExample)
#' \dontrun{
#' exportSCE(sce, format = "SCE")
#' }
#' @export
exportSCE <- function(inSCE,
samplename = "sample",
directory = "./",
type = "Cells",
format = c("SCE", "AnnData", "FlatFile", "HTAN")) {
if (any(!format %in% c("SCE", "AnnData", "FlatFile", "HTAN"))) {
warning("Output format must be 'SCE', 'AnnData', 'HTAN' or 'FlatFile'. Format ",
paste(format[!format %in% c("SCE", "AnnData", "FlatFile", "HTAN")], sep = ","),
" is not supported now. ") # "Only output the supported formats in the provided options. "
}
format <- format[format %in% c("SCE", "AnnData", "FlatFile", "HTAN")]
message("The output format is [",
paste(format, collapse = ","), "]. ")
if (length(format) == 0) {
warning("None of the provided format is supported now. Therefore, the output ",
"will be SCE, AnnData, FlatFile and HTAN. ")
format <- c("SCE", "AnnData", "FlatFile", "HTAN")
}
## Create directories and save objects
dir.create(file.path(directory, samplename), showWarnings = TRUE, recursive = TRUE)
if ("SCE" %in% format) {
## Export to R
fp <- file.path(directory, samplename, "R")
dir.create(fp, showWarnings = TRUE, recursive = TRUE)
fn <- file.path(fp, paste0(samplename , paste0("_", type, ".rds")))
saveRDS(object = inSCE, file = fn)
}
if ("FlatFile" %in% format) {
## Export to flatfile
fp <- file.path(directory, samplename, "FlatFile")
dir.create(fp, showWarnings = TRUE, recursive = TRUE)
fn <- file.path(fp, type)
exportSCEtoFlatFile(inSCE, outputDir = fn, sample=samplename)
}
if ("AnnData" %in% format) {
## Export to Python AnnData
fp <- file.path(directory, samplename, "Python")
dir.create(fp, showWarnings = TRUE, recursive = TRUE)
fn <- file.path(fp, type)
exportSCEtoAnnData(inSCE, outputDir=fn, compression='gzip', prefix=samplename)
}
}
#' Generate manifest file for droplet and cell count data
#' @param dropletSCE A \link[SingleCellExperiment]{SingleCellExperiment} object containing
#' droplet count matrix data
#' @param cellSCE A \link[SingleCellExperiment]{SingleCellExperiment} object containing
#' cell count matrix data
#' @param samplename The sample name of the \link[SingleCellExperiment]{SingleCellExperiment} objects
#' @param dir The output directory of the SCTK QC pipeline.
#' @param HTAN Whether generate manifest file with the format required by HTAN. Default is TRUE.
#' @param dataType Type of the input data. It can be one of "Droplet", "Cell" or "Both".
#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object which combines all
#' objects in sceList. The colData is merged.
#' @export
#' @examples
#' data(scExample, package = "singleCellTK")
#' generateMeta(sce, dir = ".", samplename = "Sample",
#' HTAN = TRUE)
#' @importFrom SummarizedExperiment assay colData
generateMeta <- function(dropletSCE = NULL,
cellSCE = NULL,
samplename,
dir,
HTAN=TRUE,
dataType = c("Droplet", "Cell", "Both")) {
level3List <- list()
level4List <- list()
dataType = match.arg(dataType)
directory <- file.path(basename(dir), samplename)
filterDir <- file.path(directory, 'FlatFile', 'Cells')
rawDir <- file.path(directory, 'FlatFile', 'Droplets')
WorkFlowData = c(
WorkFlow = 'singleCellTK QC pipeline',
WorkFlowVer = paste('singleCellTK', utils::sessionInfo()$otherPkgs$singleCellTK$Version, sep=':'),
ParRaw = 'Ran perCellQC, EmptyDrops and barcodeRankDrops using singleCellTK',
ParFiltered = 'Ran perCellQC, doublet detection and decontX using singleCellTK',
PardecontX = 'Ran perCellQC, doublet detection and decontX using singleCellTK',
ColData = 'Ran perCellQC, doublet detection and decontX using singleCellTK',
DecontXUMAP = 'UMAP dimension reduction generated by decontX',
ScrubletTSNE = 'tSNE dimension reduction generated by Scrublet',
ScrubletUMAP = 'UMAP dimension reduction generated by Scrublet'
)
### calculate summary stats
if (dataType == "Droplet" | dataType == "Both") {
droplet_stat = c(CellNum = ncol(dropletSCE),
MedianReads = stats::median(colData(dropletSCE)$sum),
MedianGenes = stats::median(colData(dropletSCE)$detected),
DataType = 'Droplet Matrix',
FileName = file.path(rawDir, 'assays', paste0(samplename,'_counts.mtx.gz')))
}
if (dataType == "Cell" | dataType == "Both") {
decontX_stat = c(CellNum = ncol(cellSCE),
MedianReads = stats::median(Matrix::colSums(assay(cellSCE, 'decontXcounts'))),
MedianGenes = stats::median(Matrix::colSums(assay(cellSCE, 'decontXcounts') > 0)),
DataType = 'Decontaminated cell matrix return returned by runDecontX',
FileName = file.path(filterDir, 'assays', paste0(samplename,'_decontXcounts.mtx.gz')))
cell_stat = c(CellNum = ncol(cellSCE),
MedianReads = stats::median(colData(cellSCE)$sum),
MedianGenes = stats::median(colData(cellSCE)$detected),
DataType = 'Cell Matrix',
FileName = file.path(filterDir, 'assays', paste0(samplename,'_counts.mtx.gz')),
ColData = file.path(filterDir, paste0(samplename,'_colData.txt.gz')),
DecontXUMAP = file.path(filterDir, 'reducedDims', paste0(samplename,'_decontX_UMAP.txt.gz')),
ScrubletTSNE = file.path(filterDir, 'reducedDims', paste0(samplename,'_scrublet_TSNE.txt.gz')),
ScrubletUMAP = file.path(filterDir, 'reducedDims', paste0(samplename,'_scrublet_TSNE.txt.gz')))
}
data <- list("Raw" = if (exists("droplet_stat")) {droplet_stat} else {NULL},
"Filtered" = if (exists("cell_stat")) {cell_stat} else {NULL},
"decontX" = if (exists("decontX_stat")) {decontX_stat} else {NULL})
if (dataType == "Cell") { types <- c("Filtered", "decontX") }
if (dataType == "Droplet") { types <- c("Raw") }
if (dataType == "Both") { types <- c("Filtered", "decontX", "Raw")}
for (type in types) {
level3List[[type]] <- data.frame(
SAMPLE = samplename, DATA_TYPE = data[[type]]['DataType'],
MATRIX_TYPE = 'Raw Counts', DATA_CATEGORY = 'Gene Expression',
CELL_MEDIAN_NUM_READS = data[[type]]['MedianReads'],
CELL_MEDIAN_NUM_GENES = data[[type]]['MedianGenes'],
CELL_TOTAL = data[[type]]['CellNum'],
FILE_NAME = data[[type]]['FileName'],
WORKFLOW_TYPE = WorkFlowData['WorkFlow'],
WORKFLOW_PARAMETERS = WorkFlowData[paste0('Par', type)],
WORKFLOW_VERSION = WorkFlowData['WorkFlowVer'],
stringsAsFactors = FALSE)
if (isTRUE(HTAN)) {
level3List[[type]] <- cbind(level3List[[type]],HTAN_BIOSPECIMEN_ID = samplename,
HTAN_PARENT_ID = '', HTAN_PARENT_FILE_ID = '')
}
if (type == 'Filtered') {
for (metric in c('ColData', 'DecontXUMAP', 'ScrubletTSNE', 'ScrubletUMAP')) {
level4List[[metric]] <- data.frame(
SAMPLE = samplename, FILE_NAME = data[[type]][metric],
WORKFLOW_TYPE = WorkFlowData[metric],
WORKFLOW_PARAMETERS = file.path(directory, paste0(samplename, '_QCParameters.yaml')),
WORKFLOW_VERSION = WorkFlowData['WorkFlowVer'])
if (isTRUE(HTAN)) {
level4List[[metric]] <- cbind(level4List[[metric]], HTAN_BIOSPECIMEN_ID = samplename,
HTAN_PARENT_ID = '',HTAN_PARENT_FILE_ID = data[[type]]['FileName'])
}
}
}
}
level3Meta <- do.call(base::rbind, level3List)
level4Meta <- do.call(base::rbind, level4List)
return(list(level3Meta, level4Meta))
}
#' Extract QC parameters from the SingleCellExperiment object
#' @param inSCE A \link[SingleCellExperiment]{SingleCellExperiment} object.
#' @param skip Skip extracting the parameters of the provided QC functions.
#' @param ignore Skip extracting the content within QC functions.
#' @param directory The output directory of the SCTK_runQC.R pipeline.
#' @param samplename The sample name of the \link[SingleCellExperiment]{SingleCellExperiment} objects.
#' @param writeYAML Whether output yaml file to store parameters. Default if TRUE. If FALSE,
#' return character object.
#' @return If \code{writeYAML} TRUE, a yaml object will be generated. If FALSE, character object.
#' @export
getSceParams <- function(inSCE,
skip = c("scrublet", "runDecontX","runBarcodeRanksMetaOutput"),
ignore = c("algorithms", "estimates","contamination",
"z","sample","rank","BPPARAM","batch","geneSetCollection",
"barcodeArgs"),
directory = './',
samplename = '',
writeYAML = TRUE) {
meta <- S4Vectors::metadata(inSCE)
algos <- names(meta)[!names(meta) %in% skip]
outputs <- '---'
parList <- list()
dir <- file.path(directory, samplename)
for (algo in algos) {
params <- meta[[algo]][[1]]
if (length(params) == 1) {params <- params[[1]]} ### extract params from sublist
params <- params[which(!names(params) %in% ignore)]
parList[[algo]] <- params
}
outputs <- paste(outputs, yaml::as.yaml(parList), sep='\n')
if (isTRUE(writeYAML)) {
filename <- paste0(samplename, '_QCParameters.yaml')
cat(outputs, file=file.path(dir, filename))
} else {
return(outputs)
}
}
#' Create SingleCellExperiment object from csv or txt input
#' @param data A \link[data.table]{data.table} object containing the count matrix.
#' @param samplename The sample name of the data.
#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object containing
#' the count matrix.
#' @export
constructSCE <- function(data, samplename) {
gene <- data[[1]]
data <- data[, -1]
barcode <- colnames(data)
mat <- methods::as(data, "Matrix")
dimnames(mat) <- list(gene, barcode)
coln <- paste(samplename, barcode, sep = '_')
sce <- SingleCellExperiment::SingleCellExperiment(
assays = list(counts = mat))
SummarizedExperiment::rowData(sce) <- S4Vectors::DataFrame(feature = gene)
SummarizedExperiment::colData(sce) <- S4Vectors::DataFrame(barcode,
column_name = coln,
sample = samplename,
row.names = coln)
return(sce)
}
#' Create SingleCellExperiment object from command line input arguments
#' @param preproc Method used to preprocess the data. It's one of the path provided in --preproc argument.
#' @param path Base path of the dataset. It's one of the path provided in --bash_path argument.
#' @param samplename The sample name of the data. It's one of the path provided in --sample argument.
#' @param raw The directory contains droplet matrix, gene and cell barcodes information. It's one of the path provided in --raw_data_path argument.
#' @param fil The directory contains cell matrix, gene and cell barcodes information. It's one of the path provided in --cell_data_path argument.
#' @param ref The name of reference used by cellranger. Only need for CellrangerV2 data.
#' @param rawFile The full path of the RDS file or Matrix file of the raw gene count matrix. It's one of the path provided in --raw_data argument.
#' @param filFile The full path of the RDS file or Matrix file of the cell count matrix. It's one of the path provided in --cell_data argument.
#' @param dataType Type of the input. It can be "Both", "Droplet" or "Cell". It's one of the path provided in --genome argument.
#' @return A list of \link[SingleCellExperiment]{SingleCellExperiment} object containing
#' the droplet or cell data or both,depending on the dataType that users provided.
#' @export
qcInputProcess <- function(preproc,
samplename,
path,
raw,
fil,
ref,
rawFile,
filFile,
dataType) {
dropletSCE <- NULL
cellSCE <- NULL
if (preproc == "BUStools") {
dropletSCE <- importBUStools(BUStoolsDirs = path, samples = samplename, class = "Matrix", delayedArray=FALSE)
return(list(dropletSCE, cellSCE))
}
if (preproc == "SEQC") {
dropletSCE <- importSEQC(seqcDirs = path, samples = samplename, prefix = samplename, class = "Matrix", delayedArray=FALSE)
return(list(dropletSCE, cellSCE))
}
if (preproc == "STARSolo") {
if (dataType == "Both") {
dropletSCE <- importSTARsolo(STARsoloDirs = path, samples = samplename, STARsoloOuts = "Gene/raw", class = "Matrix", delayedArray=FALSE)
cellSCE <- importSTARsolo(STARsoloDirs = path, samples = samplename, STARsoloOuts = "Gene/filtered", class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Cell") {
cellSCE <- importSTARsolo(STARsoloDirs = path, samples = samplename, STARsoloOuts = "Gene/filtered", class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Droplet") {
dropletSCE <- importSTARsolo(STARsoloDirs = path, samples = samplename, STARsoloOuts = "Gene/raw", class = "Matrix", delayedArray=FALSE)
}
return(list(dropletSCE, cellSCE))
}
if (preproc == "CellRangerV3") {
if (!is.null(path)) {
if (dataType == "Both") {
dropletSCE <- importCellRangerV3(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, dataType="raw", class = "Matrix", delayedArray=FALSE)
cellSCE <- importCellRangerV3(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, dataType="filtered", class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Cell") {
cellSCE <- importCellRangerV3(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, dataType="filtered", class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Droplet") {
dropletSCE <- importCellRangerV3(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, dataType="raw", class = "Matrix", delayedArray=FALSE)
}
} else {
if (dataType == "Both") {
dropletSCE <- importCellRangerV3Sample(dataDir = raw, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
cellSCE <- importCellRangerV3Sample(dataDir = fil, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Cell") {
cellSCE <- importCellRangerV3Sample(dataDir = fil, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Droplet") {
dropletSCE <- importCellRangerV3Sample(dataDir = raw, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
}
}
return(list(dropletSCE, cellSCE))
}
if (preproc == "CellRangerV2") {
if (!is.null(path)) {
if (dataType == "Both") {
dropletSCE <- importCellRangerV2(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, class="Matrix", delayedArray = FALSE, reference = ref, dataTypeV2="raw")
cellSCE <- importCellRangerV2(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, class="Matrix", delayedArray = FALSE, reference = ref, dataTypeV2="filtered")
} else if (dataType == "Cell") {
cellSCE <- importCellRangerV2(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, class="Matrix", delayedArray = FALSE, reference = ref, dataTypeV2="filtered")
} else if (dataType == "Droplet") {
dropletSCE <- importCellRangerV2(cellRangerDirs = path, sampleDirs = samplename, sampleNames = samplename, class="Matrix", delayedArray = FALSE, reference = ref, dataTypeV2="raw")
}
} else {
if (dataType == "Both") {
dropletSCE <- importCellRangerV2Sample(dataDir = raw, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
cellSCE <- importCellRangerV2Sample(dataDir = fil, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Cell") {
cellSCE <- importCellRangerV2Sample(dataDir = fil, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
} else if (dataType == "Droplet") {
dropletSCE <- importCellRangerV2Sample(dataDir = raw, sampleName = samplename, class = "Matrix", delayedArray=FALSE)
}
}
return(list(dropletSCE, cellSCE))
}
if (preproc == "Optimus") {
## by default has both droplet and cell data.
dropletSCE <- importOptimus(OptimusDirs = path, samples = samplename, delayedArray = FALSE)
cellSCE <- dropletSCE[,which(dropletSCE$dropletUtils_emptyDrops_IsCell)]
if (dataType == "Cell") {
dropletSCE <- NULL
} else if (dataType == "Droplet") {
cellSCE <- NULL
}
return(list(dropletSCE, cellSCE))
}
if (preproc == "DropEst") {
if (dataType == "Both") {
dropletSCE <- importDropEst(sampleDirs=path, dataType="raw", sampleNames=samplename, delayedArray=FALSE)
cellSCE <- importDropEst(sampleDirs=path, dataType="filtered", sampleNames=samplename, delayedArray=FALSE)
} else if (dataType == "Cell") {
cellSCE <- importDropEst(sampleDirs=path, dataType="filtered", sampleNames=samplename, delayedArray=FALSE)
} else if (dataType == "Droplet") {
dropletSCE <- importDropEst(sampleDirs=path, dataType="raw", sampleNames=samplename, delayedArray=FALSE)
}
return(list(dropletSCE, cellSCE))
}
if (preproc == "SceRDS") {
if (dataType == "Both") {
dropletSCE <- readRDS(rawFile)
cellSCE <- readRDS(filFile)
} else if (dataType == "Cell") {
cellSCE <- readRDS(filFile)
} else if (dataType == "Droplet") {
dropletSCE <- readRDS(rawFile)
}
return(list(dropletSCE, cellSCE))
}
if (preproc == "CountMatrix") {
if (dataType == "Both") {
dropletMM <- data.table::fread(rawFile)
dropletSCE <- constructSCE(data = dropletMM, samplename = samplename)
cellMM <- data.table::fread(filFile)
cellSCE <- constructSCE(data = cellMM, samplename = samplename)
} else if (dataType == "Cell") {
cellMM <- data.table::fread(filFile)
cellSCE <- constructSCE(data = cellMM, samplename = samplename)
} else if (dataType == "Droplet") {
dropletMM <- data.table::fread(rawFile)
dropletSCE <- constructSCE(data = dropletMM, samplename = samplename)
}
return(list(dropletSCE, cellSCE))
}
## preproc is not one of the method above. Stop the pipeline.
stop(paste0("'", preproc, "' not supported."))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.