# This function can be used to unify the group id vector. It can be any
# kind of vector, but this converts it to factor.
.norm_f <- function(
i, f, dim.type = c("rows","columns"), empty.rm = FALSE, ...){
if(!.is_a_bool(empty.rm)){
stop("'empty.rm' must be TRUE or FALSE.", call. = FALSE)
}
dim.type <- match.arg(dim.type)
if(!is.character(f) && !is.factor(f)){
stop("'f' must be a factor or character vector coercible to a ",
"meaningful factor.",
call. = FALSE)
}
if(i != length(f)){
stop("'f' must have the same number of ",dim.type," as 'x'",
call. = FALSE)
}
# This is done otherwise we lose NA values
if( !empty.rm && any(is.na(f)) ){
f <- as.character(f)
f[ is.na(f) ] <- "NA"
}
if(is.character(f)){
f <- factor(f)
}
f
}
# When we merge rows or columns, first member of group is kept as default
# (in colData or rowData). This function controls this and allows user to
# specify some other element than the first one.
.norm_archetype <- function(f, archetype){
if(length(archetype) > 1L){
if(length(levels(f)) != length(archetype)){
stop("length of 'archetype' must have the same length as ",
"levels('f')",
call. = FALSE)
}
}
f_table <- table(f)
if(!is.null(names(archetype))){
if(anyNA(names(archetype)) || anyDuplicated(names(archetype))){
stop("If 'archetype' is named, names must be non-NA and unqiue.",
call. = FALSE)
}
archetype <- archetype[names(f_table)]
}
if(any(f_table < archetype)){
stop("'archetype' out of bounds for some levels of 'f'. The maximum of",
" 'archetype' is defined as table('f')", call. = FALSE)
}
if(length(archetype) == 1L){
archetype <- rep(archetype,length(levels(f)))
}
archetype
}
# This function returns the index/position of rows/columns that are kept
# after merging.
#' @importFrom S4Vectors splitAsList
.get_element_pos <- function(f, archetype){
archetype <- as.list(archetype)
f_pos <- seq.int(1L, length(f))
f_pos_split <- S4Vectors::splitAsList(f_pos, f)
f_pos <- unlist(f_pos_split[archetype])
f_pos
}
# This function merges assays and row/colData.
#' @importFrom S4Vectors SimpleList
#' @importFrom scuttle sumCountsAcrossFeatures
.merge_rows_or_cols <- function(
x, f, by, archetype = 1L, average = FALSE, BPPARAM = SerialParam(),
check.assays = TRUE, na.rm = FALSE, ...){
# input check
if( !.is_a_bool(average) ){
stop("'average' must be TRUE or FALSE.", call. = FALSE)
}
if( !.is_a_bool(check.assays) ){
stop("'check.assays' must be TRUE or FALSE.", call. = FALSE)
}
if( !.is_a_bool(na.rm) ){
stop("'na.rm' must be TRUE or FALSE.", call. = FALSE)
}
#
# Get correct functions based on whether we agglomerate rows or cols
rowData_FUN <- switch(by, rowData, colData)
nrow_FUN <- switch(by, nrow, ncol)
rownames_FUN <- switch(by, rownames, colnames)
rownames_ass_FUN <- switch(by, `rownames<-`, `colnames<-`)
# If user specified column name from row/colData, get the values
if( .is_a_string(f) && f %in% colnames(rowData_FUN(x)) ){
f <- rowData_FUN(x)[[ f ]]
}
# Check that the group ID vector is specifying groups for each element
f <- .norm_f(nrow_FUN(x), f, ...)
# If the data is already agglomerated at each group
if(length(levels(f)) == nrow_FUN(x)){
return(x)
}
# In merging, first element of certain group is kept by default. archetype,
# can control this behavior; it can specify the preserved rows for every
# group or index.
archetype <- .norm_archetype(f, archetype)
# Get assays
assays <- assays(x)
# We check whether the assays include values that cannot be summed. For
# instance, summing negative values do not make sense.
if( check.assays ){
temp <- lapply(seq_len(length(assays)), function(i)
.check_assays_for_merge(names(assays)[[i]], assays[[i]]))
}
# Transpose if we are merging columns
if( by == 2L ){
assays <- lapply(assays, function(mat) t(mat))
}
# Get the aggregation function based on whether user wants to exclude NAs
# and if there are any NAs. scuttle::sumCountsAcrossFeatures cannot handle
# NAs so if user wants to exclude them, we use own implementation.
FUN <- if( na.rm && anyNA(assays[[1]])) .sum_counts_accross_features_na else
sumCountsAcrossFeatures
# Agglomerate assays
assays <- lapply(assays, FUN, average = average, ids = f, BPPARAM = BPPARAM)
# Transpose back to original orientation
if( by == 2L ){
assays <- lapply(assays, function(mat) t(mat))
}
# Convert to SimpleList
assays <- assays |> SimpleList()
# Now we have agglomerated assays, but TreeSE has still the original form.
# We take specified rows/columns from the TreeSE.
idx <- .get_element_pos(f, archetype = archetype)
if( by == 1L ){
x <- x[idx, ]
} else{
x <- x[ , idx]
}
# Add assays back to TreeSE
assays(x, withDimnames = FALSE) <- assays
# Change row/colnames. Currently, they have same names as in original data
# but just certain rows. Change them to represent groups
x <- rownames_ass_FUN(x, rownames_FUN(assays[[1]]))
return(x)
}
# This function works similarly to scuttle::sumCountsAcrossFeatures but this
# excludes NAs from the data. The scuttle function cannot handle NAs.
#' @importFrom DelayedArray DelayedArray type rowsum
.sum_counts_accross_features_na <- function(x, average, ids, ...){
# Which cell is not NA?
is_not_na <- !is.na(x)
type(is_not_na) <- "integer"
# Aggregate data to certain groups
x <- rowsum(x, ids, na.rm = TRUE)
# Calculate average if specified
if( average ){
x <- x/rowsum(is_not_na, ids)
}
return(x)
}
# This functions checks if assay has negative or binary values. It does not
# make sense to sum them, so we give warning to user.
.check_assays_for_merge <- function(assay.type, assay){
# Check if assays include binary or negative values
if( all(assay == 0 | assay == 1) ){
warning("'", assay.type, "'", " includes binary values.",
"\nAgglomeration of it might lead to meaningless values.",
"\nCheck the assay, and consider doing transformation again",
"manually with agglomerated data.",
call. = FALSE)
}
if( !all( assay >= 0 | is.na(assay) ) ){
warning("'", assay.type, "'", " includes negative values.",
"\nAgglomeration of it might lead to meaningless values.",
"\nCheck the assay, and consider doing transformation again",
"manually with agglomerated data.",
call. = FALSE)
}
return(assay)
}
#' @importFrom Biostrings DNAStringSetList
.merge_refseq_list <- function(sequences_list, f, names, ...){
threshold <- list(...)[["threshold"]]
if(is.null(threshold)){
threshold <- 0.05
}
if(!is(sequences_list,"DNAStringSetList")){
return(.merge_refseq(sequences_list, f, names, threshold))
}
names <- names(sequences_list)
seqs <- DNAStringSetList(lapply(sequences_list, .merge_refseq, f, names,
threshold))
names(seqs) <- names
seqs
}
#' @importFrom Biostrings DNAStringSetList
#' @importFrom DECIPHER ConsensusSequence
.merge_refseq <- function(sequences, f, names, threshold){
sequences <- split(sequences,f)
seq <- unlist(DNAStringSetList(lapply(sequences, ConsensusSequence,
threshold = threshold)))
seq
}
.merge_rows_TSE <- function(x, f, archetype = 1L, update.tree = FALSE,
update.refseq = mergeRefSeq, mergeRefSeq = FALSE, ...){
# input check
if(!.is_a_bool(update.tree)){
stop("'update.tree' must be TRUE or FALSE.", call. = FALSE)
}
if(!.is_a_bool(update.refseq)){
stop("'update.refseq' must be TRUE or FALSE.", call. = FALSE)
}
# for optionally merging referenceSeq
refSeq <- NULL
if(update.refseq){
refSeq <- referenceSeq(x)
}
#
x <- .merge_rows_or_cols(x, f, by = 1L, archetype = 1L, ...)
# optionally merge rowTree
if( update.tree ){
x <- .agglomerate_trees(x, 1, ...)
}
# optionally merge referenceSeq
if(!is.null(refSeq)){
referenceSeq(x) <- .merge_refseq_list(refSeq, f, rownames(x), ...)
}
x
}
.merge_cols_TSE <- function(x, f, archetype = 1L, update.tree = FALSE, ...){
# input check
if(!.is_a_bool(update.tree)){
stop("'update.tree' must be TRUE or FALSE.", call. = FALSE)
}
#
x <- .merge_rows_or_cols(x, f, by = 2L, archetype = 1L, ...)
# optionally merge colTree
if( update.tree ){
x <- .agglomerate_trees(x, 2, ...)
}
return(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.