R/subset_curatedTBData.R

Defines functions .subset_curatedTBData subset_curatedTBData

Documented in .subset_curatedTBData subset_curatedTBData

#' @title Subset curatedTBData based on single/multiple conditions
#' @description The function selects desired samples from curatedTBData
#' database based pre-specified conditions
#' @name subset_curatedTBData
#' @param theObject A
#'   \link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment} or
#'   \link[MultiAssayExperiment:MultiAssayExperiment-class]{MultiAssayExperiment} object.
#' @param annotationColName A character indicates
#'   feature of interest in the object's annotation data.
#' @param annotationCondition A vector of character indicates
#'   conditions want to be selected.
#' @param assayName A character indicates
#'   the name of the assay from the input object. The default is \code{NULL}.
#'   When \code{assayName} is \code{NULL}, the function selects
#'   the first assay along assay list.
#' @return A \link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}
#'   object containing subjects with desired annotation conditions.
#' @export
#' @examples
#' obj <-  curatedTBData("GSE74092", dry.run = FALSE, curated.only = TRUE)
#' subset_curatedTBData(obj[[1]], annotationColName = "TBStatus",
#'                      annotationCondition = c("Control","PTB"))
#'
subset_curatedTBData <- function(theObject, annotationColName,
                                 annotationCondition, assayName = NULL) {
    ## Input class's class type
    check_type <- methods::is(theObject, "MultiAssayExperiment") ||
        methods::is(theObject, "SummarizedExperiment")
    if (!check_type) {
        paste("subset_curatedTBData() only supports for",
              "SummarizedExperiment/MultiAssayExperiment object") |>
            stop(call. = FALSE)
    }
    ## Check whether annotationColName exists in the column data
    col_data <- SummarizedExperiment::colData(theObject)
    if (!any(colnames(col_data) == annotationColName)) {
        paste("annotationColName:", annotationColName,
              "is not found in the colData(theObject)",
              "\nNULL is returned.") |>
            message()
        return()
    }
    if (methods::is(theObject, "SummarizedExperiment")) {
        theObject_filter <- .subset_curatedTBData(theObject, annotationColName,
                                                  annotationCondition,
                                                  assayName)
        return(theObject_filter)
    } else {
        if (is.null(assayName)) {
            if (length(names(theObject)) >= 1L) {
                paste("assayName not specified",
                      "select the first assay as default.") |>
                    message()
                assayName <- 1
            } else {
                stop("No available assay from the input.")
            }
        } else {
            experiment_name_index <- which(names(theObject) %in% assayName)
            if (length(experiment_name_index) == 0L) {
                ## Use names(theObject) when theObject is MultiAssayExperiment
                msg1 <- sprintf("assay with name: %s not found", assayName)
                msg2 <- sprintf("\nThe available assay(s) is/are: %s",
                                paste0(names(theObject), collapse = ", "))
                paste0(msg1, msg2) |>
                    stop(call. = FALSE)
            }
        }
        index_filter <- col_data[, annotationColName] %in% annotationCondition
        theObject_reduced <- theObject[, index_filter, assayName]
        col_info <- SummarizedExperiment::colData(theObject_reduced)
        theObject_sub <- theObject_reduced[[assayName]]
        support_classes <- c("SummarizedExperiment", "matrix", "data.frame")
        if (methods::is(theObject_sub, "SummarizedExperiment")) {
            ## assay_raw is selected in the full version
            ## For those datasets that do not include all samples from the study
            SummarizedExperiment::colData(theObject_sub) <- col_info
            return(theObject_sub)
        } else if (is.matrix(theObject_sub) || is.data.frame(theObject_sub)) {
            ## assay_curated/assay_reprocess is selected. S
            ## set attribute to be NULL
            ## ensure that row/column names have NULL attributes
            colnames(theObject_sub) <- colnames(theObject_sub) |>
                as.character()
            row.names(theObject_sub) <- row.names(theObject_sub) |>
                as.character()
            sobject_TBSig <- SummarizedExperiment::SummarizedExperiment(
                assays = list(assay1 = as.matrix(theObject_sub)),
                colData = col_info)
            return(.subset_curatedTBData(sobject_TBSig, annotationColName,
                                         annotationCondition, "assay1"))
        } else {
            paste("The class of the selected assay is not recognized.\n",
                  sprintf("Supported classes are: %s",
                          paste(support_classes, collapse = ", "))) |>
                stop(call. = FALSE)
        }
    }
}

#' Subset curatedTBData based on single/multiple conditions
#' @name .subset_curatedTBData
#' @inheritParams subset_curatedTBData
#' @return A \link[SummarizedExperiment:SummarizedExperiment-class]{SummarizedExperiment}
#'   object containing subjects with desired annotation conditions.
.subset_curatedTBData <- function(theObject, annotationColName,
                                  annotationCondition, assayName) {
    ## Check assayName whether it is specified by the users
    if (is.null(assayName)) {
        theObject_length <- length(SummarizedExperiment::assays(theObject))
        if (theObject_length >= 1L) {
            paste("assayName not specified",
                        "select the first assay as default.") |>
                message()
            assay_name_exclude <- -1
        } else {
            stop("No available assay from the input.")
        }
    } else {
        ## Check whether assay exists in the object
        assay_names <- SummarizedExperiment::assayNames(theObject)
        assay_name_index <- which(assay_names %in% assayName)
        if (length(assay_name_index) == 0L) {
            msg1 <- sprintf("assay with name: %s not found", assayName)
            msg2 <- sprintf("\nThe available assay(s) is/are: %s",
                            paste0(assay_names, collapse = ", "))
            paste0(msg1, msg2) |>
                stop(call. = FALSE)
        }
        assay_name_exclude <- which(assay_names != assayName)
    }
    col_info <- SummarizedExperiment::colData(theObject)
    theObject_filter <- theObject[, col_info[, annotationColName] %in%
                                      annotationCondition]
    ## Set other assays to be NULL
    SummarizedExperiment::assays(theObject_filter)[assay_name_exclude] <- NULL
    col_info_filter <- SummarizedExperiment::colData(theObject_filter)
    annotation <- col_info_filter[, annotationColName]
    if (length(unique(annotation)) == length(annotationCondition)) {
        return(theObject_filter)
    } else {
        conditionNotFound <- annotationCondition[-match(unique(annotation),
                                                        annotationCondition)]
        msg <- sprintf("The condition %s is not found from the input.",
                       paste0(conditionNotFound, collapse = ", "))
        paste(msg, "NULL is returned,") |>
            message()
        return()
    }
}
compbiomed/curatedTBData documentation built on March 14, 2024, 2:08 p.m.