R/LongTable-class.R

Defines functions printSlot LongTable

Documented in LongTable printSlot

#' @include immutable-class.R
#' @include allGenerics.R
NULL

#' @title LongTable class definition
#'
#' @description Define a private constructor method to be used to build a
#'   `LongTable` object.
#'
#' @slot rowData See Slots section.
#' @slot colData See Slots section.
#' @slot assays See Slots section.
#' @slot metadata See Slots section.
#' @slot .intern See Slots section.
#'
#' @section Slots:
#' - *rowData*: A `data.table` containing the metadata associated with the
#'   row dimension of a `LongTable`.
#' - *colData*: A `data.table` containing the metadata associated with the
#'   column dimension of a `LongTable`.
#' - *assays*: A `list` of `data.table`s, one for each assay in a
#'   `LongTable`.
#' - *metadata*: An optional `list` of additional metadata for a `LongTable`
#'   which doesn't map to one of the dimensions.
#' - *.intern*: An `immutable` `list` that holds internal structural metadata
#'   about a LongTable object, such as which columns are required to key
#'   the object.
#'
#' @return `LongTable` object containing the assay data from a treatment
#'   response experiment
#'
#' @md
#' @import data.table
#' @keywords internal
#' @rdname LongTable-class
#' @aliases .LongTable
#' @exportClass LongTable
.LongTable <- setClass("LongTable",
    slots=list(
        rowData='data.table',
        colData='data.table',
        assays='list',
        metadata='list',
        .intern='immutable_list')
)


#' @title LongTable constructor method
#'
#' @rdname LongTable
#'
#' @param rowData `data.frame` A rectangular object coercible to a `data.table`.
#' @param rowIDs `character` A vector of `rowData` column names needed to
#'   uniquely identify each row in a `LongTable`.
#' @param colData `data.frame` A rectangular object coercible to a `data.table.`
#' @param colIDs `chacter` A vector of `colData` column names needed to uniquely
#'   identify each column in a `LongTable`.
#' @param assays `list` A list of rectangular objects, each coercible to
#'   a `data.table`. Must be named and item names must match the `assayIDs`
#'   list.
#' @param assayIDs `list` A list of `character` vectors specifying the columns
#'   needed to uniquely identify each row in an `assay`. Names must match the
#'   `assays` list.
#' @param metadata `list` A list of one or more metadata items associated with
#'   a LongTable experiment.
#' @param keep.rownames `logical(1)` or `character(1)` Should rownames be
#'   retained when coercing to `data.table` inside the constructor. Default
#'   is FALSE. If TRUE, adds a 'rn' column to each rectangular object that
#'   gets coerced from `data.frame` to `data.table`. If a string, that becomes
#'   the name of the rownames column.
#'
#' @return A `LongTable` object containing the data for a treatment response
#'   experiment and configured according to the rowIDs and colIDs arguments.
#'
#' @examples
#' "See vignette('The LongTable Class', package='CoreGx')"
#'
#' @importFrom data.table key setkeyv
#' @export
LongTable <- function(rowData, rowIDs, colData, colIDs, assays, assayIDs,
        metadata=list(), keep.rownames=FALSE) {

    # handle missing parameters
    isMissing <- c(rowData=missing(rowData), rowIDs=missing(rowIDs),
        colIDs=missing(colIDs), colData=missing(colData), assays=missing(assays),
        assayIDs=missing(assayIDs))

    if (any(isMissing)) stop(.errorMsg('\nRequired parameter(s) missing: ',
        names(isMissing)[isMissing], collapse='\n\t'))

    # check parameter types and coerce or error
    if (!is(colData, "data.table")) {
        tryCatch({
            colData <- data.table(colData, keep.rownames=keep.rownames)
        }, error=function(e)
            stop(.errorMsg("colData must be coercible to a data.frame!"))
        )
    } else {
        colData <- copy(colData)
    }

    if (!is(rowData, "data.table")) {
        tryCatch({
            rowData <- data.table(rowData, keep.rownames=keep.rownames) },
        error=function(e)
            stop(.errorMsg("rowData must be coerceible to a data.frame!"))
        )
    } else {
        rowData <- copy(rowData)
    }

    isDT <- is.items(assays, FUN=is.data.table)
    isDF <- is.items(assays, FUN=is.data.frame) & !isDT
    if (!all(isDT)) {
        tryCatch({
            for (i in which(isDF))
                assays[[i]] <- data.table(assays[[i]], keep.rownames)
        }, error = function(e, assays) {
            message(e)
            types <- lapply(assays, typeof)
            stop(.errorMsg(
                '\nList items are types: ',
                types, '\nPlease ensure all items in the assays list are ',
                'coerceable to a data.frame!'), collapse=', ')
        })
    }
    assays <- copy(assays)

    ## FIXME:: Move all validity checks to top of the function to prevent wasted
    ## computation or into class validity method

    # capture row internal metadata
    if (is.numeric(rowIDs) || is.logical(rowIDs))
        rowIDs <- colnames(rowData)[rowIDs]
    if (!all(rowIDs %in% colnames(rowData)))
        stop(.errorMsg('\nRow IDs not in rowData: ',
            setdiff(rowIDs, colnames(rowData)), collapse=', '))

    # Create the row and column keys for LongTable internal mappings
    if (!('rowKey' %in% colnames(rowData)))
        rowData[, c('rowKey') := .GRP, keyby=c(rowIDs)]
    if (!('colKey' %in% colnames(colData)))
        colData[, c('colKey') := .GRP, keyby=c(colIDs)]

    # initialize the internals object to store private metadata for a LongTable
    internals <- setNames(vector("list", length=6),
        c("rowIDs", "rowMeta", "colIDs", "colMeta", "assayKeys", "assayIndex"))
    internals$rowIDs <- rowIDs
    internals$rowMeta <- setdiff(colnames(rowData[, -'rowKey']), rowIDs)

    # capture column internal metadata
    if (is.numeric(colIDs) || is.logical(colIDs))
        colIDs <- colnames(colData)[colIDs]
    if (!all(colIDs %in% colnames(colData)))
        stop(.errorMsg('\nColumn IDs not in colData: ',
            setdiff(colIDs, colnames(colData)), collapse=', '),
            call.=FALSE)
    internals$colIDs <- colIDs
    internals$colMeta <- setdiff(colnames(colData[, -'colKey']), colIDs)

    # -- capture assays internal metadata
    # sort such that rowIDs are first, then colIDs; ensures reindex returns
    # the same order as construtor
    for (i in seq_along(assayIDs)) {
        rids <- intersect(rowIDs, assayIDs[[i]])
        cids <- intersect(colIDs, assayIDs[[i]])
        assayIDs[[i]] <- c(rids, cids)
    }
    internals$assayKeys <- assayIDs

    # ensure names of assays and assayIDs match
    hasMatchingAssayNames <- names(assays) == names(assayIDs)
    if (!all(hasMatchingAssayNames)) stop(.errorMsg(
        "Mismatched names between assays and assayIDs for:\n\t",
        paste0(names(assays)[!hasMatchingAssayNames], collapse=", ")),
        call.=FALSE)
    # set keys for join with metadata
    for (nm in names(assays)) {
        setkeyv(assays[[nm]], assayIDs[[nm]])
        .nm <- paste0(".", nm)
        assays[[nm]][, (.nm) := .I]
    }
    
    # build the index mapping assay rows to rowKey and colKey
    cat(.infoMsg("Building assay index...\n", time=TRUE))
    assayIndex <- expand.grid(rowKey=rowData$rowKey, colKey=colData$colKey)
    setDT(assayIndex)
    # setkeyv(assayIndex, c("rowKey", "colKey"))
    
    cat(.infoMsg("Joining rowData to assayIndex...\n", time=TRUE))
    setkeyv(rowData, "rowKey")
    setkeyv(assayIndex, "rowKey")
    # assayIndex <- assayIndex[
    #     rowData[, c(rowIDs, "rowKey"), with=FALSE], ,
    #     on="rowKey", allow.cartesian=FALSE
    # ]
    rd <- rowData[, c(rowIDs, "rowKey"), with=FALSE]
    assayIndex <- merge(
        assayIndex, rd,
        by="rowKey", all.x=TRUE, allow.cartesian=FALSE
    )

    # print if rowKey in rowData is not unique
    if(nrow(rowData) != uniqueN(rowData$rowKey)) {
        cat(.warnMsg("rowData rowKey is not unique!"))
        show(assayIndex)
        show(rowData)
    }
    rm(rd)
    gc()
    cat(.infoMsg("Joining colData to assayIndex...\n", time=TRUE))
    setkeyv(colData, "colKey")
    # assayIndex <- assayIndex[
    #     colData[, c(colIDs, "colKey"), with=FALSE], ,
    #     on="colKey", allow.cartesian=FALSE
    # ]
    cd <- colData[, c(colIDs, "colKey"), with=FALSE]
    assayIndex <- merge(
        assayIndex, cd,
        by="colKey", all.x=TRUE, allow.cartesian=FALSE
    )
    rm(cd)
    gc()
    cat(.infoMsg("Joining assays to assayIndex...\n", time=TRUE))

    # Set the key variables for the assayIndex using rowIDs and colIDs
    setkeyv(assayIndex, c(rowIDs, colIDs))


    for (nm in names(assays)) {
        .nm <- paste0(".", nm)
        assayIndex[assays[[nm]], (.nm) := get(.nm)]
    }
    gc()
    assayIndex[, (c(rowIDs, colIDs)) := NULL]
    assayIndex <- assayIndex[
        which(rowAnys(!is.na(assayIndex[, paste0(".", names(assays)), with=FALSE]))),
    ]
    gc()
    cat(.infoMsg("Setting assayIndex key...\n", time=TRUE))
    setkeyv(assayIndex, paste0(".", names(assays)))
    internals$assayIndex <- assayIndex


    # make internals immutable to prevent users from modifying structural metadata
    internals <- immutable(internals)

    gc()
    cat(.infoMsg("Building LongTable...\n", time=TRUE))
    # Drop extra assay columns and key by the assay key in the assay index
    for (i in seq_along(assays)) {
        assays[[i]][, (assayIDs[[i]]) := NULL]
        setkeyv(assays[[i]], paste0(".", names(assays)[i]))
    }

    # Reorder columns to match the keys, this prevents issues in unit tests
    # caused by different column orders
    setkeyv(rowData, "rowKey")
    setkeyv(colData, "colKey")
    setcolorder(rowData, unlist(internals[c("rowIDs", "rowMeta")]))
    setcolorder(colData, unlist(internals[c('colIDs', 'colMeta')]))

    ## Assemble  the pseudo row and column names for the LongTable
    .pasteColons <- function(...) paste(..., collapse=':')
    rowData[, `:=`(.rownames=mapply(.pasteColons, transpose(.SD))),
        .SDcols=rowIDs]
    colData[, `:=`(.colnames=mapply(.pasteColons, transpose(.SD))),
        .SDcols=colIDs]
    return(CoreGx:::.LongTable(rowData=rowData, colData=colData, assays=assays,
        metadata=metadata, .intern=internals))
}

#' Function to combine two LongTables into a single LongTable
#' @param x A `LongTable` object
#' @param y A `LongTable` object
#' 

# ---- Class unions for CoreSet slots
#' A class union to allow multiple types in a CoreSet slot
#'
#' @include LongTable-class.R
setClassUnion('list_OR_LongTable', c('list', 'LongTable'))

# #' Ensure that all rowID and colID keys are valid
# #'
# #' @param rowData A `data.table` containing row level annotations.
# #' @param colData A `data.table` containing column level annotations for a
# #'   `LongTable`.
# #' @param assays A `list` of `data.table`s, one for each assay in an
# #'   `LongTable`.
# #'
# #' @keywords internal
### FIXME:: Finish this and implement class validity methods for LongTable!
#.verifyKeyIntegrity <- function(rowData, colData, assays) {
#    if (!('rowKey' %in% colnames(rowData)) || !is.numeric(rowData$rowID))
#        message(blue('The rowKey column is missing from rowData! Please try
#            rebuilding the LongTable object with the constructor.'))
#    if (!('colKey' %in% colnames(colData)) || !is.numeric(colData$colID))
#        stop()
#}

# ---- LongTable Class Methods

#' Helper function to print slot information
#' @param slotName `character` The name of the slot to print.
#' @param slotData `data.table` The data to print.
#' 
#' @keywords internal
printSlot <- function(slotName, slotData) {
    slotCols <- ncol(slotData)
    slotString <- paste0(slotName, '(', slotCols, '): ')
    slotColnames <- colnames(slotData)
    slotNamesString <-
        if (length(slotColnames) > 6) {
            paste0(.collapse(head(slotColnames, 3)), ' ... ', .collapse(tail(slotColnames, 3)))
        } else {
            .collapse(slotColnames)
        }
    cat("  ", yellow$bold(slotString) %+% green(slotNamesString), '\n')
}

#' Show method for the LongTable class
#'
#' @examples
#' show(merckLongTable)
#'
#' @param object A `LongTable` object to print the results for.
#'
#' @return `invisible` Prints to console.
#'
#' @importFrom crayon %+% yellow red green blue cyan magenta
#' @import data.table
#' @export
setMethod('show', signature(object='LongTable'), function(object) {

    ## FIXME:: Function too long. Can I refactor to a helper that prints each slot?

    # ---- class descriptions
    cat(yellow$bold$italic(paste0("<", class(object)[1], ">"), '\n'))
    cat("  ", yellow$bold('dim: ', .collapse(dim(object)), '\n'))

    # --- assays slot
    assayLength <- length(assayNames(object))
    assaysString <- paste0('assays(', assayLength, '): ')
    assayNames <- assayNames(object)
    assayNamesString <- .collapse(assayNames(object))
    if (nchar(assayNamesString) > options("width")) {
        assayNamesString <- paste0(strwrap(assayNamesString), collapse="\n  ")
    }
    cat("  ", yellow$bold(assaysString) %+% red(assayNamesString), '\n')

    # --- rownames
    rows <- nrow(rowData(object))
    rowsString <- paste0('rownames(', rows, '): ')
    rowNames <- rownames(object)
    rownamesString <-
        if (length(rowNames) > 6) {
            paste0(.collapse(head(rowNames, 2)), ' ... ', .collapse(tail(rowNames, 2)))
        } else {
            .collapse(rowNames)
        }
    cat("  ", yellow$bold(rowsString) %+% green(rownamesString), '\n')

    # ---- rowData slot
    printSlot('rowData', rowData(object))

    # ---- colnames
    cols <- nrow(colData(object))
    colsString <- paste0('colnames(', cols, '): ')
    colnames <- colnames(object)
    colnamesString <-
        if (length(colnames) > 6) {
            paste0(.collapse(head(colnames, 3)), ' ... ', .collapse(tail(colnames, 3)))
        } else {
            .collapse(colnames)
        }
    cat("  ", yellow$bold(colsString) %+% green(colnamesString), '\n')

    # ---- colData slot
    printSlot('colData', colData(object))

    # --- metadata slot
    metadataString <- paste0('metadata(', length(metadata(object)), '): ')
    metadataNames <- names(metadata(object))
    metadataNamesString <-
        if (length(metadataNames) > 6) {
            paste0(.collapse(head(metadataNames, 3), ' ... ', .collapse(tail(metadataNames, 3))))
        } else if (length(metadataNames) >= 1) {
            .collapse(metadataNames)
        } else {
            'none'
        }
    cat("  ", yellow$bold(metadataString) %+% green(metadataNamesString), '\n')
})


# ==== LongTable Accessor Methods

#' Get the id column names for the rowData slot of a LongTable
#'
#' @examples
#' rowIDs(merckLongTable)
#'
#' @param object A `LongTable` to get the rowData id columns for.
#' @param data `logical` Should the rowData for the id columns be returned
#' instead of the column names? Default is FALSE.
#' @param key `logical` Should the key column also be returned?
#'
#' @return A `character` vector of rowData column names if data is FALSE,
#' otherwise a `data.table` with the data from the rowData id columns.
#'
#' @rdname LongTable-class
#' @family LongTable-class
#' @family LongTable-accessors
#'
#' @import data.table
#' @export
setMethod('rowIDs', signature(object='LongTable'),
        function(object, data=FALSE, key=FALSE) {
    cols <- mutable(getIntern(object, 'rowIDs'))
    if (key) cols <- c(cols, 'rowKey')
    if (data) rowData(object, key=key)[, ..cols] else cols
})

#' Get the id column names for the rowData slot of a LongTable
#'
#' @examples
#' rowMeta(merckLongTable)
#'
#' @describeIn LongTable Get the names of the non-id columns from rowData.
#'
#' @param object A `LongTable` to get the rowData metadata columns for.
#' @param data `logical` Should the rowData for the metadata columns be returned
#' instead of the column names? Default is FALSE.
#' @param key `logical` Should the key column also be returned? Default is FALSE
#'
#' @return A `character` vector of rowData column names if data is FALSE,
#' otherwise a `data.table` with the data from the rowData metadta columns.
#'
#' @import data.table
#' @export
setMethod('rowMeta', signature(object='LongTable'),
        function(object, data=FALSE, key=FALSE) {
    cols <- mutable(getIntern(object, 'rowMeta'))
    cols <- cols[!grepl('^\\.', cols)]
    if (key) cols <- c(cols, 'rowKey')
    if (data) rowData(object, key=key)[, ..cols] else cols
})

#' Get the id column names for the colData slot of a LongTable
#'
#' @examples
#' colIDs(merckLongTable)
#'
#' @describeIn LongTable Get the names of the columns in colData required to
#' uniquely identify each row.
#'
#' @param object A `LongTable` to get the colData id columns for.
#' @param data `logical` Should the colData for the id columns be returned
#' instead of the column names? Default is FALSE.
#' @param key `logical` Should the key column also be returned? Default is FALSE.
#'
#' @return A `character` vector of colData column names if data is FALSE,
#' otherwise a `data.table` with the data from the colData id columns.
#'
#' @import data.table
#' @export
setMethod('colIDs', signature(object='LongTable'),
        function(object, data=FALSE, key=FALSE) {

    cols <- mutable(getIntern(object, 'colIDs'))
    if (key) cols <- c(cols, 'colKey')
    if (data) colData(object, key=TRUE)[, ..cols] else cols

})

#' Get the id column names for the colData slot of a LongTable
#'
#' @examples
#' colMeta(merckLongTable)
#'
#' @describeIn LongTable Get the names of the non-id columns in the colData
#'   `data.table`.
#'
#' @param object A `LongTable` to get the colData metadata columns for.
#' @param data `logical` Should the colData for the metadata columns be returned
#'   instead of the column names? Default is FALSE.
#' @param key `logical` Should the key column also be returned?
#'
#' @return A `character` vector of colData column names if data is FALSE,
#'   otherwise a `data.table` with the data from the colData metadta columns.
#'
#' @import data.table
#' @export
setMethod('colMeta', signature(object='LongTable'),
    function(object, data=FALSE, key=FALSE) {

    cols <- mutable(getIntern(object, 'colMeta'))
    cols <- cols[!grepl('^\\.', cols)]
    if (key) cols <- c(cols, 'colKey')
    if (data) colData(object, key=TRUE)[, ..cols] else cols
})



#' Retrieve the unique identifier columns used for primary keys in rowData and
#'    colData.
#'
#' @describeIn LongTable Get the names of all id columns.
#'
#' @examples
#' idCols(merckLongTable)
#'
#' @param object `LongTable`
#'
#' @return `character` A character vector containing the unique rowIDs and
#'   colIDs in a LongTable object.
#'
#' @export
setMethod('idCols', signature('LongTable'),
    function(object) {
    return(unique(c(rowIDs(object), colIDs(object))))
})

#' Retrieve a copy of the assayIndex from the `@.intern` slot.
#'
#' @describeIn LongTable Get the assayIndex item from the objects internal metadata.
#'
#' @param `x` A `LongTable` or inheriting class.
#'
#' @return A `mutable` copy of the "assayIndex" for `x`
#'
#' @examples
#' assayIndex(nci_TRE_small)
#'
#' @aliases assayIndex,LongTable-method
#' @export
setMethod("assayIndex", signature("LongTable"), function(x) {
    mutable(getIntern(x, "assayIndex"))
})

#' Retrieve a copy of the assayKeys from the `@.intern` slot.
#'
#' @describeIn LongTable Get the assayKeys item from the objects internal metadata.
#'
#' @param `x` A `LongTable` or inheriting class.
#' @param `i` An optional valid assay name or index in `x`.
#'
#' @return A `mutable` copy of the "assyKeys" for `x`
#'
#' @examples
#' assayKeys(nci_TRE_small)
#' assayKeys(nci_TRE_small, "sensitivity")
#' assayKeys(nci_TRE_small, 1)
#'
#' @aliases assayKeys,LongTable-method
#' @export
setMethod("assayKeys", signature("LongTable"), function(x, i) {
    keys <- mutable(getIntern(x, "assayKeys"))
    # error handling occurs in `[[`
    if (!missing(i)) keys[[i]] else keys
})


#' Retrieve the value columns for the assays in a LongTable
#'
#' @examples
#' assayCols(merckLongTable)
#'
#' @describeIn LongTable Get a list of column names for each assay in the object.
#'
#' @param object `LongTable`
#' @param i Optional parameter specifying the `character` name or `integer`
#' index of the assay to get the column names for. If missing, returns a
#' list of value column names for all the assays.
#'
#' @return A `list` of `character` vectors containing the value column names for
#' each assay if i is missing, otherwise a `character` vector of value column
#' names for the selected assay.
#'
#' @import data.table
#' @export
setMethod('assayCols', signature(object='LongTable'),
        function(object, i) {
    if (!missing(i)) {
        stopifnot(is.numeric(i) || is.character(i))
        stopifnot(length(i) == 1)
        stopifnot(i %in% assayNames(object) ||
            i %in% seq_along(assayNames(object)))
    }
    keys <- assayKeys(object)
    assayColnames <- Map(setdiff,
        x=lapply(assays(object, raw=TRUE), FUN=colnames),
        y=as.list(paste0(".", assayNames(object)))
    )
    assayCols <- Map(c, keys, assayColnames)
    if (!missing(i)) assayCols[[i]] else assayCols
})
bhklab/CoreGx documentation built on March 14, 2024, 3:04 a.m.