#' Create a sampleMap from an experiment list and phenoData dataframe
#'
#' This function helps create a sampleMap in preparation of a
#' `MultiAssayExperiment` object. This especially useful when the
#' sample identifiers are not very different, as in the case of TCGA barcodes.
#' An `idConverter` function can be provided to truncate such sample
#' identifiers and obtain patient identifiers.
#'
#' @param experiments A named `list` of experiments compatible with the
#' `MultiAssayExperiment` API
#' @param colData A `data.frame` of clinical data with patient identifiers
#' as rownames
#' @param idConverter A function to be used against the sample or specimen
#' identifiers to match those in the rownames of the `colData`
#' (default NULL)
#' @param sampleCol A single string indicating the sample identifiers
#' column in the colData dataset
#' @param patientCol A single string indicating the patient identifiers
#' in colData, "row.names" extracts the colData row names
#' @param ... Additonal arguments to pass to the 'idConverter' function.
#'
#' @return A `DataFrame` class object of mapped samples and patient
#' identifiers including assays
#'
#' @author M. Ramos, M. Morgan, L. Schiffer
#'
#' @examples
#' ## Minimal example
#' expList <- list(assay1 = matrix(1:6, ncol = 2L,
#' dimnames = list(paste0("feature", 1:3), c("A-J", "B-J"))),
#' assay2 = matrix(1:4, ncol = 2,
#' dimnames = list(paste0("gene", 1:2), c("A-L", "B-L"))))
#'
#' ## Mock colData
#' myPheno <- data.frame(var1 = c("Yes", "No"), var2 = c("High", "Low"),
#' row.names = c("a", "b"))
#'
#' ## A look at the identifiers
#' vapply(expList, colnames, character(2L))
#' rownames(myPheno)
#'
#' ## Use 'idConverter' to correspond sample names to patient identifiers
#' generateMap(expList, myPheno,
#' idConverter = function(x) substr(tolower(x), 1L, 1L))
#'
#' @export generateMap
generateMap <- function(experiments, colData, idConverter = identity,
sampleCol, patientCol, ...) {
if (!is(experiments, "ExperimentList"))
experiments <- ExperimentList(experiments)
samps <- colnames(experiments)
expnames <- names(samps)
assay <- factor(rep(expnames, lengths(samps)), levels=expnames)
colname <- unlist(samps, use.names=FALSE)
if (!missing(sampleCol) && !missing(patientCol)) {
if (!S4Vectors::isSingleString(sampleCol) ||
!S4Vectors::isSingleString(patientCol))
stop("Provide character names in colData for mapping")
if (identical(patientCol, "row.names"))
pts <- rownames(colData)
else
pts <- colData[[patientCol]]
samples <- colData[[sampleCol]]
autoMap <- cbind.data.frame(assay = NA_character_, primary = pts,
colname = samples, stringsAsFactors = FALSE)
autoMap <- Map(function(cnames, i) {
submap <- autoMap[autoMap[["colname"]] %in% cnames, ]
if (nrow(submap)) {
submap[["assay"]] <- i
} else {
warning(
"'", i, "' assay dropped; 'colnames' not mappable",
call. = FALSE
)
}
submap
}, cnames = samps, i = names(samps))
autoMap <- do.call(function(...) {
rbind(..., make.row.names = FALSE)
}, autoMap)
autoMap[["assay"]] <- factor(autoMap[["assay"]])
} else {
matches <- match(idConverter(colname, ...), rownames(colData))
if (length(matches) && all(is.na(matches)))
stop("no way to map colData to ExperimentList")
primary <- rownames(colData)[matches]
autoMap <- S4Vectors::DataFrame(assay=assay,
primary=primary, colname=colname)
}
missingPrimary <- is.na(autoMap[["primary"]])
if (nrow(autoMap) && any(missingPrimary)) {
notFound <- autoMap[missingPrimary, ]
warning("Data from rows:",
sprintf("\n %s - %s", notFound[, "primary"],
notFound[, "colname"]),
"\ndropped due to missing phenotype data")
autoMap <- autoMap[!is.na(autoMap[["primary"]]), ]
}
autoMap
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.