#' @include ToxicoSet-accessors.R
NULL
#'`[`
#'
#' @examples
#' tSet <- TGGATESsmall[sampleNames(TGGATESsmall), treatmentNames(TGGATESsmall)[seq_len(3)]]
#'
#'@param x tSet
#'@param i Cell lines to keep in tSet
#'@param j Drugs to keep in tSet
#'@param ... further arguments
#'@param drop A boolean flag of whether to drop single dimensions or not
#'@return Returns the subsetted tSet
#'@export
setMethod(`[`, "ToxicoSet", function(x, i, j, ..., drop = FALSE){
if(is.character(i) && is.character(j)) {
return(subsetTo(x, cells=i, drugs=j, molecular.data.cells=i))
}
else if(is.numeric(i) && is.numeric(j) &&
(as.integer(i)==i) && (as.integer(j)==j)) {
return(subsetTo(x, cells=sampleNames(x)[i], drugs=treatmentNames(x)[j],
molecular.data.cells=sampleNames(x)[i]))
}
})
#### subsetTo ####
## FIXED? TODO:: Subset function breaks if it doesnt find cell line in sensitivity info
#' A function to subset a ToxicoSet to data containing only specified drugs, cells and genes
#'
#' This is the prefered method of subsetting a ToxicoSet. This function allows
#' abstraction of the data to the level of biologically relevant objects: drugs
#' and cells. The function will automatically go through all of the
#' combined data in the ToxicoSet and ensure only the requested radiations
#' and cell lines are found in any of the slots. This allows quickly picking out
#' all the experiments for a radiation or cell of interest, as well removes the need
#' to keep track of all the metadata conventions between different datasets.
#'
#' @examples
#' TGGATESDrugNames <- treatmentNames(TGGATESsmall)
#' TGGATESCells <- sampleNames(TGGATESsmall)
#' tSet <- subsetTo(TGGATESsmall,drugs = TGGATESDrugNames[1],
#' cells = TGGATESCells[1], duration = "2")
#'
#' @param object A \code{ToxicoSet} to be subsetted
#' @param cell_lines A list or vector of cell names as used in the dataset to which
#' the object will be subsetted. If left blank, then all cells will be left in
#' the dataset.
#' @param drugs A list or vector of drug names as used in the dataset to which
#' the object will be subsetted. If left blank, then all drugs will be left in
#' the dataset.
#' @param features A list or vector of feature names as used in the dataset from
#' which the object will be subsetted. If left blank that all features will
#' be left in.
#' @param molecular.data.cells A list or vector of cell names to keep in the
#' molecular data
#' @param duration A \code{list} or \code{vector} of the experimental durations
#' to include in the subset as strings. Defaults to all durations if parameter
#' is not specified.
#' @param ... Other arguments passed to other functions within the package
#'
#' @return A ToxicoSet with only the selected drugs and cells
#'
#' @importFrom CoreGx .unionList .message .warning .error
#' @export
## TODO:: Include dose parmater to subset on
subsetTo <- function(object, cell_lines = NULL,
drugs=NULL,
molecular.data.cells=NULL,
duration=NULL,
features=NULL,
...
) {
## TODO:: Remove this or add it to the function parameters?
drop = FALSE
####
# PARSING ARGUMENTS
####
adArgs = list(...)
if ("exps" %in% names(adArgs)) {
exps <- adArgs[["exps"]]
if(is(exps, "data.frame")) {
exps2 <- exps[[name(object)]]
names(exps2) <- rownames(exps)
exps <- exps2
} else{
exps <- exps[[name(object)]]
}
}else {
exps <- NULL
}
if ("dose" %in% names(adArgs)) {
## TODO:: Add subsetting on dose
stop("Due to the structure of tSets, subsetting on dose can only be done on
specific slots - not on the entire tSet")
}
## MISSING VALUE HANDLING FOR PARAMETERS
# Get named list of default values for missing parameters
argDefaultList <-
paramMissingHandler(funName = "subsetTo", tSet = object,
drugs = drugs, cell_lines = cell_lines,
features = features, duration = duration)
# Assign any missing parameter default values to function environment
if (length(argDefaultList) > 0) {
for (idx in seq_along(argDefaultList)) {
assign(names(argDefaultList)[idx], argDefaultList[[idx]])
}
}
# ERROR HANDLING FOR PARAMETERS
paramErrorChecker(funName = "subsetTo", tSet = object,
cell_lines = cell_lines,
drugs = drugs, features = features,
duration = duration)
##TODO:: Add a value to tSet which indicates the experimental design!
##FIXME:: Don't hard code object names!
if (name(object) %in% c("drugMatrix_rat", "EMEXP2458")) {
if (!('DMSO' %in% drugs)) {
drugs <- c(drugs, 'DMSO')
}
}
######
# SUBSETTING MOLECULAR PROFILES SLOT
######
### TODO:: implement strict subsetting at this level!!!!
### the function missing does not work as expected in the context below, because the arguments are passed to the anonymous
### function in lapply, so it does not recognize them as missing
molecularProfilesSlot(object) <-
lapply(molecularProfilesSlot(object),
function(SE, cell_lines, drugs, molecular.data.cells, duration, features){
if (!is.null(features)) {
SE <- SE[which(rownames(SummarizedExperiment::rowData(SE)) %in% features), ]
}
##FIXME:: Why is are all these if conditions being checked against length? Just use grepl?
molecular.data.type <-
ifelse(
length(grep("rna", S4Vectors::metadata(SE)$annotation) > 0),
"rna",
S4Vectors::metadata(SE)$annotation
)
if (length(grep(molecular.data.type, names(molecular.data.cells))) > 0) {
cell_lines <- molecular.data.cells[[molecular.data.type]]
}
column_indices <- NULL
if (length(cell_lines) == 0 && length(drugs) == 0) {
column_indices <- seq_len(ncol(SE))
}
if (length(cell_lines) == 0 && datasetType(object) == "sensitivity") {
column_indices <- seq_len(ncol(SE))
}
# Selecting indices which match the cells argument
cell_line_index <- NULL
if (length(cell_lines) != 0) {
if (!all(cell_lines %in% sampleNames(object))) {
stop("Some of the cell names passed to function did not match to names
in the ToxicoSet. Please ensure you are using cell names as
returned by the cellNames function")
}
cell_line_index <- which(SummarizedExperiment::colData(SE)[["sampleid"]] %in% cell_lines)
}
# Selecting indexes which match drugs arguement
drugs_index <- NULL
if (datasetType(object) == "perturbation" || datasetType(object) == "both") {
if (length(drugs) != 0) {
if (!all(drugs %in% treatmentNames(object))){
stop("Some of the drug names passed to function did not match to names in the ToxicoSet Please ensure you are using drug names as returned by the drugNames function")
}
drugs_index <- which(SummarizedExperiment::colData(SE)[["treatmentid"]] %in% drugs)
}
}
if (length(drugs_index) != 0 && length(cell_line_index) != 0) {
if (length(intersect(drugs_index, cell_line_index)) == 0) {
stop("This Drug - Cell Line combination was not tested together.")
}
column_indices <- intersect(drugs_index, cell_line_index)
} else {
if (length(drugs_index) != 0) {
column_indices <- drugs_index
}
if (length(cell_line_index) != 0) {
column_indices <- cell_line_index
}
}
# LOGIC TO SUBSET BASED ON DURATION
## TODO:: Determine if this works for other SE data types
if (!is.null(duration)){
if (all(!(duration %in% unique(SummarizedExperiment::colData(SE[, column_indices])$duration)))) {
# Error when other parameters are passed in
if ( !is.null(cell_lines) | !is.null(drugs) | !is.null(molecular.data.cells)) {
stop(paste0(
"There are no molecular profiles with duration of ",
duration, " in the tSet with the selected parameters."
))
} else { # Error when no other parameters are passed in
stop(paste0(
"There are no molecular profiles with duration of ",
duration, " in the tSet."
))
}
}
duration_indices <- which(SummarizedExperiment::colData(SE)$duration %in% duration)
column_indices <- intersect(column_indices, duration_indices)
}
row_indices <- seq_len(nrow(SummarizedExperiment::assay(SE, 1)))
# Final SE
SE <- SE[row_indices, column_indices]
return(SE)
}, cell_lines = cell_lines,
drugs = drugs,
molecular.data.cells = molecular.data.cells,
duration = duration,
features = features)
######
# SUBSET SENSITIVITY SLOT
######
# Logic if any "..." arguments are passed to subsetTo
if ((datasetType(object) == "sensitivity" | datasetType(object) == "both") & length(exps) != 0) {
sensitivityInfo(object) <- sensitivityInfo(object)[exps, , drop=drop]
rownames(sensitivityInfo(object)) <- names(exps)
if (length(sensitivityRaw(object)) > 0) {
sensitivityRaw(object) <- sensitivityRaw(object)[exps, , , drop=drop]
dimnames(sensitivityRaw(object))[[1]] <- names(exps)
}
sensitivityProfiles(object) <- sensitivityProfiles(object)[exps, , drop=drop]
rownames(sensitivityProfiles(object)) <- names(exps)
sensNumber(object) <- .summarizeSensitivityNumbers(object)
}
# Logic if drug or cell parameters are passed to subsetTo
else if (
(datasetType(object) == "sensitivity" | datasetType(object) == "both") &
(length(drugs) != 0 | length(cell_lines) != 0 | !is.null(duration) )
) {
drugs_index <- which(sensitivityInfo(object)[, "treatmentid"] %in% drugs)
cell_line_index <- which(sensitivityInfo(object)[,"sampleid"] %in% cell_lines)
if (length(drugs_index) !=0 & length(cell_line_index) !=0 ) {
if (length(intersect(drugs_index, cell_line_index)) == 0) {
stop("This Drug - Cell Line combination was not tested together.")
}
row_indices <- intersect(drugs_index, cell_line_index)
} else {
if(length(drugs_index)!=0 & length(cell_lines)==0) {
row_indices <- drugs_index
} else {
if(length(cell_line_index)!=0 & length(drugs)==0){
row_indices <- cell_line_index
} else {
# Includes all rows if cell or drug arguments are absent
row_indices <- seq_len(nrow(sensitivityInfo(object)))
}
}
}
# LOGIC TO SUBSET BASED ON DURATION
if(!is.null(duration)){
if(all(!(duration %in% unique(sensitivityInfo(object)[row_indices,]$duration_h)))) {
# Error when other parameters are passed in
if(!is.null(cell_lines) | !is.null(drugs) | !is.null(molecular.data.cells)) {
stop(paste0(
## TODO:: Is sample the correct way to refer to one treatment/duration combination in TGx experiments?
"There are no samples with duration of ",
duration, " in the tSet with the selected parameters."
))
} else { # Error when no other parameters are passed in
stop(paste0(
"There are no samples with duration of ",
duration, " in the tSet"
))
}
}
duration_indices <- which(sensitivityInfo(object)$duration_h %in% duration)
row_indices <- intersect(row_indices, duration_indices)
}
sensItemNames <- names(treatmentResponse(object))
sensitivityVals <-
lapply(sensItemNames, function(sensItemName, drop){
if (sensItemName == "n") {
sensItem <- treatmentResponse(object)[[sensItemName]]
if (!is.null(cell_lines)) {
sensItem[which(rownames(sensItem) %in% cell_lines),
which(colnames(sensItem) %in% drugs), drop = drop]
} else {
sensItem[ , which(colnames(sensItem) %in% drugs), drop = drop]
}
} else {
sensItem <- treatmentResponse(object)[[sensItemName]]
if (length(dim(sensItem)) == 3) {
sensItem[row_indices, , , drop = drop]
} else {
sensItem[row_indices, , drop = drop]
}
}
}, drop = drop)
names(sensitivityVals) <- sensItemNames
treatmentResponse(object) <- sensitivityVals
}
#####
# SUBSET DRUG SLOT
#####
if (length(drugs) == 0) {
if (datasetType(object) == "sensitivity" | datasetType(object) == "both"){
drugs <- unique(sensitivityInfo(object)[["treatmentid"]])
}
if(datasetType(object) == "perturbation" | datasetType(object) == "both"){
drugs <- union(drugs, na.omit(.unionList(lapply(molecularProfilesSlot(object), function(SE){unique(SummarizedExperiment::colData(SE)[["treatmentid"]])}))))
}
}
#####
# SUBSET CELLS SLOT
#####
if (length(cell_lines) == 0) {
cell_lines <- union(cell_lines, na.omit(.unionList(lapply(molecularProfilesSlot(object), function(SE){unique(SummarizedExperiment::colData(SE)[["sampleid"]])}))))
if (datasetType(object) == "sensitivity" | datasetType(object) == "both"){
cell_lines <- union(cell_lines, sensitivityInfo(object)[["sampleid"]])
}
}
#####
# ASSIGN SUBSETS BACK TO TOXICOSET OBJECT
#####
treatmentInfo(object) <- treatmentInfo(object)[drugs , , drop=drop]
sampleInfo(object) <- sampleInfo(object)[cell_lines , , drop=drop]
curation(object)$treatment <- curation(object)$treatment[drugs , , drop=drop]
curation(object)$sample <- curation(object)$sample[cell_lines , , drop=drop]
curation(object)$tissue <- curation(object)$tissue[cell_lines , , drop=drop]
return(object)
}
#
# END SUBSET TO FUNCTION
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.