#' Construct data of percent-spliced-in (PSI) matrices and "diagonal" for
#' heatmaps and scatter plots
#'
#' `make_matrix()` constructs a matrix of PSI values of the given alternative
#' splicing events (ASEs).\cr\cr
#' `make_diagonal()` constructs a table of "average" PSI values, with samples
#' grouped by two given conditions (e.g. "group A" and "group B") of a given
#' condition category (e.g. condition "treatment"). See details below.\cr\cr
#' @details
#' Note that this function takes the geometric mean of PSI, by first converting
#' all values to logit(PSI), taking the average logit(PSI) values of each
#' condition, and then converting back to PSI using inverse logit.
#'
#' Samples with low splicing coverage (either due to insufficient sequencing
#' depth or low gene expression) are excluded from calculation of mean PSIs.
#' The threshold can be set using `depth_threshold`. Excluding these samples is
#' appropriate because the uncertainty of PSI is high when the total included /
#' excluded count is low. Note that events where all samples in a condition is
#' excluded will return a value of `NaN`.
#'
#' Using logit-transformed PSI values is appropriate because PSI values are
#' bound to the (0,1) interval, and are often thought to be beta-distributed.
#' The link function often used with beta-distributed models is the logit
#' function, which is defined as `logit(x) = function(x) log(x / (1 - x))`,
#' and is equivalent to [stats::qlogis]. Its inverse is equivalent to
#' [stats::plogis].
#'
#' Users wishing to calculate arithmetic means of PSI are advised to use
#' [make_matrix], followed by [rowMeans] on subsetted sample columns.
#' @param se (Required) A \linkS4class{NxtSE} object generated by [MakeSE]
#' @param event_list A character vector containing the row names of ASE events
#' (as given by the `EventName` column of differential ASE results table
#' using `limma_ASE()` or `DESeq_ASE()`)
#' @param sample_list (default = `colnames(se)`) In `make_matrix()`, a list of
#' sample names in the given experiment to be included in the returned matrix
#' @param method In `make_matrix()`, rhe values to be returned
#' (default = "PSI"). It can
#' alternately be "logit" which returns logit-transformed PSI values, or
#' "Z-score" which returns Z-score-transformed PSI values
#' @param depth_threshold (default = 10) Samples with the number of reads
#' supporting either included or excluded isoforms below this values are
#' excluded
#' @param logit_max (default = 5) PSI values close to 0 or 1 are rounded up/down
#' to `plogis(-logit_max)` and `plogis(logit_max)`, respectively. See details.
#' @param na.percent.max (default = 0.1) The maximum proportion of values in
#' the given dataset that were transformed to `NA` because of low splicing
#' depth. ASE events where there are a higher proportion (default 10%) `NA`
#' values will be excluded from the final matrix. Most heatmap
#' functions will spring an error if there are too many NA values in any
#' given row. This option caps the number of NA values to avoid returning
#' this error.
#' @param condition The name of the column containing the condition values in
#' `colData(se)`
#' @param nom_DE The condition to be contrasted, e.g. `nom_DE = "treatment"`
#' @param denom_DE The condition to be contrasted against, e.g.
#' `denom_DE = "control"`
#' @return
#' For `make_matrix`: A matrix of PSI (or alternate) values, with
#' columns as samples and rows as ASE events.
#'
#' For `make_diagonal`: A 3 column data frame, with the first column containing
#' `event_list` list of ASE events, and the last 2 columns containing the
#' average PSI values of the nominator and denominator conditions.
#' @examples
#' se <- NxtIRF_example_NxtSE()
#'
#' colData(se)$treatment <- rep(c("A", "B"), each = 3)
#'
#' event_list <- rowData(se)$EventName
#'
#' mat <- make_matrix(se, event_list[1:10])
#'
#' diag_values <- make_diagonal(se, event_list,
#' condition = "treatment", nom_DE = "A", denom_DE = "B"
#' )
#' @name make_plot_data
#' @aliases make_matrix make_diagonal
#' @md
NULL
#' @describeIn make_plot_data constructs a matrix of PSI values of the given
#' alternative splicing events (ASEs)
#' @export
make_matrix <- function(
se,
event_list,
sample_list = colnames(se),
method = c("PSI", "logit", "Z-score"),
depth_threshold = 10, logit_max = 5, na.percent.max = 0.1
) {
if (!any(event_list %in% rownames(se))) .log(
"None of events in event_list matches those in the NxtSE object")
method <- match.arg(method)
inc <- as.matrix(assay(se, "Included")[
event_list, sample_list, drop = FALSE])
exc <- as.matrix(assay(se, "Excluded")[
event_list, sample_list, drop = FALSE])
mat <- inc / (inc + exc)
mat[inc + exc < depth_threshold] <- NA
mat <- mat[rowSums(is.na(mat)) < na.percent.max * ncol(mat), , drop = FALSE]
if (method == "PSI") {
# essentially M/Cov
return(mat)
} else if (method == "logit") {
mat <- qlogis(mat)
mat[mat > logit_max] <- logit_max
mat[mat < -logit_max] <- -logit_max
return(mat)
} else if (method == "Z-score") {
mat <- mat - rowMeans(mat)
mat <- mat / rowSds(mat)
return(mat)
}
}
#' @describeIn make_plot_data constructs a table of "average" PSI values
#' @export
make_diagonal <- function(
se, event_list = rownames(se),
condition, nom_DE, denom_DE,
depth_threshold = 10, logit_max = 5
) {
if (!any(event_list %in% rownames(se))) .log(
"None of events in event_list matches those in the NxtSE object")
inc <- assay(se, "Included")[event_list, ]
exc <- assay(se, "Excluded")[event_list, ]
mat <- inc / (inc + exc)
mat[inc + exc < depth_threshold] <- NA
# use logit method to calculate geometric mean
mat.nom <- qlogis(mat[, colData(se)[, condition] == nom_DE])
mat.denom <- qlogis(mat[, colData(se)[, condition] == denom_DE])
mat.nom[mat.nom > logit_max] <- logit_max
mat.denom[mat.denom > logit_max] <- logit_max
mat.nom[mat.nom < -logit_max] <- -logit_max
mat.denom[mat.denom < -logit_max] <- -logit_max
df <- data.frame(EventName = event_list,
nom = plogis(rowMeans(mat.nom, na.rm = TRUE)),
denom = plogis(rowMeans(mat.denom, na.rm = TRUE)))
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.