#' re-export magrittr pipe operator
#'
#' @name %>%
#' @rdname pipe
#'
#' @importFrom magrittr %>%
#'
#' @export
NULL
# #' Intersect GRanges objects
# #'
# #' \code{intersectGR} intersect two GRanges objects and return ranges from
# #' \code{a} that were overlapped by ranges in \code{b}.
# #'
# #' @param a GRanges object.
# #' @param b GRanges object.
# #' @param ... other arguments internally passed to
# #' \code{\link[GenomicRanges]{findOverlaps}}.
# #'
# #' @return GRanges object which is a subset of \code{a}.
# #'
# intersectGR <- function(a, b, ...) {
# warning("deprecated! use IRanges::subsetByOverlaps instead") # TODO drop from the package
# stopifnot(is(a, "GRanges"))
# stopifnot(is(b, "GRanges"))
#
# IRanges::subsetByOverlaps(x = a, ranges = b, ...)
# }
#' Calculate regions coverage
#'
#' \code{getCoverage} calculates coverage of regions (rows in interaction
#' matrix) by features (columns). It is possible to specify features grouping
#' variable \code{gr} then coverage tells how many distinct groups the region
#' overlap with.
#'
#' @param mat dgCMatrix interaction matrix such as produced by
#' \code{\link{getInteractionMatrix}}.
#' @param gr factor specifying features groups. Must have length equal
#' to number of columns in \code{mat}.
#'
#' @return Numeric vector.
#'
#' @examples
#' data("remap_mini")
#' y <- colnames(remap_mini)
#'
#' # simple coverage
#' gr <- seq_along(y) %>% as.factor()
#' getCoverage(remap_mini, gr)
#'
#' # per cell type coverage
#' gr <- sub(".*\\.", "", y) %>% as.factor()
#' getCoverage(remap_mini, gr)
#'
#' @importFrom DelayedArray colsum
#'
#' @export
getCoverage <- function(mat, gr) {
stopifnot("mat must be an instance of class 'dgCMatrix'" = is(mat, "dgCMatrix"))
stopifnot("gr must be a factor" = is.factor(gr))
stopifnot("gr length must equal to number of columns in mat" = length(gr) == ncol(mat))
DelayedArray::colsum(x = mat, group = gr) %>%
`>=`(1) %>%
rowSums()
}
#' Simplify Interaction Matrix
#'
#' @param mat dgCMatrix interaction matrix such as produced by
#' \code{\link{getInteractionMatrix}}.
#' @param alpha Number between 0 and 1 specifying voting threshold. Eg. for 3
#' column matrix alpha 0.5 will give voting criteria >= 2.
#' @param colname character giving new column name.
#'
#' @return dgCMatrix
#'
#' @importFrom Matrix rowSums sparseMatrix
#'
simplifyInteractionMatrix <- function(mat, alpha = 0.5, colname = NA) {
stopifnot(is(mat, "dgCMatrix"))
stopifnot(is.numeric(alpha) && (length(alpha) == 1))
stopifnot((alpha > 0) && (alpha <= 1))
crit <- ceiling(ncol(mat) * alpha)
i <- which(Matrix::rowSums(mat) >= crit)
Matrix::sparseMatrix(i = i,
j = rep(1L, length(i)),
x = 1,
dims = c(nrow(mat), 1L),
dimnames = list(rownames(mat), colname))
}
#' Apply function over groups of columns
#'
#' Returns a array obtained by applying a function to rows of submatrices of the
#' input matrix, where the submatrices are divided into specified groups of
#' columns.
#'
#' @param mat a matrix.
#' @param groups a vector giving columns grouping.
#' @param f function to be applied.
#' @param ... optional arguments to \code{f}.
#'
#' @return a matrix of dimensions \code{nrow(mat) x nlevels(groups)}.
#'
applyOverColumnGroups <- function(mat, groups, f, ...) {
stopifnot(is.matrix(mat))
stopifnot(is.vector(groups) || is.factor(groups))
stopifnot(length(groups) == ncol(mat))
stopifnot(is.function(f))
if (is.vector(groups)) {
groups <- factor(groups)
}
group_f <- lapply(X = levels(groups),
FUN = function(g) {
apply(X = mat,
MARGIN = 1,
FUN = function(row) f(row[groups == g], ...))
})
new_mat <- do.call(cbind, group_f)
colnames(new_mat) <- levels(groups)
return(new_mat)
}
#' Estimate linear models goodness of fit statistic
#'
#' Estimate goodness of fit statistic of penalized linear regression models.
#' Works with different goodness of fit statistic functions.
#'
#' @inheritParams glmnet::glmnet
#' @param u offset vector as in \code{\link{glmnet}}. \code{"U"} experiment in
#' mae.
#' @param s user supplied lambda.
#' @param method currently only cross-validation is implemented.
#' @param nfold number of fold to use in cross-validation.
#' @param statistic function computing goodness of fit statistic. Should accept
#' \code{y}, \code{x}, \code{offset} arguments and return
#' a numeric vector of the same length. See \code{rsq}, \code{mse} for examples.
#' @param ... Other arguments that can be passed to glmnet.
#'
#' @return numeric vector of \code{statistic} estimates.
#'
#' @importFrom foreach foreach
#'
estimateStat <- function(x, y, u, s, method = "cv", nfold = 10, statistic = rsq, alpha = 0, ...) {
if (method == "cv") {
out <- c()
part <- sample(seq_len(nfold), size = length(y), replace = TRUE)
out <- foreach::foreach(p_ = seq_len(nfold), .combine = c) %do%
{
py <- y[part != p_]
px <- x[part != p_, ]
poffset <- u[part != p_, ]
mod <- glmnet::glmnet(x = px, y = py, offset = poffset, lambda = s, alpha = alpha, ...)
# evaluate on held-out fold
py <- y[part == p_]
px <- x[part == p_, ]
poffset <- u[part == p_, ]
yhat <- stats::predict(mod, newx = px, newoffset = poffset, s = s)
stat <- statistic(py, yhat, px, poffset)
out <- c(out, stat)
}
}
return(out)
}
# declare estimateStat foreach variables
utils::globalVariables("p_")
#' Calculate $R^2$
#'
#' @param y numeric vector of observed expression values.
#' @param yhat numeric vector of predicted expression values.
#' @param offset numeric vector giving basal expression level.
#'
#' @return numeric vector
#'
rsq <- function(y, yhat, px, offset) {
y <- y - offset
yhat <- yhat - offset
1 - (sum((y - yhat)^2) / (var(y) * (length(y) - 1)))
}
#' Calculate Mean Squared Error
#'
#' @param y numeric vector of observed expression values.
#' @param yhat numeric vector of predicted expression values.
#' @param ... not used.
#'
#' @return numeric vector
#'
mse <- function(y, yhat, ...) mean((y - yhat)^2)
#' Calculate Mean Absolute Error
#'
#' @param y numeric vector of observed expression values.
#' @param yhat numeric vector of predicted expression values.
#' @param ... not used.
#'
#' @return numeric vector
#'
mae <- function(y, yhat, ...) mean(abs(y - yhat))
#' Transform design matrix to factor
#'
#' @param design design matrix
#'
#' @return factor
#'
#' @examples
#' \dontrun{
#' design <- matrix(data = c(1, 1, 0, 0, 0, 0, 1, 1),
#' nrow = 4,
#' ncol = 2,
#' dimnames = list(c(paste("sample", 1:4)), c("gr1", "gr2")))
#' design2factor(design)
#' }
#'
design2factor <- function(design) {
# based on edgeR::designAsFactor, but jokes aside
groups <- factor(rowMeans(design * col(design) * ncol(design)))
samples_to_keep <- groups != 0
groups <- groups[samples_to_keep] # omit empty groups
groups <- droplevels(groups)
levels(groups) <- colnames(design)[colSums(design) > 0]
return(groups)
}
#' Check if argument is a binary flag
#'
#' @param x object to test
#'
#' @return binary flag
#'
isTRUEorFALSE <- function(x) {
(length(x) == 1) && is.logical(x) && (! is.na(x))
}
#' Apply function over selected column in list of data frames
#'
#' \code{applyOverDFList} operates on a list of data frames where all data frames
#' has the same size and columns. Column of interest is extracted from each data
#' frame and column binded in \code{groups}, next \code{fun} is applied over
#' rows. Final result is a matrix with result for each group on a separate column.
#' Function is parallelized over groups.
#'
#' @param list_of_df list of \code{data.frame}s.
#' @param col_name string specifying column in \code{data.frame}s to apply
#' \code{fun} on.
#' @param fun function to apply, should take a single vector as a argument.
#' @param groups factor defining how elements of \code{list_of_df} should be
#' grouped.
#'
#' @return matrix with \code{nrow(list_of_df[[1]])} rows and
#' \code{nlevels(groups)} columns.
#'
applyOverDFList <- function(list_of_df, col_name, fun, groups) {
stopifnot("all list_of_df names must be included in groups" = setequal(names(list_of_df), names(groups)))
stopifnot("groups must not have unused levels" = setdiff(levels(groups), groups) == character(0))
col_fun_mat <- foreach::foreach(gr_ = levels(groups), .combine = cbind) %dopar% # parallel version is bit faster
{
i <- groups == gr_
attr <- lapply(list_of_df[i], function(df) df[[col_name]])
attr <- do.call(cbind, attr)
matrix(
data = apply(X = attr, MARGIN = 1, FUN = fun),
ncol = 1L,
dimnames = list(rownames(attr), gr_)
)
}
return(col_fun_mat)
}
# declare applyOverDFList foreach variables
utils::globalVariables("gr_")
#' Subset keeping missing
#'
#' Subset matrix keeping unmatched rows as NA.
#'
#' @param mat matrix
#' @param rows character
#'
#' @return a matrix
#'
subsetWithMissing <- function(mat, rows) {
i <- match(x = rows, table = rownames(mat), nomatch = 0)
i[i == 0] <- NA
smat <- mat[i, ]
rownames(smat) <- rows
smat
}
#' Helper summarizing MAE object
#'
#' @param mae MultiAssayExperiment object.
#'
#' @return named list giving number of rows and columns, overall mean and
#' standard deviation in \code{mae}'s experiments.
#'
#' @importFrom MultiAssayExperiment experiments
#' @importFrom Matrix mean
#' @importFrom stats sd
#'
#'
maeSummary <- function(mae) {
lapply(MultiAssayExperiment::experiments(mae), function(x) c(dim(x), Matrix::mean(x), sd(x)))
}
#' Translate counts matrix rownames
#'
#' \code{translateCounts} renames counts matrix rownames according to supplied
#' \code{dict}ionary. Function can handle many to one assignments by taking a
#' sum or an average over \code{counts} rows. Other types of ambiguous
#' assignments are not supported.
#'
#' @param counts matrix of expression values.
#' @param dict named character vector mapping \code{counts} rownames to new
#' values. Values of vector should correspond to new desired rownames, and its
#' names to current rownames.
#'
#' @return matrix of expression values with new rownames.
#'
#' @examples
#' counts <- matrix(
#' data = c(5, 4, 3, 2),
#' nrow = 2,
#' dimnames = list(
#' c("ENSG00000130700", "ENSG00000089225"),
#' c("treatment", "control")
#' )
#' )
#' dict <- c(ENSG00000130700 = "GATA5", ENSG00000089225 = "TBX5")
#' translateCounts(counts, dict)
#'
#' @export
translateCounts <- function(counts, dict) {
stopifnot("counts must be an instance of class 'matrix'" = is(counts, "matrix"))
stopifnot("counts must have its rownames defined" = ! is.null(rownames(counts)))
stopifnot("counts rownames must be unique" = ! anyDuplicated(rownames(counts)))
stopifnot("dict must be an instance of class 'character'" = is(dict, "character"))
stopifnot("dict must be a named character vector" = ! is.null(names(dict)))
stopifnot("dict names must be unique" = ! anyDuplicated(names(dict)))
stopifnot("counts rownames must correspond to dict names" = any(rownames(counts) %in% names(dict)))
new_counts <- counts[rownames(counts) %in% names(dict), ]
new_counts <- rowsum(x = new_counts, group = dict[rownames(new_counts)]) # TODO a simple and useful extension would be to implement option to take an average instead of sum, this could be easly done by dividing by the groups size
return(new_counts)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.