# mistyR view composition functions
# Copyleft (ɔ) 2020 Jovan Tanevski <jovan.tanevski@uni-heidelberg.de>
#' \strong{Start here:} create a basic view composition with an intraview
#'
#' This function is the first one to be called when building a mistyR workflow,
#' starting from view composition. The initial view describes the intraview of
#' the sample.
#'
#' @param data A \code{data.frame} or a \code{tibble} containing expression
#' information for all markers of interest (in named columns) for each
#' spatial unit (in rows).
#' @param unique.id A \code{character} vector. Identifier of the current sample.
#' If not provided (\code{unique.id = NULL}) then an id is automatically
#' generated by calculating the md5 hash of \code{table}.
#'
#' @return An initial mistyR view composition containing an \emph{intraview} list
#' item named described with abbreviation "intra" and \emph{data} as provided
#' in \code{data} and a \emph{misty.uniqueid} list item containing the
#' provided or automatically calculated \code{unique.id}. A cache folder for
#' the sample will be automatically created in the working directory as a
#' subfolder of \file{.misty.temp/} with the same name as \code{unique.id}.
#'
#' @family view composition functions
#'
#' @examples
#' # Create an intrinsic view from the first sample in the dataset synthetic.
#'
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#'
#' create_initial_view(expr)
#' @export
create_initial_view <- function(data, unique.id = NULL) {
init.list <- list(intraview = list(abbrev = "intra", data = data))
misty.uniqueid <- ifelse(is.null(unique.id),
digest::digest(data, "md5"),
unique.id
)
view <- append(init.list, list(misty.uniqueid = misty.uniqueid))
return(view)
}
# make a misty.view class?
#' Create a custom view
#'
#' Create a custom view from a \code{data.frame} or a \code{tibble}.
#'
#' Creating a custom view does not add it to the current view composition.
#'
#' @param name Name of the view. A character vector.
#' @param data A \code{data.frame} or a \code{tibble} with named variables in
#' columns and rows for each spatial unit ordered as in the intraview.
#' @param abbrev Abbreviated name. A character vector.
#'
#' @return A new mistyR view. A list with a single \code{name}d item described by
#' the provided \code{abbrev}iation and \emph{data} containing the provided
#' \code{data}.
#'
#' @seealso \code{\link{add_views}()} for adding created views
#' to a view composition.
#'
#' @family view composition functions
#'
#' @examples
#' # Create a view from the mean expression of the 10 nearest neighbors of
#' # each cell.
#'
#' library(dplyr)
#' library(purrr)
#' library(distances)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # find the 10 nearest neighbors
#' neighbors <- nearest_neighbor_search(distances(as.matrix(pos)), k = 11)[-1, ]
#'
#' # calculate the mean expression of the nearest neighbors for all markers
#' # for each cell in expr
#' nnexpr <- seq_len(nrow(expr)) %>%
#' map_dfr(~ expr %>%
#' slice(neighbors[, .x]) %>%
#' colMeans())
#'
#' create_view("nearest", nnexpr, "nn")
#' @export
create_view <- function(name, data, abbrev = name) {
new.list <- list(list(abbrev = abbrev, data = data))
names(new.list)[1] <- name
return(new.list)
}
#' Add custom views to the current view composition
#'
#' Add one or more custom views to the current view composition.
#'
#' @param current.views the current view composition.
#' @param new.views a view or a list of views created with
#' \code{\link{create_view}()} or otherwise.
#'
#' @return A mistyR view composition containing an union of views from
#' \code{current.views} and \code{new.views}.
#'
#' @seealso \code{\link{create_initial_view}()} for
#' starting a view composition, with an intraview,
#' \code{\link{create_view}()} for creating a custom view.
#'
#' @family view composition functions
#'
#' @examples
#'
#' # create random views
#' view1 <- data.frame(marker1 = rnorm(100, 10, 2), marker2 = rnorm(100, 15, 3))
#' view2 <- data.frame(marker1 = rnorm(100, 10, 5), marker2 = rnorm(100, 15, 5))
#'
#' misty.views <- create_initial_view(view1)
#'
#' new.view <- create_view("dummyname", view2, "dname")
#' add_views(misty.views, new.view)
#'
#' misty.views %>% add_views(create_view("dummyname", view2, "dname"))
#' @export
add_views <- function(current.views, new.views) {
assertthat::assert_that(length(current.views) >= 1,
!is.null(current.views[["intraview"]]),
!is.null(current.views[["misty.uniqueid"]]),
msg = "Intraview is missing."
)
assertthat::assert_that(is.list(new.views),
msg = "The new views are not in a list or vector."
)
assertthat::assert_that(length(new.views %>%
unlist(recursive = FALSE)) %% 2 == 0,
msg = "The new view is malformed. Consider using create_view()."
)
assertthat::assert_that(!any(names(new.views) %in% names(current.views)),
msg = "The list of new views contains a duplicate view name."
)
view.abbrev <- current.views %>%
rlist::list.remove(c("misty.uniqueid")) %>%
purrr::map_chr(~ .x[["abbrev"]])
new.view.abbrev <- new.views %>% purrr::map_chr(~ .x[["abbrev"]])
assertthat::assert_that(!any(new.view.abbrev %in% view.abbrev),
msg = "The list of new views contains a duplicate abbreviation."
)
new.views %>% purrr::walk(function(new.view) {
# check for naming of each element, abbreviation and existance of a table
assertthat::assert_that(is.list(new.view),
!is.null(new.view[["abbrev"]]),
is.character(new.view[["abbrev"]]),
!is.null(new.view[["data"]]),
(is.data.frame(new.view[["data"]]) | tibble::is_tibble(new.view[["data"]])),
msg = "The new view is malformed. Consider using create_view()."
)
# check for row compatibility
assertthat::assert_that(nrow(current.views[["intraview"]][["data"]]) ==
nrow(new.view[["data"]]),
msg = "The new view should have the same number of rows as the intraview."
)
})
# update
return(append(current.views, new.views))
}
#' Get neighbors from Delauney triangulation
#'
#' Helper function for \code{\link{add_juxtaview}()}.
#'
#' @param ddobj deldir object.
#' @param id cell/spatial unit id.
#'
#' @return a vector of neighbors of \code{id}.
#' @noRd
get_neighbors <- function(ddobj, id) {
dplyr::union(
ddobj$delsgs$ind1[which(ddobj$delsgs$ind2 == id)],
ddobj$delsgs$ind2[which(ddobj$delsgs$ind1 == id)]
)
}
#' Generate and add a juxtaview to the current view composition
#'
#' The juxtaview captures the expression of all markers within the immediate
#' neighborhood of a spatial unit.
#'
#' The neighborhood of each spatial unit is estimated by constructing a graph by
#' 2D Delaunay triangulation following by removal of edges with length larger than
#' \code{neighbor.thr}. For each spatial unit the juxtaview contains the sum of
#' expressions across its estimated neighbors for each marker.
#'
#' @param current.views the current view composition.
#' @param positions a \code{data.frame}, \code{tibble} or a \code{matrix}
#' with named coordinates in columns and rows for each spatial unit ordered
#' as in the intraview.
#' @param neighbor.thr a threshold value used to indicate the largest distance
#' between two spatial units that can be considered as neighboring.
#' @param prefix a prefix to add to the column names.
#' @param cached a \code{logical} indicating whether to cache the calculated view
#' after the first calculation and to reuse a previously cached view if it
#' already exists for this sample.
#' @param verbose a \code{logical} controlling the verbosity of the output of the
#' function during execution.
#'
#' @return A mistyR view composition with added juxtaview.
#'
#' @seealso \code{\link{create_initial_view}()} for
#' starting a view composition with an intraview only.
#'
#' @family view composition functions
#'
#' @examples
#' # Create a view composition of an intraview and a juxtaview.
#'
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # compose
#' misty.views <- create_initial_view(expr) %>% add_juxtaview(pos, neighbor.thr = 1.5)
#'
#' # preview
#' str(misty.views[["juxtaview.1.5"]])
#' @export
add_juxtaview <- function(current.views, positions, neighbor.thr = 15,
prefix = "", cached = FALSE, verbose = TRUE) {
expr <- current.views[["intraview"]][["data"]]
cache.location <- R.utils::getAbsolutePath(paste0(
".misty.temp", .Platform$file.sep,
current.views[["misty.uniqueid"]]
))
juxta.cache.file <- paste0(
cache.location, .Platform$file.sep,
"juxta.view.", neighbor.thr, ".rds"
)
if (cached && !dir.exists(cache.location)) {
dir.create(cache.location, recursive = TRUE, showWarnings = TRUE)
}
if (cached & file.exists(juxta.cache.file)) {
juxta.view <- readr::read_rds(juxta.cache.file)
} else {
if (verbose) message("\nComputing triangulation")
delaunay <- deldir::deldir(as.data.frame(positions))
if (verbose) message("\nGenerating juxtaview")
juxta.view <- seq(nrow(expr)) %>% furrr::future_map_dfr(function(cid) {
alln <- get_neighbors(delaunay, cid)
# suboptimal placement of dists, but makes conflict if out of scope
# probably due to lazy evaluations
dists <- distances::distances(as.data.frame(positions))
actualn <- alln[which(dists[alln, cid] <= neighbor.thr)]
data.frame(t(colSums(expr[actualn, ])))
}, .progress = verbose)
if (cached) readr::write_rds(juxta.view, juxta.cache.file)
}
return(current.views %>% add_views(create_view(
paste0("juxtaview.", neighbor.thr),
juxta.view %>% dplyr::rename_with(~paste0(prefix, .x)),
paste0("juxta.", neighbor.thr)
)))
}
#' Sample a Nystrom approximated row
#'
#' Helper function for \code{\link{add_paraview}()}.
#'
#' Kumar et al. 2012. Sampling methods for the Nystrom method.
#' Journal of Machine Learning Research 13(1):981-1006
#' \url{https://www.jmlr.org/papers/volume13/kumar12a/kumar12a.pdf}
#'
#' @param K.approx a list containing elemnts \code{s} - indexes of sampled
#' columns, \code{C} - sample of columns \code{s} of the original matrix, and
#' \code{W.plus} - pseudo inverse of W (sample of rows \code{s} from \code{C}).
#' @param k row to be approximated.
#'
#' @return approximated values of row k.
#' @noRd
sample_nystrom_row <- function(K.approx, k) {
cw <- seq(ncol(K.approx$W.plus)) %>%
purrr::map_dbl(~ K.approx$C[k, ] %*% K.approx$W.plus[, .x])
cwct <- seq(nrow(K.approx$C)) %>%
purrr::map_dbl(~ cw %*% t(K.approx$C)[, .x])
cwct
}
#' Get weights for paraview
#'
#' Helper function for \code{\link{add_paraview}()}.
#'
#' @param family the family of distances.
#' @param distances the distances vecor/matrix object to calculate weights from.
#' @param parameter parameter of the family function.
#'
#' @return calculated weights with the same shape as \code{distances}.
#'
#' @noRd
get_weight <- function(family = c(
"gaussian", "exponential",
"linear", "constant"
),
distances, parameter, zoi) {
expr.family <- match.arg(family)
distances[distances < zoi] <- Inf
dim.orig <- dim(distances)
switch(expr.family,
"gaussian" = {
exp(-distances^2 / parameter^2)
},
"exponential" = {
exp(-distances / parameter)
},
"linear" = {
weights <- pmax(0, 1 - distances / parameter)
dim(weights) <- dim.orig
weights
},
"constant" = {
weights <- as.numeric(!is.infinite(distances))
dim(weights) <- dim.orig
weights
}
)
}
#' Generate and add a paraview to the current view composition
#'
#' The paraview captures the expression of all markers in the broader
#' tissue structure.
#'
#' The paraview is generated by weighted sum of the expression of all spatial
#' units for each marker. The weights for each spatial unit \var{i} are dependent
#' on the \code{family} which can be one of "gaussian", "exponential", "linear"
#' or "constant".
#'
#' If "gaussian" the weights are calculated based on the distance to the spatial
#' unit \var{j} and the parameter \code{l} using the radial basis function
#' \deqn{w_{ij} = e^{-\frac{d_{ij}^2}{l^2}}}{w(i,j) = exp(-d(i,j)^2/l^2)}
#'
#' The parameter \code{l} here denotes the "effective" radius of influence.
#'
#' If "exponential" the weights are calculated based on the distance to the spatial
#' unit \var{j} and the parameter \code{l} using the exponential function
#' \deqn{w_{ij} = e^{-\frac{d_{ij}}{l}}}{w(i,j) = exp(-d(i,j)/l)}
#'
#' The parameter \code{l} here denotes signaling length. For more information
#' consult Oyler-Yaniv et. al. Immunity 46(4) 2017.
#'
#' If "linear" the weights are calculated based on the distance to the spatial
#' unit \var{j} and the parameter \code{l} using the linear function
#' \deqn{w_{ij} = 1- d(i,j)/l}{w(i,j) = 1- d(i,j)/l}
#'
#' The parameter \code{l} here denotes the intersect of the linear function. For
#' distances larger than \code{l} the weight is equal to 0.
#'
#' If "constant" the weights are always 1. The parameter \code{l} here denotes
#' the number of nearest neighbors to take into account if \code{nn} is not
#' defined.
#'
#' Since the generation of the paraview requires the calculation of pairwise distances
#' of all spatial units it can take a significant amount of computation time. The
#' parameters \code{approx} and \code{nn} can be set to speed up the calculation
#' by approximation. The approximation can be achieved by using the Nyström
#' low-rank approximation method or by limiting the calculation of the paraview
#' to a number of nearest neighbors around each spatial unit.
#'
#' If the value of \code{approx} is between 0 and 1 it will be interpreted as
#' fraction of the number of spatial units. Discrete values above 1 will be
#' interpreted as the size of the approximation block. The number of nearest
#' neighbors \code{nn} around each spatial unit are determined using a fast
#' nearest neighbor search.
#'
#' If both \code{approx} and \code{nn} have non-null values, \code{nn}
#' has priority and an approximation based on fast nearest neighbor
#' search will be used to generate the paraview.
#'
#' @inheritParams add_juxtaview
#' @param l effective radius of influence of expression in the broader tissue
#' structure.
#' @param zoi spatial units with distance smaller than the zone of indifference
#' will not be taken into account when generating the paraview.
#' @param family the family f functions used to generate weights. (see Details)
#' @param approx rank of the Nyström approximation matrix. (see Details)
#' @param nn the number of spatial units to be used for approximating the
#' paraview using a fast nearest neighbor search. (see Details)
#'
#'
#' @return A mistyR view composition with added paraview with parameter \code{l}.
#'
#' @seealso \code{\link{create_initial_view}()} for
#' starting a view composition with an intraview only.
#'
#' @family view composition functions
#'
#' @examples
#' # Create a view composition of an intraview and a paraview with radius 10.
#'
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # compose
#' misty.views <- create_initial_view(expr) %>% add_paraview(pos, l = 10)
#'
#' # preview
#' str(misty.views[["paraview.10"]])
#' @export
add_paraview <- function(current.views, positions, l, zoi = 0,
family = c(
"gaussian", "exponential",
"linear", "constant"
),
approx = 1, nn = NULL, prefix = "",
cached = FALSE, verbose = TRUE) {
dists <- distances::distances(as.data.frame(positions))
expr <- current.views[["intraview"]][["data"]]
cache.location <- R.utils::getAbsolutePath(paste0(
".misty.temp", .Platform$file.sep,
current.views[["misty.uniqueid"]]
))
para.cache.file <- paste0(
cache.location, .Platform$file.sep,
"para.view.", l, ".rds"
)
if (cached && !dir.exists(cache.location)) {
dir.create(cache.location, recursive = TRUE, showWarnings = TRUE)
}
if (match.arg(family) == "constant" & is.numeric(l) & is.null(nn)) {
nn <- round(l)
}
if (cached & file.exists(para.cache.file)) {
para.view <- readr::read_rds(para.cache.file)
} else {
if (is.null(nn)) {
if (approx == 1) {
if (verbose) message("\nGenerating paraview")
para.view <- seq(nrow(expr)) %>%
furrr::future_map_dfr(~ data.frame(t(colSums(expr[-.x, ] *
get_weight(family, dists[, .x][-.x], l, zoi)))),
.options = furrr::furrr_options(packages = "distances"),
.progress = verbose
)
} else {
if (approx < 1) approx <- base::round(approx * ncol(dists))
if (verbose) {
message("\nApproximating RBF matrix using the Nystrom method")
}
assertthat::assert_that(requireNamespace("MASS", quietly = TRUE),
msg = "The package MASS is required to approximate the paraview using
the Nystrom method."
)
# single Nystrom approximation expert, given RBF with parameter l
s <- sort(sample.int(n = ncol(dists), size = approx))
C <- get_weight(family, dists[, s], l, zoi)
# pseudo inverse of W
W.plus <- MASS::ginv(C[s, ])
# return Nystrom list
K.approx <- list(s = s, C = C, W.plus = W.plus)
para.view <- seq(nrow(expr)) %>%
furrr::future_map_dfr(~ data.frame(t(colSums(expr[-.x, ] *
sample_nystrom_row(K.approx, .x)[-.x]))),
.progress = verbose
)
}
} else {
if (verbose) {
message(
"\nGenerating paraview using ", nn,
" nearest neighbors per unit"
)
}
para.view <- seq(nrow(expr)) %>%
furrr::future_map_dfr(function(rowid) {
knn <- distances::nearest_neighbor_search(dists, nn + 1,
query_indices = rowid
)[-1, 1]
data.frame(t(colSums(expr[knn, ] *
get_weight(family, dists[knn, rowid], l, zoi))))
},
.options = furrr::furrr_options(packages = "distances"),
.progress = verbose
)
}
if (cached) readr::write_rds(para.view, para.cache.file)
}
return(current.views %>% add_views(create_view(
paste0("paraview.", l),
para.view %>% dplyr::rename_with(~paste0(prefix, .x)),
paste0("para.", l)
)))
}
#' Remove views from the current view composition
#'
#' Remove one or more views from the view composition.
#'
#' The intraview and the unique id cannot be removed with this function.
#'
#' @param current.views the current view composition.
#' @param view.names the names of one or more views to be removed.
#'
#' @return A mistyR view composition with \code{view.names} views removed.
#'
#' @family view composition functions
#'
#' @examples
#' library(dplyr)
#'
#' # get the expression data
#' data("synthetic")
#' expr <- synthetic[[1]] %>% select(-c(row, col, type))
#' # get the coordinates for each cell
#' pos <- synthetic[[1]] %>% select(row, col)
#'
#' # compose
#' misty.views <- create_initial_view(expr) %>%
#' add_juxtaview(pos, neighbor.thr = 1.5) %>%
#' add_paraview(pos, l = 10)
#'
#' # preview
#' str(misty.views)
#'
#' # remove juxtaview and preview
#' misty.views %>%
#' remove_views("juxtaview.1.5") %>%
#' str()
#'
#' # remove juxtaview and paraview and preview
#' misty.views %>%
#' remove_views(c("juxtaview.1.5", "paraview.10")) %>%
#' str()
#' @export
remove_views <- function(current.views, view.names) {
to.match <- !(view.names %in% c("intraview", "misty.uniqueid"))
view.indexes <- match(view.names[to.match], names(current.views))
if (length(view.indexes) > 0) {
current.views %>% rlist::list.remove(view.indexes)
} else {
current.views
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.