#' @importFrom pbapply pblapply
#' @importFrom future.apply future_lapply
#' @importFrom future nbrOfWorkers
#' @importFrom RANN nn2
#' @import Seurat
iSMNN_FindSMNNs <- function(
object.list = NULL,
assay = NULL,
batch.cluster.labels = NULL,
matched.clusters = NULL,
reference = NULL,
anchor.features = 2000,
scale = TRUE,
normalization.method = c("LogNormalize", "SCT"),
sct.clip.range = NULL,
reduction = c("cca", "rpca"),
l2.norm = TRUE,
dims = 1:30,
k.anchor = 5,
k.filter = 200,
k.score = 30,
max.features = 200,
nn.method = "rann",
eps = 0,
verbose = TRUE
) {
normalization.method <- match.arg(arg = normalization.method)
reduction <- match.arg(arg = reduction)
if (reduction == "rpca") {
reduction <- "pca"
}
my.lapply <- ifelse(
test = verbose && nbrOfWorkers() == 1,
yes = pblapply,
no = future_lapply
)
object.ncells <- sapply(X = object.list, FUN = function(x) dim(x = x)[2])
if (any(object.ncells <= max(dims))) {
bad.obs <- which(x = object.ncells <= max(dims))
stop("Max dimension too large: objects ", paste(bad.obs, collapse = ", "),
" contain fewer than ", max(dims), " cells. \n Please specify a",
" maximum dimensions that is less than the number of cells in any ",
"object (", min(object.ncells), ").")
}
if (!is.null(x = assay)) {
if (length(x = assay) != length(x = object.list)) {
stop("If specifying the assay, please specify one assay per object in the object.list")
}
object.list <- sapply(
X = 1:length(x = object.list),
FUN = function(x) {
DefaultAssay(object = object.list[[x]]) <- assay[x]
return(object.list[[x]])
}
)
} else {
assay <- sapply(X = object.list, FUN = DefaultAssay)
}
object.list <- CheckDuplicateCellNames(object.list = object.list)
slot <- "data"
if (normalization.method == "SCT") {
slot <- "scale.data"
scale <- FALSE
if (is.numeric(x = anchor.features)) {
stop("Please specify the anchor.features to be used. The expected ",
"workflow for integratinge assays produced by SCTransform is ",
"SelectIntegrationFeatures -> PrepSCTIntegration -> ",
"FindIntegrationAnchors.")
}
sct.check <- sapply(
X = 1:length(x = object.list),
FUN = function(x) {
sct.cmd <- grep(
pattern = 'PrepSCTIntegration',
x = Command(object = object.list[[x]]),
value = TRUE
)
# check assay has gone through PrepSCTIntegration
if (!any(grepl(pattern = "PrepSCTIntegration", x = Command(object = object.list[[x]]))) ||
Command(object = object.list[[x]], command = sct.cmd, value = "assay") != assay[x]) {
stop("Object ", x, " assay - ", assay[x], " has not been processed ",
"by PrepSCTIntegration. Please run PrepSCTIntegration prior to ",
"FindIntegrationAnchors if using assays generated by SCTransform.", call. = FALSE)
}
# check that the correct features are being used
if (all(Command(object = object.list[[x]], command = sct.cmd, value = "anchor.features") != anchor.features)) {
stop("Object ", x, " assay - ", assay[x], " was processed using a ",
"different feature set than in PrepSCTIntegration. Please rerun ",
"PrepSCTIntegration with the same anchor.features for all objects in ",
"the object.list.", call. = FALSE)
}
}
)
}
if (is.numeric(x = anchor.features) && normalization.method != "SCT") {
if (verbose) {
message("Computing ", anchor.features, " integration features")
}
anchor.features <- SelectIntegrationFeatures(
object.list = object.list,
nfeatures = anchor.features,
assay = assay
)
}
if (scale) {
if (verbose) {
message("Scaling features for provided objects")
}
object.list <- my.lapply(
X = object.list,
FUN = function(object) {
ScaleData(object = object, features = anchor.features, verbose = FALSE)
}
)
}
nn.reduction <- reduction
# if using pca, only need to compute the internal neighborhood structure once
# for each dataset
internal.neighbors <- list()
if (nn.reduction == "pca") {
k.filter <- NA
if (verbose) {
message("Computing within dataset neighborhoods")
}
k.neighbor <- max(k.anchor, k.score)
internal.neighbors <- my.lapply(
X = 1:length(x = object.list),
FUN = function(x) {
NNHelper(
data = Embeddings(object = object.list[[x]][[nn.reduction]])[, dims],
k = k.neighbor + 1,
method = nn.method,
eps = eps
)
}
)
}
# determine pairwise combinations
combinations <- expand.grid(1:length(x = object.list), 1:length(x = object.list))
combinations <- combinations[combinations$Var1 < combinations$Var2, , drop = FALSE]
# determine the proper offsets for indexing anchors
objects.ncell <- sapply(X = object.list, FUN = ncol)
offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)]
if (is.null(x = reference)) {
# case for all pairwise, leave the combinations matrix the same
if (verbose) {
message("Finding all pairwise anchors")
}
} else {
reference <- unique(x = sort(x = reference))
if (max(reference) > length(x = object.list)) {
stop('Error: requested reference object ', max(reference), " but only ",
length(x = object.list), " objects provided")
}
# modify the combinations matrix to retain only R-R and R-Q comparisons
if (verbose) {
message("Finding anchors between all query and reference datasets")
ok.rows <- (combinations$Var1 %in% reference) | (combinations$Var2 %in% reference)
combinations <- combinations[ok.rows, ]
}
}
# determine all anchors
all.anchors <- my.lapply(
X = 1:nrow(x = combinations),
FUN = function(row) {
i <- combinations[row, 1]
j <- combinations[row, 2]
object.1 <- DietSeurat(
object = object.list[[i]],
assays = assay[i],
features = anchor.features,
counts = FALSE,
scale.data = TRUE,
dimreducs = reduction
)
object.2 <- DietSeurat(
object = object.list[[j]],
assays = assay[j],
features = anchor.features,
counts = FALSE,
scale.data = TRUE,
dimreducs = reduction
)
# suppress key duplication warning
suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[assay[i]]])
DefaultAssay(object = object.1) <- "ToIntegrate"
if (reduction %in% Reductions(object = object.1)) {
slot(object = object.1[[reduction]], name = "assay.used") <- "ToIntegrate"
}
object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)
suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[assay[j]]])
DefaultAssay(object = object.2) <- "ToIntegrate"
if (reduction %in% Reductions(object = object.2)) {
slot(object = object.2[[reduction]], name = "assay.used") <- "ToIntegrate"
}
object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)
object.pair <- switch(
EXPR = reduction,
'cca' = {
object.pair <- RunCCA(
object1 = object.1,
object2 = object.2,
assay1 = "ToIntegrate",
assay2 = "ToIntegrate",
features = anchor.features,
num.cc = max(dims),
renormalize = FALSE,
rescale = FALSE,
verbose = verbose
)
if (l2.norm){
object.pair <- L2Dim(object = object.pair, reduction = reduction)
reduction <- paste0(reduction, ".l2")
nn.reduction <- reduction
}
reduction.2 <- character()
object.pair
},
'pca' = {
common.features <- intersect(
x = rownames(x = Loadings(object = object.1[["pca"]])),
y = rownames(x = Loadings(object = object.2[["pca"]]))
)
object.pair <- merge(x = object.1, y = object.2, merge.data = TRUE)
projected.embeddings.1<- t(x = GetAssayData(object = object.1, slot = "scale.data")[common.features, ]) %*%
Loadings(object = object.2[["pca"]])[common.features, ]
object.pair[['projectedpca.1']] <- CreateDimReducObject(
embeddings = rbind(projected.embeddings.1, Embeddings(object = object.2[["pca"]])),
assay = DefaultAssay(object = object.1),
key = "projectedpca1_"
)
projected.embeddings.2 <- t(x = GetAssayData(object = object.2, slot = "scale.data")[common.features, ]) %*%
Loadings(object = object.1[["pca"]])[common.features, ]
object.pair[['projectedpca.2']] <- CreateDimReducObject(
embeddings = rbind(projected.embeddings.2, Embeddings(object = object.1[["pca"]])),
assay = DefaultAssay(object = object.2),
key = "projectedpca2_"
)
object.pair[["pca"]] <- CreateDimReducObject(
embeddings = rbind(
Embeddings(object = object.1[["pca"]]),
Embeddings(object = object.2[["pca"]])),
assay = DefaultAssay(object = object.1),
key = "pca_"
)
reduction <- "projectedpca.1"
reduction.2 <- "projectedpca.2"
if (l2.norm){
slot(object = object.pair[["projectedpca.1"]], name = "cell.embeddings") <- Sweep(
x = Embeddings(object = object.pair[["projectedpca.1"]]),
MARGIN = 2,
STATS = apply(X = Embeddings(object = object.pair[["projectedpca.1"]]), MARGIN = 2, FUN = sd),
FUN = "/"
)
slot(object = object.pair[["projectedpca.2"]], name = "cell.embeddings") <- Sweep(
x = Embeddings(object = object.pair[["projectedpca.2"]]),
MARGIN = 2,
STATS = apply(X = Embeddings(object = object.pair[["projectedpca.2"]]), MARGIN = 2, FUN = sd),
FUN = "/"
)
object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.1")
object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.2")
reduction <- paste0(reduction, ".l2")
reduction.2 <- paste0(reduction.2, ".l2")
}
object.pair
},
stop("Invalid reduction parameter. Please choose either cca or rpca")
)
internal.neighbors <- internal.neighbors[c(i, j)]
anchors = NULL
for(cluster_i in matched.clusters){
cells1.index = which(batch.cluster.labels[[i]] %in% cluster_i)
cells2.index = which(batch.cluster.labels[[j]] %in% cluster_i)
cells1 = names(batch.cluster.labels[[i]])[cells1.index]
cells2 = names(batch.cluster.labels[[j]])[cells2.index]
message("Finding neighborhoods for ", cluster_i, " ...")
anchors_i <- FindAnchors(
object.pair = object.pair,
assay = c("ToIntegrate", "ToIntegrate"),
slot = slot,
cells1 = names(batch.cluster.labels[[i]])[which(batch.cluster.labels[[i]] %in% cluster_i)],
cells2 = names(batch.cluster.labels[[j]])[which(batch.cluster.labels[[j]] %in% cluster_i)],
internal.neighbors = internal.neighbors,
reduction = reduction,
reduction.2 = reduction.2,
nn.reduction = nn.reduction,
dims = dims,
k.anchor = k.anchor,
k.filter = k.filter,
k.score = k.score,
max.features = max.features,
nn.method = nn.method,
eps = eps,
verbose = verbose
)
anchors_i[,1] = cells1.index[c(anchors_i[,1])]
anchors_i[,2] = cells2.index[c(anchors_i[,2])]
anchors = rbind(anchors, anchors_i)
}
anchors[, 1] <- anchors[, 1] + offsets[i]
anchors[, 2] <- anchors[, 2] + offsets[j]
return(anchors)
}
)
all.anchors <- do.call(what = 'rbind', args = all.anchors)
all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3)])
all.anchors <- AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell)
command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE)
anchor.set <- new(Class = "AnchorSet",
object.list = object.list,
reference.objects = reference %||% seq_along(object.list),
anchors = all.anchors,
offsets = offsets,
anchor.features = anchor.features,
command = command
)
return(anchor.set)
}
#' @importFrom pbapply pblapply
#' @importFrom future.apply future_lapply
#' @importFrom future nbrOfWorkers
#' @importFrom RANN nn2
#' @import Seurat
iSMNN_FindSMNNs_aftercorrection <- function(
object.list = NULL,
assay = NULL,
batch.cluster.labels = NULL,
matched.clusters = NULL,
reference = NULL,
anchor.features = 2000,
scale = TRUE,
normalization.method = c("LogNormalize", "SCT"),
sct.clip.range = NULL,
reduction = c("cca", "rpca"),
l2.norm = TRUE,
dims = 1:30,
k.anchor = 5,
k.filter = 200,
k.score = 30,
max.features = 200,
nn.method = "rann",
eps = 0,
verbose = TRUE
) {
#normalization.method <- match.arg(arg = normalization.method)
reduction <- match.arg(arg = reduction)
if (reduction == "rpca") {
reduction <- "pca"
}
my.lapply <- ifelse(
test = verbose && nbrOfWorkers() == 1,
yes = pblapply,
no = future_lapply
)
object.ncells <- sapply(X = object.list, FUN = function(x) dim(x = x)[2])
if (any(object.ncells <= max(dims))) {
bad.obs <- which(x = object.ncells <= max(dims))
stop("Max dimension too large: objects ", paste(bad.obs, collapse = ", "),
" contain fewer than ", max(dims), " cells. \n Please specify a",
" maximum dimensions that is less than the number of cells in any ",
"object (", min(object.ncells), ").")
}
if (!is.null(x = assay)) {
if (length(x = assay) != length(x = object.list)) {
stop("If specifying the assay, please specify one assay per object in the object.list")
}
object.list <- sapply(
X = 1:length(x = object.list),
FUN = function(x) {
DefaultAssay(object = object.list[[x]]) <- assay[x]
return(object.list[[x]])
}
)
} else {
assay <- sapply(X = object.list, FUN = DefaultAssay)
}
object.list <- CheckDuplicateCellNames(object.list = object.list)
slot <- "data"
anchor.features = rownames(object.list[[1]]@assays$integrated@data)
nn.reduction <- reduction
# if using pca, only need to compute the internal neighborhood structure once
# for each dataset
internal.neighbors <- list()
if (nn.reduction == "pca") {
k.filter <- NA
if (verbose) {
message("Computing within dataset neighborhoods")
}
k.neighbor <- max(k.anchor, k.score)
internal.neighbors <- my.lapply(
X = 1:length(x = object.list),
FUN = function(x) {
NNHelper(
data = Embeddings(object = object.list[[x]][[nn.reduction]])[, dims],
k = k.neighbor + 1,
method = nn.method,
eps = eps
)
}
)
}
# determine pairwise combinations
combinations <- expand.grid(1:length(x = object.list), 1:length(x = object.list))
combinations <- combinations[combinations$Var1 < combinations$Var2, , drop = FALSE]
# determine the proper offsets for indexing anchors
objects.ncell <- sapply(X = object.list, FUN = ncol)
offsets <- as.vector(x = cumsum(x = c(0, objects.ncell)))[1:length(x = object.list)]
if (is.null(x = reference)) {
# case for all pairwise, leave the combinations matrix the same
if (verbose) {
message("Finding all pairwise anchors")
}
} else {
reference <- unique(x = sort(x = reference))
if (max(reference) > length(x = object.list)) {
stop('Error: requested reference object ', max(reference), " but only ",
length(x = object.list), " objects provided")
}
# modify the combinations matrix to retain only R-R and R-Q comparisons
if (verbose) {
message("Finding anchors between all query and reference datasets")
ok.rows <- (combinations$Var1 %in% reference) | (combinations$Var2 %in% reference)
combinations <- combinations[ok.rows, ]
}
}
# determine all anchors
all.anchors <- my.lapply(
X = 1:nrow(x = combinations),
FUN = function(row) {
i <- combinations[row, 1]
j <- combinations[row, 2]
object.1 <- DietSeurat(
object = object.list[[i]],
assays = assay[i],
features = anchor.features,
counts = FALSE,
scale.data = TRUE,
dimreducs = reduction
)
object.2 <- DietSeurat(
object = object.list[[j]],
assays = assay[j],
features = anchor.features,
counts = FALSE,
scale.data = TRUE,
dimreducs = reduction
)
# suppress key duplication warning
suppressWarnings(object.1[["ToIntegrate"]] <- object.1[[assay[i]]])
DefaultAssay(object = object.1) <- "ToIntegrate"
if (reduction %in% Reductions(object = object.1)) {
slot(object = object.1[[reduction]], name = "assay.used") <- "ToIntegrate"
}
object.1 <- DietSeurat(object = object.1, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)
suppressWarnings(object.2[["ToIntegrate"]] <- object.2[[assay[j]]])
DefaultAssay(object = object.2) <- "ToIntegrate"
if (reduction %in% Reductions(object = object.2)) {
slot(object = object.2[[reduction]], name = "assay.used") <- "ToIntegrate"
}
object.2 <- DietSeurat(object = object.2, assays = "ToIntegrate", scale.data = TRUE, dimreducs = reduction)
object.pair <- switch(
EXPR = reduction,
'cca' = {
object.pair <- RunCCA(
object1 = object.1,
object2 = object.2,
assay1 = "ToIntegrate",
assay2 = "ToIntegrate",
features = anchor.features,
num.cc = max(dims),
renormalize = FALSE,
rescale = FALSE,
verbose = verbose
)
if (l2.norm){
object.pair <- L2Dim(object = object.pair, reduction = reduction)
reduction <- paste0(reduction, ".l2")
nn.reduction <- reduction
}
reduction.2 <- character()
object.pair
},
'pca' = {
common.features <- intersect(
x = rownames(x = Loadings(object = object.1[["pca"]])),
y = rownames(x = Loadings(object = object.2[["pca"]]))
)
object.pair <- merge(x = object.1, y = object.2, merge.data = TRUE)
projected.embeddings.1<- t(x = GetAssayData(object = object.1, slot = "scale.data")[common.features, ]) %*%
Loadings(object = object.2[["pca"]])[common.features, ]
object.pair[['projectedpca.1']] <- CreateDimReducObject(
embeddings = rbind(projected.embeddings.1, Embeddings(object = object.2[["pca"]])),
assay = DefaultAssay(object = object.1),
key = "projectedpca1_"
)
projected.embeddings.2 <- t(x = GetAssayData(object = object.2, slot = "scale.data")[common.features, ]) %*%
Loadings(object = object.1[["pca"]])[common.features, ]
object.pair[['projectedpca.2']] <- CreateDimReducObject(
embeddings = rbind(projected.embeddings.2, Embeddings(object = object.1[["pca"]])),
assay = DefaultAssay(object = object.2),
key = "projectedpca2_"
)
object.pair[["pca"]] <- CreateDimReducObject(
embeddings = rbind(
Embeddings(object = object.1[["pca"]]),
Embeddings(object = object.2[["pca"]])),
assay = DefaultAssay(object = object.1),
key = "pca_"
)
reduction <- "projectedpca.1"
reduction.2 <- "projectedpca.2"
if (l2.norm){
slot(object = object.pair[["projectedpca.1"]], name = "cell.embeddings") <- Sweep(
x = Embeddings(object = object.pair[["projectedpca.1"]]),
MARGIN = 2,
STATS = apply(X = Embeddings(object = object.pair[["projectedpca.1"]]), MARGIN = 2, FUN = sd),
FUN = "/"
)
slot(object = object.pair[["projectedpca.2"]], name = "cell.embeddings") <- Sweep(
x = Embeddings(object = object.pair[["projectedpca.2"]]),
MARGIN = 2,
STATS = apply(X = Embeddings(object = object.pair[["projectedpca.2"]]), MARGIN = 2, FUN = sd),
FUN = "/"
)
object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.1")
object.pair <- L2Dim(object = object.pair, reduction = "projectedpca.2")
reduction <- paste0(reduction, ".l2")
reduction.2 <- paste0(reduction.2, ".l2")
}
object.pair
},
stop("Invalid reduction parameter. Please choose either cca or rpca")
)
internal.neighbors <- internal.neighbors[c(i, j)]
anchors = NULL
for(cluster_i in matched.clusters){
cells1.index = which(batch.cluster.labels[[i]] %in% cluster_i)
cells2.index = which(batch.cluster.labels[[j]] %in% cluster_i)
cells1 = names(batch.cluster.labels[[i]])[cells1.index]
cells2 = names(batch.cluster.labels[[j]])[cells2.index]
message("Finding neighborhoods for ", cluster_i, " ...")
anchors_i <- FindAnchors(
object.pair = object.pair,
assay = c("ToIntegrate", "ToIntegrate"),
slot = slot,
cells1 = names(batch.cluster.labels[[i]])[which(batch.cluster.labels[[i]] %in% cluster_i)],
cells2 = names(batch.cluster.labels[[j]])[which(batch.cluster.labels[[j]] %in% cluster_i)],
internal.neighbors = internal.neighbors,
reduction = reduction,
reduction.2 = reduction.2,
nn.reduction = nn.reduction,
dims = dims,
k.anchor = k.anchor,
k.filter = k.filter,
k.score = k.score,
max.features = max.features,
nn.method = nn.method,
eps = eps,
verbose = verbose
)
anchors_i[,1] = cells1.index[c(anchors_i[,1])]
anchors_i[,2] = cells2.index[c(anchors_i[,2])]
anchors = rbind(anchors, anchors_i)
}
anchors[, 1] <- anchors[, 1] + offsets[i]
anchors[, 2] <- anchors[, 2] + offsets[j]
return(anchors)
}
)
all.anchors <- do.call(what = 'rbind', args = all.anchors)
all.anchors <- rbind(all.anchors, all.anchors[, c(2, 1, 3)])
all.anchors <- AddDatasetID(anchor.df = all.anchors, offsets = offsets, obj.lengths = objects.ncell)
command <- LogSeuratCommand(object = object.list[[1]], return.command = TRUE)
anchor.set <- new(Class = "AnchorSet",
object.list = object.list,
reference.objects = reference %||% seq_along(object.list),
anchors = all.anchors,
offsets = offsets,
anchor.features = anchor.features,
command = command
)
return(anchor.set)
}
# Check a list of objects for duplicate cell names
#
# @param object.list List of Seurat objects
# @param verbose Print message about renaming
# @param stop Error out if any duplicate names exist
#
# @return Returns list of objects with duplicate cells renamed to be unique
#
CheckDuplicateCellNames <- function(object.list, verbose = TRUE, stop = FALSE) {
cell.names <- unlist(x = lapply(X = object.list, FUN = colnames))
if (any(duplicated(x = cell.names))) {
if (stop) {
stop("Duplicate cell names present across objects provided.")
}
if (verbose) {
warning("Some cell names are duplicated across objects provided. Renaming to enforce unique cell names.")
}
object.list <- lapply(
X = 1:length(x = object.list),
FUN = function(x) {
return(RenameCells(
object = object.list[[x]],
new.names = paste0(Cells(x = object.list[[x]]), "_", x)
))
}
)
}
return(object.list)
}
### Find MNNs between batches
FindAnchors <- function(
object.pair,
assay,
slot,
cells1,
cells2,
internal.neighbors,
reduction,
reduction.2 = character(),
nn.reduction = reduction,
dims = 1:10,
k.anchor = 20,
k.filter = 200,
k.score = 30,
max.features = 200,
nn.method = "rann",
eps = 0,
projected = FALSE,
verbose = TRUE
) {
# compute local neighborhoods, use max of k.anchor and k.score if also scoring to avoid
# recomputing neighborhoods
k.neighbor <- k.anchor
if (!is.na(x = k.score)) {
k.neighbor <- max(k.anchor, k.score)
}
object.pair <- FindNN(
object = object.pair,
cells1 = cells1,
cells2 = cells2,
internal.neighbors = internal.neighbors,
dims = dims,
reduction = reduction,
reduction.2 = reduction.2,
nn.reduction = nn.reduction,
k = k.neighbor,
nn.method = nn.method,
eps = eps,
verbose = FALSE
)
object.pair <- FindAnchorPairs(
object = object.pair,
cells1 = cells1,
integration.name = "integrated",
k.anchor = k.anchor,
verbose = verbose
)
if (!is.na(x = k.filter)) {
top.features <- TopDimFeatures(
object = object.pair,
reduction = reduction,
dims = dims,
features.per.dim = 100,
max.features = max.features,
projected = projected
)
object.pair <- FilterAnchors(
object = object.pair,
assay = assay,
slot = slot,
integration.name = 'integrated',
features = top.features,
k.filter = k.filter,
nn.method = nn.method,
eps = eps,
verbose = verbose
)
}
if (!is.na(x = k.score)) {
object.pair = ScoreAnchors(
object = object.pair,
assay = DefaultAssay(object = object.pair),
integration.name = "integrated",
verbose = verbose,
k.score = k.score
)
}
anchors <- GetIntegrationData(
object = object.pair,
integration.name = 'integrated',
slot = 'anchors'
)
return(anchors)
}
# Find nearest neighbors
#
FindNN <- function(
object,
cells1 = NULL,
cells2 = NULL,
internal.neighbors,
grouping.var = NULL,
dims = 1:10,
reduction = "cca.l2",
reduction.2 = character(),
nn.dims = dims,
nn.reduction = reduction,
k = 300,
nn.method = "rann",
eps = 0,
integration.name = 'integrated',
verbose = TRUE
) {
if (xor(x = is.null(x = cells1), y = is.null(x = cells2))) {
stop("cells1 and cells2 must both be specified")
}
if (!is.null(x = cells1) && !is.null(x = cells2) && !is.null(x = grouping.var)) {
stop("Specify EITHER grouping.var or cells1/2.")
}
if (is.null(x = cells1) && is.null(x = cells2) && is.null(x = grouping.var)) {
stop("Please set either cells1/2 or grouping.var")
}
if (!is.null(x = grouping.var)) {
if (nrow(x = unique(x = object[[grouping.var]])) != 2) {
stop("Number of groups in grouping.var not equal to 2.")
}
groups <- names(x = sort(x = table(object[[grouping.var]]), decreasing = TRUE))
cells1 <- colnames(x = object)[object[[grouping.var]] == groups[[1]]]
cells2 <- colnames(x = object)[object[[grouping.var]] == groups[[2]]]
}
if (verbose) {
message("Finding neighborhoods")
}
if (!is.null(x = internal.neighbors[[1]])) {
nnaa <- internal.neighbors[[1]]
nnbb <- internal.neighbors[[2]]
} else {
dim.data.self <- Embeddings(object = object[[nn.reduction]])[ ,nn.dims]
dims.cells1.self <- dim.data.self[cells1, ]
dims.cells2.self <- dim.data.self[cells2, ]
nnaa <- NNHelper(
data = dims.cells1.self,
k = k + 1,
method = nn.method,
eps = eps
)
nnbb <- NNHelper(
data = dims.cells2.self,
k = k + 1,
method = nn.method,
eps = eps
)
}
if (length(x = reduction.2) > 0) {
nnab <- NNHelper(
data = Embeddings(object = object[[reduction.2]])[cells2, ],
query = Embeddings(object = object[[reduction.2]])[cells1, ],
k = k,
method = nn.method,
eps = eps
)
nnba <- NNHelper(
data = Embeddings(object = object[[reduction]])[cells1, ],
query = Embeddings(object = object[[reduction]])[cells2, ],
k = k,
method = nn.method,
eps = eps
)
} else {
dim.data.opposite <- Embeddings(object = object[[reduction]])[ ,dims]
dims.cells1.opposite <- dim.data.opposite[cells1, ]
dims.cells2.opposite <- dim.data.opposite[cells2, ]
nnab <- NNHelper(
data = dims.cells2.opposite,
query = dims.cells1.opposite,
k = k,
method = nn.method,
eps = eps
)
nnba <- NNHelper(
data = dims.cells1.opposite,
query = dims.cells2.opposite,
k = k,
method = nn.method,
eps = eps
)
}
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = 'neighbors',
new.data = list('nnaa' = nnaa, 'nnab' = nnab, 'nnba' = nnba, 'nnbb' = nnbb, 'cells1' = cells1, 'cells2' = cells2)
)
return(object)
}
# Internal helper function to dispatch to various neighbor finding methods
#
# @param data Input data
# @param query Data to query against data
# @param k Number of nearest neighbors to compute
# @param method Nearest neighbor method to use: "rann", "annoy"
# @param ... additional parameters to specific neighbor finding method
#
NNHelper <- function(data, query = data, k, method, ...) {
args <- as.list(x = sys.frame(which = sys.nframe()))
args <- c(args, list(...))
return(
switch(
EXPR = method,
"rann" = {
args <- args[intersect(x = names(x = args), y = names(x = formals(fun = nn2)))]
do.call(what = 'nn2', args = args)
},
"annoy" = {
args <- args[intersect(x = names(x = args), y = names(x = formals(fun = AnnoyNN)))]
do.call(what = 'AnnoyNN', args = args)
},
stop("Invalid method. Please choose one of 'rann', 'annoy'")
)
)
}
# Find Anchor pairs
#
FindAnchorPairs <- function(
object,
cells1 = NULL,
integration.name = 'integrated',
k.anchor = 5,
verbose = TRUE
) {
neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors')
max.nn <- c(ncol(x = neighbors$nnab$nn.idx), ncol(x = neighbors$nnba$nn.idx))
if (any(k.anchor > max.nn)) {
message(paste0('warning: requested k.anchor = ', k.anchor, ', only ', min(max.nn), ' in dataset'))
k.anchor <- min(max.nn)
}
if (verbose) {
message("Finding anchors")
}
# convert cell name to neighbor index
nn.cells1 <- neighbors$cells1
nn.cells2 <- neighbors$cells2
cell1.index <- suppressWarnings(which(cells1 == nn.cells1, arr.ind = TRUE))
ncell <- 1:nrow(x = neighbors$nnab$nn.idx)
ncell <- ncell[ncell %in% cell1.index]
anchors <- list()
# pre allocate vector
anchors$cell1 <- rep(x = 0, length(x = ncell) * 5)
anchors$cell2 <- anchors$cell1
anchors$score <- anchors$cell1 + 1
idx <- 0
for (cell in ncell) {
neighbors.ab <- neighbors$nnab$nn.idx[cell, 1:k.anchor]
mutual.neighbors <- which(
x = neighbors$nnba$nn.idx[neighbors.ab, 1:k.anchor, drop = FALSE] == cell,
arr.ind = TRUE
)[, 1]
for (i in neighbors.ab[mutual.neighbors]){
idx <- idx + 1
anchors$cell1[idx] <- cell
anchors$cell2[idx] <- i
anchors$score[idx] <- 1
}
}
anchors$cell1 <- anchors$cell1[1:idx]
anchors$cell2 <- anchors$cell2[1:idx]
anchors$score <- anchors$score[1:idx]
anchors <- t(x = do.call(what = rbind, args = anchors))
anchors <- as.matrix(x = anchors)
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = 'anchors',
new.data = anchors
)
if (verbose) {
message(paste0("\tFound ", nrow(x = anchors), " anchors"))
}
return(object)
}
# Get top n features across given set of dimensions
#
# @param object Seurat object
# @param reduction Which dimension reduction to use
# @param dims Which dimensions to use
# @param features.per.dim How many features to consider per dimension
# @param max.features Number of features to return at most
# @param projected Use projected loadings
#
TopDimFeatures <- function(
object,
reduction,
dims = 1:10,
features.per.dim = 100,
max.features = 200,
projected = FALSE
) {
dim.reduction <- object[[reduction]]
max.features <- max(length(x = dims) * 2, max.features)
num.features <- sapply(X = 1:features.per.dim, FUN = function(y) {
length(x = unique(x = as.vector(x = sapply(X = dims, FUN = function(x) {
unlist(x = TopFeatures(object = dim.reduction, dim = x, nfeatures = y, balanced = TRUE, projected = projected))
}))))
})
max.per.pc <- which.max(x = num.features[num.features < max.features])
features <- unique(x = as.vector(x = sapply(X = dims, FUN = function(x) {
unlist(x = TopFeatures(object = dim.reduction, dim = x, nfeatures = max.per.pc, balanced = TRUE, projected = projected))
})))
features <- unique(x = features)
return(features)
}
FilterAnchors <- function(
object,
assay = NULL,
slot = "data",
integration.name = 'integrated',
features = NULL,
k.filter = 200,
nn.method = "rann",
eps = 0,
verbose = TRUE
) {
if (verbose) {
message("Filtering anchors")
}
assay <- assay %||% DefaultAssay(object = object)
features <- features %||% VariableFeatures(object = object)
if (length(x = features) == 0) {
stop("No features provided and no VariableFeatures computed.")
}
features <- unique(x = features)
neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = 'neighbors')
nn.cells1 <- neighbors$cells1
nn.cells2 <- neighbors$cells2
cn.data1 <- L2Norm(
mat = t(as.matrix(x = GetAssayData(object = object[[assay[1]]], slot = slot)[features, nn.cells1])),
MARGIN = 1)
cn.data2 <- L2Norm(
mat = t(as.matrix(x = GetAssayData(object = object[[assay[2]]], slot = slot)[features, nn.cells2])),
MARGIN = 1)
nn <- NNHelper(
data = cn.data2[nn.cells2, ],
query = cn.data1[nn.cells1, ],
k = k.filter,
method = nn.method,
eps = eps
)
anchors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "anchors")
position <- sapply(X = 1:nrow(x = anchors), FUN = function(x) {
which(x = anchors[x, "cell2"] == nn$nn.idx[anchors[x, "cell1"], ])[1]
})
anchors <- anchors[!is.na(x = position), ]
if (verbose) {
message("\tRetained ", nrow(x = anchors), " anchors")
}
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = "anchors",
new.data = anchors
)
return(object)
}
# Set a default value if an object is null
#
# @param lhs An object to set if it's null
# @param rhs The value to provide if x is null
#
# @return rhs if lhs is null, else lhs
#
# @author Hadley Wickham
# @references https://adv-r.hadley.nz/functions.html#missing-arguments
#
`%||%` <- function(lhs, rhs) {
if (!is.null(x = lhs)) {
return(lhs)
} else {
return(rhs)
}
}
# Calculates the l2-norm of a vector
#
# Modified from PMA package
# @references Witten, Tibshirani, and Hastie, Biostatistics 2009
# @references \url{https://github.com/cran/PMA/blob/master/R/PMD.R}
#
# @param vec numeric vector
#
# @return returns the l2-norm.
#
L2Norm <- function(mat, MARGIN = 1) {
normalized <- sweep(
x = mat,
MARGIN = MARGIN,
STATS = apply(
X = mat,
MARGIN = MARGIN,
FUN = function(x){
sqrt(x = sum(x ^ 2))
}
),
FUN = "/"
)
normalized[!is.infinite((x = normalized))] <- 0
return(normalized)
}
ScoreAnchors <- function(
object,
assay = NULL,
integration.name = 'integrated',
verbose = TRUE,
k.score = 30,
do.cpp = TRUE
) {
assay <- assay %||% DefaultAssay(object = object)
anchor.df <- as.data.frame(x = GetIntegrationData(object = object, integration.name = integration.name, slot = 'anchors'))
neighbors <- GetIntegrationData(object = object, integration.name = integration.name, slot = "neighbors")
offset <- length(x = neighbors$cells1)
nbrsetA <- function(x) c(neighbors$nnaa$nn.idx[x, 1:k.score], neighbors$nnab$nn.idx[x, 1:k.score] + offset)
nbrsetB <- function(x) c(neighbors$nnba$nn.idx[x, 1:k.score], neighbors$nnbb$nn.idx[x, 1:k.score] + offset)
# score = number of shared neighbors
anchor.new <- data.frame(
'cell1' = anchor.df[, 1],
'cell2' = anchor.df[, 2],
'score' = mapply(
FUN = function(x, y) {
length(x = intersect(x = nbrsetA(x = x), nbrsetB(x = y)))},
anchor.df[, 1],
anchor.df[, 2]
)
)
# normalize the score
max.score <- quantile(anchor.new$score, 0.9)
min.score <- quantile(anchor.new$score, 0.01)
anchor.new$score <- anchor.new$score - min.score
anchor.new$score <- anchor.new$score / (max.score - min.score)
anchor.new$score[anchor.new$score > 1] <- 1
anchor.new$score[anchor.new$score < 0] <- 0
anchor.new <- as.matrix(x = anchor.new)
object <- SetIntegrationData(
object = object,
integration.name = integration.name,
slot = 'anchors',
new.data = anchor.new
)
return(object)
}
AddDatasetID <- function(
anchor.df,
offsets,
obj.lengths
) {
ndataset <- length(x = offsets)
total.cells <- sum(obj.lengths)
offsets <- c(offsets, total.cells)
row.offset <- rep.int(x = offsets[1:ndataset], times = obj.lengths)
dataset <- rep.int(x = 1:ndataset, times = obj.lengths)
anchor.df <- data.frame(
'cell1' = anchor.df[, 1] - row.offset[anchor.df[, 1]],
'cell2' = anchor.df[, 2] - row.offset[anchor.df[, 2]],
'score' = anchor.df[, 3],
'dataset1' = dataset[anchor.df[, 1]],
'dataset2' = dataset[anchor.df[, 2]]
)
return(anchor.df)
}
# Update slots in an object
#
# @param object An object to update
#
# @return \code{object} with the latest slot definitions
#
UpdateSlots <- function(object) {
object.list <- sapply(
X = slotNames(x = object),
FUN = function(x) {
return(tryCatch(
expr = slot(object = object, name = x),
error = function(...) {
return(NULL)
}
))
},
simplify = FALSE,
USE.NAMES = TRUE
)
object.list <- Filter(f = Negate(f = is.null), x = object.list)
object.list <- c('Class' = class(x = object)[1], object.list)
object <- do.call(what = 'new', args = object.list)
for (x in setdiff(x = slotNames(x = object), y = names(x = object.list))) {
xobj <- slot(object = object, name = x)
if (is.vector(x = xobj) && !is.list(x = xobj) && length(x = xobj) == 0) {
slot(object = object, name = x) <- vector(mode = class(x = xobj), length = 1L)
}
}
return(object)
}
# Get the names of objects within a Seurat object that are of a certain class
#
# @param object A Seurat object
# @param classes.keep A vector of names of classes to get
#
# @return A vector with the names of objects within the Seurat object that are of class \code{classes.keep}
#
FilterObjects <- function(object, classes.keep = c('Assay', 'DimReduc')) {
object <- UpdateSlots(object = object)
slots <- na.omit(object = Filter(
f = function(x) {
sobj <- slot(object = object, name = x)
return(is.list(x = sobj) && !is.data.frame(x = sobj) && !is.package_version(x = sobj))
},
x = slotNames(x = object)
))
slots <- grep(pattern = 'tools', x = slots, value = TRUE, invert = TRUE)
slots <- grep(pattern = 'misc', x = slots, value = TRUE, invert = TRUE)
slots.objects <- unlist(
x = lapply(
X = slots,
FUN = function(x) {
return(names(x = slot(object = object, name = x)))
}
),
use.names = FALSE
)
object.classes <- sapply(
X = slots.objects,
FUN = function(i) {
return(inherits(x = object[[i]], what = classes.keep))
}
)
object.classes <- which(x = object.classes, useNames = TRUE)
return(names(x = object.classes))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.