diClusters <- function(data.list, result.list, target, equiweight=TRUE, cluster.args=list(), pval.col="PValue", fc.col=NA, grid.length=21, iterations=4)
# Performs post-hoc clustering of significant bin pairs, to control the
# cluster-level FDR at 'target'. Adjusts the weights so that the contribution
# from each set of bin pairs is the same.
#
# written by Aaron Lun
# created 12 January 2016
# last modified by Aaron Lun 4 June 2019
# last modified 11 May 2024 to call local .weightedFDR
{
# Setting initial parameters.
if (missing(target)) {
target <- 0.05
warning("unspecified 'target' for the cluster-level FDR set to 0.05")
}
expanded.args <- as.list(match.call(clusterPairs, do.call(call, c("clusterPairs", cluster.args))))
if (is.null(expanded.args$tol)) {
cluster.args$tol <- 1
warning("'tol' for 'clusterPairs' set to a default of 1 bp")
}
# Checking inputs.
if (.notList(data.list)) {
data.list <- list(data.list)
}
if (.notList(result.list) || is.data.frame(result.list)) {
result.list <- list(result.list)
}
nset <- length(data.list)
if (nset!=length(result.list)) { stop("data list must have same length as result list") }
for (x in seq_len(nset)) {
if (length(data.list[[x]])!=nrow(result.list[[x]])) {
stop("corresponding entries of data and result lists must have same number of entries")
}
}
# Computing the adjusted FDR for all of these samples (with equi-weighting).
nentries <- lengths(data.list)
if (equiweight) {
weights <- rep(1/nentries, nentries)
} else {
weights <- rep(1, sum(nentries))
}
all.ps <- in.each.group <- vector("list", nset)
last <- 0L
for (x in seq_len(nset)) {
all.ps[[x]] <- result.list[[x]][,pval.col]
in.each.group[[x]] <- last + seq_len(nrow(result.list[[x]]))
last <- last + nrow(result.list[[x]])
}
all.ps <- unlist(all.ps)
adjp <- .weightedFDR(all.ps, weights)
# Getting the sign.
if (is.na(fc.col)) {
all.signs <- lapply(nentries, logical)
} else {
all.signs <- lapply(result.list, function(x) { x[,fc.col] > 0 })
}
# Controlling the cluster-level FDR.
FUN <- function(sig, index.only=TRUE) {
pos.data.list <- neg.data.list <- vector("list", nset)
for (x in seq_len(nset)) {
cur.sig <- sig[in.each.group[[x]]]
pos.data.list[[x]] <- data.list[[x]][cur.sig & all.signs[[x]],]
neg.data.list[[x]] <- data.list[[x]][cur.sig & !all.signs[[x]],]
}
pos.clust.all <- do.call(clusterPairs, c(pos.data.list, cluster.args, index.only=index.only))
neg.clust.all <- do.call(clusterPairs, c(neg.data.list, cluster.args, index.only=index.only))
# Extracting the indices.
if (index.only) {
pos.clust <- pos.clust.all
neg.clust <- neg.clust.all
additional <- max(sapply(pos.clust, function(x) {
keep <- !is.na(x)
if (!any(keep)) { return(0) }
return(max(x[keep]))
}))
} else {
pos.clust <- pos.clust.all$indices
neg.clust <- neg.clust.all$indices
additional <- length(pos.clust.all$interactions)
}
# Assembling it back into a single return value.
clust.indices <- vector("list", nset)
for (x in seq_len(nset)) {
cur.sig <- sig[in.each.group[[x]]]
cur.signs <- all.signs[[x]][cur.sig]
full.ids <- integer(sum(cur.sig))
full.ids[cur.signs] <- pos.clust[[x]]
full.ids[!cur.signs] <- neg.clust[[x]] + additional
clust.indices[[x]] <- full.ids
}
names(clust.indices) <- names(data.list)
if (index.only) { return(clust.indices) }
list(indices=clust.indices, interactions=c(pos.clust.all$interactions, neg.clust.all$interactions))
}
out <- controlClusterFDR(target=target, adjp=adjp, FUN=function(sig) {
unlist(FUN(sig))
}, weights=weights, grid.length=grid.length, iterations=iterations)
sig <- adjp <= out$threshold
clusters <- FUN(sig, index.only=FALSE)
# Cleaning up the output.
for (x in seq_len(nset)) {
full.ids <- rep(NA_integer_, nrow(data.list[[x]]))
full.ids[sig[in.each.group[[x]]]] <- clusters$indices[[x]]
clusters$indices[[x]] <- full.ids
}
clusters$FDR <- out$FDR
return(clusters)
}
.notList <- function(x) { (!is.list(x) && !is(x, "List")) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.