require(Biobase)
require(NMF)
require(cvxclustr)
#' Batch factor detection via DASC (Data-adaptive Shrinkage and Clustering-DASC)
#'
#' @param edata the normalized target matrix, a data.frame The row is gene,
#' the column is sample
#' @param pdata Phenotypic data summarizes information about the samples
#' @param factor A factor vector which controls the convex clustering
#' @param method Algorithm to use: 'admm' or 'ama'
#' @param type An integer indicating the norm used: 1 = 1-norm 2 = 2-norm
#' 3 = 2-norm^2
#' @param lambda A double number A regularization parameter in the convex
#' optimization
#' @param rank integer sequence
#' @param nrun the iteration numbers of Semi-NMF
#' @param spanning parameter is assigned as false
#' @param annotation An annotation of the dataset
#' @return outputs the result of semi-NMF. It classifies each sample to its
#' batch factor.
#' @details
#' The \code{DASC} function is the main function of our algorithm DASC
#' (Data-adaptive Shrinkage and Clustering-DASC) package. The DASC includes
#' two main steps
#' \itemize{ \item Data-adaptive shrinkage using convex clustering shrinkage
#' (Implemented by convex optimization.);
#' \item Extract batch factors using matrix factorization.
#' }
#'
#' @import Biobase
#' @import cvxclustr
#' @import NMF
#'
#' @export
#'
#' @author Haidong Yi, Ayush T. Raman
#'
#' @seealso \code{\link[cvxclustr]{cvxclust_path_ama}} and
#' \code{\link[cvxclustr]{cvxclust_path_admm}} for the detailed algorithm
#'
DASC <- function(edata, pdata, factor, method="ama", type=3, lambda, rank,
nrun, spanning=FALSE, countable=TRUE, annotation){
if (!is.null(type) && !(type %in% c(1, 2, 3))) {
stop("type must be '1', '2', '3', or NULL")
}
if (!is.null(method) && !(method %in% c("ama", "admm"))) {
stop("method must be 'ama', 'admm', or NULL")
}
edata <- as.matrix(edata)
Zero <- apply(edata, 1, sd)
Zero.num <- which(Zero < 0.001)
if (length(Zero.num) > 0) {
names(Zero.num) <- NULL
edata <- edata[-Zero.num, ]
}
if (countable == TRUE) {
edata = tryCatch({
log(1 + edata)
}, warning = function(e) {
stop("For non-countable dataset, please set parameter countable FALSE")
})
}
if (type == 3) {
Laplace <- trans_Laplace(as.factor(factor))
Udata <- edata %*% solve(diag(ncol(edata)) + lambda * Laplace)
Udata <- as.matrix(Udata)
Bdata <- edata - Udata
} else {
ADJ <- trans_ADJ(as.factor(factor))
if (spanning) {
ADJ <- Sptree(ADJ)
}
w <- adj2vector(ADJ, nrow(ADJ))
sol <- cvxclust(edata, w, lambda, method = method, type = type)
Bdata <- edata - sol$U[[1]]
}
Zero <- apply(Bdata, 1, sd)
Zero.num <- which(Zero < 0.001)
if (length(Zero.num) > 0) {
names(Zero.num) <- NULL
Bdata <- Bdata[-Zero.num, ]
}
data.snmf <- sNMF(Bdata, rank = rank, nrun = nrun)
data.snmf
}
#' Transform the adjacency matrix to a vector
#'
#' @param Adjacency the adjacency matrix of factor
#' @param n number of samples
#' @return \code{w} the vector of the adjacency matrix
#' @author Haidong Yi, Ayush T. Raman
#'
#' @export
#' @examples
#' W <- matrix(c(0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0),nrow=4)
#' w <- adj2vector(W,4)
#'
adj2vector <- function(Adjacency, n) {
w <- double(n * (n - 1) / 2)
iter <- 1
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
w[iter] <- Adjacency[i, j]
iter <- iter + 1
}
}
return(w)
}
#' Representing node in this subtype
#'
#' @param v the index of the node
#' @param X the saved vector with the information of the parent of every node
#' @return \code{r} the parent index of the node
#' @author Haidong Yi, Ayush T. Raman
#'
#' @export
#' @examples
#' nodes <- c(2,3,4,4)
#' get_father(2, nodes)
#'
get_father <- function(v, X) {
r <- v
while (X[r] != r) {
r <- X[r]
}
i <- v
j <- numeric()
while (i != r) {
j <- X[i]
X[i] <- r
i <- j
}
return(r)
}
#' Get Spanning tree from adjacency matrix
#'
#' @param ADJ the adjacency matrix of the factor
#' @return \code{ADJ} the spaning tree of the adjacency matrix
#' @author Haidong Yi, Ayush T. Raman
#'
#' @export
#' @examples
#' W <- matrix(c(0,1,1,1,1,0,1,1,1,1,0,1,1,1,1,0), nrow=4)
#' Sptree(W)
#'
Sptree <- function(ADJ) {
Rownum <- nrow(ADJ)
Colnum <- ncol(ADJ)
father <- numeric()
for (i in 1:Rownum) {
father[i] <- i
}
for (i in 2:Rownum) {
for (j in 1:(i - 1)) {
if (ADJ[i, j] > 0) {
father <- merge(i, j, father)
}
}
}
sum <- numeric()
pre <- numeric()
j <- 1
for (i in 1:Rownum) {
pre[i] <- get_father(i, father)
if (pre[i] == i) {
sum[j] <- i
j <- j + 1
}
}
ADJ <- matrix(0, Rownum, Colnum)
for (i in 1:Colnum) {
if (i != pre[i]) {
ADJ[i, pre[i]] <- 1
ADJ[pre[i], i] <- 1
}
}
ADJ
}
#' Combine two trees into one
#'
#' @param x the index of the node
#' @param y the index of the node
#' @param X the saved vector with the information of the parent of every node
#' @return \code{X} A updated X vector with updates on father of every node
#' @author Haidong Yi, Ayush T. Raman
#' @details
#'
#' During the traversal of the graph matrix, merge function joins two
#' disjoint sets into a single subset. It is a union step of Disjoint-set
#' algorithm by Bernard A. Galler and Michael J. Fischer. For further details,
#' please refer to:
#' \url{https://en.wikipedia.org/wiki/Disjoint-set_data_structure}
#'
merge <- function(x, y, X) {
fx <- get_father(x, X)
fy <- get_father(y, X)
if (fx < fy) {
X[fx] <- fy
} else {
X[fy] <- fx
}
return(X)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.