#' @name LRAcluster
#' @aliases LRAcluster
#' @title integrated analysis of cancer omics data by low-rank approximation
#' @description The LRAcluster function is the main interface of this package, it gets a list of matrix as input and outputs the coordinates of the samples in the reduced space and the explained potential.
#' @param data a list of data matrix,please keep the columns are the same order of samples
#' @param types a list of data types, can be binary, gaussian, or poisson
#' @param dimension the reduced dimension
#' @param names data names
#' @return A list contains the following component:
#'
#' \code{coordinate} A matrix of the coordinates of all the samples in the reduced space
#'
#' \code{potential} ratio of explained variance
#' @keywords internal
#' @author Dingming Wu, Dongfang Wang
#' @references Wu D, Wang D, Zhang MQ, Gu J (2015). Fast dimension reduction and integrative clustering of multi-omics data using low-rank approximation: application to cancer molecular classification. BMC Genomics, 16(1):1022.
LRAcluster <- function(data, types, dimension = 2, names = as.character(1:length(data)))
{
#--------#
# binary #
#--------#
epsilon.binary<-2.0
check.binary.row<-function(arr)
{
if (sum(!is.na(arr))==0)
{
return (F)
}
else
{
idx<-!is.na(arr)
if (sum(arr[idx])==0 || sum(arr[idx])==sum(idx))
{
return (F)
}
else
{
return (T)
}
}
}
check.binary<-function(mat,name)
{
index<-apply(mat,1,check.binary.row)
n<-sum(!index)
if (n>0)
{
w<-paste("Warning: ",name," have ",as.character(n)," invalid lines!",sep="")
warning(w)
}
mat_c<-mat[index,]
rownames(mat_c)<-rownames(mat)[index]
colnames(mat_c)<-colnames(mat)
return (mat_c)
}
base.binary.row<-function(arr)
{
idx<-!is.na(arr)
n<-sum(idx)
m<-sum(arr[idx])
return (log(m/(n-m)))
}
base.binary<-function(mat)
{
mat_b<-matrix(0,nrow(mat),ncol(mat))
ar_b<-apply(mat,1,base.binary.row)
mat_b[1:nrow(mat_b),]<-ar_b
return (mat_b)
}
update.binary<-function(mat,mat_b,mat_now,eps)
{
mat_p<-mat_b+mat_now
mat_u<-matrix(0,nrow(mat),ncol(mat))
idx1<-!is.na(mat) & mat==1
idx0<-!is.na(mat) & mat==0
index<-is.na(mat)
arr<-exp(mat_p)
mat_u[index]<-mat_now[index]
mat_u[idx1]<-mat_now[idx1]+eps*epsilon.binary/(1.0+arr[idx1])
mat_u[idx0]<-mat_now[idx0]-eps*epsilon.binary*arr[idx0]/(1.0+arr[idx0])
return (mat_u)
}
stop.binary<-function(mat,mat_b,mat_now,mat_u)
{
index<-!is.na(mat)
mn<-mat_b+mat_now
mu<-mat_b+mat_u
arn<-exp(mn)
aru<-exp(mu)
idx1<-!is.na(mat) & mat==1
idx0<-!is.na(mat) & mat==0
lgn<-sum(log(arn[idx1]/(1+arn[idx1])))+sum(log(1/(1+arn[idx0])))
lgu<-sum(log(aru[idx1]/(1+aru[idx1])))+sum(log(1/(1+aru[idx0])))
return (lgu-lgn)
}
LL.binary<-function(mat,mat_b,mat_u)
{
index<-!is.na(mat)
mu<-mat_b+mat_u
aru<-exp(mu)
idx1<-!is.na(mat) & mat==1
idx0<-!is.na(mat) & mat==0
lgu<-sum(log(aru[idx1]/(1+aru[idx1])))+sum(log(1/(1+aru[idx0])))
return (lgu)
}
LLmax.binary<-function(mat)
{
return (0)
}
LLmin.binary<-function(mat,mat_b)
{
index<-!is.na(mat)
aru<-exp(mat_b)
idx1<-!is.na(mat) & mat==1
idx0<-!is.na(mat) & mat==0
lgu<-sum(log(aru[idx1]/(1+aru[idx1])))+sum(log(1/(1+aru[idx0])))
return (lgu)
}
binary_type_base <- function( data,dimension=2 ,name="test")
{
data<-check.binary(data,name)
data_b<-base.binary(data)
data_now<-matrix(0,nrow(data),ncol(data))
data_u<-update.binary(data,data_b,data_now)
data_u<-nuclear_approximation(data_u,dimension)
while (T)
{
thr<-stop.binary(data,data_b,data_now,data_u)
message(thr)
if (thr<0.2)
{
break
}
data_now<-data_u
data_u<-update.binary(data,data_b,data_now)
data_u<-nuclear_approximation(data_u,dimension)
}
return (data_now)
}
#----------#
# gaussian #
#----------#
epsilon.gaussian=0.5
check.gaussian.row<-function(arr)
{
if (sum(!is.na(arr))==0)
{
return (F)
}
else
{
return (T)
}
}
check.gaussian<-function(mat,name)
{
index<-array(T,nrow(mat))
for(i in 1:nrow(mat))
{
if (sum(is.na(mat[i,])==ncol(mat)))
{
war<-paste("Warning: ",name,"'s ",as.character(i)," line is all NA. Delete this line",sep="")
warning(war)
index[i]<-F
}
}
mat_c<-mat[index,]
rownames(mat_c)<-rownames(mat)[index]
colnames(mat_c)<-colnames(mat)
return (mat_c)
}
base.gaussian.row<-function(arr)
{
idx<-!is.na(arr)
return (mean(arr[idx]))
}
base.gaussian<-function(mat)
{
mat_b<-matrix(0,nrow(mat),ncol(mat))
ar_b<-apply(mat,1,base.gaussian.row)
mat_b[1:nrow(mat_b),]<-ar_b
return (mat_b)
}
update.gaussian<-function(mat,mat_b,mat_now,eps)
{
mat_p<-mat_b+mat_now
mat_u<-matrix(0,nrow(mat),ncol(mat))
index<-!is.na(mat)
mat_u[index]<-mat_now[index]+eps*epsilon.gaussian*(mat[index]-mat_p[index])
index<-is.na(mat)
mat_u[index]<-mat_now[index]
return (mat_u)
}
stop.gaussian<-function(mat,mat_b,mat_now,mat_u)
{
index<-!is.na(mat)
mn<-mat_b+mat_now
mu<-mat_b+mat_u
ren<-mat[index]-mn[index]
reu<-mat[index]-mu[index]
lgn<- -0.5*sum(ren*ren)
lgu<- -0.5*sum(reu*reu)
return (lgu-lgn)
}
LL.gaussian<-function(mat,mat_b,mat_u)
{
index<-!is.na(mat)
mu<-mat_b+mat_u
reu<-mat[index]-mu[index]
lgu<- -0.5*sum(reu*reu)
return (lgu)
}
LLmax.gaussian<-function(mat)
{
return (0.0)
}
LLmin.gaussian<-function(mat,mat_b)
{
index<-!is.na(mat)
reu<-mat[index]-mat_b[index]
lgu<- -0.5*sum(reu*reu)
return (lgu)
}
gaussian_base<-function(data,dimension=2,name="test")
{
data<-check.gaussian(data,name)
data_b<-base.gaussian(data)
data_now<-matrix(0,nrow(data),ncol(data))
data_u<-update.gaussian(data,data_b,data_now)
data_u<-nuclear_approximation(data_u,dimension)
while(T)
{
thr<-stop.gaussian(data,data_b,data_now,data_u)
message(thr)
if (thr<0.2)
{
break
}
data_now<-data_u
data_u<-update.gaussian(data,data_b,data_now)
data_u<-nuclear_approximation(data_u,dimension)
}
return (data_now)
}
#---------#
# poisson #
#---------#
epsilon.poisson<-0.5
check.poisson.row<-function(arr)
{
if (sum(!is.na(arr))==0)
{
return (F)
}
else
{
idx<-!is.na(arr)
if (sum(arr[idx]<0)>0)
{
return (F)
}
else
{
return (T)
}
}
}
check.poisson<-function(mat,name)
{
w<-paste(name," is poisson type. Add 1 to all counts",sep="")
message(w)
index<-apply(mat,1,check.poisson.row)
n<-sum(!index)
if (n>0)
{
w<-paste("Warning: ",name," have ",as.character(n)," invalid lines!",sep="")
warning(w)
}
mat_c<-mat[index,]+1
rownames(mat_c)<-rownames(mat)[index]
colnames(mat_c)<-colnames(mat)
return (mat_c)
}
base.poisson.row<-function(arr)
{
idx<-!is.na(arr)
m<-sum(log(arr[idx]))
n<-sum(idx)
return(m/n)
}
base.poisson<-function(mat)
{
mat_b<-matrix(0,nrow(mat),ncol(mat))
ar_b<-apply(mat,1,base.poisson.row)
mat_b[1:nrow(mat_b),]<-ar_b
return (mat_b)
}
update.poisson<-function(mat,mat_b,mat_now,eps)
{
mat_p<-mat_b+mat_now
mat_u<-matrix(0,nrow(mat),ncol(mat))
index<-!is.na(mat)
mat_u[index]<-mat_now[index]+eps*epsilon.poisson*(log(mat[index])-mat_p[index])
index<-is.na(mat)
mat_u[index]<-mat_now[index]
return (mat_u)
}
stop.poisson<-function(mat,mat_b,mat_now,mat_u)
{
index<-!is.na(mat)
mn<-mat_b+mat_now
mu<-mat_b+mat_u
lgn<-sum(mat[index]*mn[index]-exp(mn[index]))
lgu<-sum(mat[index]*mu[index]-exp(mu[index]))
return (lgu-lgn)
}
LL.poisson<-function(mat,mat_b,mat_u)
{
index<-!is.na(mat)
mu<-mat_b+mat_u
lgu<-sum(mat[index]*mu[index]-exp(mu[index]))
return (lgu)
}
LLmax.poisson<-function(mat)
{
index<-!is.na(mat)
lgu<-sum(mat[index]*log(mat[index])-mat[index])
return (lgu)
}
LLmin.poisson<-function(mat,mat_b)
{
index<-!is.na(mat)
lgu<-sum(mat[index]*mat_b[index]-exp(mat_b[index]))
return (lgu)
}
poisson_type_base<-function(data,dimension=2,name="test")
{
data<-check.poisson(data,name)
data_b<-base.poisson(data)
data_now<-matrix(0,nrow(data),ncol(data))
data_u<-update.poisson(data,data_b,data_now)
data_u<-nuclear_approximation(data_u,dimension)
while(T)
{
thr<-stop.poisson(data,data_b,data_now,data_u)
message(thr)
if (thr<0.2)
{
break
}
data_now<-data_u
data_u<-update.poisson(data,data_b,data_now)
data_u<-nuclear_approximation(data_u,dimension)
}
return (data_now)
}
#----#
# na #
#----#
nuclear_approximation<-function(mat,dimension)
{
svd<-svd(mat,nu=0,nv=0)
if (dimension<length(svd$d))
{
lambda<-svd$d[dimension+1]
svd<-svd(mat,nu=dimension,nv=dimension)
indexh<-svd$d>lambda
indexm<-svd$d<lambda
dia<-array(svd$d,length(svd$d))
dia[indexh]<-dia[indexh]-lambda
dia[indexm]<-0
mat_low<-svd$u%*%diag(c(dia[1:dimension],0))[1:dimension,1:dimension]%*%t(svd$v)
}
else
{
mat_low<-mat
}
return (mat_low)
}
#------------#
# LRAcluster #
#------------#
check.matrix.element<-function(x)
{
if (!is.matrix(x))
{
return (T)
}
else
{
return (F)
}
}
ncol.element<-function(x)
{
return (ncol(x))
}
nrow.element<-function(x)
{
return (nrow(x))
}
check<-function(mat,type,name)
{
if (type=="binary")
{
return (check.binary(mat,name))
}
else if (type=="gaussian")
{
return (check.gaussian(mat,name))
}
else if (type=="poisson")
{
return (check.poisson(mat,name))
}
else
{
e<-paste("unknown type ",type,sep="")
stop(e)
}
}
eps<-0.0
if (!is.list(data))
{
stop("the input data must be a list!")
}
c<-sapply(data,check.matrix.element)
if (sum(c)>0)
{
stop("each element of input list must be a matrix!")
}
c<-sapply(data,ncol.element)
if (length(levels(factor(c)))>1)
{
stop("each element of input list must have the same column number!")
}
if (length(data)!=length(types))
{
stop("data and types must be the same length!")
}
nSample<-c[1]
loglmin<-0
loglmax<-0
loglu<-0.0
nData<-length(data)
for (i in 1:nData)
{
data[[i]]<-check(data[[i]],types[[i]],names[[i]])
}
nGeneArr<-sapply(data,nrow.element)
nGene<-sum(nGeneArr)
indexData<-list()
k=1
for(i in 1:nData)
{
indexData[[i]]<- (k):(k+nGeneArr[i]-1)
k<-k+nGeneArr[i]
}
base<-matrix(0,nGene,nSample)
now<-matrix(0,nGene,nSample)
update<-matrix(0,nGene,nSample)
thr<-array(0,nData)
for (i in 1:nData)
{
if (types[[i]]=="binary")
{
base[indexData[[i]],]<-base.binary(data[[i]])
loglmin<-loglmin+LLmin.binary(data[[i]],base[indexData[[i]],])
loglmax<-loglmax+LLmax.binary(data[[i]])
}
else if (types[[i]]=="gaussian")
{
base[indexData[[i]],]<-base.gaussian(data[[i]])
loglmin<-loglmin+LLmin.gaussian(data[[i]],base[indexData[[i]],])
loglmax<-loglmax+LLmax.gaussian(data[[i]])
}
else if (types[[i]]=="poisson")
{
base[indexData[[i]],]<-base.poisson(data[[i]])
loglmin<-loglmin+LLmin.poisson(data[[i]],base[indexData[[i]],])
loglmax<-loglmax+LLmax.poisson(data[[i]])
}
}
for (i in 1:nData)
{
if (types[[i]]=="binary")
{
update[indexData[[i]],]<-update.binary(data[[i]],base[indexData[[i]],],now[indexData[[i]],],exp(eps))
}
else if (types[[i]]=="gaussian")
{
update[indexData[[i]],]<-update.gaussian(data[[i]],base[indexData[[i]],],now[indexData[[i]],],exp(eps))
}
else if (types[[i]]=="poisson")
{
update[indexData[[i]],]<-update.poisson(data[[i]],base[indexData[[i]],],now[indexData[[i]],],exp(eps))
}
}
update<-nuclear_approximation(update,dimension)
nIter<-0
thres<-array(Inf,3)
epsN<-array(Inf,2)
while(T)
{
for (i in 1:nData)
{
if (types[[i]]=="binary")
{
thr[i]<-stop.binary(data[[i]],base[indexData[[i]],],now[indexData[[i]],],update[indexData[[i]],])
}
else if (types[[i]]=="gaussian")
{
thr[i]<-stop.gaussian(data[[i]],base[indexData[[i]],],now[indexData[[i]],],update[indexData[[i]],])
}
else if (types[[i]]=="poisson")
{
thr[i]<-stop.poisson(data[[i]],base[indexData[[i]],],now[indexData[[i]],],update[indexData[[i]],])
}
}
nIter<-nIter+1
thres[1]<-thres[2]
thres[2]<-thres[3]
thres[3]<-sum(thr)
epsN[1]<-epsN[2]
epsN[2]<-eps
if (nIter>5)
{
if (runif(1)<thres[1]*thres[3]/(thres[2]*thres[2]+thres[1]*thres[3]))
{
eps<-epsN[1]+0.05*runif(1)-0.025
}
else
{
eps<-epsN[2]+0.05*runif(1)-0.025
}
if (eps< -0.7)
{
eps<- 0
epsN<-c(0,0)
}
if (eps > 1.4)
{
eps<-0
epsN<-c(0,0)
}
}
if (sum(thr)<nData*0.2)
{
break
}
now<-update
for (i in 1:nData)
{
if (types[[i]]=="binary")
{
update[indexData[[i]],]<-update.binary(data[[i]],base[indexData[[i]],],now[indexData[[i]],],exp(eps))
}
else if (types[[i]]=="gaussian")
{
update[indexData[[i]],]<-update.gaussian(data[[i]],base[indexData[[i]],],now[indexData[[i]],],exp(eps))
}
else if (types[[i]]=="poisson")
{
update[indexData[[i]],]<-update.poisson(data[[i]],base[indexData[[i]],],now[indexData[[i]],],exp(eps))
}
}
update<-nuclear_approximation(update,dimension)
}
for (i in 1:nData)
{
if (types[[i]]=="binary")
{
loglu<-loglu+LL.binary(data[[i]],base[indexData[[i]],],update[indexData[[i]],])
}
else if (types[[i]]=="gaussian")
{
loglu<-loglu+LL.gaussian(data[[i]],base[indexData[[i]],],update[indexData[[i]],])
}
else if (types[[i]]=="poisson")
{
loglu<-loglu+LL.poisson(data[[i]],base[indexData[[i]],],update[indexData[[i]],])
}
}
sv<-svd(update,nu=0,nv=dimension)
coordinate<-diag(c(sv$d[1:dimension],0))[1:dimension,1:dimension]%*%t(sv$v)
colnames(coordinate)<-colnames(data[[1]])
rownames(coordinate)<-paste("PC ",as.character(1:dimension),sep="")
ratio<-(loglu-loglmin)/(loglmax-loglmin)
return (list("coordinate"=coordinate,"potential"=ratio))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.