R/CRANE.R

Defines functions isElist jutterDegree adjMatToElist alpacaObjectToDfList elistToAdjMat elistSort elistRemoveTags elistIsEdgeOrderEqual elistAddTags condorRun condorCreateObject alpacaGetMember alpacaComputeDifferentialScoreFromDWBM craneUnipartite craneBipartite alpacaCrane

Documented in adjMatToElist alpacaComputeDifferentialScoreFromDWBM alpacaCrane alpacaGetMember alpacaObjectToDfList condorCreateObject condorRun craneBipartite craneUnipartite elistAddTags elistIsEdgeOrderEqual elistRemoveTags elistSort elistToAdjMat isElist jutterDegree

#' Find the robust nodes in ALPACA community using CRANE
#'
#' @param input same input for alpaca: first column TF, second column Genes, third column edge weights from baseline condition, fourth column edge weights from disease condition.
#' @param alp alpca object in list format (output from alpaca package)
#' @param alpha alpha paramter perturbs each edge weights
#' @param beta beta parameter perturbs the strength of each node. Set this to 0 if you want nodes to have node strength identical to the orignal network.
#' @param iteration Number of CRANE distributions to create. Higher value leads to better ranking but longer runtime.
#' @param isParallel TRUE = use Multithread / FALSE = do not use Multithread
#'
#' @import foreach
#' @import doParallel
#' @import parallel
#'
#' @return list of data frames
#' @export
#' @examples
#' \dontrun{
#'
#' input=cbind(nonAng,ang[,3])
#' alp=alpaca(input,NULL,verbose = F)
#' alpListObject=alpacaCrane(input, alp, isParallel = T)
#'
#' }


alpacaCrane = function(input,alp,alpha=0.1,beta=0,iteration=30,isParallel=F){
  elist1=input[,1:3]
  tfcheck=paste(unique(elist1[,1]),"A",sep="_")
  gcheck=paste(unique(elist1[,2]),"B",sep="_")
  member=alpacaGetMember(alp)
  
  print("Converting to Adjecency Matrix...")
  A=elistToAdjMat(elist1,isBipartite=T)
  ridx=order(rownames(A))
  cidx=order(colnames(A))
  elist1=adjMatToElist(A[ridx,cidx])
  
  print("Generating CRANE Networks, this may take a long time (upto 10-20 minutes for Networks with 19000+ nodes) ...")
  if (isParallel){
    cores=detectCores()
    cl <- makeCluster(cores[1]-1)
    registerDoParallel(cl)
    exportFunctions=c("craneBipartite","elistRemoveTags","adjMatToElist","jutterDegree")
    elist.df=foreach(i=1:iteration,.export = exportFunctions, .combine='cbind', .packages=c("netZooR"),.verbose = F) %dopar% {
      craneBipartite(A,alpha=alpha,beta=beta)[,3]
    }
    stopCluster(cl)
  }else{
    elist.df=NULL
    for (i in 1:iteration){
      elist.df=cbind(elist.df,craneBipartite(A,alpha=alpha,beta=beta)[,3])
    }
  }
  gc()
  
  print("Creating Test Distribution...")
  elist1=elistAddTags(elist1)
  
  print("Detecting communities in control network...")
  ctrl.pos = elist1[elist1[,3]>=0,1:3]
  ctrl.elist = data.frame(red=ctrl.pos[,2],blue=ctrl.pos[,1],weights=ctrl.pos[,3])
  ctrl.condor = createCondorObject(ctrl.elist)
  ctrl.condor = condorCluster(ctrl.condor,project=F)
  ctrl.memb = c(ctrl.condor$red.memb[,2],ctrl.condor$blue.memb[,2])
  names(ctrl.memb) = c(as.character(ctrl.condor$red.memb[,1]),as.character(ctrl.condor$blue.memb[,1]))
  
  
  print("Comuting Differential Scores...")
  df=NULL
  for (i in 1:iteration){
    net.table=cbind(elist1,elist.df[,i])
    pos.table = net.table[intersect(which(net.table[,3]>=0),which(net.table[,4]>=0)),]
    pos.graph = graph.edgelist(as.matrix(pos.table[,1:2]),directed=T)
    
    if (length(setdiff(V(pos.graph)$name,names(ctrl.memb)))>0){
      uncounted <- setdiff(V(pos.graph)$name,names(ctrl.memb))
      unc.memb <- sample(1:max(ctrl.memb),length(uncounted),replace=T)
      names(unc.memb) <- uncounted
      ctrl.memb <- c(ctrl.memb,unc.memb)
    }
    if (i %% round(iteration/5)==0){
      print(paste("Computing Differential Score",(i/iteration)*100,"%"))
    }
    dwbm = alpacaComputeDWBMmatmScale(pos.table,ctrl.memb[V(pos.graph)$name])
    
    miss_tf=setdiff(tfcheck,rownames(dwbm))
    miss_g=setdiff(gcheck,colnames(dwbm))
    if (length(miss_tf)>0){
      temp=array(0,dim = c(length(miss_tf),ncol(dwbm)))
      rownames(temp)=miss_tf
      dwbm=rbind(dwbm,temp)
    }
    if (length(miss_g)>0){
      temp=array(0,dim = c(nrow(dwbm),length(miss_g)))
      colnames(temp)=miss_g
      dwbm=cbind(dwbm,temp)
    }
    
    temp=alpacaComputeDifferentialScoreFromDWBM(dwbm,member)
    
    if (i>1 & !identical(rownames(df),names(temp))){
      print("error")
      break
    }else{
      df=cbind(df,temp)
    }
  }
  
  isSig=c()
  tstat=c()
  if (identical(names(alp[[2]]),rownames(df))){
    for (i in 1:nrow(df)){
      if (rownames(df)[i]=="" || sd(df[i,], na.rm=TRUE)==0){
        isSig=c(isSig,NA)
        next
      }
      dist=df[i,]
      if (length(dist)>1){
        temp=t.test(dist, mu=alp[[2]][i], alternative = "less",conf.level=0.95)
        isSig=c(isSig,temp$p.value)
        temp2=temp$statistic[[1]]
        names(temp2)=rownames(df)[i]
        tstat=c(tstat,temp2)
      }else{
        temp.v=1
        names(temp.v)=rownames(df)[i]
        isSig=c(isSig,temp.v)
        temp.v[]=10e+200
        tstat=c(tstat,temp.v)
      }
    }
  }
  alp[[3]]=isSig
  alp[[4]]=tstat
  
  alp.out=alpacaObjectToDfList(alp)
  
  return(alp.out)
}


#' Pertrubs the bipartite network with fixed node strength
#'
#' @param df Adjacency Matrix or Edge list
#' @param alpha alpha paramter perturbs each edge weights
#' @param beta beta parameter perturbs the strength of each node. Set this to 0 if you want nodes to have node strength identical to the orignal network.
#' @param getAdj TRUE = this will return adjacency matrix instead of edge list
#' @param randomStart FALSE = initialize the first row with completely random edge weights.
#'
#' @return edge list
#' @export
#' @examples
#' \dontrun{
#'
#' # Using Edge list as input
#' elist=craneBipartite(nonAng)
#' elist=craneBipartite(nonAng,alpha=0.3)
#'
#' # Using Edge list as input and Adjcency Matrix as output
#' adjMatrix=craneBipartite(nonAng,alpha=0.1,getAdj=T)
#'
#' # Using Edge list as input and Adjcency Matrix as output
#' A=elistToAdjMat(nonAng)
#' elist=craneBipartite(A)
#'
#' }

craneBipartite= function(df,alpha=0.1,beta=0,getAdj=F,randomStart=F){
  if (isElist(df)){
    print("Converting Edgelist to Adj Matrix")
    A=elistToAdjMat(df,isBipartite=T)
  }else{
    A=df
  }
  
  print(paste("Applying Alpha =",alpha))
  rown=nrow(A)
  coln=ncol(A)
  
  # randomize nodes
  ridx=sample(1:rown,rown,replace = F)
  cidx=sample(1:coln,coln,replace = F)
  A=A[ridx,cidx]
  
  rlt_A=array(0,dim=dim(A))
  colnames(rlt_A)=colnames(A)
  rownames(rlt_A)=rownames(A)
  
  # beta implimentation: beta = 1 models subsample data
  if (beta!=0){
    print("Beta on TFs")
    tfD=rowSums(A)
    tfD.jit=jutterDegree(tfD,beta,beta_slope = T)
    tfD.delta=tfD.jit-tfD
    correction=tfD.delta/coln
    A=A+correction
    print("Beta on Genes")
    A=t(A)
    tfD=rowSums(A)
    tfD.jit=jutterDegree(tfD,beta,beta_slope = F)
    tfD.delta=tfD.jit-tfD
    correction=tfD.delta/rown
    A=t(A)
  }
  
  tfD=rowSums(A)
  gD=colSums(A)
  
  omin=min(A)
  omax=max(A)
  
  if (alpha >0){
    rlt_A[1,]=A[1,]
    if (randomStart){
      rlt_A[1,]=rnorm(length(A[1,]),mean(A[1,]),2)
    }
    print("Constructing Iteratuvely Perturbed Network")
    current_degree=rep(0,coln)
    for (i in 2:rown){
      if (i %% 100 ==0){
        print(i)
      }
      #Randomly add delta to previous the Rows by alpha
      temp=rlt_A[(i-1),]
      n=length(temp)
      avg=mean(temp)
      std=sd(temp)
      cur_min=min(temp)
      cur_max=max(temp)
      delta=rnorm(n,mean=0,sd=std)
      temp=temp+alpha*delta
      temp=(temp/sum(temp))*tfD[(i-1)]
      goal_degree=colSums(A[1:i,])
      k=0
      # Backtrack to Control min max
      while(1){
        if (k >150){
          break
        }
        temp_degree=current_degree+temp
        y=goal_degree-temp_degree
        
        lidx=which(y<omin)
        ridx=which(y>omax)
        idx=c(lidx,ridx)
        lcor=y[lidx]-omin
        rcor=y[ridx]-omax
        if(length(idx)==0){
          break
        }
        if (length(lidx)>0){
          temp[lidx]=temp[lidx]-abs(lcor)
        }
        if (length(ridx)>0){
          temp[ridx]=temp[ridx]+abs(rcor)
        }
        temp=(temp/sum(temp))*tfD[(i-1)]
        k=k+1
      }
      #Update Final Edges
      rlt_A[(i-1),]=temp
      current_degree=current_degree+temp
      y=goal_degree-current_degree
      rlt_A[i,]=y
    }
  }
  
  if(!isTRUE(all.equal(rowSums(rlt_A),tfD,check.attributes = F, use.names = F))){
    print("ROW ERROR Alpha limit has been reached")
    return(NULL)
  }
  if(!isTRUE(all.equal(colSums(rlt_A),gD,check.attributes = F, use.names = F))){
    print("COLUMN ERROR Alpha limit has been reached")
    return(NULL)
  }
  
  print("Sorting Nodes")
  ridx=order(rownames(rlt_A))
  cidx=order(colnames(rlt_A))
  if (getAdj){
    return(rlt_A[ridx,cidx])
  }
  elist=adjMatToElist(rlt_A[ridx,cidx])
  elist=elistRemoveTags(elist)
  return(elist)
}

#' Pertrubs the unipartite network with fixed node strength from adjacency matrix
#'
#' @param A Adjacency Matrix
#' @param alpha alpha paramter perturbs each edge weights
#' @param isSelfLoop TRUE/FALSE if self loop exists. co-expression matrix will have a self-loop of 1. Thus TRUE
#'
#' @return adjacency matrix
#' @export
#'
craneUnipartite = function(A,alpha=0.1,isSelfLoop=F){
  print(paste("Applying Alpha =",alpha))
  
  rown=nrow(A)
  coln=ncol(A)
  
  # randomize nodes
  ridx=sample(1:rown,rown,replace = F)
  A=A[ridx,ridx]
  
  rlt_A=array(0,dim=dim(A))
  colnames(rlt_A)=colnames(A)
  rownames(rlt_A)=rownames(A)
  
  Aorig=A
  oavg=mean(Aorig)
  diag(Aorig)=mean(Aorig)
  A[lower.tri(A)]=0
  
  rowD=rowSums(A)
  colD=colSums(A)
  gD=colSums(A)
  
  omin=min(A)
  oomin=min(A[A>0])
  omax=max(A)
  randn=rown-2
  if (alpha >0){
    rlt_A[1,]=A[1,]
    print("Constructing Iteratuvely Perturbed Network")
    current_degree=rep(0,coln)
    for (i in 2:(rown-1)){
      if (i %% 100 ==0){
        print(i)
      }
      #Randomly add delta to previous the Rows by alpha
      temp=rlt_A[(i-1),]
      n=randn
      avg=mean(temp[(i+1):rown])
      std=sd(Aorig[(i-1),])
      cur_min=min(temp[(i+1):rown])
      cur_max=max(temp[(i+1):rown])
      delta=rnorm(n,mean=0,sd=std)
      temp[(i+1):rown]=temp[(i+1):rown]+alpha*delta
      temp[(i+1):rown][is.na(temp[(i+1):rown])]=oomin
      if (any(temp[(i+1):rown]<omin)){
        temp[(i+1):rown]=temp[(i+1):rown]+abs(min(temp[(i+1):rown]))
      }
      temp[(i+1):rown]=(temp[(i+1):rown]/sum(temp[(i+1):rown]))*(rowD[(i-1)]-sum(temp[1:i]))
      goal_degree=colSums(A[1:i,])
      k=0
      # Backtrack to Control min max
      while(1){
        if (k >150){
          break
        }
        temp_degree=current_degree+temp
        y=goal_degree-temp_degree
        
        lidx=which(y<omin)
        ridx=which(y>omax)
        idx=c(lidx,ridx)
        lcor=y[lidx]-omin
        rcor=y[ridx]-omax
        if(length(idx)==0){
          break
        }
        if (length(lidx)>0){
          temp[lidx]=temp[lidx]-abs(lcor)
        }
        if (length(ridx)>0){
          temp[ridx]=temp[ridx]+abs(rcor)
        }
        temp[(i+1):rown][is.na(temp[(i+1):rown])]=oomin
        if (any(temp[(i+1):rown]<omin)){
          temp[(i+1):rown]=temp[(i+1):rown]+abs(min(temp[(i+1):rown]))+omin
        }
        temp[(i+1):rown]=(temp[(i+1):rown]/sum(temp[(i+1):rown]))*(rowD[(i-1)]-sum(temp[1:i]))
        k=k+1
      }
      #Final Update Last Edges
      rlt_A[(i-1),]=temp
      current_degree=current_degree+temp
      y=goal_degree-current_degree
      rlt_A[i,]=y
      randn=randn-1
    }
  }
  
  if (isSelfLoop){
    rlt_A[nrow(rlt_A),ncol(rlt_A)]=A[nrow(rlt_A),ncol(rlt_A)]
  }
  
  # Non-Iterative version of the algorithm (not recommended)
  if(!isTRUE(all.equal(rowSums(rlt_A),rowD,check.attributes = F, use.names = F))){
    print("ROW ERROR Alpha limit has been reached")
    return(NULL)
  }
  if(!isTRUE(all.equal(colSums(rlt_A),colD,check.attributes = F, use.names = F))){
    print("COLUMN ERROR Alpha limit has been reached")
    return(NULL)
  }
  
  for (i in 1:nrow(rlt_A)){
    for (j in 1:ncol(rlt_A)){
      rlt_A[j,i]=rlt_A[i,j]
    }
  }
  
  print("Sorting Nodes")
  ridx=order(rownames(rlt_A))
  cidx=order(colnames(rlt_A))
  
  return(rlt_A[ridx,cidx])
}

#' Compute Differential modularity score from differential modularity matrix
#'
#' This functions takes the precomputed differential modularity matrix and the genLouvain membership to compute the differential modularity score.
#' @param dwbm differential modularity matrix
#' @param louv.memb louvain community membership
#'
#' @return Vector of differntial modularity score
#'
alpacaComputeDifferentialScoreFromDWBM = function(dwbm,louv.memb){
  louv.Ascores <- NULL
  louv.Bscores <- NULL
  louv.Anames <- NULL
  louv.Bnames <- NULL
  for (i in seq_len(max(louv.memb)))
  {
    print(i)
    this.comm <- names(louv.memb)[louv.memb==i]
    this.tfs <- this.comm[grep("_A$",this.comm)]
    this.genes <- this.comm[grep("_B$",this.comm)]
    if (length(this.tfs)>0 && length(this.genes)>0)
    {
      dwbm.submat <- array(dwbm[this.tfs,this.genes],dim=c(length(this.tfs),length(this.genes)))
      tf.sums <- apply(dwbm.submat,1,sum)
      gene.sums <- apply(dwbm.submat,2,sum)
      this.denom <- sum(dwbm[this.tfs,this.genes])
      louv.Ascores <- c(louv.Ascores,tf.sums/this.denom)
      louv.Bscores <- c(louv.Bscores,gene.sums/this.denom)
      louv.Anames <- c(louv.Anames,this.tfs)
      louv.Bnames <- c(louv.Bnames,this.genes)
    }
  }
  
  louv.scores <- c(louv.Ascores,louv.Bscores)
  names(louv.scores) <- c(louv.Anames,louv.Bnames)
  
  return(louv.scores)
}

#' get the member vector from alpaca object
#'
#' @param alp alpaca object
#' @param target tf, gene, or all
#'
#' @return member vector
#'
alpacaGetMember = function(alp, target="all") {
  alp2 = alp[[1]]
  memb_vector = as.vector(alp2)
  names(memb_vector) = names(alp2)
  memb_vector=memb_vector[!is.na(memb_vector)]
  if (target =="tf"){
    return(memb_vector[grep("_A",names(memb_vector))])
  }else if (target == "gene"){
    return(memb_vector[grep("_B",names(memb_vector))])
  }else{
    return(memb_vector)
  }
}

#' creates condor object
#'
#' @param elist edge list
#'
#' @return condor object
condorCreateObject = function(elist){
  elist=elistAddTags(elist)
  elist=elist[elist[,3]>0,]
  elist=data.frame(red=elist[,2],blue=elist[,1],weights=elist[,3])
  elist=createCondorObject(elist,return.gcc=T)
  return(elist)
}

#' Run CONDOR clustering
#'
#' @param elist edge list
#' @param qscore TRUE = output qscore / FALSE = do not output qscore
#'
#' @return condor object
#'
condorRun = function(elist,qscore=F){
  cond.object=condorCreateObject(elist)
  cond.object=condorCluster(cond.object)
  if (qscore){
    cond.object=condorQscore(cond.object)
  }
  return(cond.object)
}

#' Adds "_A" to first column and "_B" to second column
#'
#' @param elist edge list
#'
#' @return edge list
#'
elistAddTags = function(elist){
  elist[,1]=paste(elist[,1],"A",sep="_")
  elist[,2]=paste(elist[,2],"B",sep="_")
  return(elist)
}

#' check if first two columns are identical
#'
#' @param elist1 edge list
#' @param elist2 edge list
#'
#' @return boolean
#'
elistIsEdgeOrderEqual=function(elist1,elist2){
  check1=identical(as.character(elist1[,1]),as.character(elist2[,1]))
  check2=identical(as.character(elist1[,2]),as.character(elist2[,2]))
  if (check1 & check2){
    return(TRUE)
  }else{
    return(FALSE)
  }
}

#' undo elistAddTags
#'
#' @param elist edge list
#'
#' @return edge list
#'
elistRemoveTags = function(elist){
  elist[,1]=gsub("_A","",elist[,1])
  elist[,2]=gsub("_B","",elist[,2])
  return(elist)
}

#' Sorts the edge list based on first two columns in alphabetical order
#'
#' @param elist edge list
#' @return edge list
#'
elistSort=function(elist){
  A=elistToAdjMat(elist,isBipartite=T)
  ridx=order(rownames(A))
  cidx=order(colnames(A))
  elist=adjMatToElist(A[ridx,cidx])
  return(elist)
}

#' Converts edge list to adjacency matrix
#'
#' @param elist edge list
#' @param isBipartite TRUE = for bipartite / FALSE = for unipartite
#'
#' @import igraph
#' @return Adjcency Matrix
#' @export
elistToAdjMat= function(elist,isBipartite=F){
  if (isTRUE(isBipartite)){
    elist=elistAddTags(elist)
  }
  colnames(elist)=c('from','to','weight')
  A_up = as.matrix(get.adjacency(graph.data.frame(elist),attr = 'weight')) # upper triangular matrix
  A = A_up[rownames(A_up) %in% elist[,1], colnames(A_up) %in% elist[,2]]
  return(A)
}

#' Converts alpaca output into list of data frames
#'
#' @param alp alpaca object
#'
#' @return list of data frames
#'
alpacaObjectToDfList= function(alp){
  alp.out=list()
  if (length(alp)<3){
    tf=alpacaGetMember(alp,target="tf")
    df=data.frame(tf,alp[[2]][names(tf)])
    colnames(df)=c("Membership","Differential Modularity Score")
    rownames(df)=gsub("_A","",rownames(df))
    alp.out[["TF"]]=df
    gene=alpacaGetMember(alp,target="gene")
    df=data.frame(gene,alp[[2]][names(gene)])
    colnames(df)=c("Membership","Differential Modularity Score")
    rownames(df)=gsub("_B","",rownames(df))
    alp.out[["Gene"]]=df
  }else{
    tf=alpacaGetMember(alp,target="tf")
    df=data.frame(tf,alp[[2]][names(tf)],alp[[3]][names(tf)],alp[[4]][names(tf)])
    colnames(df)=c("Membership","Differential Modularity Score","Pvalue","t-statistic")
    rownames(df)=gsub("_A","",rownames(df))
    alp.out[["TF"]]=df
    gene=alpacaGetMember(alp,target="gene")
    df=data.frame(gene,alp[[2]][names(gene)],alp[[3]][names(gene)],alp[[4]][names(gene)])
    colnames(df)=c("Membership","Differential Modularity Score","Pvalue","t-statistic")
    rownames(df)=gsub("_B","",rownames(df))
    alp.out[["Gene"]]=df
  }
  return(alp.out)
}

#' converts adjacency matrix to edge list
#'
#' @param adj_mat adjacency matrix
#'
#' @return edge list
#'
adjMatToElist = function(adj_mat){
  from=rep(row.names(adj_mat), ncol(adj_mat))
  to=rep(colnames(adj_mat), each=nrow(adj_mat))
  weight=as.numeric(unlist(c(adj_mat),use.names = F))
  elist=data.frame(from,to,weight)
  elist=elistRemoveTags(elist)
  return(elist)
}

#' CRANE Beta perturbation function. This function will add noice to the node strength sequence.
#'
#' @param nodeD Vector of node strength
#' @param beta beta
#' @param beta_slope TRUE=use predetermined slope to add noise / FALSE = use constant value for noise
#' @return vector with new strength distribution
#'
jutterDegree=function(nodeD,beta,beta_slope=T){
  if (beta==0){
    print("No BETA")
    return(nodeD)
  }
  print(paste("Applying Beta =",beta))
  if (beta<0){
    beta=abs(beta)
    correction=0.06
    const.sig=1
    yint=const.sig*beta
  }else{
    # Correction needed based on number of genes
    correction=0.025
    const.sig=400
    yint=const.sig*beta
  }
  if(beta_slope){
    beta.pos=beta*correction
    beta.neg=beta*correction*0.5
    print(paste("applying beta slope:",beta.pos))
    temp.sigma=nodeD
    temp.sigma[temp.sigma>=0]=abs(temp.sigma[temp.sigma>=0]*beta.pos)+yint
    temp.sigma[temp.sigma<0]=abs(temp.sigma[temp.sigma<0]*beta.neg)+yint
    temp.sigma=abs(temp.sigma)
  }else{
    print(paste("NO Slope Applied, Gene perturbation:", const.sig*beta))
    temp.sigma=const.sig*beta
  }
  nodeD=nodeD+rnorm(length(nodeD),0,temp.sigma)
  
  return(nodeD)
}

#' Check if data frame is an edge list
#'
#' @param df some data frame
#'
#' @return Boolean
#'
isElist = function(df){
  return(ncol(df)==3 & (is.factor(df[,1])|is.character(df[,1]))&(is.factor(df[,2])|is.character(df[,2]))&is.numeric(df[,3]) )
}
netZoo/netZooR documentation built on Oct. 16, 2024, 10:23 p.m.