intersectoR.kmeans <- function(
pSet1=NA, #a list for a set of patterns where each entry is a set of genes associated with a single pattern
pSet2=NA, #a list for a second set of patterns where each entry is a set of genes associated with a single pattern
pval=.05, #the maximum p-value considered significant
full=FALSE #logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.
){
#match gene order
gns=unique(names(pSet1$cluster)[names(pSet1$cluster)%in%names(pSet2$cluster)])
ord01=match(gns,names(pSet1$cluster))
ord02=match(gns,names(pSet2$cluster))
pSet1$cluster<-pSet1$cluster[ord01]
pSet2$cluster<-pSet2$cluster[ord02]
overLPmtx=matrix(nrow=0,ncol=9)
colnames(overLPmtx)=c("pSet1","NpSet1","pSet2","NpSet2","NoverLP",
"OverLap%1","OverLap%2","pval","pval.REV")
for(i in sort(unique(pSet1$cluster))){
for(j in sort(unique(pSet2$cluster))){
pvalOLP=phyper(q=sum(pSet1$cluster==i&pSet2$cluster==j),m=sum(pSet1$cluster==i),
n=sum(pSet1$cluster!=i), k=sum(pSet2$cluster==j),lower.tail = FALSE, log.p = FALSE)
pvalOLP.rev=phyper(q=sum(pSet2$cluster==j&pSet1$cluster==i), m=sum(pSet2$cluster==j),
n=sum(pSet2$cluster!=j), k=sum(pSet1$cluster==i), lower.tail = FALSE, log.p = FALSE)
if(pvalOLP<=pval){overLPmtx=rbind(overLPmtx,c(i,sum(pSet1$cluster==i),j,
sum(pSet2$cluster==j),sum(pSet1$cluster==i&pSet2$cluster==j),
round(100*sum(pSet1$cluster==i&pSet2$cluster==j)/sum(pSet1$cluster==i),2),
round(100*sum(pSet1$cluster==i&pSet2$cluster==j)/sum(pSet2$cluster==j),2),
pvalOLP,pvalOLP.rev))}
}
}
message(paste(dim(overLPmtx)[1]," cluster pairs have overlap with p<",pval,":",sep=""))
if(full==FALSE){
return(overLPmtx)
} else if(full){
overLPindx<-overLPmtx[,c("pSet1","pSet2")]
overLPsets<-sapply(1:dim(overLPmtx)[1],function(x)
cbind("pSet1"=sort(names(pSet1$cluster[pSet1$cluster==overLPindx[x,1]&pSet2$cluster==overLPindx[x,2]])),
"pSet2"=sort(names(pSet2$cluster[pSet2$cluster==overLPindx[x,2]&pSet1$cluster==overLPindx[x,1]]))))
overLPall=cbind(pSet1$cluster,pSet2$cluster)
colnames(overLPall)=c("pSet1","pSet2")
rownames(overLPall)=names(pSet1$cluster)
ord03=order(pSet1$cluster,pSet2$cluster)
overLPall=overLPall[ord03,]
return(list(overLPmtx=overLPmtx,overLPindx=overLPindx,overLPsets=overLPsets,overLPall=overLPall))
}
}
#' @param full logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.
#' @examples
#' ESepiGen4c1lmRNASeq <- p.ESepiGen4c1l$mRNA.Seq
#' rownames(ESepiGen4c1lmRNASeq) <- map.ESepiGen4c1l$GeneSymbols
#'
#' k.RNAseq6l3c3t<-kmeans(p.RNAseq6l3c3t,22)
#' k.ESepiGen4c1l<-kmeans(ESepiGen4c1lmRNASeq,10)
#' intersectoR(k.RNAseq6l3c3t, k.ESepiGen4c1l, pval=.05)
#' @rdname intersectoR-methods
#' @aliases intersectoR
setMethod("intersectoR",signature(pSet1 = "kmeans",pSet2 = "kmeans"),intersectoR.kmeans)
#######################################################################################################################################
intersectoR.hclust <- function(
pSet1=NA, #a hclust obj
pSet2=NA, #a second set hclust obj
pval=.05, # the maximum p-value considered significant
full=FALSE, #logical indicating whether to return full data frame of signigicantly overlapping sets. Default is false will return summary matrix.
k=NULL #numeric giving cut height for hclust objects, if vector arguments will be applied to pSet1 and pSet2 in that order
){
overLPmtx=matrix(nrow=0,ncol=9)
colnames(overLPmtx)=c("pSet1","NpSet1","pSet2","NpSet2","NoverLP",
"OverLap%1","OverLap%2","pval","pval.REV")
if(length(k)==1){cut1<-cutree(pSet1,k=k) ; cut2<-cutree(pSet2,k=k)}
if(length(k)==2){cut1<-cutree(pSet1,k=k[1]) ; cut2<-cutree(pSet2,k=k[2])}
#cut1<-cut1[names(cut1)%in%names(cut2)]
#cut2<-cut2[names(cut2)%in%names(cut1)]
gns=unique(names(cut1)[names(cut1)%in%names(cut2)])
ord01=match(gns,names(cut1))
ord02=match(gns,names(cut2))
cut1<-cut1[ord01]
cut2<-cut2[ord02]
for(i in sort(unique(cut1))){
for(j in sort(unique(cut2))){
pvalOLP=phyper(q=sum(cut1==i&cut2==j),m=sum(cut1==i),n=sum(cut1!=i),
k=sum(cut2==j), lower.tail = FALSE, log.p = FALSE)
pvalOLP.rev=phyper(q=sum(cut2==j&cut1==i), m=sum(cut2==j), n=sum(cut2!=j),
k=sum(cut1==i), lower.tail = FALSE, log.p = FALSE)
if(pvalOLP<=pval){overLPmtx=rbind(overLPmtx,c(i,sum(cut1==i),j,
sum(cut2==j),sum(cut1==i&cut2==j),sum(cut1==i&cut2==j)/sum(cut1==i),
sum(cut1==i&cut2==j)/sum(cut2==j),pvalOLP,pvalOLP.rev))}
}
}
print(paste(dim(overLPmtx)[1]," cluster pairs have overlap with p<",pval,":",sep=""))
if(full==FALSE){
return(overLPmtx) #return summary matrix
} else if(full){
overLPindx<-overLPmtx[,c("pSet1","pSet2")]
overLPsets<-sapply(1:dim(overLPmtx)[1],function(x)
cbind("pSet1"=sort(names(cut1[cut1==overLPindx[x,1]&cut1==overLPindx[x,2]])),
"pSet2"=sort(names(cut2[cut2==overLPindx[x,2]&cut2==overLPindx[x,1]]))))
overLPall=cbind(cut1,cut2)
colnames(overLPall)=c("pSet1","pSet2")
rownames(overLPall)=names(cut1)
ord03=order(cut1,cut2)
overLPall=overLPall[ord03,]
return(list(overLPmtx=overLPmtx,overLPindx=overLPindx,overLPsets=overLPsets,overLPall=overLPall))
}
}
#' @param k Numeric giving cut height for hclust objects, if a vector is given arguments will be applied to pSet1 and pSet2 in that order
#' @examples
#'
#' h.RNAseq6l3c3t<-hclust(as.dist(1-(cor(t(p.RNAseq6l3c3t)))))
#' h.ESepiGen4c1l<-hclust(as.dist(1-(cor(t(ESepiGen4c1lmRNASeq)))))
#' intersectoR(pSet1=h.ESepiGen4c1l, pSet2=h.RNAseq6l3c3t, pval=.05, k=c(3,4))
#'
#' @rdname intersectoR-methods
#' @aliases intersectoR
setMethod("intersectoR",signature(pSet1 = "hclust",pSet2 = "hclust"),intersectoR.hclust)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.