get.reads<-function(file,seqL=150,readformat="bam"){
shift=round(seqL/2)
if(readformat=="bam"){
reads<-readGAlignments(as.character(file))
reads1<-reads[strand(reads)=="+"]
reads2<-reads[strand(reads)=="-"]
res<-GRanges(seqnames=c(seqnames(reads1),seqnames(reads2)),ranges=c(IRanges(start=start(reads1)+shift,width=1),IRanges(end=end(reads2)-shift,width=1)),strand=c(strand(reads1),strand(reads2)))
}else{
reads<-read.table(as.character(file),sep="\t",col.names=c("chrom","position","strand"))
reads2<-split(reads,reads$strand)
res<-GRanges(seqnames=Rle(c(as.character(reads2[["+"]]$chrom),as.character(reads2[["-"]]$chrom))),ranges=c(IRanges(start=reads2[["+"]]$position+shift,width=1),IRanges(end=reads2[["-"]]$position-shift,width=seqL)),strand=Rle(c("+","-"),sapply(reads2,nrow)))
}
return(res)
}
get.motif<-function(file,extendL=300){
motif<-try(read.table(as.character(file),sep="\t"),silent=TRUE)
if(is.data.frame(motif)){
motifcenter=as.integer(rowMeans(motif[,3:4]))
motifGR<-GRanges(seqnames=Rle(as.character(motif[,2])),ranges=IRanges(start=(motifcenter-extendL),end=(motifcenter+extendL-1)))}
}
get.motifbin<-function(motifseg,bin=50){
chrtemp=as.character(seqnames(motifseg))
starttemp=start(motifseg)
endtemp=end(motifseg)
motiflocnum=length(chrtemp)
motifbinnum=(endtemp[1]-starttemp[1]+1)/bin
startmatrix=matrix(NA,nrow=motiflocnum,ncol=motifbinnum)
endmatrix=matrix(NA,nrow=motiflocnum,ncol=motifbinnum)
startmatrix[,1]=starttemp
endmatrix[,motifbinnum]=endtemp
if(motifbinnum>1){
for(i in 2:motifbinnum){
startmatrix[,i]=startmatrix[,(i-1)]+bin
endmatrix[,(motifbinnum-i+1)]=endmatrix[,(motifbinnum-i+2)]-bin
}
}
motifbinGR=GRanges(seqnames=Rle(rep(chrtemp,each=motifbinnum)),ranges=IRanges(start=c(t(startmatrix)),end=c(t(endmatrix))))
return(motifbinGR)
}
countmotifsegreads<-function(read,segmentfile,seqL=150,core=1,format="bam",histonemotifRFid,readsmem){
countmotifsegreadsv1<-function(listfile,segmentfile,seqL,core,format="bam",histonemotifRFid){
segnum=length(segmentfile)
markernum=length(listfile)
time=nrow(listfile[[1]])
result<-vector("list",markernum)
totalreads<-vector(length=markernum*time)
for(markcount in 1:markernum){
result[[markcount]] <- vector("list",segnum)
for(i in 1:segnum){
result[[markcount]][[i]]<-matrix(NA,nrow=length(segmentfile[[i]]),ncol=time)
}
for(i in 1:nrow(listfile[[markcount]])){
temp<-get.reads(listfile[[markcount]][i,],seqL,format)
totalreads[i+(markcount-1)*time]=length(temp)
temp1=vector("list",length=core)
if(core==1){
temp1[[1]]=temp
rm(temp)
}else{
readcount=totalreads[i+(markcount-1)*time]
readseg=round(readcount/core)
for(j in 1:(core-1)){
temp1[[j]]=temp[((j-1)*readseg+1):(j*readseg)]
}
temp1[[core]]=temp[((core-1)*readseg+1):readcount]
rm(temp)
}
for(j in 1:segnum){
countreads=function(x){
tempresult=try(countOverlaps(segmentfile[[j]],temp1[[x]]))
return(tempresult)
}
tempbincount=mclapply(1:core,countreads,mc.cores=core)
tempbincount1=tempbincount[[1]]
if(core>1){
for(k in 2:core){
tempbincount1=as.numeric(tempbincount1)+as.numeric(tempbincount[[k]])
}
}
rm(tempbincount)
result[[markcount]][[j]][,i]=tempbincount1
}
}
}
ratio=totalreads/mean(totalreads)
for(markcount in 1:markernum){
for(j in 1:segnum){
for(i in 1:time){
result[[markcount]][[j]][,i]=try(round(log2(result[[markcount]][[j]][,i]/ratio[i+(markcount-1)*time]+1),2))
}
if(nrow(result[[markcount]][[j]]>0))
{rownames(result[[markcount]][[j]])=histonemotifRFid[[j]]
} }}
return(list(result,ratio))
}
countmotifsegreadsv2<-function(reads,segmentfile,core,histonemotifRFid){
segnum=length(segmentfile)
markernum=length(reads)
result<-vector("list",markernum)
time<-length(reads[[1]])
totalreads<-vector(length=markernum*time)
for(markcount in 1:markernum){
result[[markcount]] <- vector("list",segnum)
for(i in 1:segnum){
result[[markcount]][[i]]<-matrix(NA,nrow=length(segmentfile[[i]]),ncol=time)
}
totalreads[((markcount-1)*time+1):(markcount*time)]<-sapply(reads[[markcount]],length)
for(i in 1:length(reads[[markcount]])){
temp1=vector("list",length=core)
if(core==1){
temp1[[1]]=reads[[markcount]][[i]]
}else{
readseg=round(totalreads[i+(markcount-1)*time]/core)
for(j in 1:(core-1)){
temp1[[j]]=reads[[markcount]][[i]][((j-1)*readseg+1):(j*readseg)]
}
temp1[[core]]=reads[[markcount]][[i]][((core-1)*readseg+1):totalreads[i+(markcount-1)*time]]
}
for(j in 1:segnum){
countreads=function(x){
tempresult=try(countOverlaps(segmentfile[[j]],temp1[[x]]))
return(tempresult)
}
tempbincount=mclapply(1:core,countreads,mc.cores=core)
tempbincount1=as.numeric(tempbincount[[1]])
if(core>1){
for(k in 2:core){
tempbincount1=as.numeric(tempbincount1)+as.numeric(tempbincount[[k]])
}
}
rm(tempbincount)
result[[markcount]][[j]][,i]=tempbincount1
}
rm(temp1)
gc()
}
}
ratio=totalreads/mean(totalreads)
for(markcount in 1:markernum){
for(j in 1:segnum){
for(i in 1:time){
result[[markcount]][[j]][,i]=try(round(log2(result[[markcount]][[j]][,i]/ratio[i+(markcount-1)*time]+1),2))
}
if(nrow(result[[markcount]][[j]]>0))
{rownames(result[[markcount]][[j]])=histonemotifRFid[[j]]
} }
}
return(list(result,ratio))
}
if(readsmem){
result=countmotifsegreadsv2(read,segmentfile,core,histonemotifRFid)
}else{
result=countmotifsegreadsv1(read,segmentfile,seqL,core,format,histonemotifRFid)
}
return(result)
}
motifbincount=function(motifbin,reads,core=1,seqL=150,format="bam",name,ratio){
segnum=length(motifbin)
if(is.list(reads)){
time=length(reads)
readtype=TRUE
}else{
time=nrow(reads)
readtype=FALSE
}
if(is.list(motifbin[[1]])){
motiftype=TRUE
}else{
motiftype=FALSE
}
for(i in 1:time){
if(readtype){
temp=reads[[i]]
}else{
temp<-get.reads(listfile[i,],seqL,format)
}
temp1=vector("list",length=core)
readcount=length(temp)
readseg=round(readcount/core)
if(core>1){
for(j in 1:(core-1)){
temp1[[j]]=temp[((j-1)*readseg+1):(j*readseg)]
}
temp1[[core]]=temp[((core-1)*readseg+1):readcount]
}else{
temp1[[1]]=temp
}
rm(temp)
for(j in 1:segnum){
if(motiftype){
for(k in 1:3){
if(length(motifbin[[j]][[i]][[k]])>0){
countreads=function(x){
tempresult=try(countOverlaps(motifbin[[j]][[i]][[k]],temp1[[x]]))
return(tempresult)
}
tempbincount=mclapply(1:core,countreads,mc.cores=core)
tempbincount1=tempbincount[[1]]
if(core>1){
for(l in 2:core){
tempbincount1=tempbincount1+tempbincount[[l]]}
}
rm(tempbincount)
tempbincount1=round(log2(tempbincount1/ratio[i]+1),2)
}else{tempbincount1=0}
write.table(tempbincount1,file=paste("motifbin_",name,"_",i,"_",j,"_",k,".txt",sep=""),quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
}
}else{
if(length(motifbin[[j]])>0){
countreads=function(x){
tempresult=try(countOverlaps(motifbin[[j]],temp1[[x]]))
return(tempresult)
}
tempbincount=mclapply(1:core,countreads,mc.cores=core)
tempbincount1=tempbincount[[1]]
if(core>1){
for(k in 2:core){
tempbincount1=tempbincount1+tempbincount[[k]]
}
}
rm(tempbincount)
tempbincount1=round(log2(tempbincount1/ratio[i]+1),2)
}else{tempbincount1=0}
write.table(tempbincount1,file=paste("motifbin_",name,"_",i,"_",j,".txt",sep=""),quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
}
}
rm(temp1)
gc()
}}
motifRFid=function(motifseg,histonemotifsegreads,motifpeakoverlap,motifseg500overlapid){
segnum=length(motifseg)
marknum=length(histonemotifsegreads)
result=vector("list",length=segnum)
time=ncol(histonemotifsegreads[[1]][[1]])
for(i in 1:segnum){
result[[i]]=vector("list",length=time)
for(j in 1:time){
result[[i]][[j]]=vector("list",length=3)
tempmarkreads=vector(length=nrow(histonemotifsegreads[[1]][[i]]))
temppeakoverlap=vector(length=length(motifseg[[i]]))
for(markcount in 1:marknum){
tempmarkreads=tempmarkreads+histonemotifsegreads[[markcount]][[i]][,j]
temppeakoverlap=temppeakoverlap+motifpeakoverlap[[markcount]][[i]][,j]
}
temp=cbind(1:length(motifseg[[i]]),temppeakoverlap)
if(nrow(temp)>200){
for(markcount in marknum:1){
if(nrow(temp[temp[,2]>=markcount,])>=250){
temppeak=temp[temp[,2]>=markcount,]
break
}
}
if(markcount==1){
temppeak=temp[temp[,2]>0,]
}
tempnopeak=temp[temp[,2]==0,]
peakvalue=cbind(motifseg500overlapid[[i]],tempmarkreads)
peakvalue1=peakvalue[peakvalue[,1]%in%temppeak[,1],]
if(is.matrix(temppeak)&is.matrix(tempnopeak)){
if(nrow(peakvalue1)>500){
peakvalue1=peakvalue1[order(peakvalue1[,2],decreasing=TRUE),]
if(nrow(tempnopeak)>=250){
temppeakid=sample(peakvalue1[1:round(nrow(peakvalue1)/2),1],250)
tempnopeakid=sample(tempnopeak[,1],250)
}else{
temppeakid=sample(peakvalue1[1:round(nrow(peakvalue1)/2),1],(500-nrow(tempnopeak)))
tempnopeakid=tempnopeak[,1]
}
}else if(nrow(peakvalue1)>250){
if(nrow(tempnopeak)>=250){
temppeakid=sample(peakvalue1[,1],250)
tempnopeakid=sample(tempnopeak[,1],250)
}else{
temppeakid=sample(peakvalue1[,1],min(500-nrow(tempnopeak),nrow(peakvalue1)))
tempnopeakid=tempnopeak[,1]
}
}else{
temppeakid=peakvalue1[,1]
tempnopeakid=sample(tempnopeak[,1],min(500-nrow(peakvalue1),nrow(tempnopeak)))
}
result[[i]][[j]][[1]]=temppeakid
result[[i]][[j]][[2]]=tempnopeakid
result[[i]][[j]][[3]]=sample(tempnopeak[,1],500,replace=TRUE)
}
}
}
}
return(result)
}
motifbinRFfdr=function(histonemotifRFid,motifsegs,markernum,binnum){
segnum=length(histonemotifRFid)
time=length(histonemotifRFid[[1]])
result=vector("list",length=segnum)
for(i in 1:segnum){
result[[i]]=matrix(nrow=length(motifsegs[[i]]),ncol=time)
}
result1=result
for(i in 1:time){
tempbincounttotal=vector("list",length=3)
temprfval=vector("list",length=segnum)
for(j in 1:segnum){
temprfval[[j]]=vector("list",length=4)
tempbincount1=vector("list",length=markernum)
for(markcount in 1:markernum){
tempbincount=read.table(paste("motifbin_",markcount,"_",i,"_",j,".txt",sep=""),sep="\t")
tempbincount1[[markcount]]=tempbincount[,1]
}
if(try(length(histonemotifRFid[[j]][[i]][[1]]))>0){
temppeakid=read.table(paste("motifbin_",1,"_",i,"_",j,"_",1,".txt",sep=""),sep="\t")
tempnopeakid=read.table(paste("motifbin_",1,"_",i,"_",j,"_",2,".txt",sep=""),sep="\t")
tempbgid=read.table(paste("motifbin_",1,"_",i,"_",j,"_",3,".txt",sep=""),sep="\t")
tempoutpeak=matrix(temppeakid[,1],byrow=TRUE,ncol=binnum)
tempoutnopeak=matrix(tempnopeakid[,1],byrow=TRUE,ncol=binnum)
bgtempbincount=matrix(tempbgid[,1],byrow=TRUE,ncol=binnum)
tempbincountset=matrix(tempbincount1[[1]],ncol=binnum,byrow=TRUE)
if(markernum>1){
for(markcount in 2:markernum){
temppeakid=read.table(paste("motifbin_",markcount,"_",i,"_",j,"_",1,".txt",sep=""),sep="\t")
tempnopeakid=read.table(paste("motifbin_",markcount,"_",i,"_",j,"_",2,".txt",sep=""),sep="\t")
tempbgid=read.table(paste("motifbin_",markcount,"_",i,"_",j,"_",3,".txt",sep=""),sep="\t")
tempoutpeak0=matrix(temppeakid[,1],byrow=TRUE,ncol=binnum)
tempoutnopeak0=matrix(tempnopeakid[,1],byrow=TRUE,ncol=binnum)
bgtempbincount0=matrix(tempbgid[,1],byrow=TRUE,ncol=binnum)
tempoutpeak=cbind(tempoutpeak,tempoutpeak0)
tempoutnopeak=cbind(tempoutnopeak,tempoutnopeak0)
bgtempbincount=cbind(bgtempbincount, bgtempbincount0)
tempbincountset=cbind(tempbincountset,matrix(tempbincount1[[markcount]],ncol=binnum,byrow=TRUE))
}
}
tempbincounttotal[[1]]=rbind(tempbincounttotal[[1]],tempoutpeak)
tempbincounttotal[[2]]=rbind(tempbincounttotal[[2]],tempoutnopeak)
tempbincounttotal[[3]]=rbind(tempbincounttotal[[3]],bgtempbincount)
trainset=rbind(tempoutpeak,tempoutnopeak)
trainresponse=c(rep(1,nrow(tempoutpeak)),rep(0,nrow(tempoutnopeak)))
motifbinrfmodel=randomForest(x=trainset,y=factor(trainresponse))
predictmotifbinrf=predict(motifbinrfmodel,newdata=tempbincountset,type="vote")
temprfval[[j]][[1]]=predictmotifbinrf[,2]
predictmotifbinbg=predict(motifbinrfmodel,newdata=bgtempbincount,type="vote")
temprfval[[j]][[2]]=predictmotifbinbg[,2]
}
}
temprow=min(10000,nrow(tempbincounttotal[[1]]),nrow(tempbincounttotal[[2]]))
trainset=rbind(as.matrix(tempbincounttotal[[1]][sample(nrow(tempbincounttotal[[1]]),temprow),]),as.matrix(tempbincounttotal[[2]][sample(nrow(tempbincounttotal[[2]]),temprow),]))
trainresponse=c(rep(1,temprow),rep(0,temprow))
motifbinrfmodeltotal=randomForest(x=trainset,y=factor(trainresponse))
for(j in 1:segnum){
tempbincount1=vector("list",length=markernum)
for(markcount in 1:markernum){
tempbincount=read.table(paste("motifbin_",markcount,"_",i,"_",j,".txt",sep=""),sep="\t")
tempbincount1[[markcount]]=tempbincount[,1]
}
if(try(length(histonemotifRFid[[j]][[i]][[1]]))>0){
tempbgid=read.table(paste("motifbin_",1,"_",i,"_",j,"_",3,".txt",sep=""),sep="\t")
bgtempbincount=matrix(tempbgid[,1],byrow=TRUE,ncol=binnum)
tempbincountset=matrix(tempbincount1[[1]],ncol=binnum,byrow=TRUE)
if(markernum>1){
for(markcount in 2:markernum){
tempbgid=read.table(paste("motifbin_",markcount,"_",i,"_",j,"_",3,".txt",sep=""),sep="\t")
bgtempbincount0=matrix(tempbgid[,1],byrow=TRUE,ncol=binnum)
bgtempbincount=cbind(bgtempbincount, bgtempbincount0)
tempbincountset=cbind(tempbincountset,matrix(tempbincount1[[markcount]],ncol=binnum,byrow=TRUE))
}
}
predictmotifbinrf=predict(motifbinrfmodeltotal,newdata=tempbincountset,type="vote")
temprfval[[j]][[3]]=predictmotifbinrf[,2]
predictmotifbinbg=predict(motifbinrfmodeltotal,newdata=bgtempbincount,type="vote")
temprfval[[j]][[4]]=predictmotifbinbg[,2]
motifbinbgecdf=ecdf((temprfval[[j]][[2]]+temprfval[[j]][[4]])/2)
predictmotifbinrfpval=1-motifbinbgecdf((temprfval[[j]][[1]]+temprfval[[j]][[3]])/2)
predictmotifbinrfpadj=p.adjust(predictmotifbinrfpval,method="fdr")
result[[j]][,i]=predictmotifbinrfpval
result1[[j]][,i]=predictmotifbinrfpadj
}
}
}
return(list(result,result1))
}
innerprocal=function(RFfdr,fdrcut=0.01,RFreads,name,binnum){
segnum=length(RFfdr)
time=ncol(RFfdr[[1]])
marknum=length(RFreads)
result=vector("list",length=segnum)
for(i in 1:segnum){
result[[i]]=matrix(nrow=nrow(RFfdr[[i]]),ncol=time)
}
for(i in 1:time){
tempbincounttotal=vector(length=binnum*length(name))
for(j in 1:segnum){
tempbincount1=matrix(nrow=nrow(RFfdr[[j]]))
for(namecount in 1:length(name)){
tempbincount=read.table(paste("motifbin_",namecount,"_",i,"_",j,".txt",sep=""),sep="\t")
tempbincount1=cbind(tempbincount1,matrix(tempbincount[,1],ncol=binnum,byrow=TRUE))
}
tempbincountset=tempbincount1[,2:ncol(tempbincount1)]
tempfdr=RFfdr[[j]][,i]
tempmarkreads=vector(length=nrow(RFfdr[[j]]))
for(markcount in 1:marknum){
tempmarkreads=tempmarkreads+RFreads[[markcount]][[j]][,i]
}
tempbincountset=cbind(tempbincountset,tempmarkreads)
tempbincountsetf=tempbincountset[tempfdr<fdrcut,]
tempbincountsetf= tempbincountsetf[order(tempbincountsetf[,ncol(tempbincountsetf)],decreasing=TRUE),]
tempbincountsetf=as.matrix(tempbincountsetf[,1:(ncol(tempbincountsetf)-1)])
if(nrow(tempbincountsetf)>100){
tempbincounttotal=rbind(tempbincounttotal, tempbincountsetf[sample(1:100,50),])
}else if(nrow(tempbincountsetf)>50){
tempbincounttotal=rbind(tempbincounttotal, tempbincountsetf[sample(1:nrow(tempbincountsetf),50),])
}else{
tempbincounttotal=rbind(tempbincounttotal, tempbincountsetf[,])
}
}
tempbincounttotal=tempbincounttotal[2:nrow(tempbincounttotal),]
tempbincounttotal=na.exclude(tempbincounttotal)
tempbincounttotal[is.infinite(tempbincounttotal)]=0
tempbinmean=colMeans(tempbincounttotal)
for(j in 1:segnum){
tempbincount1=matrix(nrow=nrow(RFfdr[[j]]))
for(namecount in 1:length(name)){
tempbincount=read.table(paste("motifbin_",namecount,"_",i,"_",j,".txt",sep=""),sep="\t")
tempbincount1=cbind(tempbincount1,matrix(tempbincount[,1],ncol=binnum,byrow=TRUE))
}
tempbincountset=as.matrix(tempbincount1[,2:ncol(tempbincount1)])
for(k in 1:nrow(tempbincountset)){
result[[j]][k,i]=sum(tempbinmean*tempbincountset[k,])
}
}
}
return(result)
}
clsnumcount<-function(data){
x <- 2:10
y <- sapply(x, function(k) {
set.seed(12345)
mean(sapply(1:10,function(d) {
clu <- kmeans(data,k,iter.max = 100)$cluster
cluSS <- sum(sapply(unique(clu),function(i) {
sum(rowSums(sweep(data[clu==i,,drop=F],2,colMeans(data[clu==i,,drop=F]),"-")^2))
}))
1-cluSS/sum((sweep(data,2,colMeans(data),"-"))^2)
}))
})
y <- c(0,y)
x <- c(1,x)
clunum <- x[which.min(sapply(x, function(i) {
x2 <- pmax(0,x-i)
sum(lm(y~x+x2)$residuals^2)
}))]
return(clunum)
}
motifclstest<-function(motifcls,motifname){
motiflen=length(motifname)
clsnum=length(table(motifcls[,ncol(motifcls)-2]))
motifclscount=matrix(nrow=motiflen,ncol=3*clsnum)
motifclstotal=table(motifcls[,ncol(motifcls)-2])
motifclssum=nrow(motifcls)
for(i in 1:motiflen){
if(length(motifcls[motifcls[,ncol(motifcls)-1]==motifname[i],1])>0){
motifclstemp=motifcls[motifcls[,ncol(motifcls)-1]==motifname[i],]
motifclstemptotal=nrow(motifclstemp)
for(j in 1:clsnum){
motifclscount[i,j]=length(motifclstemp[motifclstemp[,ncol(motifcls)-2]==j,1])
motifclscount[i,(j+2*clsnum)]=fisher.test(matrix(c(motifclscount[i,j],motifclstemptotal-motifclscount[i,j],motifclstotal[j],motifclssum-motifclstotal[j]),nrow=2),alternative="greater")$p.value
motifclscount[i,(j+clsnum)]=(motifclscount[i,j]/motifclstemptotal)/(motifclstotal[j]/motifclssum)
}
}
}
rownames(motifclscount)=as.character(motifname)
temp=matrix(p.adjust(as.vector(motifclscount[,(2*clsnum+1):(3*clsnum)]),"bonferroni"),ncol=clsnum)
motifclscount=cbind(motifclscount,temp)
return(motifclscount)
}
DynaMOs<-function(reads,peak,motifseg,core,readsmem,markernum,time,mode,format,motifbin,readlen,motifname){
motifnum=length(motifseg)
motifbinnum=(end(motifseg[[1]])[1]-start(motifseg[[1]])[1]+1)/motifbin
motifpeakoverlap=vector("list",length=markernum)
for(markcount in 1:markernum){
motifpeakoverlap[[markcount]]=vector("list",length=length(motifseg))
motifoverlap.multi=function(x){
temp=data.matrix(countOverlaps(motifseg[[x]],peak[[markcount]][[1]]))
if(time>1){
for(j in 2:time){
temp=cbind(temp,countOverlaps(motifseg[[x]],peak[[markcount]][[j]]))
}
}
return(temp)
}
motifpeakoverlap[[markcount]]=mclapply(1:motifnum,motifoverlap.multi,mc.cores=core)
}
print("motifpeakoverlap")
motifsegs=vector("list",length=motifnum)
motifid0=vector("list",length=motifnum)
if(mode=="l"){
motifsegs=motifseg
for(i in 1:motifnum){
motifid0[[i]]=1:length(motifsegs[[i]])
}
}else{
for(i in 1:motifnum){
temp=rowSums(motifpeakoverlap[[1]][[i]])
if(markernum>1){
for(motifcount in 2:markernum){
temp=temp+rowSums(motifpeakoverlap[[motifcount]][[i]])
}
}
motifid0[[i]]=which(temp>0)
motifsegs[[i]]=motifseg[[i]][motifid0[[i]]]
}
}
temp=countmotifsegreads(reads,motifsegs,readlen,core,format,motifid0,readsmem)
histonemotifsegreads=temp[[1]]
ratio=temp[[2]]
print("motifsegreadcount")
motifid1=motifRFid(motifseg,histonemotifsegreads,motifpeakoverlap,motifid0)
print("motifRFid")
get.motifbintemp<-function(motifseg,bin=motifbin){
temp=get.motifbin(motifseg,bin)
return(temp)
}
motifbintemp=mclapply(motifsegs,get.motifbintemp,mc.cores=core)
print("motifsegbin")
for(markcount in 1:markernum){
motifbincount(motifbintemp,reads[[markcount]],core,readlen,format,markcount,ratio[((markcount-1)*time):(markcount*time)])
}
print("motifbinreadcount")
rm(motifbintemp)
motifsegrf=vector("list",length=motifnum)
for(i in 1:motifnum){
motifsegrf[[i]]=vector("list",length=time)
for(j in 1:time){
motifsegrf[[i]][[j]]=vector("list",length=3)
for(k in 1:3){
motifsegrf[[i]][[j]][[k]]=motifseg[[i]][motifid1[[i]][[j]][[k]]]}
}
}
get.motifbinv2<-function(motifseg,bin=motifbin){
time=length(motifseg)
segnum=length(motifseg[[1]])
motifbinGR=vector("list",length=time)
for(i in 1:time){
motifbinGR[[i]]=vector("list",length=segnum)
for(timenum in 1:segnum){
if(length(motifseg[[i]][[timenum]])>0){
chrtemp=as.character(seqnames(motifseg[[i]][[timenum]]))
starttemp=start(motifseg[[i]][[timenum]])
endtemp=end(motifseg[[i]][[timenum]])
motiflocnum=length(chrtemp)
motifbinnum=(endtemp[1]-starttemp[1]+1)/bin
startmatrix=matrix(NA,nrow=motiflocnum,ncol=motifbinnum)
endmatrix=matrix(NA,nrow=motiflocnum,ncol=motifbinnum)
startmatrix[,1]=starttemp
endmatrix[,motifbinnum]=endtemp
if(motifbinnum > 1){
for(j in 2:motifbinnum){
startmatrix[,j]=startmatrix[,(j-1)]+bin
endmatrix[,(motifbinnum-j+1)]=endmatrix[,(motifbinnum-j+2)]-bin
}
}
motifbinGR[[i]][[timenum]]=GRanges(seqnames=Rle(rep(chrtemp,each=motifbinnum)),ranges=IRanges(start=c(t(startmatrix)),end=c(t(endmatrix))))}
}
}
return(motifbinGR)
}
motifbinrftemp=mclapply(motifsegrf,get.motifbinv2,mc.cores=core)
print("motifsegbinrf")
for(markcount in 1:markernum){
motifbincount(motifbinrftemp,reads[[markcount]],core,readlen,format,markcount,ratio[((markcount-1)*time):(markcount*time)])
}
print("motifbinreadcountrf")
motifrandomforestfdr=motifbinRFfdr(motifid1,motifsegs,markernum,motifbinnum)
print("motifrandomforestfdr")
for(i in 1:motifnum){
motifrandomforestfdr[[2]][[i]][is.na(motifrandomforestfdr[[2]][[i]])]=1
write.table(motifrandomforestfdr[[2]][[i]],paste("motif",motifname[i],"fdr.txt",sep="_"),quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
}
RFinnerprod=innerprocal(motifrandomforestfdr[[2]],0.01,histonemotifsegreads,1:markernum,motifbinnum)
print("RFinnerprod")
for(i in 1:motifnum){
write.table(RFinnerprod[[i]],paste("motif",motifname[i],"innerprod.txt",sep="_"),quote=FALSE,sep="\t",row.names=FALSE,col.names=FALSE)
}
for(i in 1:motifnum){
temp=t(scale(t(histonemotifsegreads[[1]][[i]])))
if(markernum>1){
for(markcount in 2:markernum){
temp=cbind(temp,t(scale(t(histonemotifsegreads[[markcount]][[i]]))))
}
}
write.table(round(temp,2),paste("motif_",motifname[i],"_readcount.txt",sep=""),sep="\t",row.names=TRUE,col.names=FALSE,quote=FALSE)
}
print("motifreadwrite")
}
DynaMO<-function(readlist,peak,motif,mode="l",core=1,readsmem=TRUE,readlen=150,readformat="bam",motiflen=300,motifbin=20,cluster=0,fdrcut=0.01,batch=TRUE){
markernum=length(readlist)
time=nrow(readlist[[1]])
motifnum=length(motif)
motifname=1:motifnum
if(readsmem){
reads=vector("list",length=markernum)
for(i in 1:markernum){
reads[[i]]=vector("list",length=time)
for(j in 1:time){
reads[[i]][[j]]=get.reads(readlist[[i]][j,1],readlen,readformat)
}
}
}else{
reads=readlist
}
motifseg=vector("list",length=motifnum)
get.motifmulti=function(x){
result=get.motif(motif[x],motiflen)
return(result)
}
motifseg=mclapply(1:motifnum,get.motifmulti,mc.cores=core)
if(motifnum>15&batch){
motifbatch=10
temp=motifnum%%motifbatch
if(temp>5){
motifround=motifnum%/%motifbatch+1
}else{
motifround=motifnum%/%motifbatch
}
for(i in 1:(motifround-1)){
motifsegtemp=motifseg[((i-1)*motifbatch+1):(i*motifbatch)]
motifnametemp=((i-1)*motifbatch+1):(i*motifbatch)
DynaMOs(reads,peak,motifsegtemp,core,readsmem,markernum,time,mode,readformat,motifbin,readlen,motifnametemp)
}
motifsegtemp=motifseg[((motifround-1)*motifbatch+1):motifnum]
motifnametemp=((motifround-1)*motifbatch+1):motifnum
DynaMOs(reads,peak,motifsegtemp,core,readsmem,markernum,time,mode,readformat,motifbin,readlen,motifnametemp)
}else{
DynaMOs(reads,peak,motifseg,core,readsmem,markernum,time,mode,readformat,motifbin,readlen,motifname)
}
if(time>1){
motiffdr=read.table("motif_1_fdr.txt",sep="\t")
numtest=apply(motiffdr,1,function(x){if(min(x)<=fdrcut){return(1)}else{return(0)}})
temp=try(read.table("motif_1_readcount.txt",sep="\t"))
temp1=try(temp[,2:ncol(temp)])
rownames(temp1)=paste(1,temp[,1],sep="_")
motiffdrread=temp1[numtest==1,]
if(motifnum>1){
for(i in 2:motifnum){
motiffdr=read.table(paste("motif",motifname[i],"fdr.txt",sep="_"),sep="\t")
numtest=apply(motiffdr,1,function(x){if(min(x)<=fdrcut){return(1)}else{return(0)}})
temp=try(read.table(paste("motif",i,"readcount.txt",sep="_"),sep="\t"))
temp1=try(temp[,2:ncol(temp)])
rownames(temp1)=paste(i,temp[,1],sep="_")
motiffdrread=rbind(motiffdrread,temp1[numtest==1,])
}
}
motiffdrread=na.exclude(motiffdrread)
if(cluster==0){
tempread=motiffdrread[sample(1:nrow(motiffdrread),min(c(nrow(motiffdrread),5000))),]
clsnum=clsnumcount(tempread)
}else{
clsnum=cluster
}
motifk=kmeans(motiffdrread,centers=clsnum,20,20)
motiffdrread1=cbind(motiffdrread,motifk$cluster)
tempname=matrix(as.numeric(unlist(strsplit(rownames(motiffdrread1),split="_"))),ncol=2,byrow=TRUE)
motiffdrread1=cbind(motiffdrread1,tempname)
colnames(motiffdrread1)=c(paste(rep(paste("marker",1:markernum,sep=""),each=time),1:time,sep="_"),"cluster","motifname","motiflocid")
write.table(motiffdrread1,"motif_filter_reads_cluster.txt",sep="\t",row.names=FALSE,quote=FALSE)
print("clustering finished")
motiffdrclstest=motifclstest(motiffdrread1,1:motifnum)
colnames(motiffdrclstest)=paste(rep(paste("cluster",1:clsnum)),rep(c("count","ratio","p-value","padj"),each=clsnum))
write.table(motiffdrclstest,"motif_cluster_enrichment.txt",sep="\t",quote=FALSE)
}
}
fitfunc <- function(expr,time) {
xseq <- seq(time[1],time[length(time)],length.out=1000)
predvalue <- predict(loess(expr~time),data.frame(time=xseq))
maximum <- xseq[which.max(predvalue)]
maxderivative <- xseq[which.max(diff(predvalue))]
list(maximum=maximum,maxderivative=maxderivative,fitted=data.frame(time=xseq,predict=predvalue),data=data.frame(time=time,expr=expr))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.