###############################################################################
##.assign2gene function assign cs.temp to reference
.assign2gene <- function(cs.temp,ref,upstream, upstreamOverlap, downstream, filterCluster){
##define variable as a NULL value
chr = strand = start.a = end.b = dis = dis.start = up = down = down.up = cluster = NULL
ref[, "subset" := paste0(seqnames,"_",strand)]
setkey(ref,subset)
cs.temp[, "subset" := paste0(chr,"_",strand)]
setkey(cs.temp,subset)
asn <- lapply(as.list(cs.temp[,unique(subset)]), function(x) {
cs <- cs.temp[list(x)]
cs[,subset:= NULL]
colnames(cs)[3:4] <- c("start.c","end.c")
gr <- makeGRangesFromDataFrame(cs, keep.extra.columns= TRUE, start.field = "dominant_tss", end.field = "dominant_tss")
ref_sub <- ref[list(x)]
ref_coding <- ref[list(x)]
if(!(x %in% ref[,unique(subset)])){
GenomicRanges::mcols(gr)[,"gene"] <- NA
if(filterCluster == TRUE){
GenomicRanges::mcols(gr)[,"inCoding"] <- NA
}
}else{
if(cs$strand[1] == "+"){
setorder(ref_sub,start)
ref_sub[,end.b:=data.table::shift(end, 1, fill = 0,type='lag')]
ref_sub[,width:=data.table::shift(width, 1, fill = 1000,type='lag')]
ref_sub[,dis := start - end.b]
## ref_sub[,dis:= ifelse(dis < 0, 0,dis)] ##Oct1
ref_sub[,up:= ifelse(dis > upstream, upstream,
ifelse(dis + width <= upstreamOverlap, dis + width -1,
ifelse(dis < upstreamOverlap, upstreamOverlap, dis)))]
##add down to solve overlap issue
ref_sub[,start.a:=data.table::shift(start, 1, fill = 1000,type='lead')]
ref_sub[,dis.start := start.a - start]
ref_sub[,down.up := data.table::shift(up, 1, fill = 1000,type='lead')]
ref_sub[,down:= ifelse(dis.start > downstream + down.up, downstream,
ifelse(dis.start < down.up, 0, dis.start-down.up))]
##
ref_sub[,end:= start + down] ##start - 1 -> start April10
ref_sub[,start := end - down - up +1]
}else{
setorder(ref_sub,end)
ref_sub[,end.b:=data.table::shift(start, 1, fill = 0,type='lead')]
ref_sub[(.N),end.b := end + 1000]
ref_sub[,width:=data.table::shift(width, 1, fill = 1000,type='lead')]
ref_sub[,dis := end.b - end] ##start -> end April10
## ref_sub[,dis:= ifelse(dis < 0,0,dis)]
ref_sub[,up:= ifelse(dis > upstream, upstream,
ifelse(dis + width <= upstreamOverlap, dis + width -1,
ifelse(dis < upstreamOverlap, upstreamOverlap, dis)))]
##add down to solve overlap issue
ref_sub[,start.a:=data.table::shift(end, 1, fill = 1000,type='lag')]
ref_sub[, dis.start := end -start.a]
ref_sub[, down.up := data.table::shift(up, 1, fill = 1000,type='lag')]
ref_sub[,down:= ifelse(dis.start >= downstream+ down.up, downstream,
ifelse(dis.start < down.up, 0,dis.start - down.up))]
##
ref_sub[,start:= end- down]##end + 1 -> end April10
ref_sub[,end:= start + down + up -1]
}
rownames(ref_sub) <- ref_sub$gene_id
ref_sub <- makeGRangesFromDataFrame(ref_sub, keep.extra.columns= FALSE)
##find overlap with promoter region
hits <- findOverlaps(gr,ref_sub)
################fix breakTies issue, Jan09,2020##############
hits <- as.data.frame(hits)
hits <- hits[!duplicated(hits$queryHits),]
gr1 <- gr[hits$queryHits]
gr2 <- gr[-hits$queryHits]
GenomicRanges::mcols(gr1)[,"gene"] <- names(ref_sub)[hits$subjectHits]
GenomicRanges::mcols(gr2)[,"gene"] <- NA
gr <- c(gr1,gr2)
# hits <- breakTies(hits, method = "first")
# hits <- methods::as(hits, "List")
# hits <- extractList(names(ref_sub), hits)
# hits <- as.character(hits)
# mcols(gr)[,"gene"] <- hits
#############################################################
if(filterCluster == TRUE){
##find overlap with coding regions
##coding
rownames(ref_coding) <- ref_coding$gene_id
setorder(ref_coding,start)
ref_coding <- makeGRangesFromDataFrame(ref_coding, keep.extra.columns = FALSE)
hits <- findOverlaps(gr,ref_coding)
################fix breakTies issue, Jan09,2020##############
hits <- as.data.frame(hits)
hits <- hits[!duplicated(hits$queryHits),]
gr1 <- gr[hits$queryHits]
gr2 <- gr[-hits$queryHits]
GenomicRanges::mcols(gr1)[,"inCoding"] <- names(ref_sub)[hits$subjectHits]
GenomicRanges::mcols(gr2)[,"inCoding"] <- NA
gr <- c(gr1,gr2)
# hits <- breakTies(hits, method = "first")
# hits <- methods::as(hits, "List")
# hits <- extractList(names(ref_coding), hits)
# hits <- as.character(hits)
# mcols(gr)[,"inCoding"] <- hits
#############################################################
}
}
gr <- as.data.frame(gr)
gr$dominant_tss <- gr$start
colnames(gr)[c(1,7,8)] <- c("chr","start","end")
gr <- gr[,c(6,1,7,8,5,ncol(gr),9:(ncol(gr)-1))]
setDT(gr)
setorder(gr, start)
return(gr)
})
setorder(do.call("rbind",asn), cluster)
}
###############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.