#' Data class chromo.regs
#'
#' Class to store shattered regions and information produced by shattered.regions and shattered.regions.cnv functions
#'
#' @param regions.summary (list): a list of data.frames sumarizing the information of shattered regions found in each sample
#' @param high.density.regions (matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal)
#' @param high.density.regions.hc (matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal).
#' Only those bins that overlap with high confidence regions defined in regions.summary are set to = 1
#' @param cnv.brk.dens (matrix): a numeric matrix representing the number of CNV segmentation breakpoints found in at genomic bins in each sample
#' @param svc.brk.dens (matrix): a numeric matrix representing the number of SV breakpoints found at genomic bins in each sample
#' @param cnv.brk.common.dens (matrix): a numeric matrix representing the number of CNV breakpoints colocalizing SV breakpoints found at genomic bins in each sample
#' @param svc.brk.common.dens (matrix): a numeric matrix representing the number of SV breakpoints colocalizing CNV breakpoints found at genomic bins in each sample
#' @param cnvbrk (S4): on object generated by cnv.breaks function
#' @param svcbrk (S4): on object generated by svc.breaks function
#' @param common.brk (list): on object generated by match.breaks function
#' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv
#' @param svc (S4) an object of class svcnvio containing data type 'svc' validated by validate.svc
#' @param param (list): list of configuration parameters provided or set as default
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @export
chromo.regs <- setClass("chromo.regs",
representation(
regions.summary = "list",
high.density.regions = "matrix",
high.density.regions.hc = "matrix",
cnv.brk.dens = "matrix",
svc.brk.dens = "matrix",
cnv.brk.common.dens = "matrix",
svc.brk.common.dens = "matrix",
cnvbrk = 'breaks',
svcbrk = "breaks",
common.brk = "list",
cnv = "svcnvio",
svc = "svcnvio",
param = "list"
))
setMethod("show","chromo.regs",function(object){
conf <- table(names(unlist(lapply(unname(object@regions.summary),function(x) table(x[,"conf"])))))
writeLines(paste("An object of class chromo.regs from svpluscnv containing the following stats:",
"\nNumber of samples tested=",nrow(object@high.density.regions),
"\nNumber of samples with high-breakpoint density bins=",length(which(apply(object@high.density.regions,1,sum) != 0)),
"\nNumber of samples with high-confidence shattered regions=",conf["HC"],
"\nNumber of samples with low-confidence shattered regions=",conf["lc"]))
})
#' Return the binary matrix containing high confidence high-breakpoint-densityregion definitions
#' @param object (chromo.regs) An object of class chromo.regs
#' @param conf (character) Either "hc" for high confidence HBD or else include all
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @export
#' @docType methods
#' @rdname hbd.mat-methods
setGeneric("hbd.mat", function(object, conf="hc") standardGeneric("hbd.mat"))
#' @rdname hbd.mat-methods
#' @export
setMethod("hbd.mat", "chromo.regs", function(object, conf="hc"){
if(conf == "hc"){
object@high.density.regions.hc
}else{
object@high.density.regions
}
})
#' Return the genomicRanges object containing the genomic bins
#' @param object (chromo.regs) An object of class chromo.regs
#' @return an genomicRanges object with defined genomic bins
#' @export
#' @docType methods
#' @rdname extract.bins-methods
setGeneric("extract.bins", function(object) standardGeneric("extract.bins"))
#' @rdname extract.bins-methods
#' @export
setMethod("extract.bins", "chromo.regs", function(object){
binNames <- colnames(object@high.density.regions)
bindf <- remove.factors(data.frame(do.call(rbind,strsplit(binNames," "))))
bindf[,2] <- as.numeric(bindf[,2])
bindf[,3] <- as.numeric(bindf[,3])
colnames(bindf) <- c("chrom","start","end")
bingr <- with(bindf,GRanges(chrom, IRanges(start=start, end=end), name=binNames))
return(bingr)
})
#' Evaluate true catastrophic events
#' Evaluate shattered regions based on interleaved breaks and breakpoint dispersion parameters in order to identify true catastrophic chromosomal alterations
#'
#' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs
#' @param interleaved.cut (numeric) the percentage of non interleaved structural variant calls
#' @param dist.iqm.cut (numeric) interquantile average of the distance between breakpoints within a shattered region
#' @param verbose (logical)
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @export
#'
shattered.eval <- function(chromo.regs.obj,
interleaved.cut=0.5,
dist.iqm.cut=100000,
verbose=TRUE){
svcdat <- chromo.regs.obj@svc@data
cnvbrk <- chromo.regs.obj@cnvbrk
svcbrk <- chromo.regs.obj@svcbrk
if(verbose){
message(paste("Evaluating shattered regions in ",length(names(chromo.regs.obj@regions.summary))," samples",sep="") )
pb <- txtProgressBar(style=3)
cc <-0
tot <- length(chromo.regs.obj@regions.summary)
}
for(cl in names(chromo.regs.obj@regions.summary)){
if(verbose) cc <- cc+1
if(verbose) setTxtProgressBar(pb, cc/tot)
regions <- chromo.regs.obj@regions.summary[[cl]]
br1 <- cnvbrk@breaks[which(cnvbrk@breaks$sample == cl),2:3]
br2 <- svcbrk@breaks[which(svcbrk@breaks$sample == cl),2:3]
colnames(br1) <- colnames(br2) <- c("chrom","pos")
br1.gr <- with(br1, GRanges(chrom, IRanges(start=pos, end=pos)))
br2.gr <- with(br2, GRanges(chrom, IRanges(start=pos, end=pos)))
c.br1 <- chromo.regs.obj@common.brk$brk1_match[which(chromo.regs.obj@common.brk$brk1_match$sample == cl),2:3]
c.br2 <- chromo.regs.obj@common.brk$brk2_match[which(chromo.regs.obj@common.brk$brk2_match$sample == cl),2:3]
colnames(c.br1) <- colnames(c.br2) <- c("chrom","pos")
c.br1.gr <- with(c.br1, GRanges(chrom, IRanges(start=pos, end=pos)))
c.br2.gr <- with(c.br2, GRanges(chrom, IRanges(start=pos, end=pos)))
regions_gr <- with(regions, GRanges(chrom, IRanges(start=start, end=end)))
hits_1 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,br1.gr))
hits_2 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,br2.gr))
c.hits_1 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,c.br1.gr))
c.hits_2 = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,c.br2.gr))
svdat_i <- svcdat[which(svcdat$sample == cl),]
sv_ranges_ori <- with(svdat_i, GRanges(chrom1, IRanges(start=pos1, end=pos1)))
sv_ranges_dest <- with(svdat_i, GRanges(chrom2, IRanges(start=pos2, end=pos2)))
hits_ori = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,sv_ranges_ori))
hits_dest = suppressWarnings(GenomicAlignments::findOverlaps(regions_gr,sv_ranges_dest))
# calculate interleaved score, n.breaks, dispersion, and median distance between consecutive breaks
start <- end <- reg.size <- dist.iqm.cnv <- dist.iqm.sv <- n.brk.cnv <- n.brk.sv <- n.orth.cnv <- n.orth.sv <- interleaved <- rep(0,nrow(regions))
conf <- rep("HC",nrow(regions))
for(i in 1:nrow(regions)){
sites1 <- sort(unique(br1[subjectHits(hits_1)[which(queryHits(hits_1) == i)],]$pos))
dist.iqm.cnv[i] <- IQM(sites1[2:length(sites1)] - sites1[1:(length(sites1)-1) ],lowQ = 0.2,upQ = 0.8)
n.brk.cnv[i] <- length(sites1)
sites2 <- sort(unique(br2[subjectHits(hits_2)[which(queryHits(hits_2) == i)],]$pos))
dist.iqm.sv[i] <- IQM(sites2[2:length(sites2)] - sites2[1:(length(sites2)-1) ],lowQ = 0.2,upQ = 0.8)
n.brk.sv[i] <- length(sites2)
c.sites1 <- sort(unique(c.br1[subjectHits(c.hits_1)[which(queryHits(c.hits_1) == i)],]$pos))
n.orth.cnv[i] <- length(c.sites1)
c.sites2 <- sort(unique(c.br2[subjectHits(c.hits_2)[which(queryHits(c.hits_2) == i)],]$pos))
n.orth.sv[i] <- length(c.sites2)
start[i] <- min(c(sites1,sites2))
end[i] <- max(c(sites1,sites2))
a<-svdat_i[subjectHits(hits_ori)[which(queryHits(hits_ori) == i)],]
b<-svdat_i[subjectHits(hits_dest)[which(queryHits(hits_dest) == i)],]
apos<-a$pos1
names(apos) <- rownames(a)
bpos<-b$pos2
names(bpos) <- rownames(b)
brkids <- names(sort(c(apos,bpos)))
reg.size <- regions$end-regions$start
ct<-0
for(x in 2:length(brkids)) if(brkids[x] == brkids[x-1]) ct<-ct+1
interleaved[i] <- ct/length(unique(brkids))
}
chrom <-regions[,"chrom"]
nbins<-regions[,"nbins"]
regions <- data.table(chrom,start,end,nbins)
# Find links between shattered regions
if(nrow(chromo.regs.obj@regions.summary[[cl]]) > 1 ){
record_mat<- matrix(nrow=0,ncol=2)
links <- rep("",nrow(regions))
for(i in 1:nrow(regions)){
for(j in 1:nrow(regions)){
region_a_hits <- subjectHits(hits_ori)[which(queryHits(hits_ori) == i)]
region_b_hits <- subjectHits(hits_dest)[which(queryHits(hits_dest) == j)]
if(length(intersect(region_a_hits,region_b_hits)) > 0 ){
record_mat <- rbind(record_mat,c(i,j),c(j,i))
}
}
}
for(i in 1:nrow(regions)){
links[i] <- paste(setdiff(as.character(sort(unique(c(i, record_mat[which(record_mat[,1] == i),2]) ))),as.character(i)),collapse=",")
if(length(grep("[0-9]",links[i])) == 0) links[i]<-"-"
}
}else{
links <- "-"
}
if(!is.null(interleaved.cut)) conf[which(interleaved >= interleaved.cut)] <- "lc"
if(!is.null(dist.iqm.cut)) conf[which(apply(cbind(dist.iqm.cnv,dist.iqm.sv),1,mean) < dist.iqm.cut)] <- "lc"
conf[which(links != "-")] <- "HC"
chromo.regs.obj@regions.summary[[cl]] <- data.table(regions,links,reg.size,dist.iqm.cnv,dist.iqm.sv,n.brk.cnv,n.brk.sv,n.orth.cnv,n.orth.sv,interleaved,conf)
}
if(verbose) close(pb)
bins <- data.table(do.call(rbind,strsplit(colnames(chromo.regs.obj@high.density.regions)," ")),colnames(chromo.regs.obj@high.density.regions))
colnames(bins) <- c("chrom","start","end","binid")
bins$start <- as.numeric(bins$start)
bins$end <- as.numeric(bins$end)
binsGR <- with(bins, GRanges(chrom, IRanges(start=start, end=end)))
highDensityRegionsHC <- chromo.regs.obj@high.density.regions
for(cl in names(chromo.regs.obj@regions.summary)){
lc <- chromo.regs.obj@regions.summary[[cl]][which(chromo.regs.obj@regions.summary[[cl]]$conf == "lc"),]
if(nrow(lc) > 0){
lcGR<- with(lc, GRanges(chrom, IRanges(start=start, end=end)))
hits = suppressWarnings(GenomicAlignments::findOverlaps(binsGR,lcGR))
highDensityRegionsHC[cl,bins$bins[unique(queryHits(hits)),]] <- 0
}
}
chromo.regs.obj@high.density.regions.hc <- highDensityRegionsHC
return(chromo.regs.obj)
}
#' Shattered region detection
#'
#' Caller for the identification of shattered genomic regions based on CNV and SVC data
#'
#' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
#' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc
#' @param fc.pct (numeric) inherited from cnv.breaks(); copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2
#' @param min.cnv.size (numeric) inherited from cnv.breaks(); The minimun segment size (in base pairs) to include in the analysis
#' @param min.num.probes (numeric) inherited from cnv.breaks(); The minimun number of probes per segment to include in the analysis
#' @param low.cov (data.frame) inherited from cnv.breaks(), svc.breaks() and match.breaks; a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis
#' @param clean.brk (numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied
#' @param window.size (numeric) size in megabases of the genmome bin to compute break density
#' @param slide.size (numeric) size in megabases of the sliding genmome window
#' @param num.cnv.breaks (numeric) number of segmentation breakpoints per segments to be considered high-density break
#' @param num.cnv.sd (numeric) number of standard deviations above the sample average for num.cnv.breaks
#' @param num.svc.breaks (numeric) number of svc breakpoints per segments to be considered high-density break
#' @param num.svc.sd (numeric) number of standard deviations above the sample average for num.svc.breaks
#' @param num.common.breaks (numeric) number of common SV and segmentation breakpoints per segments to be considered high-density break
#' @param num.common.sd (numeric) number of standard deviations above the sample average for num.common.breaks
#' @param maxgap (numeric) inherited from match.breaks(); sets the maximum gap between co-localizing orthogonal breakpoints
#' @param chrlist (character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included
#' @param chr.lim (data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.
#' @param interleaved.cut (numeric) 0-1 value indicating percentage of interleaved (non-contiguous) SV breakpoint pairs
#' @param dist.iqm.cut (numeric) interquantile average of the distance between breakpoints within a shattered region
#' @param verbose (logical)
#' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
#' @keywords chromothripsis, chromoplexy, chromosome shattering
#' @export
#' @examples
#'
#' ## validate input data.frames
#' cnv <- validate.cnv(segdat_lung_ccle)
#' svc <- validate.svc(svdat_lung_ccle)
#'
#' shattered.regions(cnv,svc)
shattered.regions <- function(cnv,
svc,
fc.pct = 0.2,
min.cnv.size = 0,
min.num.probes=0,
low.cov = NULL,
clean.brk=NULL,
window.size = 10,
slide.size = 2,
num.cnv.breaks = 6,
num.cnv.sd = 5,
num.svc.breaks = 6,
num.svc.sd = 5,
num.common.breaks = 3,
num.common.sd = 3,
maxgap=10000,
chrlist=NULL,
chr.lim=NULL,
interleaved.cut = 0.33,
dist.iqm.cut = 1e+05,
verbose=TRUE){
stopifnot(cnv@type == "cnv")
cnvdat <- cnv@data
stopifnot(svc@type == "svc")
svcdat <- svc@data
if(is.null(chr.lim)){
chr.lim <- chromosome.limit.coords(cnv)
}else{
stopifnot(ncol(chr.lim) == 3)
}
if(!is.null(chrlist)){
chr.lim <- chr.lim[which(chr.lim$chrom %in% chrlist)]
}
if(verbose) message("Finding 'cnv' breakpoints")
cnvbrk <- cnv.breaks(cnv = cnv,
fc.pct = fc.pct,
min.cnv.size = min.cnv.size,
min.num.probes = min.num.probes,
low.cov = low.cov,
clean.brk = clean.brk,
chrlist=chrlist,
verbose = verbose)
if(verbose) message("Finding 'svc' breakpoints")
svcbrk <- svc.breaks(svc = svc,
chrlist=chrlist,
low.cov = low.cov)
if(verbose) message("Matching 'cnv' and 'svc' breakpoints")
common.brk <- match.breaks(cnvbrk,svcbrk,
maxgap = maxgap,
verbose = verbose,
plot=FALSE)
if(is.null(chrlist)) chrlist <- unique(c(cnvdat$chrom,svcdat$chrom1,svcdat$chrom2))
chrlist <- chr.sort(chrlist)
if(verbose) message("Mapping 'cnv' breakpoints across the genome:")
cnvbrk.dens <- break.density(cnvbrk, chr.lim = chr.lim, window.size = window.size, slide.size = slide.size, verbose = verbose)
if(verbose) message("Mapping 'svc' breakpoints across the genome:")
svcbrk.dens <- break.density(svcbrk,chr.lim = chr.lim, window.size = window.size, slide.size = slide.size,verbose = verbose)
burden1 <- common.brk$restab$matched.brk1
burden2 <- common.brk$restab$matched.brk2
names(burden1) <- names(burden2) <- common.brk$restab$sample
common.brk1 <- breaks(breaks = common.brk$brk1_match,
burden=burden1,
param=list(datatype="combined"))
common.brk2 <- breaks(breaks = common.brk$brk2_match,
burden = burden2,
param=list(datatype="combined"))
if(verbose) message("Mapping 'cnv' validated breakpoints across the genome:")
cnvbrk.common.dens <- break.density(common.brk1, chr.lim = chr.lim, window.size = window.size, slide.size = slide.size,verbose = verbose)
if(verbose) message("Mapping 'svc' validated breakpoints across the genome:")
svcbrk.common.dens <- break.density(common.brk2,chr.lim = chr.lim, window.size = window.size, slide.size = slide.size,verbose = verbose)
commonSamples <- intersect(names(cnvbrk@burden),names(svcbrk@burden))
if(length(commonSamples) == 0) stop("There is no common samples between 'cnv' and 'svc' input datasets.")
# calculate inter quantile mean and standard deviation per sample
iqmdata1<- rep(0,length(commonSamples))
names(iqmdata1) <- commonSamples
sddata1 <- iqmdata2 <- sddata2<- iqmdata3<- sddata3 <- iqmdata1
iqmdata1[names(apply(cnvbrk.dens,1,IQM,lowQ=.1,upQ=.9))] <- apply(cnvbrk.dens,1,IQM,lowQ=.1,upQ=.9)
sddata1[names(apply(cnvbrk.dens,1,IQSD,lowQ=.1,upQ=.9))] <- apply(cnvbrk.dens,1,IQSD,lowQ=.1,upQ=.9)
iqmdata2[names(apply(svcbrk.dens,1,IQM,lowQ=.1,upQ=.9))] <- apply(svcbrk.dens,1,IQM,lowQ=.1,upQ=.9)
sddata2[names(apply(svcbrk.dens,1,IQSD,lowQ=.1,upQ=.9))] <- apply(svcbrk.dens,1,IQSD,lowQ=.1,upQ=.9)
iqmdata3[names(apply(cnvbrk.common.dens,1,IQM,lowQ=.1,upQ=.9))] <- apply(cnvbrk.common.dens,1,IQM,lowQ=.1,upQ=.9)
sddata3[names(apply(cnvbrk.common.dens,1,IQSD,lowQ=.1,upQ=.9))] <- apply(cnvbrk.common.dens,1,IQSD,lowQ=.1,upQ=.9)
a <- sapply(commonSamples, function(i) names(which(cnvbrk.dens[i,] > iqmdata1[i]+num.cnv.sd*sddata1[i] )),simplify=FALSE)
b <- sapply(commonSamples, function(i) names(which(cnvbrk.dens[i,] >= num.cnv.breaks)),simplify=FALSE)
c <- sapply(commonSamples, function(i) names(which(svcbrk.dens[i,] > iqmdata2[i]+num.svc.sd*sddata2[i] )),simplify=FALSE)
d <- sapply(commonSamples, function(i) names(which(svcbrk.dens[i,] >= num.svc.breaks)),simplify=FALSE)
e <- sapply(commonSamples, function(i) names(which(cnvbrk.common.dens[i,] > iqmdata3[i]+num.common.sd*sddata3[i] )),simplify=FALSE)
f <- sapply(commonSamples, function(i) names(which(cnvbrk.common.dens[i,] >= num.common.breaks)),simplify=FALSE)
# condition for chromothripsis: at least n=breaks > 6 (svc SND cnv) AND n-breaks > u+2*sd (svc AND cnv)
res <- sapply(commonSamples,function(i) Reduce(intersect, list(a[[i]],b[[i]],c[[i]],d[[i]],e[[i]],f[[i]])), simplify=FALSE)
highDensityRegions <- rbind(cnvbrk.dens[commonSamples,])
if(length(commonSamples == 1)) rownames(highDensityRegions) <- commonSamples
highDensityRegions[] <- 0
for(cl in commonSamples) highDensityRegions[cl,res[[cl]]] <- 1
#res <- res[which(unlist(lapply(res,length)) >0)]
if(verbose){
message("Locating shattered regions...")
pb <- txtProgressBar(style=3)
cc <-0
tot <- length(res)
}
restab <- list()
for(cl in names(res)){
if(verbose) cc <- cc+1
if(verbose) setTxtProgressBar(pb, cc/tot)
if(length(res[[cl]]) > 0){
tab <- data.table(do.call(rbind,strsplit(res[[cl]]," ")))
colnames(tab) <- c("chrom","start","end")
tab$start <- as.numeric(tab$start )
tab$end <- as.numeric(tab$end )
tabgr = with(tab, GRanges(chrom, IRanges(start=start, end=end)))
hits = as.data.frame(suppressWarnings(GenomicAlignments::findOverlaps(tabgr,tabgr)))
agg <- aggregate(subjectHits ~ queryHits, hits, paste,simplify=FALSE)
prev<-c(); cnum <- 0
agglist <- list()
for(x in agg$subjectHits){
if(length(intersect(x,prev) > 0)){
agglist[[cnum]] <- unique(c(x,prev))
prev <- agglist[[cnum]]
}else{
cnum <- cnum+1
agglist[[cnum]]<- x
prev <-agglist[[cnum]]
}
}
agglistUniq <- list()
for(i in 1:length(agglist)){
chrom <- as.character(unique(tab[as.numeric(agglist[[i]]),"chrom"]))
start<-min( tab[as.numeric(agglist[[i]]),"start"])
end<-max( tab[as.numeric(agglist[[i]]),"end"])
nbins <- length(agglist[[i]])
conf <- "lc"
agglistUniq[[i]] <- data.table(chrom,start,end,nbins,conf)
}
tabmerged <- data.table(do.call(rbind,agglistUniq))
restab[[cl]] <- tabmerged
}
}
if(verbose) close(pb)
results <- chromo.regs(
regions.summary = restab,
high.density.regions = highDensityRegions,
high.density.regions.hc = matrix(),
cnv.brk.dens = cnvbrk.dens,
svc.brk.dens = svcbrk.dens,
cnv.brk.common.dens = cnvbrk.common.dens,
svc.brk.common.dens = svcbrk.common.dens,
cnvbrk = cnvbrk,
svcbrk = svcbrk,
common.brk = common.brk,
cnv = cnv,
svc = svc,
param = list(fc.pct = fc.pct,
min.cnv.size = min.cnv.size,
min.num.probes=min.num.probes,
low.cov = low.cov,
clean.brk=clean.brk,
window.size = window.size,
slide.size = slide.size,
num.cnv.breaks = num.cnv.breaks,
num.cnv.sd = num.cnv.sd,
num.svc.breaks = num.svc.breaks,
num.svc.sd = num.svc.sd,
num.common.breaks = num.common.breaks,
num.common.sd = num.common.sd,
maxgap=maxgap,
chrlist=chrlist,
interleaved.cut = interleaved.cut,
dist.iqm.cut = dist.iqm.cut)
)
results <- shattered.eval(results, interleaved.cut = interleaved.cut, dist.iqm.cut = dist.iqm.cut, verbose = verbose)
return(results)
}
#' Generates a GenomicRanges objact containing genomic bins based on a given bin size. If a cnv (svcnvio) object is provided the chromosome limits
#' will be obtaind from mapped regions, otherwise chromosome limits will be obtained from the database (D3GB)
#'
#' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
#' @param genome.v (hg19 or hg38) reference genome version to generate genoic bins (ignored if cnv is not NULL)
#' @param window.size (numeric) size in megabases to generate genomic bins
#' @param slide.size (numeric) size in megabases of the sliding genomic window; slide.size must be <= 1
#' @return an instance of the class 'chromo.regs' containing information about shattered regions
#' @export
#'
get.chr.bins <- function(cnv = NULL, genome.v= "hg19", window.size=10, slide.size=2){
WS <- window.size * 1e+6
SS <- slide.size * 1e+6
offset <- round(window.size/slide.size)
stopifnot(WS >= SS)
if(is.null(cnv)){
chr.lim<- d3gb.chr.lim(genome.v=genome.v)
}else{
chr.lim<- chromosome.limit.coords(cnv)
}
df.list<-list()
for(chr in unique(chr.lim$chrom)){
binlimits <- seq(chr.lim$begin[which(chr.lim$chrom == chr)], chr.lim$end[which(chr.lim$chrom == chr)],SS)
chrom <- rep(chr, length(binlimits)-offset)
Begin <- binlimits[1:(length(binlimits)-offset)]+1
End <- binlimits[(offset+1):length(binlimits)]
Name <- paste(chrom,Begin,End,sep=" ")
df.list[[chr]] <- data.frame(chrom,Begin,End,Name)
}
bingr <- with(do.call(rbind,df.list), GRanges(chrom, IRanges(start=Begin, end=End),name=Name))
return(bingr)
}
#' Transforms a bed format data.frame containing genomic regions into a matrix of n samples versus m defined genomic bins where bins overlapping with bed segments take value = 1
#'
#' @param bed (data.frame) An data.frame
#' @param bingr (S4) a GenomicRanges object containing the
#' @param genome.v (hg19 or hg38) reference genome version to generate genoic bins (ignored if bingr is not NULL)
#' @param window.size (numeric) size in megabases to generate genomic bins
#' @param slide.size (numeric) size in megabases of the sliding genomic window; slide.size must be <= 1
#' @return an instance of the class 'chromo.regs' containing information about shattered regions
#' @export
#'
bed2chromo.reg <- function(bed, bingr=NULL, genome.v="hg19", window.size=10, slide.size=2){
if(!is.null(bingr)){
stopifnot(isS4(bingr))
}else{
bingr <- get.chr.bins(cnv=NULL,genome.v="hg19", window.size=window.size, slide.size=slide.size)
}
inbedGR <- with(bed,GRanges(chrom, IRanges(start=start, end=end)))
idNames <- unique(bed$sampleid)
tot <- length(unique(bed$sampleid))*length(binsgr$name)
hbd <- matrix(rep(0,),ncol=length(binsgr$name),nrow=length(idNames))
colnames(hbd) <- bingr$name
rownames(hbd) <- idNames
hits <- GenomicAlignments::findOverlaps(bingr,inbedGR)
for(i in idNames){
hbdbins_i <- bingr$name[queryHits(hits)[which(bed$sampleid[subjectHits(hits)] == i)]]
hbd[i,hbdbins_i] <- 1
}
return(hbd)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.