inst/doc/BubbleTree-vignette.R

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----eval=FALSE---------------------------------------------------------------
#  head(snp.dat)
#  
#    CHROM    POS  ID REF ALT QUAL FILTER LT.rna.dp LN.rna.dp ON.rna.dp OT.rna.dp BT.wes.dp LT.wes.dp LN.wes.dp ON.wes.dp
#  1  chr1  54757 rs202000650   T   G    .   PASS        NA        NA        NA        NA        NA        NA        NA        NA
#  2  chr1 564636           .   C   T    .   PASS        NA        NA        NA        NA        NA        NA        NA        NA
#  3  chr1 564862   rs1988726   T   C    .   PASS        NA        NA        NA        NA        NA        NA        NA        NA
#  4  chr1 564868   rs1832728   T   C    .   PASS        NA        NA        NA        NA        NA        NA        NA        NA
#  5  chr1 565454   rs7349151   T   C    .   PASS        NA        NA        NA        NA        NA        NA        NA        NA
#  6  chr1 565464   rs6594030   T   C    .   PASS        NA        NA        NA        NA        NA        NA        NA        NA
#    OT.wes.dp LT.wgs.dp LN.wgs.dp ON.wgs.dp OT.wgs.dp LT.rna.freq LN.rna.freq ON.rna.freq OT.rna.freq BT.wes.freq LT.wes.freq
#  1        NA        25        24        27        19          NA          NA          NA          NA          NA          NA
#  2        NA        21        NA        NA        14          NA          NA          NA          NA          NA          NA
#  3        NA        10        15        55        13          NA          NA          NA          NA          NA          NA
#  4        NA        10        12        60        14          NA          NA          NA          NA          NA          NA
#  5        NA        21        14        26        24          NA          NA          NA          NA          NA          NA
#  6        NA        25        16        33        29          NA          NA          NA          NA          NA          NA
#    LN.wes.freq ON.wes.freq OT.wes.freq LT.wgs.freq LN.wgs.freq ON.wgs.freq OT.wgs.freq
#  1          NA          NA          NA      0.2400      0.1667      0.2222      0.3684
#  2          NA          NA          NA      0.0000          NA          NA      0.1429
#  3          NA          NA          NA      0.4000      0.5333      0.9091      0.7692
#  4          NA          NA          NA      0.5000      0.6667      0.9333      0.7857
#  5          NA          NA          NA      0.1429      0.3571      0.6538      0.6250
#  6          NA          NA          NA      0.2000      0.3750      0.7273      0.5862
#  

## ----eval=FALSE---------------------------------------------------------------
#  is.hetero <- function(x, a=0.3, b=0.7) {
#   (x - a)  *  (b - x) >= 0
#  }
#  
#  wgs.snp.ss <- subset(snp.dat, ! CHROM %in% c("chrX", "chrY") &
#                           LN.wgs.dp >= 15 &
#                           ON.wgs.dp >=15 &
#                           is.hetero(LN.wgs.freq, 0.4, 0.6) &
#                           is.hetero(ON.wgs.freq, 0.4, 0.6))

## ----eval=FALSE---------------------------------------------------------------
#  library(GenomicRanges)
#  wgs.hetero.grs <- list()
#  wgs.hetero.grs$lung <- with(wgs.snp.ss, GRanges(CHROM, IRanges(POS, POS), mcols=cbind(LT.wgs.dp, LT.wgs.freq)))
#  wgs.hetero.grs$ovary <- with(wgs.snp.ss, GRanges(CHROM, IRanges(POS, POS), mcols=cbind(OT.wgs.dp, OT.wgs.freq)))
#  names(elementMetadata(wgs.hetero.grs$lung)) <- names(elementMetadata(wgs.hetero.grs$ovary))  <- c("dp", "freq")

## ----eval=FALSE---------------------------------------------------------------
#  library(GenomicRanges)
#  min.num <- 10
#  cnv.gr <- with(subset(seg$output, num.mark >= min.num & ! chrom %in% c("chrX", "chrY")) , GRanges(chrom, IRanges(loc.start, loc.end), mcols=cbind(num.mark, seg.mean)))

## -----------------------------------------------------------------------------
suppressMessages(
    library(BubbleTree)
)

## -----------------------------------------------------------------------------
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
head(allCall.lst[[1]]@rbd)

## -----------------------------------------------------------------------------
# load sample files
load(system.file("data", "cnv.gr.rda", package="BubbleTree"))
load(system.file("data", "snp.gr.rda", package="BubbleTree"))

# load annotations
load(system.file("data", "centromere.dat.rda", package="BubbleTree"))
load(system.file("data", "cyto.gr.rda", package="BubbleTree"))
load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree"))
load(system.file("data", "vol.genes.rda", package="BubbleTree"))
load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree"))


# initialize RBD object
r <- new("RBD", unimodal.kurtosis=-0.1)

# create new RBD object with GenomicRanges objects for SNPs and CNVs
rbd <- makeRBD(r, snp.gr, cnv.gr)
head(rbd)

# create a new prediction
btreepredictor <- new("BTreePredictor", rbd=rbd, max.ploidy=6, prev.grid=seq(0.2,1, by=0.01))
pred <- btpredict(btreepredictor)

# create rbd plot
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
btree <- drawBTree(btreeplotter, pred@rbd)
print(btree)

# create rbd.adj plot
btreeplotter <- new("BTreePlotter", branch.col="gray50")
btree <- drawBTree(btreeplotter, pred@rbd.adj)
print(btree)

# create a combined plot with rbd and rbd.adj that shows the arrows indicating change
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
arrows <- trackBTree(btreeplotter,
                     pred@rbd,
                     pred@rbd.adj,
                     min.srcSize=0.01, 
                     min.trtSize=0.01)

btree <- drawBTree(btreeplotter, pred@rbd) + arrows 
print(btree)


# create a plot with overlays of significant genes
btreeplotter <- new("BTreePlotter", branch.col="gray50")
annotator <- new("Annotate")

comm <- btcompare(vol.genes, cancer.genes.minus2)

sample.name <- "22_cnv_snv"

btree <- drawBTree(btreeplotter, pred@rbd.adj) + 
    ggplot2::labs(title=sprintf("%s (%s)", sample.name, info(pred)))

out <- pred@result$dist  %>% 
    filter(seg.size >= 0.1 ) %>% 
    arrange(gtools::mixedorder(as.character(seqnames)), start)

ann <- with(out, {
    annoByGenesAndCyto(annotator,
                       as.character(out$seqnames),
                       as.numeric(out$start),
                       as.numeric(out$end),
                       comm$comm,
                       gene.uni.clean.gr=gene.uni.clean.gr,
                       cyto.gr=cyto.gr)
})

out$cyto <- ann$cyto
out$genes <- ann$ann

btree <- btree + drawFeatures(btreeplotter, out)
print(btree)


# print out purity and ploidy values
info <- info(pred)
cat("\nPurity/Ploidy: ", info, "\n")

## -----------------------------------------------------------------------------
load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree")) 
load(system.file("data", "vol.genes.rda", package="BubbleTree"))
load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree"))
load(system.file("data", "cyto.gr.rda", package="BubbleTree")) 
load(system.file("data", "centromere.dat.rda", package="BubbleTree")) 
load(system.file("data", "all.somatic.lst.RData", package="BubbleTree"))
load(system.file("data", "allHetero.lst.RData", package="BubbleTree")) 
load(system.file("data", "allCNV.lst.RData", package="BubbleTree")) 
load(system.file("data", "hg19.seqinfo.rda", package="BubbleTree")) 

## -----------------------------------------------------------------------------
# lists of 379 known cancer genes
head(cancer.genes.minus2)
# another list of 105 known cancer genes
head(vol.genes)

# full gene annotation Grange object
head(gene.uni.clean.gr)

# cytoband coordinate data
head(cyto.gr)

# centromere coordinate data
head(centromere.dat)

# SNV location data
head(all.somatic.lst, n=1L)

# sequence variants
head(allHetero.lst, n=1L)

# copy number variation data
head(allCNV.lst, n=1L)

# hg19 sequence data
hg19.seqinfo

## -----------------------------------------------------------------------------
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
print(btreeplotter@branches)

## -----------------------------------------------------------------------------
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
sample <- allCall.lst[["sam10"]]
rbd1 <- sample@rbd
rbd2 <- sample@rbd.adj
arrows <- trackBTree(btreeplotter, rbd1, rbd2, min.srcSize=0.01, min.trtSize=0.01)
btree <- drawBTree(btreeplotter, rbd1)
print(btree)

## -----------------------------------------------------------------------------
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
sample <- allCall.lst[["sam12"]]
rbd1 <- sample@rbd
rbd2 <- sample@rbd.adj
arrows <- trackBTree(btreeplotter, rbd1, rbd2, min.srcSize=0.01, min.trtSize=0.01)
btree <- drawBTree(btreeplotter, rbd1) + drawBubbles(btreeplotter, rbd2, "gray80") + arrows
print(btree)

## -----------------------------------------------------------------------------
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
btreeplotter <- new("BTreePlotter", max.ploidy=5, max.size=10)
sample <- allCall.lst[["sam12"]]
rbd1 <- sample@rbd
rbd2 <- sample@rbd.adj
arrows <- trackBTree(btreeplotter, rbd1, rbd2, min.srcSize=0.01, min.trtSize=0.01)
btree <- drawBTree(btreeplotter, rbd1) + arrows
print(btree)

## -----------------------------------------------------------------------------
load(system.file("data", "allRBD.lst.RData", package="BubbleTree"))
btreepredictor <- new("BTreePredictor")
btreepredictor@config$cutree.h <- 0.15

high.ploidy <- rep(TRUE, length(allRBD.lst))
high.purity <- rep(TRUE, length(allRBD.lst))

high.ploidy[c("sam6",
              "ovary.wgs",
              "ovary.wes",
              "TCGA-06-0145-01A-01W-0224-08",
              "TCGA-13-1500-01A-01D-0472-01",
              "TCGA-AO-A0JJ-01A-11W-A071-09")] <- FALSE

high.purity[c("sam6", "ovary.wgs", "ovary.wes")] <- FALSE

nn <- "sam13"
rbd <- allRBD.lst[[nn]]
btreepredictor@config$high.ploidy <- high.ploidy[nn]
btreepredictor@config$high.purity <- high.purity[nn]
btreepredictor <- loadRBD(btreepredictor, rbd)
btreepredictor@config$min.segSize <- ifelse(max(btreepredictor@rbd$seg.size, na.rm=TRUE) < 0.4, 0.1, 0.4)
btreepredictor <- btpredict(btreepredictor)

## -----------------------------------------------------------------------------
cat(info(btreepredictor), "\n")

names(allCall.lst) <- names(allRBD.lst)
results <- list()
for (name in names(allCall.lst)) {
    results[[name]] <- allCall.lst[[name]]@result$dist
}

## -----------------------------------------------------------------------------
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
load(system.file("data", "cancer.genes.minus2.rda", package="BubbleTree"))
load(system.file("data", "vol.genes.rda", package="BubbleTree"))
load(system.file("data", "gene.uni.clean.gr.rda", package="BubbleTree"))
load(system.file("data", "cyto.gr.rda", package="BubbleTree"))

# 77 common cancer genes merged from 2 sets
comm <- btcompare(vol.genes, cancer.genes.minus2)

btreeplotter <- new("BTreePlotter", branch.col="gray50")
annotator <- new("Annotate")

nn <- "sam13"
cc <- allCall.lst[[nn]]
z <- drawBTree(btreeplotter, cc@rbd.adj) + ggplot2::labs(title=sprintf("%s (%s)", nn, info(cc)))
out <- cc@result$dist  %>% filter(seg.size >= 0.1 ) %>% arrange(gtools::mixedorder(as.character(seqnames)), start)

ann <- with(out, {
    annoByGenesAndCyto(annotator,
                       as.character(out$seqnames),
                       as.numeric(out$start),
                       as.numeric(out$end),
                       comm$comm,
                       gene.uni.clean.gr=gene.uni.clean.gr,
                       cyto.gr=cyto.gr)
})

out$cyto <- ann$cyto
out$genes <- ann$ann
v <- z + drawFeatures(btreeplotter, out)
print(v)


## -----------------------------------------------------------------------------
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
load(system.file("data", "centromere.dat.rda", package="BubbleTree"))
load(system.file("data", "all.somatic.lst.RData", package="BubbleTree"))
load(system.file("data", "allHetero.lst.RData", package="BubbleTree"))
load(system.file("data", "allCNV.lst.RData", package="BubbleTree"))
load(system.file("data", "hg19.seqinfo.rda", package="BubbleTree"))

trackplotter <- new("TrackPlotter")

nn <- "sam12"
ymax <- ifelse(nn %in% c("lung.wgs", "lung.wes"), 9, 4.3)

p1 <- xyTrack(trackplotter,
              result.dat=allCall.lst[[nn]]@result,
              gr2=centromere.dat,
              ymax=ymax) + ggplot2::labs(title=nn)

p2 <- bafTrack(trackplotter,
               result.dat=allCall.lst[[nn]]@result,
               gr2=centromere.dat,
               somatic.gr=all.somatic.lst[[nn]])

t1 <- getTracks(p1, p2)

z1 <- heteroLociTrack(trackplotter, 
                      allCall.lst[[nn]]@result, 
                      centromere.dat, 
                      allHetero.lst[[nn]])

z2 <- RscoreTrack(trackplotter, 
                  allCall.lst[[nn]]@result, 
                  centromere.dat, 
                  allCNV.lst[[nn]])

t2 <- getTracks(z1, z2)

gridExtra::grid.arrange(t1,t2, ncol=1)

## -----------------------------------------------------------------------------
load(system.file("data", "allCall.lst.RData", package="BubbleTree"))
btp <- new("BTreePlotter", max.ploidy=5, max.size=10)

nn1 <- "HCC11.Primary.Tumor"
nn2 <- "HCC11.Recurrent.Tumor" 

rbd1 <- allCall.lst[[nn1]]@result$dist
rbd2 <- allCall.lst[[nn2]]@result$dist

srcSize <- 0.5
trtSize <- 1
minOver <- 1e7

arrows <- trackBTree(btp,
                     rbd1,
                     rbd2,
                     min.srcSize=srcSize,
                     min.trtSize=trtSize,
                     min.overlap=minOver)

z <- drawBTree(btp, rbd1)

if(!is.null(arrows)) {
    z <- z + arrows + ggplot2::labs(title=sprintf("%s -> %s", nn1, nn2))
}
print(z)

Try the BubbleTree package in your browser

Any scripts or data that you put into this service are public.

BubbleTree documentation built on Nov. 8, 2020, 8:22 p.m.