Nothing
## ----code, echo = FALSE-------------------------------------------------------
date = "`r doc_date()`"
pkg = "`r pkg_ver('CNEr')`"
## ----global_options, echo=FALSE-----------------------------------------------
short=TRUE #if short==TRUE, do not echo code chunks
debug=FALSE
knitr::opts_chunk$set(echo=!short, warning=debug, message=debug, error=FALSE,
cache.path = "cache/",
fig.path = "figures/")
## ----Axt, eval=TRUE, echo=TRUE------------------------------------------------
library(CNEr)
## These axt files are specially prepared for the region
## (chr6:24,000,000..27,000,000)
axtFilesHg38DanRer10 <- file.path(system.file("extdata", package="CNEr"),
"hg38.danRer10.net.axt")
axtFilesDanRer10Hg38 <- file.path(system.file("extdata", package="CNEr"),
"danRer10.hg38.net.axt")
## ----readAxt, eval=TRUE, echo=TRUE--------------------------------------------
axtHg38DanRer10 <- readAxt(axtFilesHg38DanRer10)
axtDanRer10Hg38 <- readAxt(axtFilesDanRer10Hg38)
## ----showAxt, eval=TRUE, echo=TRUE--------------------------------------------
## Axt class is shown in UCSC axt format
axtHg38DanRer10
axtDanRer10Hg38
## ----matchDistribution, eval=TRUE, echo=TRUE----------------------------------
## Distribution of matched alignments; Given an Axt alignment, plot a heatmap with percentage of each matched alignment
matchDistribution(axtHg38DanRer10)
matchDistribution(axtDanRer10Hg38)
## ----syntenyDotplot, eval=TRUE, echo=TRUE-------------------------------------
## Example of chr4 on hg19 and galGal3
## The synteny of human and zebrafish is not quite obvious on the dotplot.
library(BSgenome.Hsapiens.UCSC.hg19)
library(BSgenome.Ggallus.UCSC.galGal3)
fn <- file.path(system.file("extdata", package="CNEr"),
"chr4.hg19.galGal3.net.axt.gz")
axt <- readAxt(fn,
tAssemblyFn=file.path(system.file("extdata",
package="BSgenome.Hsapiens.UCSC.hg19"),
"single_sequences.2bit"),
qAssemblyFn=file.path(system.file("extdata",
package="BSgenome.Ggallus.UCSC.galGal3"),
"single_sequences.2bit"))
library(GenomeInfoDb)
syntenicDotplot(axt, firstChrs=c("chr4"), secondChrs="chr4", type="dot")
## ----UCSC, eval=FALSE, echo=TRUE----------------------------------------------
# ## To fetch rmsk table from UCSC
# library(rtracklayer)
# mySession <- browserSession("UCSC")
# genome(mySession) <- "hg38"
# hg38.rmsk <- getTable(ucscTableQuery(mySession, track="RepeatMasker",
# table="rmsk"))
# hg38.rmskGRanges <- GRanges(seqnames=hg38.rmsk$genoName,
# ## The UCSC coordinate is 0-based.
# ranges=IRanges(start=hg38.rmsk$genoStart+1,
# end=hg38.rmsk$genoEnd),
# strand=hg38.rmsk$strand)
# ## To fetch ensembl gene exons from BioMart
# library(biomaRt)
# ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",
# host="dec2015.archive.ensembl.org")
# ensembl <- useDataset("hsapiens_gene_ensembl",mart=ensembl)
# attributes <- listAttributes(ensembl)
# exons <- getBM(attributes=c("chromosome_name", "exon_chrom_start",
# "exon_chrom_end", "strand"),
# mart=ensembl)
# exonsRanges <- GRanges(seqnames=exons$chromosome_name,
# ranges=IRanges(start=exons$exon_chrom_start,
# end=exons$exon_chrom_end),
# strand=ifelse(exons$strand==1L, "+", "-")
# )
# seqlevelsStyle(exonsRanges) <- "UCSC"
# ## Use the existing Bioconductor annotation package for hg38
# library(TxDb.Hsapiens.UCSC.hg38.knownGene)
# exonsRanges <- exons(TxDb.Hsapiens.UCSC.hg38.knownGene)
## ----Bed, eval=TRUE, echo=TRUE------------------------------------------------
## Existing bed file for chr6:24,000,000..27,000,000 of Zebrafish danRer10
bedDanRer10Fn <- file.path(system.file("extdata", package="CNEr"),
"filter_regions.danRer10.bed")
danRer10Filter <- readBed(bedDanRer10Fn)
danRer10Filter
## Existing bed file for alignment region in Human hg38 against
## chr6:24,000,000..27,000,000 of danRer10
bedHg38Fn <- file.path(system.file("extdata", package="CNEr"),
"filter_regions.hg38.bed")
hg38Filter <- readBed(bedHg38Fn)
hg38Filter
## ----CNE, eval=TRUE, echo=TRUE------------------------------------------------
## Here we have the twoBit files from Bioconductor package
## BSgenome.Drerio.UCSC.danRer10 and BSgenome.Hsapiens.UCSC.hg38
cneDanRer10Hg38 <- CNE(
assembly1Fn=file.path(system.file("extdata",
package="BSgenome.Drerio.UCSC.danRer10"),
"single_sequences.2bit"),
assembly2Fn=file.path(system.file("extdata",
package="BSgenome.Hsapiens.UCSC.hg38"),
"single_sequences.2bit"),
axt12Fn=axtFilesDanRer10Hg38, axt21Fn=axtFilesHg38DanRer10,
cutoffs1=8L, cutoffs2=4L)
cneDanRer10Hg38
## ----CNEScan, eval=TRUE, echo=TRUE--------------------------------------------
identities <- c(45L, 48L, 49L)
windows <- c(50L, 50L, 50L)
## Here danRer10Filter is tFilter since danRer10 is assembly1
cneListDanRer10Hg38 <- ceScan(x=cneDanRer10Hg38, tFilter=danRer10Filter,
qFilter=hg38Filter,
window=windows, identity=identities)
## ----CNEScanHead, eval=TRUE, echo=TRUE----------------------------------------
## CNEs from the alignments with danRer10 as reference
CNE12(cneListDanRer10Hg38[["45_50"]])
## CNEs from the alignments with hg38 as reference
CNE21(cneListDanRer10Hg38[["45_50"]])
## ----CNEMerge, eval=TRUE, echo=TRUE-------------------------------------------
cneMergedListDanRer10Hg38 <- lapply(cneListDanRer10Hg38, cneMerge)
## ----CNERealignment, eval=FALSE, echo=TRUE------------------------------------
# cneFinalListDanRer10Hg38 <- lapply(cneMergedListDanRer10Hg38, blatCNE)
## ----saveCNE, eval=TRUE, echo=TRUE--------------------------------------------
## on individual tables
dbName <- tempfile()
data(cneFinalListDanRer10Hg38)
tableNames <- paste("danRer10", "hg38", names(cneFinalListDanRer10Hg38),
sep="_")
for(i in 1:length(cneFinalListDanRer10Hg38)){
saveCNEToSQLite(cneFinalListDanRer10Hg38[[i]], dbName, tableNames[i],
overwrite=TRUE)
}
## ----queryCNE, eval=TRUE, echo=TRUE-------------------------------------------
chr <- "chr6"
start <- 24000000L
end <- 27000000L
minLength <- 50L
tableName <- "danRer10_hg38_45_50"
fetchedCNERanges <- readCNERangesFromSQLite(dbName, tableName, chr,
start, end, whichAssembly="first",
minLength=minLength)
fetchedCNERanges
## ----CNEWidthDistribution, eval=TRUE, echo=TRUE-------------------------------
dbName <- file.path(system.file("extdata", package="CNEr"),
"danRer10CNE.sqlite")
tAssemblyFn <- file.path(system.file("extdata",
package="BSgenome.Drerio.UCSC.danRer10"),
"single_sequences.2bit")
qAssemblyFn <- file.path(system.file("extdata",
package="BSgenome.Hsapiens.UCSC.hg38"),
"single_sequences.2bit")
cneGRangePairs <- readCNERangesFromSQLite(dbName=dbName,
tableName="danRer10_hg38_45_50",
tAssemblyFn=tAssemblyFn,
qAssemblyFn=qAssemblyFn)
plotCNEWidth(cneGRangePairs)
## ----plotCNEDistribution, eval=TRUE, echo=TRUE--------------------------------
plotCNEDistribution(first(cneGRangePairs))
## ----outputBedBW, eval=FALSE, echo=TRUE---------------------------------------
# makeCNEDensity(cneGRangePairs[1:1000])
## ----queryUCSC, eval=FALSE, echo=TRUE, cache=TRUE-----------------------------
# library(Gviz)
# library(biomaRt)
# genome <- "danRer10"
# axisTrack <- GenomeAxisTrack()
# cpgIslands <- UcscTrack(genome=genome, chromosome=chr,
# track="cpgIslandExt", from=start, to=end,
# trackType="AnnotationTrack", start="chromStart",
# end="chromEnd", id="name", shape="box",
# showId=FALSE,
# fill="#006400", name="CpG",
# background.title="brown")
# refGenes <- UcscTrack(genome=genome, chromosome=chr,
# track="refGene", from=start, to=end,
# trackType="GeneRegionTrack", rstarts="exonStarts",
# rends="exonEnds", gene="name2", symbol="name2",
# transcript="name", strand="strand", fill="#8282d2",
# name="refSeq Genes", collapseTranscripts=TRUE,
# showId=TRUE, background.title="brown")
# ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL",
# host="dec2015.archive.ensembl.org")
# ensembl <- useDataset("drerio_gene_ensembl",mart=ensembl)
# biomTrack <- BiomartGeneRegionTrack(genome=genome, chromosome=chr,
# biomart=ensembl,
# start=start , end=end, name="Ensembl Genes")
## ----loadAnnotation, eval=TRUE, echo=FALSE------------------------------------
data(axisTrack)
data(cpgIslands)
data(refGenes)
## ----plotAnnotation, eval=TRUE, echo=TRUE, fig.height=5, fig.width=7----------
library(Gviz)
plotTracks(list(axisTrack, cpgIslands, refGenes),
collapseTranscripts=TRUE, shape="arrow",
transcriptAnnotation="symbol")
## ----CNEDensity, eval=TRUE, echo=TRUE-----------------------------------------
dbName <- file.path(system.file("extdata", package="CNEr"),
"danRer10CNE.sqlite")
genome <- "danRer10"
windowSize <- 200L
minLength <- 50L
cneDanRer10Hg38_21_30 <-
CNEDensity(dbName=dbName,
tableName="danRer10_hg38_21_30",
whichAssembly="first", chr=chr, start=start,
end=end, windowSize=windowSize,
minLength=minLength)
cneDanRer10Hg38_45_50 <-
CNEDensity(dbName=dbName,
tableName="danRer10_hg38_45_50",
whichAssembly="first", chr=chr, start=start,
end=end, windowSize=windowSize,
minLength=minLength)
cneDanRer10Hg38_49_50 <-
CNEDensity(dbName=dbName,
tableName="danRer10_hg38_49_50",
whichAssembly="first", chr=chr, start=start,
end=end, windowSize=windowSize,
minLength=minLength)
cneDanRer10AstMex102_48_50 <-
CNEDensity(dbName=dbName,
tableName="AstMex102_danRer10_48_50",
whichAssembly="second", chr=chr, start=start,
end=end, windowSize=windowSize,
minLength=minLength)
cneDanRer10CteIde1_75_75 <-
CNEDensity(dbName=dbName,
tableName="cteIde1_danRer10_75_75",
whichAssembly="second", chr=chr, start=start,
end=end, windowSize=windowSize,
minLength=minLength)
## ----GvizDataTrack, eval=TRUE, echo=TRUE--------------------------------------
dTrack1 <- DataTrack(range=cneDanRer10Hg38_21_30,
genome=genome, type="horiz",
horizon.scale=max(cneDanRer10Hg38_21_30$score)/3,
fill.horizon=c("#B41414", "#E03231", "#F7A99C",
"yellow", "orange", "red"),
name="human 21/30", background.title="brown")
dTrack2 <- DataTrack(range=cneDanRer10Hg38_45_50,
genome=genome, type="horiz",
horizon.scale=max(cneDanRer10Hg38_45_50$score)/2,
fill.horizon=c("#B41414", "#E03231", "#F7A99C",
"yellow", "orange", "red"),
name="human 45/50", background.title="brown")
dTrack3 <- DataTrack(range=cneDanRer10Hg38_49_50,
genome=genome, type="horiz",
horizon.scale=max(cneDanRer10Hg38_21_30$score)/3,
fill.horizon=c("#B41414", "#E03231", "#F7A99C",
"yellow", "orange", "red"),
name="human 49/50", background.title="brown")
dTrack4 <- DataTrack(range=cneDanRer10AstMex102_48_50,
genome=genome, type="horiz",
horizon.scale=max(cneDanRer10Hg38_21_30$score)/3,
fill.horizon=c("#B41414", "#E03231", "#F7A99C",
"yellow", "orange", "red"),
name="blind cave fish 48/50", background.title="brown")
dTrack5 <- DataTrack(range=cneDanRer10CteIde1_75_75,
genome=genome, type="horiz",
horizon.scale=max(cneDanRer10CteIde1_75_75$score)/3,
fill.horizon=c("#B41414", "#E03231", "#F7A99C",
"yellow", "orange", "red"),
name="grass carp 75/75", background.title="brown")
## ----plotCNE, eval=TRUE, echo=TRUE, fig.height=10, fig.width=8----------------
ht <- HighlightTrack(trackList=list(refGenes, dTrack5, dTrack4,
dTrack1, dTrack2, dTrack3),
start=c(24200000, 25200000, 26200000),
end=c(25100000, 26150000, 27000000),
chromosome =chr)
plotTracks(list(axisTrack, cpgIslands, ht),
collapseTranscripts=TRUE, shape="arrow",
transcriptAnnotation="symbol",
from=24000000, to=27000000)
## ----sessionInfo, eval=TRUE, echo=TRUE----------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.