Nothing
## June 2008 Joern Toedling
## use Biostrings (version >= 2.0) for mapping reporters to the genome
## shown on the reporters of the example data.
### these functions were adopted from the GenomeSearching vignette of
### Biostrings and slightly modified
library("Ringo")
library("Biostrings")
library("BSgenome")
library("BSgenome.Hsapiens.UCSC.hg18")
#### FUNCTIONS:
## 1. for mapping with mismatches
doMapping <- function(stringSet, chromosomes, max.mismatch=1)
{
seqnames <- seqnames(Hsapiens)
if (!missing(chromosomes))
seqnames <- seqnames[match( chromosomes, seqnames)]
stopifnot(length(seqnames)!=0)
res <- vector("list", length(seqnames))
names(res) <- seqnames
for (seqname in seqnames) {
subject <- Hsapiens[[seqname]]
cat("\n>>> Finding all hits in chromosome", seqname, "...\n")
chrRes <- vector("list", length(stringSet))
for (i in seq_len(length(stringSet))) {
if (i %% 100 == 0) cat(i, " ")
patternID <- names(stringSet)[i]
pattern <- stringSet[[i]]
plus.matches <- matchPattern(pattern, subject, max.mismatch=max.mismatch)
rcpattern <- reverseComplement(pattern)
minus.matches <- matchPattern(rcpattern, subject, max.mismatch=max.mismatch)
nMatches <- length(plus.matches) + length(minus.matches)
patRes <- data.frame("CHROMOSOME"=rep.int(seqname, nMatches),
"PROBE_ID"=rep.int(patternID, nMatches),
"POSITION"=c(start(plus.matches), start(minus.matches)),
"END"=c(end(plus.matches), end(minus.matches)),
"STRAND"=rep(c("+","-"), c(length(plus.matches), length(minus.matches))),
stringsAsFactors=FALSE)
chrRes[[i]] <- patRes
}#for
chrRes <- do.call("rbind", chrRes)
res[[seqname]] <- chrRes
unload(Hsapiens, seqname)
}# for (seqname in seqnames)
res <- do.call("rbind",res)
stopifnot(all(res$END >= res$POSITION))
res$LENGTH <- res$END - res$POSITION + 1
res$END <- NULL
return(res)
}# doMapping
### 2. for only exact reporter to genome mappings. This function requires
## that all reporter sequences are of the same length and it does not
## allow for mismatches. Is is MUCH faster than the previous one.
doExactMapping <- function(stringSet, chromosomes)
{
seqnames <- seqnames(Hsapiens)
if (!missing(chromosomes))
seqnames <- seqnames[match( chromosomes, seqnames)]
stopifnot(length(seqnames)!=0)
res <- vector("list", length(seqnames))
names(res) <- seqnames
pdict <- PDict(stringSet)
mdict <- PDict(reverseComplement(stringSet))
for (seqname in seqnames) {
subject <- Hsapiens[[seqname]]
cat("\n>>> Finding all hits in chromosome", seqname, "...\n")
plus.mindex <- matchPDict(pdict, subject)
plus.matches <- extractAllMatches(subject=subject, plus.mindex)
minus.mindex <- matchPDict(mdict, subject)
minus.matches <- extractAllMatches(subject=subject, minus.mindex)
nMatches <- length(plus.matches) + length(minus.matches)
chrRes <- data.frame("CHROMOSOME"=rep.int(seqname, nMatches),
"PROBE_ID"=c(names(plus.matches), names(minus.matches)),
"POSITION"=c(start(plus.matches), start(minus.matches)),
"END"=c(end(plus.matches), end(minus.matches)),
"STRAND"=rep(c("+","-"), c(length(plus.matches), length(minus.matches))),
stringsAsFactors=FALSE)
}#for
res[[seqname]] <- chrRes
unload(Hsapiens, seqname)
res <- do.call("rbind",res)
stopifnot(all(res$END >= res$POSITION))
res$LENGTH <- res$END - res$POSITION + 1
res$END <- NULL
return(res)
}# doExactMapping
exDir <- system.file("exData", package="Ringo")
ndf <- read.delim(file.path(exDir,"MOD_2003-12-05_SUZ12_1in2.ndf"), header=TRUE, as.is=TRUE)
## get the reporter sequences and set their unique identifiers:
repseqs <- DNAStringSet(ndf$"PROBE_SEQUENCE")
stopifnot(!any(duplicated(ndf$"PROBE_ID")))
names(repseqs) <- ndf$"PROBE_ID"
### have a look at seqnames in BSgenome package for H. sapiens
seqnames(Hsapiens)
## this takes some hours:
# hits <- doMapping(repseqs, "chr9")
### alternative for only exact matches and length of all reporters equal:
hits <- doExactMapping(repseqs, "chr9")
### compare these hits to the ones supplied in the POS file
pos <- read.delim(file.path(ringoExampleDir,"MOD_2003-12-05_SUZ12_1in2.pos"), header=TRUE, as.is=TRUE)
chits <- with(hits, paste(PROBE_ID, CHROMOSOME, POSITION, LENGTH, sep="|"))
cpos <- with(pos, paste(PROBE_ID, CHROMOSOME, POSITION, LENGTH, sep="|"))
length(intersect(chits, cpos))
# [1] 660 ## not that great
## build a probeAnno object out of this data.frame
exProbeAnno <- posToProbeAnno(hits, genome="H.sapiens (hg18)", microarrayPlatform="NimbleGen MOD_2003-12-05_SUZ12_1in2")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.