setMethod("categoryToEntrezBuilder",
signature(p="KEGGHyperGParams"),
function(p) {
getKeggToEntrezMap(p)
})
setMethod("categoryToEntrezBuilder",
signature(p="GOHyperGParams"),
function(p) {
if (isDBDatPkg(p@datPkg))
getGoToEntrezMap_db(p)
else
getGoToEntrezMap(p)
})
setMethod("categoryToEntrezBuilder",
signature(p="PFAMHyperGParams"),
function(p) {
getPfamToEntrezMap(p)
})
## this is for OBO
setMethod("categoryToEntrezBuilder", "OBOHyperGParams",
function(p)
{
geneids <- geneIds(p@datPkg@geneSetCollection)
genesByGS <- lapply(geneids, intersect, universeGeneIds(p))
genesByGS[lengths(genesByGS) > 0]
})
getGoToEntrezMap_db <- function(p) {
keep.all <- switch(testDirection(p),
over=FALSE,
under=TRUE,
stop("Bad testDirection slot"))
#annPkgNS <- getNamespace(annotation(p))
#db <- get("db_conn", annPkgNS)
datPkg <- p@datPkg
if(datPkg@installed)
db <- do.call(paste0(datPkg@name, "_dbconn"), list())
else
db <- dbconn(datPkg@db)
univ <- unlist(universeGeneIds(p), use.names=FALSE)
## For over representation:
## Obtain all unique GO IDs from specified ontology that have at
## least one of the genes from geneIds(p) annotated at it.
##
## For under representation:
## Obtain all unique GO IDs from specified ontology that have at
## least one of the genes from univ annotated at it.
##
## These are the GO IDs that form the keys in our GO_to_Entrez map.
## First we need to handle the fact that different species have different
## mappings for their names. And treat installed vs bare OrgDbs differently
## also need to account for different names for NOSCHEMA OrgDbs
if(datPkg@installed){
if (is(datPkg, "YeastDatPkg") || is(datPkg, "Org.Sc.sgdDatPkg")) {
TABLENAME = "sgd"
GENEIDS = "systematic_name"
goColNames <- c("go_id","ontology")
} else {
TABLENAME = "genes"
if(dbmeta(db, "DBSCHEMA") == "NOSCHEMA_DB") {
GENEIDS <- dbListFields(db, TABLENAME)[2]
goColNames <- dbListFields(db, "go_all")[c(2,4)]
} else {
GENEIDS = "gene_id"
goColNames <- c("go_id","ontology")
}
}
} else {
if( is(datPkg, "YeastDatPkg") || is(datPkg, "Org.Sc.sgdDatPkg") ) {
TABLENAME = "sgd"
} else {
TABLENAME = "genes"
}
GENEIDS <- dbListFields(db, TABLENAME)[2]
goColNames <- dbListFields(db, "go_all")[c(2,4)]
}
## Not all DBs have e.g. go_bp_all, so we just get from go_all
SQL <- "SELECT DISTINCT %s
FROM go_all INNER JOIN %s USING (_id)
WHERE %s IN (%s) AND %s=%s"
inClause1 <- if (!keep.all)
geneIds(p)
else
univ
inClause1 <- toSQLStringSet(inClause1) # may get reused below
SQL <- sprintf(SQL, goColNames[1], TABLENAME, GENEIDS, inClause1, goColNames[2], toSQLStringSet(ontology(p)))
##Check to make sure that we have some geneIds(p) to actually get GO terms FOR...
##basically, the "gene universe" at this point, is only those genes that
##have representation for on this ontology, and none of your genes were in
##THAT list, so there are not going to be any GO terms to find in this query.
if(inClause1=="")
stop("genes being tested do not have corresponding GO terms")
wantedGO <- dbGetQuery(db, SQL)[[1]]
## Now collect the Entrez IDs annotated at our wantedGO IDs making
## sure to only keep those that are in the gene ID universe
## specified in p.
SQL <- "SELECT DISTINCT %s, %s
FROM go_all INNER JOIN %s USING (_id)
WHERE %s IN (%s) AND %s IN (%s)"
inClauseGO <- toSQLStringSet(wantedGO)
if (!keep.all) # avoid recomputing
inClause1 <- toSQLStringSet(univ)
SQL <- sprintf(SQL, GENEIDS, goColNames[1], TABLENAME, GENEIDS, inClause1,
goColNames[1], inClauseGO)
ans <- dbGetQuery(db, SQL)
if (nrow(ans) == 0)
list()
else
split(ans[[GENEIDS]], ans[[goColNames[1]]])
}
getGoToEntrezMap <- function(p) {
keep.all <- switch(testDirection(p),
over=FALSE,
under=TRUE,
stop("Bad testDirection slot"))
lib <- p@datPkg
ontology <- ontology(p)
## Return a list mapping GO ids to the Entrez Gene ids annotated
## at the GO id. Only those GO ids that are in the specified
## ontology and have at least one annotation in the set of
## Entrez Gene ids specified by 'selected' are included.
go2allprobes <- GO2AllProbes(lib, ontology)
probeAnnot <- getGoToProbeMap(go2allprobes, ontology)
## Map to Entrez Gene and flag GO ids that don't have any
## annotations in our selected set. No sense testing these.
probeToEntrezMapHelper(probeAnnot, geneIds(p), lib, universeGeneIds(p),
keep.all=keep.all)
}
getKeggToEntrezMap <- function(p) {
keep.all <- switch(testDirection(p),
over=FALSE,
under=TRUE,
stop("Bad testDirection slot"))
## lib <- annotation(p) ##not unless we want to dispatch on 'character'
## Need to instead get the value from datPkg like this:
lib <- p@datPkg
##kegg2allprobes <- revmap(getDataEnv("PATH", lib)) ## this means I should make a generic method for this step that will dispatch on the type and return something that can be "as.list()'ed"
kegg2allprobes <- KEGG2AllProbes(lib)
probeAnnot <- getKeggToProbeMap(kegg2allprobes)
probeToEntrezMapHelper(probeAnnot, geneIds(p), lib, universeGeneIds(p),
keep.all=keep.all)
}
.setKeytype <- function(p){
keytype <- switch(class(p@datPkg),
YeastDatPkg = "ORF",
Org.XX.egDatPkg = "ENTREZID",
"PROBEID")
keytype
}
## basically this function returns a list of PFAM IDs (names) where each element contains a vector of the entrez gene IDs that go with thos PFAM IDs.
getPfamToEntrezMap <- function(p) {
keep.all <- switch(testDirection(p),
over=FALSE,
under=TRUE,
stop("Bad testDirection slot"))
keytype <- .setKeytype(p)
##alteration:
obj <- get(paste0(annotation(p),'.db'))
probes <- keys(obj, keytype=keytype, column='PFAM')
suppressWarnings(
tab <- select(obj, keys=probes, columns='PFAM',keytype=keytype))
probeAnnot <- split(tab[[keytype]], f=as.factor(tab$PFAM))
probeToEntrezMapHelper(probeAnnot, geneIds(p), p@datPkg, universeGeneIds(p),
keep.all=keep.all)
}
probeToEntrezMapHelper <- function(probeAnnot, selected, lib, universe,
keep.all=FALSE) {
## Given a list 'probeAnnot' mapping category => probe, convert the probe
## ids to Entrez Gene ids (unless we are using YEAST, in which case we skip
## this step). Then reduce the Entrez Gene ids to the specified universe
## and only keep those entries in the list that have a non-empty
## intersection with the selected genes. If keep.all is TRUE, then we keep
## entries even if the list of gene IDs includes no gene ID from the
## selected list.
id2entrezEnv <- ID2EntrezID(lib)
egAnnot <- lapply(probeAnnot, function(x) {
z <- unique(unlist(mget(unique(x), id2entrezEnv)))
z <- intersect(z, universe)
## would be nice to have a short-circuiting way to do this
if (length(z) > 0 && (keep.all || any(selected %in% z))) {
return(z)
}
NULL
})
notNull <- sapply(egAnnot, function(x) !is.null(x))
egAnnot[notNull]
}
getGoToProbeMap <- function(go2allprobes, ontology, goids) {
## Return a list with one element for each GO id in the specified
## ontology that has at least one Probe probe id annotated at it.
## The elements are vectors of Probe ids. Names are GO ids.
##
probeAnnot = as.list(go2allprobes)
if (!missing(goids))
probeAnnot = probeAnnot[goids]
## Remove "all" artifact GO nodes
whAll = match("all", names(probeAnnot), 0)
## This is a tad faster than if (any(whAll > 0))
## because we stop early
for (el in whAll) {
if (el > 0) {
probeAnnot = probeAnnot[-whAll]
break
}
}
goids = names(probeAnnot)
## filter on desired GO ontology
inOnt <- filterGOByOntology(goids, ontology)
probeAnnot = probeAnnot[inOnt]
## remove any GO ids that don't map to an probe id (NA)
removeLengthZeroAndMissing(probeAnnot)
}
getKeggToProbeMap <- function(kegg2allprobes, keggIds) {
probeAnnot = as.list(kegg2allprobes)
if (!missing(keggIds))
probeAnnot = probeAnnot[keggIds]
removeLengthZeroAndMissing(probeAnnot)
}
getPfamToProbeMap <- function(pfam2allprobes, pfamIds) {
probeAnnot = as.list(pfam2allprobes)
if (!missing(pfamIds))
probeAnnot = probeAnnot[pfamIds]
removeLengthZeroAndMissing(probeAnnot)
}
removeLengthZeroAndMissing <- function(map) {
notNA = sapply(map, function(x) {
len = length(x)
!(len == 0 || (len == 1 && is.na(x)))
})
map <- map[notNA]
}
splitOrfByPfam <- function(ypfEnv){
## There should be a PFAM to ORF data set in addition to YEASTPFAM...a
## YEASTPFAM2PROBE, but for now we invert
probe2pfam <- as.list(ypfEnv)
probe2pfam <- removeLengthZeroAndMissing(probe2pfam)
pfamlen <- listLen(probe2pfam)
orf <- rep(names(probe2pfam), pfamlen)
pfam <- unlist(probe2pfam)
orfByPfam <- split(orf, pfam)
orfByPfam
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.