
Defines functions smartKeys .keys .noSchemaKeys .deriveTableNameFromField .legacyKeys .queryForKeys chooseCentralOrgPkgSymbol .getCentralID .checkForDeprecatedKeytype .deprecatedColsMessage .filterDeprecatedKeytypes .listDeprecatedKeytypes

Documented in chooseCentralOrgPkgSymbol

### =========================================================================
### keys() and keytype() - related functions for gene-centric Dbs
### -------------------------------------------------------------------------

## Three helpers for deprecating keytypes
## One to just list the bum keytypes
.listDeprecatedKeytypes <- function(){
   c('CHR','CHRLOC','CHRLOCEND') ## Uncomment after the release
## Another for keytypes to remove unwanted keytypes
.filterDeprecatedKeytypes <- function(keytypes){
    keytypes[!(keytypes %in% .listDeprecatedKeytypes())]

## this is a 'standard' warning for people who try to use keys or cols
## that are no longer valid due to being deprecated
.deprecatedColsMessage <- function(){
    depCols <- paste(.listDeprecatedKeytypes(), collapse="','")
    warning(wmsg(paste0("Accessing gene location information via '",
                        depCols,"' is deprecated. Please use a range ",
                        "based accessor like genes(), or select() with ",
                        "columns values like TXCHROM and TXSTART ",
                        "on a TxDb or OrganismDb object instead.\n")))

## And one for keys and select to warn if the user tries to use them
.checkForDeprecatedKeytype <- function(keytype){
    if(any(.listDeprecatedKeytypes() %in% keytype )){

## Need an accessor for getting the central ID for a DB (when appropriate)
.getCentralID <- function(x){
                       "SELECT value FROM metadata WHERE name='CENTRALID'"))

## Sometimes we need to translate a centralID into a central keytype.
chooseCentralOrgPkgSymbol <- function(x){
  centralID <- .getCentralID(x)
  keytype <- switch(EXPR = centralID,
                    "EG" = "ENTREZID",
                    "TAIR" = "TAIR",
                    "ORF" = "ORF",
                    "GID" = "GID")

## keys methods return the possible primary keys.  So for EG based packages,
## this will be the viable entrez gene IDs.
## Must use SELECT DISTINCT for now because some packages like ag.db
## (Arabidopsis) have repeated probe ids in the probes table (those are the
## probe ids that hit multiple genes).
## TODO: When 'x' has the new slot containing the package name, use
## dbUniqueVals() (defined in SQL.R) and pass pkgname:::datacache to it.
## dbUniqueVals() is what's used behind the scene by the Lkeys/Rkeys/keys
## methods for AnnDbBimap objects so the "keys" methods below will give a
## consistent answer (and will take advantage of the cache).
## helper to get keys
.queryForKeys <- function(x, keytype){
    x <- .getOrgPkg(x)
  table <- .getDBLocs(x, keytype)
  field <- .getDBLocs(x, keytype, value="field")
  sql <- paste("SELECT DISTINCT",field,"FROM",table)
  res <- dbQuery(dbconn(x), sql)

.legacyKeys <- function(x, keytype){
  ## have to swap keytype
  ## keytype <- .swapSymbolExceptions(x, keytype)
  keytype <- .simplifyCols(x, keytype)
  ## Some org packages may have entrez genes in weird places...
  centralID <- .getCentralID(x)
  EGgeneTable <- character()
  if(centralID == "EG" || centralID == "ORF"){
    EGgeneTable <- "genes"
  }else if(centralID == "TAIR"){
    EGgeneTable <- "entrez_genes"
  ## now decide
  if(class(x) == "OrgDb" && species(x) != "Plasmodium falciparum"){
    res <- switch(EXPR = keytype,
                  "ENTREZID" = dbQuery(dbconn(x),
                    paste("SELECT gene_id FROM", EGgeneTable), 1L),
                  "TAIR" = dbQuery(dbconn(x),
                    "SELECT gene_id FROM genes", 1L),
                  "ORF" = dbQuery(dbconn(x),
                    "SELECT systematic_name FROM sgd", 1L),
                  "PROBEID" =
                     stop("PROBEID is not supported for Organism packages"),
                  .queryForKeys(x, keytype))
  if(class(x) == "OrgDb" && species(x) == "Plasmodium falciparum"){
    res <-  switch(EXPR = keytype,
                   "ORF" = dbQuery(dbconn(x),
                     paste("SELECT gene_id FROM", EGgeneTable), 1L),
                   .queryForKeys(x, keytype))
  if(class(x) == "ChipDb"){
    res <- switch(EXPR = keytype,
                  "ENTREZID" = dbQuery(dbconn(x),
                    "SELECT gene_id FROM probes", 1L),
                  "PROBEID" =  dbQuery(dbconn(x),
                    "SELECT DISTINCT probe_id FROM probes", 1L),
                  .queryForKeys(x, keytype))
  if(class(x) == "GODb"){
    res <- switch(EXPR = keytype,
                  "GOID" =  dbQuery(dbconn(x),
                    "SELECT DISTINCT go_id FROM go_term", 1L),
                  .queryForKeys(x, keytype))

## special functions for newer NOSCHEMA_DB's
.deriveTableNameFromField <- function(field, x){
        y <- x ## Switcheroo
        x <- .getOrgPkg(x)
        try(.attachDB(x,y), silent=TRUE) ## not a disaster if we fail
    con <- dbconn(x)
    tables <- .getDataTables(con)
    if(exists("y", inherits=FALSE)){
        tables <- c("c.probes", tables)
    colTabs <- sapply(tables, FUN=dbListFields, con=con, simplify = FALSE)
    colTabs <- colTabs[grep("go_[mcb].+", names(colTabs), invert = TRUE)]
    m <- unlist2(sapply(colTabs, match, field, simplify = FALSE))  ## cannot ever be repeated
    tab <- names(m)[!is.na(m)]
    if(length(tab)!=1){stop("Two fields in the source DB have the same name.")}

.noSchemaKeys <- function(x, keytype){
    tab <- .deriveTableNameFromField(field=keytype, x)
    ## So now we know table name (tab) and field (keytype)
        y <- x ## Switcheroo
        x <- .getOrgPkg(x)
        try(.attachDB(x,y), silent=TRUE) ## not a disaster if we fail
    sql <- paste("SELECT",keytype,"FROM",tab)
    res <- dbQuery(dbconn(x), sql, 1L)

## general keys function
.keys <- function(x, keytype){
    testForValidKeytype(x, keytype)
    schema <- metadata(x)[metadata(x)$name=="DBSCHEMA",]$value
    if(schema=="NOSCHEMA_DB" || schema=="NOCHIPSCHEMA_DB"){
        .noSchemaKeys(x, keytype)
        .legacyKeys(x, keytype)

## So the new idea is that each place where I want to "enhance" keys,
## I should just be able to use a helper to wrap up the actual keys
## method...

## And we need a master helper to tie it all together
smartKeys <-
    function(x, keytype, ..., pattern, column, fuzzy=FALSE, FUN)
    ## check args, then...

    ## FUN is the base keys method
    .keys <- FUN
    ## So 1st we need helpers for other "keys" situations
    ## keys0 is for when we have a pattern we want to match in the keys
    .keys0 <- function(x, keytype, ..., pattern, fuzzy=FALSE)
        {   ## assumes 'pattern' present
            FUN <- if (fuzzy) agrep else grep
            FUN(pattern, .keys(x, keytype), value=TRUE, ...)
    ## keys1 is for when we have a column but no pattern
    ## so we want to filter by column
    .keys1 <- function(x, keytype, ..., column)
        {   ## column acts as filter
            k <- suppressWarnings(select(x, as.character(.keys(x, keytype)),
                                         column, keytype))
            k[[keytype]][ !is.na(k[[column]]) ]
    ## keys2 is for when we have a column, and a pattern to match on that
    ## column, and we want all the keys of a particular keytype that match
    ## that column.
    .keys2 <- function(x, keytype, ..., pattern, column, fuzzy=FALSE)
        {   ## assumes 'pattern', 'column' present
            FUN <- if (fuzzy) agrep else grep
            k <- suppressWarnings(select(x, as.character(.keys(x, keytype)),
                                         column, keytype))
            k[[keytype]][ FUN(pattern, k[[column]], ...) ]

    ## Now decide which function to call...
    if (missing(pattern) && missing(column))
        k <- .keys(x, keytype)
    else if (missing(column))
        k <- .keys0(x, keytype, ..., pattern=pattern, fuzzy=fuzzy)
    else if (missing(pattern))
        k <- .keys1(x, keytype, ..., column=column)
        k <- .keys2(x, keytype, ..., pattern=pattern, column=column,

## TODO: don't fail to document all the new arguments (pattern, column and fuzzy)
setMethod("keys", "OrgDb",
    function(x, keytype, ...){
        keytype <- chooseCentralOrgPkgSymbol(x)
      smartKeys(x=x, keytype=keytype, ..., FUN=.keys)

setMethod("keys", "ChipDb",
    function(x, keytype, ...){
      if(missing(keytype)) keytype <- "PROBEID"
      smartKeys(x=x, keytype=keytype, ..., FUN=.keys)

setMethod("keys", "GODb",
    function(x, keytype, ...){
      if(missing(keytype)) keytype <- "GOID"
      smartKeys(x=x, keytype=keytype, ..., FUN=.keys)

setMethod("keys", "OrthologyDb",
          function(x, keytype, ...) 
    .ontoKeys(x, keytype, ...)

## new uses for keys:
## now TERM is a real key? (TODO: someone tell the keytypes)
## head(keys(GO.db, keytype="TERM"))

## get TERM keys that match a particular pattern
## head(keys(GO.db, keytype="TERM", pattern="mitochondrion"))

## get GOIDs where a TERM exists.
## head(keys(GO.db, keytype="GOID", column="TERM"))

## get keys of type GOID that go with a pattern match in TERM
## head(keys(GO.db, keytype="GOID", pattern="mitochondrion", column="TERM"))
## select(GO.db, keys =head(keys(GO.db, keytype="GOID", pattern="mitochondrion", column="TERM")), cols=c("GOID","TERM"))

## do the above but use fuzzy matching
## head(keys(GO.db, keytype="GOID", pattern="mitochondrion", column="TERM", fuzzy=TRUE))
## select(GO.db, keys = head(keys(GO.db, keytype="GOID", pattern="mitochondrion", column="TERM", fuzzy=TRUE)), cols=c("GOID","TERM"))

## Can just get keys (straight up)
## head(keys(org.Hs.eg.db, keytype="SYMBOL"))

## keys1 situation works fine (and smartKeys is called twice.)
## Can filter by column (only return keys where there is a value for "PATH"
## length(keys(org.Hs.eg.db, keytype="ENTREZID", column="PATH"))
## is shorter than:
## length(keys(org.Hs.eg.db, keytype="ENTREZID"))

## debug(AnnotationDbi:::smartKeys)

## Can just get keys that match a pattern
## keys(org.Hs.eg.db, keytype="SYMBOL", pattern="BRCA")

## Can get a key that matches a pattern on some other column
## head(keys(org.Hs.eg.db,keytype="ENTREZID",pattern="MSX",column="SYMBOL"))

## keytypes method is to allow the user to specify what kind of keytype is
## passed in to either keys or the select methods.
## temporarily:this method will be VERY unsophisticated.

## TODO: would like to find a way to restore these blacklisted types to being
## able to be used, but I need a way around the lack of an Rkeys() method etc.

## keytypesBlackList <- c("CHRLOCEND","CHRLOC","PFAM","PROSITE",
##                        "DESCRIPTION", "GENENAME")
## .filterKeytypes <- function(x, baseType, keytypesBlackList){
##   res <- .cols(x, baseType=baseType)
##   res <- res[!res %in% keytypesBlackList]
##   ## append the centralID (if not already present)
##   centralID <- .getCentralID(x)
##   if(centralID == "EG"){ centralID <- "ENTREZID" }
##   res <- c(res, centralID)
##   unique(res)
## }

setMethod("keytypes", "OrgDb",
    ## function(x) .filterKeytypes(x, baseType="ENTREZID", keytypesBlackList)
        kts <- .cols(x, baseType="ENTREZID")

setMethod("keytypes", "ChipDb",
    ## function(x) .filterKeytypes(x, baseType="PROBEID", keytypesBlackList) 
        kts <- .cols(x, baseType="ENTREZID")

setMethod("keytypes", "GODb",
    function(x) return(c("GOID","TERM","ONTOLOGY","DEFINITION")) ## only one type makes sense

setMethod("keytypes", "OrthologyDb",
    .justFirstUpper(dbGetQuery(dbconn(x), "select name from names;")[,1])

