Nothing
## Inparanoid select methods:
.getBaseSpecies <- function(x){
meta <- metadata(x)
## TODO: switch to helper function:
baseSpecies <- sub(" ","_",meta[meta$name=="ORGANISM","value"])
baseSpecies
}
## match a "set" of cols
.getInpTables <- function(x, cols){
res <- .getTableNames(x)
res <- res[res %in% cols]
unique(res)
}
## the mapping of 5 letter codes to the short names.
.makeFiveLetterMapping <- function(){
fields = c(
"Acyrthosiphon_pisum" = "ACYPI",
"Aedes_aegypti" = "AEDAE",
"Anopheles_gambiae" = "ANOGA",
"Apis_mellifera" = "APIME",
"Arabidopsis_thaliana" = "ARATH",
"Aspergillus_fumigatus" = "ASPFU",
"Batrachochytrium_dendrobatidis" = "BATDE",
"Bombyx_mori" = "BOMMO",
"Bos_taurus" = "BOSTA",
"Branchiostoma_floridae" = "BRAFL",
"Brugia_malayi" = "BRUMA",
"Caenorhabditis_brenneri" = "CAEBRE",
"Caenorhabditis_briggsae" = "CAEBR",
"Caenorhabditis_elegans" = "CAEEL",
"Caenorhabditis_japonica" = "CAEJA",
"Caenorhabditis_remanei" = "CAERE",
"Candida_albicans" = "CANAL",
"Candida_glabrata" = "CANGL",
"Canis_familiaris" = "CANFA",
"Capitella_spI" = "CAPSP",
"Cavia_porcellus" = "CAVPO",
"Chlamydomonas_reinhardtii" = "CHLRE",
"Ciona_intestinalis" = "CIOIN",
"Ciona_savignyi" = "CIOSA",
"Coccidioides_immitis" = "COCIM",
"Coprinopsis_cinereus" = "COPCI",
"Cryptococcus_neoformans" = "CRYNE",
"Cryptosporidium_hominis" = "CRYHO",
"Cryptosporidium_parvum" = "CRYPA",
"Culex_pipiens" = "CULPI",
"Cyanidioschyzon_merolae" = "CYAME",
"Danio_rerio" = "DANRE",
"Daphnia_pulex" = "DAPPU",
"Debaryomyces_hansenii" = "DEBHA",
"Dictyostelium_discoideum" = "DICDI",
"Drosophila_ananassae" = "DROAN",
"Drosophila_grimshawi" = "DROGR",
"Drosophila_melanogaster" = "DROME",
"Drosophila_mojavensis" = "DROMO",
"Drosophila_pseudoobscura" = "DROPS",
"Drosophila_virilis" = "DROVI",
"Drosophila_willistoni" = "DROWI",
"Entamoeba_histolytica" = "ENTHI",
"Equus_caballus" = "EQUCA",
"Escherichia_coliK12" = "ESCCO",
"Fusarium_graminearum" = "FUSGR",
"Gallus_gallus" = "GALGA",
"Gasterosteus_aculeatus" = "GASAC",
"Giardia_lamblia" = "GIALA",
"Helobdella_robusta" = "HELRO",
"Homo_sapiens" = "HOMSA",
"Ixodes_scapularis" = "IXOSC",
"Kluyveromyces_lactis" = "KLULA",
"Leishmania_major" = "LEIMA",
"Lottia_gigantea" = "LOTGI",
"Macaca_mulatta" = "MACMU",
"Magnaporthe_grisea" = "MAGGR",
"Monodelphis_domestica" = "MONDO",
"Monosiga_brevicollis" = "MONBR",
"Mus_musculus" = "MUSMU",
"Nasonia_vitripennis" = "NASVI",
"Nematostella_vectensis" = "NEMVE",
"Neurospora_crassa" = "NEUCR",
"Ornithorhynchus_anatinus" = "ORNAN",
"Oryza_sativa" = "ORYSA",
"Oryzias_latipes" = "ORYLA",
"Ostreococcus_tauri" = "OSTTA",
"Pan_troglodytes" = "PANTR",
"Pediculus_humanus" = "PEDPA",
"Physcomitrella_patens" = "PHYPA",
"Phytophthora_ramorum" = "PHYRA",
"Phytophthora_sojae" = "PHYSO",
"Plasmodium_falciparum" = "PLAFA",
"Plasmodium_vivax" = "PLAVI",
"Pongo_pygmaeus" = "PONPY",
"Populus_trichocarpa" = "POPTR",
"Pristionchus_pacificus" = "PRIPA",
"Puccinia_graminis" = "PUCGR",
"Rattus_norvegicus" = "RATNO",
"Rhizopus_oryzae" = "RHIOR",
"Saccharomyces_cerevisiae" = "SACCE",
"Schistosoma_mansoni" = "SCHMA",
"Schizosaccharomyces_pombe" = "SCHPO",
"Sclerotinia_sclerotiorum" = "SCLSC",
"Sorghum_bicolor" = "SORBI",
"Stagonospora_nodorum" = "STANO",
"Strongylocentrotus_purpuratus" = "STRPU",
"Takifugu_rubripes" = "TAKRU",
"Tetrahymena_thermophila" = "TETTH",
"Tetraodon_nigroviridis" = "TETNI",
"Thalassiosira_pseudonana" = "THAPS",
"Theileria_annulata" = "THEAN",
"Theileria_parva" = "THEPA",
"Tribolium_castaneum" = "TRICA",
"Trichomonas_vaginalis" = "TRIVA",
"Trichoplax_adhaerens" = "TRIAD",
"Trypanosoma_cruzi" = "TRYCR",
"Ustilago_maydis" = "USTMA",
"Xenopus_tropicalis" = "XENTR",
"Yarrowia_lipolytica" = "YARLI"
)
fields
}
.getBaseFiveCode <- function(baseSpecies){
fiveMap <- .makeFiveLetterMapping()
baseFiveCode <- fiveMap[names(fiveMap) %in% baseSpecies]
}
## helpers
.getLCcolnames <- function(x){
con <- dbconn(x)
list <- dbListTables(con)
## drop unwanted tables
unwanted <- c("map_counts","map_metadata","metadata")
list <- list[!list %in% unwanted]
## Add baseSpecies to the front...
list <- c(.getBaseSpecies(x), list)
}
.inpCols <- function(x, type="all"){
list <- .getLCcolnames(x)
## Then just to format things in the usual way
list <- toupper(list)
if(type=="noBase"){
baseSpecies <- .getBaseSpecies(x)
list <- list[!(list %in% toupper(baseSpecies))]
}
list
}
setMethod("columns", "InparanoidDb", function(x){.inpCols(x)})
## currently, I think the base type CAN be a valid keytype.
setMethod("keytypes", "InparanoidDb", function(x){.inpCols(x)})
## setMethod("keytypes", "InparanoidDb", function(x){.inpCols(x,type="noBase")})
## new helper to map Table names to UC names used by cols and keytypes
.getTableNames <- function(x){
LC <- .getLCcolnames(x)
UC <- .inpCols(x)
names(UC) <- LC
UC
}
.keysInp <- function(x, keytype){
## argument checking
testForValidKeytype(x, keytype)
## translate keytype back to table name
tabNames <- .getTableNames(x)
lckeytype <- names(tabNames[tabNames %in% keytype])
## get the baseSpecies
baseSpecies <- .getBaseSpecies(x)
baseFiveCode <- .getBaseFiveCode(baseSpecies)
## get connection to DB
con <- dbconn(x)
if(baseSpecies != lckeytype){
sql <- paste("SELECT inp_id FROM", lckeytype,
paste0("WHERE species!='",baseFiveCode,"'"))
res <- dbGetQuery(con, sql)
res <- as.vector(t(res))
}else{
res <- character()
fiveMap <- .makeFiveLetterMapping()
## remove the baseSpecies from fiveMap names to get all real tables
tables <- names(fiveMap)[!(names(fiveMap) %in% baseSpecies)]
for(i in seq_len(length(tables))){
sql <- paste("SELECT inp_id FROM", tables[i],
paste0("WHERE species=='",baseFiveCode,"'"))
rs <- dbGetQuery(con, sql)
rs <- as.vector(t(rs))
res <- unique(c(res, rs)) ## should not be too bad
}
}
as.character(res)
}
setMethod("keys", "InparanoidDb",
function(x, keytype, ...){
if(missing(keytype)){stop("Please supply a keytype argument.")}
#.keysInp(x, keytype)
smartKeys(x=x, keytype=keytype, ..., FUN=.keysInp)
}
)
## usage:
## head(keys(hom.Hs.inp.db, keytype="HOMO_SAPIENS"))
## head(keys(hom.Hs.inp.db, keytype="HOMO_SAPIENS", pattern="4$"))
## k = head(keys(hom.Hs.inp.db, keytype="HOMO_SAPIENS", pattern="4$", column="RATTUS_NORVEGICUS"))
## select(hom.Hs.inp.db, keys=k, cols=c("HOMO_SAPIENS","RATTUS_NORVEGICUS"), keytype="HOMO_SAPIENS")
## length(keys(hom.Hs.inp.db, keytype="HOMO_SAPIENS", column="APIS_MELLIFERA"))
## length(keys(hom.Hs.inp.db, keytype="HOMO_SAPIENS"))
##############################################################################
## Select is more complicated, but I should be able to implement it similar to
## how I did it for Reactome.db
.UCToStandard <- function(strVec){
strVec <- tolower(strVec)
firstLett <- toupper(substr(strVec,1,1))
rest <- substr(strVec,2,nchar(strVec))
paste0(firstLett, rest)
}
## This is for making simple queries
## SELECT base.inp_id,alt.inp_id FROM
## (SELECT * FROM Mus_musculus WHERE species='HOMSA' AND seed_status='100%')
## AS base,
## (SELECT * FROM Mus_musculus WHERE species='MUSMU' AND seed_status='100%')
## AS alt WHERE base.clust_id=alt.clust_id
## AND alt.inp_id IN ('ENSMUSP00000097561','ENSMUSP00000051825','ENSMUSP00000074773','ENSMUSP00000074340','ENSMUSP00000093245','ENSMUSP00000095433');
## The trouble is that this query seems extremely slow... Addding an index
## helps, but it is still awful.
## .extractWithSimpleInpQuery <- function(x, table, keys, keytype,
## baseFiveCode, fiveMap){
## fiveCode <- fiveMap[names(fiveMap) %in% table]
## subQueryBase <- paste0("(SELECT * FROM ", table,
## " WHERE species='",baseFiveCode,
## "' AND seed_status='100%')")
## subQueryAlt <- paste0("(SELECT * FROM ", table,
## " WHERE species='",fiveCode,
## "' AND seed_status='100%')")
## ## base or not? - based on the keytype (always one of two options)
## inTableClause <- character()
## fiveMap <- .makeFiveLetterMapping()
## if(keytype==names(fiveMap)[fiveMap %in% baseFiveCode]){
## inTableClause <- 'base.inp_id'
## }else{
## inTableClause <- 'alt.inp_id'
## }
## ## generate a simple query for each table
## sql <- paste("SELECT base.inp_id,alt.inp_id FROM ",
## subQueryBase, "AS base,", subQueryAlt,
## "AS alt WHERE base.clust_id=alt.clust_id",
## "AND",inTableClause,"IN",
## paste0("('",paste(keys, collapse="','"),"')"))
## ## then extract
## res <- dbQuery(dbconn(x), sql)
## }
###########################################################################
## New idea:
## It might actually be faster to just get the subqueries for each in another
## set of helpers, and then to merge them in R on the clust_ids...
## This is indeed about a billion times faster.
.extractWithSimpleInpQuery <- function(x, table, keys, keytype,
baseFiveCode, fiveMap){
fiveCode <- fiveMap[names(fiveMap) %in% table]
## Base query for Alt portion
subQueryAlt <- paste0("SELECT * FROM ", table,
" AS alt WHERE species='",fiveCode,
"' AND seed_status='100%'")
## Base query for base portion
subQueryBase <- paste0("SELECT * FROM ", table,
" AS base WHERE species='",baseFiveCode,
"' AND seed_status='100%'")
## Clause to append to whichever of these matches the keytype
inClause <- paste( "AND inp_id IN",
paste0("('",paste(keys, collapse="','"),"')"))
## base or not? WHO gets the inClause?
fiveMap <- .makeFiveLetterMapping()
if(keytype==names(fiveMap)[fiveMap %in% baseFiveCode]){
subQueryBase <- paste(subQueryBase, inClause)
}else{
subQueryAlt <- paste(subQueryAlt, inClause)
}
## then extract
resBase <- dbQuery(dbconn(x), subQueryBase)
resAlt <- dbQuery(dbconn(x), subQueryAlt)
## then merge as an inner join on clust_id for each
res <- merge(resBase, resAlt, by.x="clust_id", by.y="clust_id")
res <- res[,c("inp_id.x","inp_id.y")]
colnames(res) <- c("base.inp_id",fiveCode)
res
}
## This calls .extractWithSimpleInpQuery for each table and merges results
.collateInpQueryResults <- function(x, tables, keys, keytype, fiveMap,
baseFiveCode, baseSpecies){
mergeID <- "base.inp_id"
res <- data.frame()
for(i in seq_len(length(tables))){
if(i==1){
if(tables[i]==baseSpecies){
## This means that my keytype == baseSpecies
res <- keys(x, keytype=toupper(baseSpecies))
res <- res[res %in% keys]
res <- as.data.frame(res)
colnames(res) <- mergeID
}else{ ## Otherwise, we must query the DB
res <- .extractWithSimpleInpQuery(x, tables[i], keys, keytype,
baseFiveCode, fiveMap)
}
}else{
if(i==2){ ## on 2nd pass, set up these vals
mergeKeys <- res[[mergeID]]
mkeytype <- baseSpecies
}
if(tables[i] == baseSpecies){
## This means that one of later cols == baseSpecies
res <- cbind(res, res[mergeID])
colnames(res)[dim(res)[2]] <- baseFiveCode
}else{
res <- merge(res,
.extractWithSimpleInpQuery(x, tables[i], mergeKeys,
mkeytype, baseFiveCode,
fiveMap),
by.x=mergeID, by.y=mergeID,
all.x=TRUE, all.y=TRUE)
}
}
}
## last thing is to put baseFiveCode instead of "base.inp_id"
colnames(res)[colnames(res) == "base.inp_id"] <- baseFiveCode
res
}
## function for making select happen
.selectInp <- function(x, keys, cols, keytype){
## Some argument checking
testSelectArgs(x, keys=keys, cols=cols, keytype=keytype)
## filter out keys that are not legit (just from the DB query)
ktKeys = keys(x, keytype=keytype)
qkeys <- keys[keys %in% ktKeys]
## now I need to go through each table, and for each I want to extract the
## missing piece with a SIMPLE query (basically get ONE thing), and then
## append it onto the results
baseSpecies <- .getBaseSpecies(x)
fiveMap <- .makeFiveLetterMapping()
baseFiveCode <- .getBaseFiveCode(baseSpecies)
## collate possible types (type must ALWAYS be in front)
tables <-unique(c(keytype,cols[!(cols %in% keytype)]))
res <- .collateInpQueryResults(x, .UCToStandard(tables), qkeys, keytype,
fiveMap, baseFiveCode, baseSpecies)
## Setup to call resort_base
## reqCols must have exactly the same stuff as in tables, but in same format
## as header from res
reqCols <- fiveMap[toupper(names(fiveMap)) %in% tables]
## colType is the table abbreviation that matches the initial keytype.
colType <- fiveMap[toupper(names(fiveMap)) %in% keytype]
## now drop any unrequested cols
res <- res[,reqCols,drop=FALSE]
## And then resort/tidy etc.
res <- resort_base(res, keys, jointype=colType, reqCols=reqCols)
## Then match to the colnames
colnames(res) <- toupper(names(fiveMap)[match(colnames(res),fiveMap)])
## return results
res
}
## select method
setMethod("select", "InparanoidDb",
function(x, keys, columns, keytype, ...){
## if (missing(keytype)) keytype <- "ENTREZID"
kt <- "ENTREZID"
.selectWarnInp(x, keys, columns, keytype,kt=kt, ...)
## .selectInp(x, keys, columns, keytype)
}
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.