###########################################################################/**
# @RdocClass DChipGenomeInformation
#
# @title "The DChipGenomeInformation class"
#
# \description{
# @classhierarchy
#
# This class represents dChip genome information files, which typically
# contains information about chromosomal locations of the units.
# }
#
# @synopsis
#
# \arguments{
# \item{...}{Arguments passed to @see "GenomeInformation".}
# }
#
# \section{Fields and Methods}{
# @allmethods "public"
# }
#
# \details{
# The dChip genome information files for various chip types can be
# downloaded from \url{https://sites.google.com/site/dchipsoft/}.
# Put each file in a
# directory named identically as the corresponding chip type under the
# \emph{annotations/} directory, e.g.
# \emph{annotations/Mapping50K_Hind240/50k hind genome info AfAm
# june 05 hg17.xls}.
# Note that dChip changes the filename and file format slightly between
# chip types, but currently the @seemethod "byChipType" basically searches
# for files with names consisting of \code{"genome info"} or
# \code{"genome_info"}. At least for the most common chip types, there
# is no need to rename the files in order for this class to recognize them.
# }
#
# @author "HB"
#*/###########################################################################
setConstructorS3("DChipGenomeInformation", function(...) {
this <- extend(GenomeInformation(...), "DChipGenomeInformation")
if (isFile(this)) verify(this)
this
})
setMethodS3("findByChipType", "DChipGenomeInformation", function(static, chipType, version=NULL, ...) {
# Argument 'version':
if (is.null(version))
version <- ".*"
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Search in annotationData/chipTypes/<chipType>/
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
pattern <- sprintf("^.*( |_)genome( |_)info(| |_).*%s[.](txt|xls)$",
version)
pathname <- findAnnotationDataByChipType(chipType, pattern)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# As a backup search the "old" style
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (is.null(pathname)) {
path <- filePath("annotations", chipType)
path <- Arguments$getReadablePath(path, mustExist=FALSE)
if (isDirectory(path)) {
pathnames <- list.files(path=path, pattern=pattern, full.names=TRUE)
nfiles <- length(pathnames)
if (nfiles > 1) {
pathnames <- sort(pathnames)
warning("Found more than one matching dChip genome information file, but returning only the last one: ", paste(pathnames, collapse=", "))
pathnames <- rev(pathnames)
pathname <- pathnames[1]
}
}
}
pathname
}, static=TRUE, protected=TRUE)
###########################################################################/**
# @RdocMethod byChipType
#
# @title "Defines a DChipGenomeInformation object by chip type"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{chipType}{A @character string.}
# \item{version}{An optional @character string specifying the version
# string, if more than one version is available.}
# \item{...}{Not used.}
# \item{verbose}{See @see "R.utils::Verbose".}
# }
#
# \value{
# Returns an @see "DChipGenomeInformation" object.
# If no file was not found, an error is thrown.
# }
#
# \seealso{
# @seeclass
# }
#
# @keyword IO
# @keyword programming
#*/###########################################################################
setMethodS3("byChipType", "DChipGenomeInformation", function(static, chipType, version=NULL, ..., verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Defining ", class(static)[1], " from chip type")
verbose && cat(verbose, "Chip type: ", chipType)
verbose && cat(verbose, "Version: ", version)
# Search for the genome information file
pathname <- findByChipType(static, chipType, version=version, ..., verbose=verbose)
verbose && cat(verbose, "Located pathname: ", pathname)
if (is.null(pathname))
throw("Failed to located dChip genome information: ", chipType)
verbose && enter(verbose, "Instantiating ", class(static)[1])
res <- newInstance(static, pathname)
verbose && print(verbose, res)
verbose && exit(verbose)
verbose && exit(verbose)
res
})
setMethodS3("verify", "DChipGenomeInformation", function(this, ...) {
tryCatch({
df <- readDataFrame(this, nrow=10)
}, error = function(ex) {
throw("File format error of the dChip genome information file (",
ex$message, "): ", getPathname(this))
})
invisible(TRUE)
}, private=TRUE)
setMethodS3("readDataFrame", "DChipGenomeInformation", function(this, units=NULL, ..., .orderByUnits=FALSE) {
readFcns <- list(
"^GenomeWideSNP" = readGenomeWideSNP,
"^Mapping10K" = read50KHg17,
"^Mapping50K" = read50KHg17,
"^Mapping250K" = read250KHg17,
"^Mouse430" = readMouse430,
"^Mouse430KS" = readMouse430KenHardwired
)
# Get the chip type; this requires that there is a CDF too.
tryCatch({
chipType <- getChipType(this)
}, error = function(ex) {
print(ex)
})
# Validate 'units'?
if (!is.null(units)) {
# Locate the CDF
cdf <- AffymetrixCdfFile$byChipType(chipType)
units <- Arguments$getIndices(units, max=nbrOfUnits(cdf))
}
# Try to read with the designated read function.
res <- NULL
for (kk in seq_along(readFcns)) {
pattern <- names(readFcns)[kk]
if (regexpr(pattern, chipType) != -1) {
readFcn <- readFcns[[kk]]
tryCatch({
res <- readFcn(this, ...)
}, error=function(ex) {
print(ex)
})
}
}
# If failed, re-try using all read functions.
if (is.null(res)) {
for (kk in seq_along(readFcns)) {
readFcn <- readFcns[[kk]]
tryCatch({
res <- readFcn(this, ...)
}, error = function(ex) {
print(ex)
})
if (!is.null(res))
break
}
}
if (is.null(res)) {
throw("Cannot read dChip annotation data. No predefined read function available for this chip type: ", chipType)
}
# Extract units of interest?
if (.orderByUnits) {
cdf <- AffymetrixCdfFile$byChipType(chipType)
units <- 1:nbrOfUnits(cdf)
}
if (!is.null(units)) {
cdf <- AffymetrixCdfFile$byChipType(chipType)
unitNames <- getUnitNames(cdf, units=units)
idxs <- match(unitNames, res[,1])
# Not needed anymore
unitNames <- NULL
res <- res[idxs,,drop=FALSE]
# Not needed anymore
idxs <- NULL
}
res
})
setMethodS3("readGenomeWideSNP", "DChipGenomeInformation", function(this, ..., exclude=c("^Strand$", "^dbSNP RS ID$")) {
colClasses <- c(
"^Probe Set ID$"="character",
"^Chromosome$"="character",
"^Physical Position$"="integer",
"^Strand$"="character",
"^dbSNP RS ID$"="character",
"^$"="NULL" # Remove empty columns
)
readTableInternal(this, pathname=getPathname(this), colClasses=colClasses, exclude=exclude, isPatterns=TRUE, na.strings=c("", "---"), ...)
}, private=TRUE)
setMethodS3("read250KHg17", "DChipGenomeInformation", function(this, ..., exclude=c("Expr1002", "Allele A", "dbSNP RS ID")) {
colClasses <- c(
"Probe Set ID"="character",
"Chromosome"="character",
"Expr1002"="integer",
"Physical Position"="integer",
"Allele A"="character",
"dbSNP RS ID"="character"
)
readTableInternal(this, pathname=getPathname(this), colClasses=colClasses, exclude=exclude, ...)
}, private=TRUE)
setMethodS3("read50KHg17", "DChipGenomeInformation", function(this, ..., exclude=c("Genetic Map", "Strand", "Allele A", "dbSNP RS ID", "Freq AfAm", "heterrate")) {
colClasses <- c(
"Probe Set ID"="character",
"Chromosome"="character",
"Physical Position"="integer",
"Genetic Map"="", # Often empty?!
"Strand"="character",
"Allele A"="character",
"dbSNP RS ID"="character",
"Freq AfAm"="double",
"heterrate"="double" # Often empty?!
)
readTableInternal(this, pathname=getPathname(this), colClasses=colClasses, exclude=exclude, ...)
}, private=TRUE)
setMethodS3("readMouse430KenHardwired", "DChipGenomeInformation", function(this, ...) {
colClasses <- c(
"Probe Set"="character",
"chromosome"="character",
"Physical Position"="integer",
"End"="integer",
"Strand"="character",
"Cytoband"="character"
)
tableData <- readTableInternal(this, pathname=getPathname(this),
colClasses=colClasses, ...)
# ad hoc fix: remove "chr" from chromosome
tableData[,"chromosome"] <- gsub("chr", "", tableData[,"chromosome"])
tableData[,"chromosome"] <- gsub("random", "", tableData[,"chromosome"])
tableData
}, private=TRUE)
# @author "KS"
setMethodS3("readMouse430", "DChipGenomeInformation", function(this, ...) {
colClasses <- c(
"Probe Set"="character",
"chromosome"="character",
"Start"="integer",
"End"="integer",
"Strand"="character",
"Cytoband"="character"
)
tableData <- readTableInternal(this, pathname=getPathname(this),
colClasses=colClasses, ...)
colnames <- colnames(tableData)
colnames <- gsub("^Start", "Physical Position", colnames)
colnames(tableData) <- colnames
# Remove the "chr" prefix from the "chromosome" elements /KS
chr <- tableData[,"chromosome"]
chr <- gsub("chr", "", chr)
# Ad hoc fix: exclude all chrN_random items /KS
chr[grep("_random$", chr)] <- NA
tableData[,"chromosome"] <- chr
tableData
}, private=TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.