########################################################################/**
# @RdocFunction parseDatHeaderString
#
# @title "Parses a DAT header string"
#
# \description{
# @get "title".
# }
#
# @synopsis
#
# \arguments{
# \item{header}{A @character string.}
# \item{timeFormat}{The format string used to parse the timestamp.
# For more details, see \code{\link[base:strptime]{strptime}()}.
# If @NULL, no parsing is done.}
# \item{...}{Not used.}
# }
#
# \value{
# Returns named @list structure.
# }
#
# \seealso{
# @see "readCelHeader".
# }
#
# @author "HB"
#
# @keyword programming
# @keyword file
# @keyword IO
# @keyword internal
#**/#######################################################################
parseDatHeaderString <- function(header, timeFormat="%m/%d/%y %H:%M:%S", ...) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Local functions
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
trim <- function(s, ...) {
s <- as.character(s);
s <- sub("^[\t\n\f\r ]*", "", s);
s <- sub("[\t\n\f\r ]*$", "", s);
s;
} # trim();
# <quote>
# There are several sub-fields in this field. The first sub field is the
# scanner ID, sometimes followed by a number, followed by three spaces.
# If the scanner ID is absent, the field consists of four spaces.
# Next are 10 structured comment fields. Each field is preceded by the
# delimiter 0x14 and a space. The field is followed by a space and 0x14.
# Only field two is valid, the other 9 fields are obsolete. Field 2
# contains the probe array type, followed by .1sq. The 1sq extension is
# also obsolete.
# Next (after the last structured field) there is the chip orientation
# preceded by a space.
# The rest of the field is filled with nulls (zeros)
# </quote>
# Split by ASCII 0x14 delimiter
parts <- strsplit(header, split=" \024 ", fixed=TRUE)[[1]];
field <- parts[1];
commentFields <- parts[-1];
res <- list();
pattern <- ".*CLS=(.{5})RWS=(.{5})XIN=(.{3})YIN=(.{3})VE=(.{3}).*(.{7})(.{4})";
# Number of pixels per row (padded with spaces), preceded with "CLS=".
res$nbrOfPixelColumns <- as.integer(gsub(pattern, "\\1", field));
# Number of rows in the image (padded with spaces), preceded with "RWS=".
res$nbrOfPixelRows <- as.integer(gsub(pattern, "\\2", field));
# Pixel width in micrometers (padded with spaces), preceded with "XIN="
res$pixelWidth <- as.double(gsub(pattern, "\\3", field));
# Pixel height in micrometers (padded with spaces), preceded with "YIN="
res$pixelHeight <- as.double(gsub(pattern, "\\4", field));
# Scan speed in millimeters per second (padded with spaces),
# preceded with "VE=".
res$scanSpeed <- as.double(gsub(pattern, "\\5", field));
# Temperature in degrees Celsius (padded with spaces). If no temperature was
# set then the entire field is empty.
res$temperature <- as.double(gsub(pattern, "\\6", field));
# Laser power in milliwatts or microwatts (padded with spaces).
res$laserPower <- as.double(gsub(pattern, "\\6", field));
# Find the element with a date. It is part of the same string as the
# one containing the chip type. Get the chip type from the header.
# Extract the date timestamp
pattern <- ".*([01][0-9]/[0-3][0-9]/[0-9][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]).*";
timestamp <- gsub(pattern, "\\1", header);
timestamp <- trim(timestamp); # Unnecessary?
# Parse the identified timestamp into POSIXct?
if (!is.null(timeFormat)) {
timestamp <- strptime(timestamp, format=timeFormat, ...);
# If no valid timestamp was found, return NA.
if (length(as.character(timestamp)) == 0) {
timestamp <- as.POSIXct(NA);
}
}
res$timestamp <- timestamp;
res$chipType <- trim(gsub("[.]1sq", "", commentFields[2]));
res;
} # parseDatHeaderString()
############################################################################
# HISTORY:
# 2009-09-21
# o Created from internal code in aroma.affymetrix.
############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.