#########################################################################/**
# @RdocFunction updateCel
#
# @title "Updates a CEL file"
#
# @synopsis
#
# \description{
# @get "title".
# }
#
# \arguments{
# \item{filename}{The filename of the CEL file.}
# \item{indices}{A @numeric @vector of cell (probe) indices specifying
# which cells to updated. If @NULL, all indices are considered.}
# \item{intensities}{A @numeric @vector of intensity values to be stored.
# Alternatively, it can also be a named @data.frame or @matrix (or @list)
# where the named columns (elements) are the fields to be updated.}
# \item{stdvs}{A optional @numeric @vector.}
# \item{pixels}{A optional @numeric @vector.}
# \item{writeMap}{An optional write map.}
# \item{...}{Not used.}
# \item{verbose}{An @integer specifying how much verbose details are
# outputted.}
# }
#
# \value{
# Returns (invisibly) the pathname of the file updated.
# }
#
# \details{
# Currently only binary (v4) CEL files are supported.
# The current version of the method does not make use of the Fusion SDK,
# but its own code to navigate and update the CEL file.
# }
#
# @examples "../incl/updateCel.Rex"
#
# @author "HB"
#
# @keyword "file"
# @keyword "IO"
#*/#########################################################################
updateCel <- function(filename, indices=NULL, intensities=NULL, stdvs=NULL, pixels=NULL, writeMap=NULL, ..., verbose=0) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'filename':
if (!file.exists(filename)) {
stop("Cannot update CEL file. File not found: ", filename);
}
header <- readCelHeader(filename);
version <- header$version;
if (!version %in% 4) {
stop("Updating CEL v", version, " files is not supported: ", filename);
}
nbrOfCells <- header$total;
# Argument 'indices':
if (is.null(indices)) {
nbrOfIndices <- nbrOfCells;
} else {
# A CEL file has one-based indices
r <- range(indices);
if (r[1] < 1 || r[2] > nbrOfCells) {
stop("Argument 'indices' is out of range [1,", nbrOfCells, "]: ",
"[", r[1], ",", r[2], "]");
}
nbrOfIndices <- length(indices);
}
# Argument 'intensities':
if (is.matrix(intensities)) {
intensities <- as.data.frame(intensities);
}
if (is.list(intensities) || is.data.frame(intensities)) {
if (is.list(intensities)) {
fields <- names(intensities);
} else {
fields <- colnames(intensities);
}
if (is.null(stdvs) && ("stdvs" %in% fields)) {
stdvs <- intensities[["stdvs"]];
}
if (is.null(pixels) && ("pixels" %in% fields)) {
pixels <- intensities[["pixels"]];
}
if ("intensities" %in% fields) {
intensities <- intensities[["intensities"]];
}
}
# Argument 'intensities':
if (!is.null(intensities)) {
if (!is.double(intensities))
intensities <- as.double(intensities);
if (length(intensities) != nbrOfIndices) {
stop("Number of 'intensities' values does not match the number of cell indices: ", length(intensities), " != ", nbrOfIndices);
}
}
# Argument 'stdvs':
if (!is.null(stdvs)) {
if (!is.double(stdvs))
stdvs <- as.double(stdvs);
if (length(stdvs) != nbrOfIndices) {
stop("Number of 'stdvs' values does not match the number of cell indices: ", length(stdvs), " != ", nbrOfIndices);
}
}
# Argument 'pixels':
if (!is.null(pixels)) {
if (!is.integer(pixels))
pixels <- as.integer(pixels);
if (length(pixels) != nbrOfIndices) {
stop("Number of 'pixels' values does not match the number of cell indices: ", length(pixels), " != ", nbrOfIndices);
}
}
# Argument 'writeMap':
if (!is.null(writeMap)) {
writeMap <- .assertMap(writeMap, nbrOfCells);
}
# Argument 'verbose':
if (length(verbose) != 1)
stop("Argument 'verbose' must be a single integer.");
verbose <- as.integer(verbose);
if (!is.finite(verbose))
stop("Argument 'verbose' must be an integer: ", verbose);
# Nothing to do?
if (nbrOfIndices == 0) {
return(invisible(filename));
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Reorder data such that it is written in optimal order
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
reorder <- TRUE;
if (is.null(indices)) {
# Has write map?
if (is.null(writeMap)) {
indices <- 1:nbrOfIndices;
reorder <- FALSE;
} else {
indices <- writeMap;
}
}
if (reorder) {
if (verbose >= 2)
cat("Re-ordering data for optimal write order...");
o <- order(indices);
indices <- indices[o];
if (!is.null(intensities))
intensities <- intensities[o];
if (!is.null(stdvs))
stdvs <- stdvs[o];
if (!is.null(pixels))
pixels <- pixels[o];
o <- NULL; # Not needed anymore
if (verbose >= 2)
cat("done.\n");
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Write data to file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
version <- header$version;
if (version == 4) {
# Open CEL file
con <- file(filename, open="r+b");
on.exit(close(con));
# Skip CEL header
if (verbose >= 2)
cat("Skipping to beginging of data section...");
h <- .readCelHeaderV4(con);
# "Cell entries - this consists of an intensity value, standard
# deviation value and pixel count for each cell in the array.
# The values are stored by row then column starting with the X=0,
# Y=0 cell. As an example, the first five entries are for cells
# defined by XY coordinates: (0,0), (1,0), (2,0), (3,0), (4,0).
# Type: (float, float, short) = 4 + 4 + 2 = 10 bytes / cell
# cellData <- c(readFloat(con), readFloat(con), readShort(con));
sizeOfCell <- 10;
# Current file position
dataOffset <- seek(con, origin="start", rw="read");
if (verbose >= 2)
cat("done.\n");
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update in chunks
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
CHUNK.SIZE <- 2^19; # = 524288 indices == 5.2Mb
CHUNK.SIZE <- 2^20; # = 1048576 indices == 10.5Mb
# Work with zero-based indices
indices <- indices - 1;
count <- 1;
offset <- dataOffset;
nbrOfChunks <- ceiling(length(indices) / CHUNK.SIZE);
while (length(indices) > 0) {
if (verbose >= 1) {
cat(sprintf("Number of indices left: %d\n", length(indices)));
cat(sprintf("Updating chunk #%d of %d...\n", count, nbrOfChunks));
}
# Recall: All indices are ordered!
# Shift offset to the first index.
firstIndex <- indices[1];
offset <- offset + sizeOfCell*firstIndex;
# Shift indices such that first index is zero.
indices <- indices - firstIndex;
# Get largest index
maxIndex <- indices[length(indices)];
# verbose && cat(verbose, "Largest index: ", maxIndex, "\n");
# Identify the indices to update such no more than CHUNK.SIZE cells
# are read/updated.
n <- which.max(indices >= CHUNK.SIZE);
if (n == 1)
n <- length(indices);
subset <- 1:n;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read the data section of the CEL file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose >= 1)
cat("Reading chunk data section...");
seek(con, origin="start", where=offset, rw="read");
rawAll <- readBin(con=con, what="raw", n=sizeOfCell*(indices[n]+1));
if (verbose >= 1)
cat("done.\n");
# Common to all fields
raw <- NULL;
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'intensities'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(intensities)) {
if (verbose >= 1)
cat("Updating 'intensities'...");
# Write floats (size=4) to raw vector
raw <- raw(length=4*n);
raw <- writeBin(con=raw, intensities[subset], size=4, endian="little");
intensities <- intensities[-subset]; # Not needed anymore
# Updated 'rawAll' accordingly
idx <- rep(sizeOfCell*indices[subset], each=4) + 1:4;
rawAll[idx] <- raw;
idx <- NULL; # Not needed anymore
if (verbose >= 1)
cat("done.\n");
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'stdvs'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(stdvs)) {
if (verbose >= 1)
cat("Updating 'stdvs'...");
# Write floats (size=4) to raw vector
if (length(raw) != 4*n)
raw <- raw(length=4*n);
raw <- writeBin(con=raw, stdvs[subset], size=4, endian="little");
stdvs <- stdvs[-subset]; # Not needed anymore
# Updated 'rawAll' accordingly
idx <- rep(sizeOfCell*indices[subset], each=4) + 5:8;
rawAll[idx] <- raw;
idx <- NULL; # Not needed anymore
if (verbose >= 1)
cat("done.\n");
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Update 'pixels'
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (!is.null(pixels)) {
raw <- NULL; # Not needed anymore
if (verbose >= 1)
cat("Updating 'pixels'...");
# Write short integers (size=2) to raw vector
raw <- raw(length=2*n);
raw <- writeBin(con=raw, pixels[subset], size=2, endian="little");
pixels <- pixels[-subset]; # Not needed anymore
# Updated 'rawAll' accordingly
idx <- rep(sizeOfCell*indices[subset], each=2) + 9:10;
rawAll[idx] <- raw;
idx <- NULL; # Not needed anymore
if (verbose >= 1)
cat("done.\n");
}
raw <- NULL; # Not needed anymore
# Remove updated indices
indices <- indices[-subset];
subset <- NULL; # Not needed anymore
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Write raw data back to file
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (verbose >= 1)
cat("Writing chunk to data section...");
seek(con, origin="start", where=offset, rw="write");
writeBin(con=con, rawAll);
if (verbose >= 1)
cat("done.\n");
rawAll <- NULL; # Not needed anymore
if (verbose >= 1)
cat(sprintf("Updating chunk #%d of %d...done\n", count, nbrOfChunks));
count <- count + 1;
} # while (...)
} # if (version ...)
invisible(filename);
}
############################################################################
# HISTORY:
# 2007-01-04
# o Added argument 'writeMap'.
# 2006-08-19
# o BUG FIX: Wow wow wow. This one was tricky to find. If not specifying
# the 'rw' argument in seek() it defaults to "", which is not "read" as
# I naively though (because I did not read the inner details of ?seek),
# but the latest call to seek. In other words, since I at the end of
# every "chunk" loop call seek(..., rw="write") the seek(..., [rw=""])
# was equal to a seek(..., rw="write"), but I wanted seek(..., rw="read")!
# That made updateCel() do funny things and write to the wrongs parts
# of the file etc.
# 2006-08-18
# o BUG FIX: The new implementation where data was written to raw vectors
# was incorrect. A lot of extra zeros was written. The "eastern egg" in
# the updated example contains an image from http://tpo.berkeley.edu/.
# 2006-08-14
# o BUG FIX: updateCel() would in some cases give "Error: subscript out of
# bounds" when writing the last chunk.
# 2006-07-22
# o Update updateCel() to update data in chunks, because updating the
# complete data section is expensive. For example, a 500K chip has
# 6553600 cells each of size 10 bytes, i.e. >65Mb or raw memory. With
# copying etc it costs >100-200Mb of memory to update a CEL file if only
# the first *and* the last cell is updated. Now it should only be of
# the order of 10-20Mb per chunk.
# o Added verbose output to updateCel().
# o Now updateCel() deallocates objects as soon as possible in order to
# free up as much memory as possible. Had memory problems with the 500K's.
# 2006-07-21
# o updateCel() was really slow when updating a large number of cells.
# Now the idea is to write to raw vectors stored in memory. By reading
# the chunk of the CEL data section that is going to be updated as a raw
# data vector and then updating this in memory first, and the re-write
# that chuck of raw data to file, things are much faster.
# o BUG FIX: updateCel(..., indices=NULL) would generate an error, because
# we tried to reorder by order(indices).
# 2006-06-19
# o Replace 'data' argument with arguments 'intensities', 'stdvs', and
# 'pixels'. /HB
# 2006-06-18
# o First version can update CEL v4 (binary) cell entries. Note that this
# code does not make use of the Fusion SDK library. This may updated in
# the future, but for now we just want something that works.
# o Created. /HB
############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.