Nothing
setMethodS3("readGeneAssignments", "AffymetrixNetAffxCsvFile", function(this, ..., unique=TRUE, parse=TRUE, fields=NULL, flatten=TRUE, na.rm=TRUE, verbose=FALSE) {
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Validate arguments
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Argument 'unique':
unique <- Arguments$getLogical(unique)
# Argument 'unique':
parse <- Arguments$getLogical(parse)
knownFields <- c("accession", "geneSymbol", "geneTitle", "cytoband", "entrezGeneId")
if (!is.null(fields)) {
fields <- match.arg(fields, choices=knownFields, several.ok=TRUE)
}
# Argument 'flatten':
flatten <- Arguments$getLogical(flatten)
# Argument 'na.rm':
na.rm <- Arguments$getLogical(na.rm)
na.rm <- (na.rm || flatten)
# Argument 'verbose':
verbose <- Arguments$getVerbose(verbose)
if (verbose) {
pushState(verbose)
on.exit(popState(verbose))
}
verbose && enter(verbose, "Reading gene assignments from ", class(this)[1])
verbose && print(verbose, this)
# Force unique?
if (flatten && !unique) {
unique <- TRUE
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Read data
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
verbose && enter(verbose, "Reading annotation data")
colClasses <- c("*"="NULL", "(transcriptClusterId|probesetId|geneAssignment)"="character")
colClasses <- c("*"="NULL", "(transcriptClusterId|probesetId|unigene)"="character")
map <- readDataFrame(this, colClasses=colClasses, ...)
verbose && cat(verbose, "Number of entries : ", nrow(map))
verbose && cat(verbose, "Number of unique probesetIds: ", length(unique(map$probesetId)))
verbose && cat(verbose, "Number of unique transcriptClusterIds: ", length(unique(map$transcriptClusterId)))
verbose && exit(verbose)
if (unique) {
verbose && enter(verbose, "Dropping duplicated entries")
n0 <- nrow(map)
map <- unique(map)
n <- nrow(map)
if (n < n0) {
verbose && printf(verbose, "Dropped %d (%.2f%%) out of %d duplicated entries\n", n-n0, 100*(n-n0)/n0, n0)
verbose && cat(verbose, "Number of unique entries: ", nrow(map))
} else {
verbose && printf(verbose, "All %d entries are unique.\n", n)
}
verbose && exit(verbose)
}
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Parse
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (parse) {
verbose && enter(verbose, "Parsing entries")
rows <- seq_len(nrow(map))
pairs <- map$geneAssignment
verbose && cat(verbose, "Number of entries: ", length(rows))
pairs <- strsplit(pairs, split=" /// ", fixed=TRUE)
if (unique) {
verbose && enter(verbose, "Dropping duplicates")
pairs <- lapply(pairs, FUN=unique)
verbose && cat(verbose, "Number of entries: ", length(rows))
verbose && exit(verbose)
}
ok <- !sapply(pairs, FUN=function(x) any(is.na(x)))
nok <- sum(ok)
verbose && printf(verbose, "Number of entries with annotations: %d (%.2f%%) out of %d\n", nok, 100*nok/length(ok), length(ok))
if (na.rm) {
verbose && enter(verbose, "Dropping entries without annotation")
rows <- rows[ok]
pairs <- pairs[ok]
# Update
ok <- !sapply(pairs, FUN=function(x) any(is.na(x)))
# Sanity check
stopifnot(all(ok))
verbose && cat(verbose, "Number of entries: ", length(rows))
verbose && cat(verbose, "Number of unique probesetIds: ", length(unique(map$probesetId[rows])))
verbose && cat(verbose, "Number of unique transcriptClusterIds: ", length(unique(map$transcriptClusterId[rows])))
verbose && exit(verbose)
}
# Valid number of annotation fields per entry
nFields <- c(2L, 5L)
# Infer number of fields from data?
if (length(nFields) > 1) {
verbose && enter(verbose, "Inferring number of annotation fields")
x <- pairs[ok][[1]]
x <- strsplit(x, split=" // ", fixed=TRUE)
ns <- sapply(x, FUN=length)
n <- unique(ns)
# Sanity check
stopifnot(length(n) == 1)
stopifnot(is.element(n, nFields))
nFields <- n
verbose && cat(verbose, "Number of annotation fields: ", nFields)
verbose && exit(verbose)
}
knownFields <- knownFields[1:nFields]
verbose && cat(verbose, "Annotation fields: ", hpaste(knownFields, maxHead=Inf))
verbose && enter(verbose, "Parsing annotation field")
pairs[ok] <- lapply(pairs[ok], FUN=function(x) {
x <- strsplit(x, split=" // ", fixed=TRUE)
ns <- sapply(x, FUN=length)
# Sanity check
stopifnot(all(ns == nFields))
x <- unlist(x, use.names=FALSE)
dimNA(x) <- c(nFields,NA)
t(x)
})
verbose && exit(verbose)
# Remove any duplicates?
if (unique) {
verbose && enter(verbose, "Removing duplicates")
pairs[ok] <- lapply(pairs[ok], FUN=unique)
verbose && exit(verbose)
}
verbose && enter(verbose, "Adding column names")
verbose && cat(verbose, "Column names: ", hpaste(knownFields, maxHead=Inf))
# [1] HuGene-1_0-st-v1.na31.AFFX_README.NetAffx-CSV-Files.txt
ok <- !sapply(pairs, FUN=function(x) any(is.na(x)))
pairs[ok] <- lapply(pairs[ok], FUN=function(x) {
colnames(x) <- knownFields
x
})
verbose && exit(verbose)
# Check argument 'fields' again
if (is.null(fields)) {
fields <- knownFields
}
# Extract a subset of fields?
if (!all(fields == knownFields)) {
verbose && enter(verbose, "Extracting fields of interest")
verbose && cat(verbose, "Fields: ", hpaste(fields, maxHead=Inf))
ok <- !sapply(pairs, FUN=function(x) any(is.na(x)))
pairs[ok] <- lapply(pairs[ok], FUN=function(x) {
x[,fields,drop=FALSE]
})
if (unique) {
verbose && enter(verbose, "Dropping duplicated entries")
pairs[ok] <- lapply(pairs[ok], FUN=unique)
verbose && exit(verbose)
}
verbose && exit(verbose)
}
# Sanity check
stopifnot(length(pairs) == length(rows))
# Flatten?
if (flatten) {
verbose && enter(verbose, "Flattens list to table")
verbose && cat(verbose, "Number of entries: ", length(rows))
# Sanity check
stopifnot(all(ok))
verbose && enter(verbose, "Identifying blocks of unique sizes")
ns <- sapply(pairs, FUN=NROW, simplify=TRUE, USE.NAMES=FALSE)
uns <- sort(unique(ns))
verbose && print(verbose, uns)
verbose && exit(verbose)
verbose && enter(verbose, "Stacking by block size")
ids <- map$transcriptClusterId
ids <- map$probesetId
ids <- ids[rows]
verbose && cat(verbose, "Number of entries: ", length(rows))
verbose && cat(verbose, "Number of ids: ", length(ids))
verbose && cat(verbose, "Number of unique ids: ", length(unique(ids)))
unitNames <- idxs <- c()
for (ii in seq_along(uns)) {
n <- uns[ii]
verbose && enter(verbose, sprintf("Size %d (n=%d) of %d", ii, n, length(uns)))
keep <- which(ns == n)
verbose && str(verbose, keep)
nII <- length(keep)
unitNamesII <- ids[keep]
unitNamesII <- rep(unitNamesII, each=n)
idxsII <- rep(seq_len(n), times=nII)
unitNames <- c(unitNames, unitNamesII)
idxs <- c(idxs, idxsII)
verbose && exit(verbose)
} # for (ii ...)
# Sanity checks
stopifnot(length(unitNames) == length(idxs))
stopifnot(length(idxs) == sum(ns))
verbose && exit(verbose)
verbose && enter(verbose, "Stacking unit entries")
dataList <- pairs
verbose && cat(verbose, "Number of entries: ", length(dataList))
chunkSizes <- c(3000, rep(5, times=100))
while (length(dataList) > 1) {
len <- length(dataList)
chunkSize <- chunkSizes[1]
starts <- seq(from=1, to=len, by=chunkSize)
ends <- starts + chunkSize - 1L
ends[length(ends)] <- len
dataList2 <- vector("list", length(starts))
for (kk in seq_along(starts)) {
verbose && enter(verbose, sprintf("Chunk #%d of %d", kk, length(starts)))
idxs <- starts[kk]:ends[kk]
dataKK <- Reduce(rbind, dataList[idxs])
dataList2[[kk]] <- dataKK
verbose && exit(verbose)
} # for (kk ...)
dataList <- dataList2
chunkSizes <- chunkSizes[-1]
} # for (chunkSize ...)
data <- dataList[[1]]
rownames(data) <- NULL
verbose && exit(verbose)
verbose && enter(verbose, "Building final table")
# Sanity check
stopifnot(length(unitNames) == nrow(data))
data <- cbind(unitName=unitNames, index=idxs, data)
rownames(data) <- NULL
verbose && exit(verbose)
verbose && enter(verbose, "Coercing to data frame")
data <- as.data.frame(data, stringsAsFactors=FALSE)
verbose && exit(verbose)
pairs <- data
verbose && exit(verbose)
} # if (flatten)
res <- pairs
verbose && exit(verbose)
} # if (parse)
verbose && exit(verbose)
res
}) # readGeneAssignments()
setMethodS3("getHeaderAttributes", "AffymetrixNetAffxCsvFile", function(this, ...) {
hdr <- getHeader(this, ...)
comments <- hdr$comments
attrs <- grep("^#%", comments, value=TRUE)
attrs <- gsub("^#%", "", attrs)
pattern <- "([^=]*)=(.*)"
keys <- gsub(pattern, "\\1", attrs)
values <- gsub(pattern, "\\2", attrs)
values <- trim(values)
names(values) <- keys
attrs <- as.list(values)
attrs
}) # getHeaderAttributes()
setMethodS3("getGenomeBuild", "AffymetrixNetAffxCsvFile", function(this, ...) {
attrs <- getHeaderAttributes(this, ...)
attrs <- attrs[grep("^genome-version", names(attrs))]
if (length(attrs) == 0) {
return(NULL)
}
keys <- names(attrs)
if (is.element("genome-version", keys)) {
res <- attrs[["genome-version"]]
} else if (is.element("genome-version-ucsc", keys)) {
res <- attrs[["genome-version-ucsc"]]
} else {
res <- attrs[[1]]
}
res
})
setMethodS3("getNetAffxBuild", "AffymetrixNetAffxCsvFile", function(this, ...) {
attrs <- getHeaderAttributes(this, ...)
res <- attrs[["netaffx-annotation-netaffx-build"]]
res
})
setMethodS3("getNetAffxDate", "AffymetrixNetAffxCsvFile", function(this, ...) {
attrs <- getHeaderAttributes(this, ...)
res <- attrs[["netaffx-annotation-date"]]
res <- as.Date(res)
res
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.