### =========================================================================
### randomSparseArray() and poissonSparseArray()
### -------------------------------------------------------------------------
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### randomSparseArray()
###
### Returns an SVT_SparseArray object of type "double".
randomSparseArray <- function(dim, density=0.05, dimnames=NULL)
{
if (!is.numeric(dim))
stop(wmsg("'dim' must be an integer vector"))
if (!is.integer(dim))
dim <- as.integer(dim)
if (!isSingleNumber(density) || density < 0 || density > 1)
stop(wmsg("'density' must be a number >= 0 and <= 1"))
## Start with an empty sparse array.
ans <- new_SVT_SparseArray(dim, dimnames=dimnames, type="double")
## Add the nonzero values to it.
ans_len <- length(ans)
nzcount <- as.integer(ans_len * density)
Lindex <- sample.int(ans_len, nzcount)
nzvals <- signif(rnorm(nzcount), 2)
if (nzcount <= .Machine$integer.max) {
## Using an Mindex seems to be slightly faster (4%-5%) than using an
## Lindex but we can only do this when the resulting Mindex matrix
## has < 2^31 rows.
ans[Lindex2Mindex(Lindex, dim(ans))] <- nzvals
} else {
ans[Lindex] <- nzvals
}
ans
}
randomSparseMatrix <- function(nrow=1L, ncol=1L, density=0.05, dimnames=NULL)
{
if (!isSingleNumber(nrow) || !isSingleNumber(ncol))
stop(wmsg("'nrow' and 'ncol' must be single integers"))
randomSparseArray(c(nrow, ncol), density=density, dimnames=dimnames)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### poissonSparseArray()
###
### Like stats::rpois() but slightly faster and implementation is much
### simpler. Only for 0 <= 'lambda' <= 4.
### NOT exported.
simple_rpois <- function(n, lambda)
SparseArray.Call("C_simple_rpois", n, lambda)
### Returns an SVT_SparseArray object of type "integer".
### Density of the returned object is expected to be about '1 - exp(-lambda)'.
### Default for 'lambda' is set to -log(0.95) which should produce an object
### with an expected density of 0.05.
poissonSparseArray <- function(dim, lambda=-log(0.95), density=NA,
dimnames=NULL)
{
dim <- S4Arrays:::normarg_dim(dim)
if (!missing(lambda) && !identical(density, NA))
stop(wmsg("only one of 'lambda' and 'density' can be specified"))
if (!missing(lambda)) {
if (!isSingleNumber(lambda) || lambda < 0)
stop(wmsg("'lambda' must be a non-negative number"))
} else if (!identical(density, NA)) {
if (!isSingleNumber(density) || density < 0 || density >= 1)
stop(wmsg("'density' must be a number >= 0 and < 1"))
lambda <- -log(1 - density)
}
ans_SVT <- SparseArray.Call("C_poissonSparseArray", dim, lambda)
new_SVT_SparseArray(dim, dimnames=dimnames, type="integer", SVT=ans_SVT,
check=FALSE)
}
### Replacement for rpois() when 'n' is big and 'lambda' is small.
### For example:
### .sparse_rpois(3e9, 0.005) # takes about 1 min. and uses < 1G of RAM
### rpois(3e9, 0.005) # takes about 55 sec. and uses 12G of RAM
.sparse_rpois <- function(n, lambda, chunksize=5e6L)
{
if (n == 0L)
return(list(integer(0), integer(0)))
nzidx_env <- new.env(parent=emptyenv())
nzvals_env <- new.env(parent=emptyenv())
offset <- 0 # double to avoid integer overflow when n >= 2^31
k <- 1L
while (offset < n) {
nn <- n - offset
if (nn > chunksize)
nn <- chunksize
vals <- rpois(nn, lambda)
nzidx <- nzwhich(vals)
key <- sprintf("%04d", k)
assign(key, offset + nzidx, envir=nzidx_env)
assign(key, vals[nzidx], envir=nzvals_env)
offset <- offset + nn
k <- k + 1L
}
nzidx <- as.list(nzidx_env, all.names=TRUE, sorted=TRUE)
nzidx <- unlist(nzidx, recursive=FALSE, use.names=FALSE)
nzvals <- as.list(nzvals_env, all.names=TRUE, sorted=TRUE)
nzvals <- unlist(nzvals, recursive=FALSE, use.names=FALSE)
list(nzidx, nzvals)
}
### Solution based on .sparse_rpois(). Equivalent to poissonSparseArray()
### but slower and uses more memory e.g.
###
### poissonSparseArray2(c(1e5, 2e4), density=0.02)
###
### is about 3x slower uses about 2.5x more memory than
###
### poissonSparseArray(c(1e5, 2e4), density=0.02)
###
### NOT exported
poissonSparseArray2 <- function(dim, lambda=-log(0.95), density=NA)
{
dim <- S4Arrays:::normarg_dim(dim)
if (!missing(lambda) && !identical(density, NA))
stop(wmsg("only one of 'lambda' and 'density' can be specified"))
if (!missing(lambda)) {
if (!isSingleNumber(lambda) || lambda < 0)
stop(wmsg("'lambda' must be a non-negative number"))
} else {
if (!isSingleNumber(density) || density < 0 || density >= 1)
stop(wmsg("'density' must be a number >= 0 and < 1"))
lambda <- -log(1 - density)
}
## Start with an empty sparse array.
ans <- new_SVT_SparseArray(dim, type="integer")
## Add the nonzero values to it.
ans_len <- length(ans)
srp <- .sparse_rpois(ans_len, lambda)
ans[srp[[1L]]] <- srp[[2L]]
ans
}
poissonSparseMatrix <- function(nrow=1L, ncol=1L, lambda=-log(0.95), density=NA,
dimnames=NULL)
{
if (!isSingleNumber(nrow) || !isSingleNumber(ncol))
stop(wmsg("'nrow' and 'ncol' must be single integers"))
if (missing(lambda)) {
poissonSparseArray(c(nrow, ncol), density=density,
dimnames=dimnames)
} else {
poissonSparseArray(c(nrow, ncol), lambda=lambda, density=density,
dimnames=dimnames)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.