### =========================================================================
### Grouping objects
### -------------------------------------------------------------------------
###
### We call "grouping" an arbitrary mapping from a collection of NO objects
### to a collection of NG groups, or, more formally, a bipartite graph
### between integer sets [1, NO] and [1, NG]. Objects mapped to a given group
### are said to belong to, or to be assigned to, or to be in that group.
### Additionally, the objects in each group are ordered. So for example the
### 2 following groupings are considered different:
###
### Grouping 1: NG = 3, NO = 5
### group objects
### 1 : 4, 2
### 2 :
### 3 : 4
###
### Grouping 2: NG = 3, NO = 5
### group objects
### 1 : 2, 4
### 2 :
### 3 : 4
###
### There are no restriction on the mapping e.g. any object can be mapped
### to 0, 1, or more groups, and can be mapped twice to the same group. Also
### some or all the groups can be empty.
###
### The Grouping class is a virtual class that formalizes the most general
### kind of grouping. More specific groupings (e.g. many-to-one mappings)
### are formalized via specific Grouping subclasses.
setClass("Grouping", contains="IntegerList", representation("VIRTUAL"))
setGeneric("nobj", function(x) standardGeneric("nobj"))
setGeneric("grouplengths", signature="x",
function(x, i=NULL) standardGeneric("grouplengths")
)
.subset_by_integer <- function(x, i=NULL)
{
if (is.null(i))
return(x)
if (!is.numeric(i))
stop(wmsg("subscript must be NULL or an integer vector"))
if (!is.integer(i))
i <- as.integer(i)
x_len <- length(x)
if (S4Vectors:::anyMissingOrOutside(i, -x_len, x_len))
stop(wmsg("subscript contains NAs or out of bounds indices"))
x[i]
}
setMethod("grouplengths", "Grouping",
function(x, i=NULL)
{
x_grouplens <- elementNROWS(x)
.subset_by_integer(x_grouplens, i)
}
)
setMethod("show", "Grouping",
function(object)
{
NG <- length(object)
NO <- nobj(object)
cat(class(object), " with ",
NG, ifelse(NG == 1, " group ", " groups "),
"and ", NO, ifelse(NO == 1, " object\n", " objects\n"),
sep="")
if (NG == 0L)
return(invisible(NULL))
empty_groups <- which(grouplengths(object) == 0L)
cat("Nb of empty groups: ", length(empty_groups),
" (", 100.00 * length(empty_groups) / NG, "%)\n", sep="")
}
)
### -------------------------------------------------------------------------
### ManyToOneGrouping objects
### -------------------------
### A ManyToOneGrouping object represents a grouping where every object in
### the collection belongs to exactly one group.
setClass("ManyToOneGrouping", contains="Grouping", representation("VIRTUAL"))
setMethod("nobj", "ManyToOneGrouping", function(x) sum(grouplengths(x)))
setGeneric("members", signature="x",
function(x, i) standardGeneric("members")
)
setMethod("members", "ManyToOneGrouping",
function(x, i)
{
if (!is.numeric(i))
stop(wmsg("subscript 'i' must be a vector of integers"))
if (!is.integer(i))
i <- as.integer(i)
sort(unlist(sapply(i, function(ii) x[[ii]])))
}
)
setGeneric("vmembers", signature="x",
function(x, L) standardGeneric("vmembers")
)
setMethod("vmembers", "ManyToOneGrouping",
function(x, L)
{
if (!is.list(L))
stop(wmsg("'L' must be a list of integer vectors"))
lapply(L, function(i) members(x, i))
}
)
setGeneric("togroup", signature="x",
function(x, j=NULL) standardGeneric("togroup")
)
### Works on any ManyToOneGrouping object 'x' for which unlist() and
### elementNROWS() work.
setMethod("togroup", "ManyToOneGrouping",
function(x, j=NULL)
{
x_togroup <- unlist(x, use.names=FALSE)
x_eltNROWS <- elementNROWS(x)
x_togroup[x_togroup] <- rep.int(seq_along(x_eltNROWS), x_eltNROWS)
.subset_by_integer(x_togroup, j)
}
)
setGeneric("togrouplength", signature="x",
function(x, j=NULL) standardGeneric("togrouplength")
)
setMethod("togrouplength", "ManyToOneGrouping",
function(x, j=NULL) grouplengths(x, togroup(x, j))
)
### -------------------------------------------------------------------------
### ManyToManyGrouping objects
### -------------------------
### A ManyToManyGrouping object represents a grouping where objects
### can map to any number of groups.
setClass("ManyToManyGrouping", contains="Grouping", representation("VIRTUAL"))
### -------------------------------------------------------------------------
### BiIndexGrouping objects
### -----------------------
#setClass("BiIndexGrouping",
# contains="ManyToOneGrouping",
# representation(
# group2object="list",
# object2group="integer"
# )
#)
#setMethod("length", "BiIndexGrouping", function(x) length(x@group2object))
#setMethod("nobj", "BiIndexGrouping", function(x) length(x@object2group))
### -------------------------------------------------------------------------
### H2LGrouping and Dups objects
### ----------------------------
###
### High-to-Low Index ManyToOneGrouping objects.
###
setClass("H2LGrouping",
contains="ManyToOneGrouping",
representation(
high2low="integer",
low2high="list"
)
)
### For storing the grouping implicitly defined by the "duplicated"
### relationship between elements of an arbitrary vector.
setClass("Dups", contains="H2LGrouping")
### Two additional accessors for H2LGrouping objects.
setGeneric("high2low", function(x) standardGeneric("high2low"))
setMethod("high2low", "H2LGrouping", function(x) x@high2low)
setGeneric("low2high", function(x) standardGeneric("low2high"))
setMethod("low2high", "H2LGrouping", function(x) x@low2high)
### 'length(x)' and 'nobj(x)' are the same.
setMethod("length", "H2LGrouping", function(x) length(x@low2high))
setMethod("nobj", "H2LGrouping", function(x) length(x@high2low))
setMethod("getListElement", "H2LGrouping",
function(x, i, exact=TRUE)
{
i <- normalizeDoubleBracketSubscript(i, x, exact=exact)
if (is.na(x@high2low[i]))
c(i, x@low2high[[i]])
else
integer()
}
)
### Should be more efficient than the default method for ManyToOneGrouping
### objects.
setMethod("grouplengths", "H2LGrouping",
function(x, i=NULL)
{
x_grouplens <- elementNROWS(x@low2high) + 1L
x_grouplens[!is.na(x@high2low)] <- 0L
.subset_by_integer(x_grouplens, i)
}
)
setMethod("members", "H2LGrouping",
function(x, i)
{
if (!is.numeric(i))
stop(wmsg("subscript 'i' must be a vector of integers"))
if (!is.integer(i))
i <- as.integer(i)
## NAs and "subscript out of bounds" are checked at the C level
.Call2("C_members_H2LGrouping", x, i, PACKAGE="IRanges")
}
)
setMethod("vmembers", "H2LGrouping",
function(x, L)
{
if (!is.list(L))
stop(wmsg("'L' must be a list of integer vectors"))
.Call2("C_vmembers_H2LGrouping", x, L, PACKAGE="IRanges")
}
)
setMethod("togroup", "H2LGrouping",
function(x, j=NULL)
{
x_togroup <- x@high2low
x_togroup[is.na(x_togroup)] <- which(is.na(x_togroup))
.subset_by_integer(x_togroup, j)
}
)
### The default method should be as good (if not better) as this.
#setMethod("togrouplength", "H2LGrouping",
# function(x)
# {
# ans <- rep.int(1L, length(x))
# mapped_lows <- setdiff(unique(x@high2low), NA)
# for (low in mapped_lows) {
# ii <- as.integer(c(low, x@low2high[[low]]))
# ans[ii] <- length(ii)
# }
# ans
# }
#)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### More operations on H2LGrouping objects. These operations are NOT part of
### the core Grouping API.
###
### The rank of group G_i is the number of non-empty groups that are before
### G_i plus one. Or, equivalently, it's the number of non-empty groups with
### an index <= i.
setGeneric("grouprank", signature="x",
function(x, i=NULL) standardGeneric("grouprank")
)
setMethod("grouprank", "H2LGrouping",
function(x, i=NULL)
{
ans <- cumsum(is.na(high2low(x)))
if (!is.null(i))
ans <- ans[i]
return(ans)
}
)
### togrouprank() returns the mapping from objects to group ranks.
### An important property of togrouprank() is that:
### togrouprank(x, neg_idx)
### and
### seq_along(neg_idx)
### are identical, where 'neg_idx' is the vector of the indices of
### the non-empty groups i.e.
### neg_idx <- which(grouplengths(x) != 0L)
setGeneric("togrouprank", signature="x",
function(x, j=NULL) standardGeneric("togrouprank")
)
setMethod("togrouprank", "H2LGrouping",
function(x, j=NULL)
{
to_group <- togroup(x)
group_rank <- grouprank(x)
ans <- group_rank[to_group]
if (!is.null(j))
ans <- ans[j]
return(ans)
}
)
setReplaceMethod("length", "H2LGrouping",
function(x, value)
{
if (!isSingleNumber(value))
stop(wmsg("length must be a single integer"))
if (!is.integer(value))
value <- as.integer(value)
if (value < 0L)
stop(wmsg("length cannot be negative"))
if (value > length(x))
stop(wmsg("cannot make a ", class(x), " instance longer"))
length(x@high2low) <- value
x@low2high <- S4Vectors:::reverseSelfmatchMapping(x@high2low)
x
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity.
###
.valid.H2LGrouping <- function(x)
{
if (!is.integer(x@high2low))
return("the 'high2low' slot must contain an integer vector")
if (!all(x@high2low >= 1L, na.rm=TRUE))
return("the 'high2low' slot must contain integer values >= 1")
if (!all(x@high2low < seq_along(x@high2low), na.rm=TRUE)) {
problem <- c("when mapped, elements in the 'high2low' slot must be mapped ",
"to elements at a lower position")
return(paste(problem, collapse=""))
}
if (!all(is.na(x@high2low[x@high2low]))) {
problem <- c("when mapped, elements in the 'high2low' slot must be mapped ",
"to unmapped elements")
return(paste(problem, collapse=""))
}
if (!is.list(x@low2high))
return("the 'low2high' slot must contain a list")
if (length(x@high2low) != length(x@low2high))
return("the 'high2low' and 'low2high' slots must have the same length")
if (!identical(S4Vectors:::reverseSelfmatchMapping(x@high2low),
x@low2high))
{
problem <- c("the 'low2high' slot must contain the reverse mapping ",
"of the 'high2low' slot")
return(paste(problem, collapse=""))
}
NULL
}
setValidity("H2LGrouping",
function(object)
{
problems <- .valid.H2LGrouping(object)
if (is.null(problems)) TRUE else problems
}
)
### For Dups objects only.
.duplicated.Dups <- function(x, incomparables=FALSE)
{
if (!identical(incomparables, FALSE))
stop(wmsg("\"duplicated\" method for Dups objects ",
"only accepts 'incomparables=FALSE'"))
!is.na(high2low(x))
}
setMethod("duplicated", "Dups", .duplicated.Dups)
### For Dups objects only.
setMethod("show", "Dups",
function(object)
{
percentage <- 100.00 * sum(duplicated(object)) / length(object)
cat(class(object), " of length ", length(object),
" (", percentage, "% of duplicates)\n", sep="")
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Constructors.
###
.newH2LGrouping <- function(Class, high2low)
{
if (!is.numeric(high2low))
stop(wmsg("'high2low' must be a vector of integers"))
if (!is.integer(high2low))
high2low <- as.integer(high2low)
new2(Class, high2low=high2low,
low2high=S4Vectors:::reverseSelfmatchMapping(high2low),
check=FALSE)
}
H2LGrouping <- function(high2low=integer())
.newH2LGrouping("H2LGrouping", high2low)
Dups <- function(high2low=integer())
.newH2LGrouping("Dups", high2low)
setMethod("high2low", "ANY",
function(x)
{
ans <- selfmatch(x)
ans[ans == seq_along(x)] <- NA_integer_
ans
}
)
### -------------------------------------------------------------------------
### GroupingRanges objects
### ----------------------
###
### A GroupingRanges object represents a "block-grouping", that is, a
### grouping where each group is a block of adjacent elements in the original
### collection of objects. GroupingRanges objects support the IPosRanges
### API (e.g. start/end/width) in addition to the Grouping API.
###
setClass("GroupingRanges",
## We put IPosRanges before Grouping so GroupingRanges objects inherit
## the "show" method for IPosRanges objects instead of the method for
## Grouping objects.
contains=c("IPosRanges", "Grouping"),
representation("VIRTUAL")
)
### Overwrite default method with optimized method for GroupingRanges objects.
setMethod("grouplengths", "GroupingRanges",
function(x, i=NULL)
{
x_width <- width(x)
.subset_by_integer(x_width, i)
}
)
setClass("GroupingIRanges", contains=c("IRanges", "GroupingRanges"))
### -------------------------------------------------------------------------
### Partitioning objects
### --------------------
###
### A Partitioning object is a GroupingRanges object where the blocks fully
### cover the original collection of objects and don't overlap. This makes
### them many-to-one groupings. Furthermore, the blocks must be ordered by
### ascending position on the original collection of objects.
### Note that for a Partitioning object 'x', 'togroup(x)' is sorted in
### increasing order (not necessarily strictly increasing).
###
### The Partitioning class is virtual with 2 concrete direct subclasses:
### PartitioningByEnd and PartitioningByWidth.
###
setClass("Partitioning",
contains=c("GroupingRanges", "ManyToOneGrouping"),
representation(
"VIRTUAL",
NAMES="character_OR_NULL" # R doesn't like @names !!
),
prototype(
NAMES=NULL
)
)
setMethod("parallel_slot_names", "Partitioning",
function(x) c("NAMES", callNextMethod())
)
setMethod("extractROWS", "Partitioning",
function(x, i)
{
i <- normalizeSingleBracketSubscript(i, x)
if (!isStrictlySorted(i))
stop(wmsg("Partitioning objects only support subsetting by a ",
"strictly sorted subscript that drops empty partitions"))
x_nobj <- nobj(x)
ans <- callNextMethod()
if (nobj(ans) != x_nobj)
stop(wmsg("Partitioning objects only support subsetting by a ",
"strictly sorted subscript that drops empty partitions"))
ans
}
)
setMethod("bindROWS", "Partitioning",
function(x, objects=list(), use.names=TRUE, ignore.mcols=FALSE, check=TRUE)
stop(wmsg("Partitioning objects don't support concatenation"))
)
### The default methods below assume that the "length + start/end/width" API
### is already implemented.
setMethod("getListElement", "Partitioning",
function(x, i, exact=TRUE)
{
i <- normalizeDoubleBracketSubscript(i, x, exact=exact)
## The purpose of the code below is to extract 'start(x)[i] - 1'
## (stored in 'ans_shift') and 'width(x)[i]' (stored in 'ans_len')
## in the fastest possible way. Looks like a convoluted way to
## extract those 2 values but it is actually 1000x faster than the
## naive way.
ans_shift <- 0L
ans_len <- end(x)[i]
if (i >= 2L) {
ans_shift <- end(x)[i - 1L]
ans_len <- ans_len - ans_shift
}
seq_len(ans_len) + ans_shift
}
)
### Overwrite method for ManyToOneGrouping objects with optimized method for
### Partitioning objects.
setMethod("togroup", "Partitioning",
function(x, j=NULL)
{
x_width <- width(x)
x_togroup <- rep.int(seq_along(x_width), x_width)
.subset_by_integer(x_togroup, j)
}
)
setMethod("names", "Partitioning", function(x) x@NAMES)
setReplaceMethod("names", "Partitioning", set_IRanges_names)
setMethod("NSBS", "Partitioning",
function(i, x, exact=TRUE, strict.upper.bound=TRUE, allow.NAs=FALSE)
{
i <- range(i)
callNextMethod()
})
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### PartitioningByEnd uses a compact internal representation that allows
### fast mapping from groups to objects. However, it is not efficient for
### mapping from objects to groups.
###
setClass("PartitioningByEnd",
contains="Partitioning",
representation(
end="integer"
),
prototype(
end=integer()
)
)
setMethod("parallel_slot_names", "PartitioningByEnd",
function(x) c("end", callNextMethod())
)
setMethod("end", "PartitioningByEnd", function(x) x@end)
### Overwrite method for Ranges objects with optimized method for
### PartitioningByEnd objects.
setMethod("length", "PartitioningByEnd", function(x) length(end(x)))
### Overwrite method for ManyToOneGrouping objects with optimized method for
### PartitioningByEnd objects.
setMethod("nobj", "PartitioningByEnd",
function(x) S4Vectors:::last_or(end(x), 0L)
)
setMethod("start", "PartitioningByEnd",
function(x)
{
x_end <- end(x)
if (length(x_end) == 0L)
return(integer())
c(1L, x_end[-length(x_end)] + 1L)
}
)
setMethod("width", "PartitioningByEnd",
function(x) S4Vectors:::diffWithInitialZero(end(x))
)
.valid.PartitioningByEnd <- function(x)
{
if (!is.integer(end(x)))
return("the ends must be integers")
if (length(x) == 0L)
return(NULL)
if (S4Vectors:::anyMissing(end(x)))
return("the ends cannot be NAs")
if (S4Vectors:::isNotSorted(end(x)))
return("the ends must be sorted")
if (end(x)[1L] < 0L)
return("the ends cannot be negative")
if (!is.null(names(end(x))))
return("the ends should not be named")
NULL
}
setValidity2("PartitioningByEnd", .valid.PartitioningByEnd)
.numeric2end <- function(x=integer(0), NG=NULL)
{
if (!is.integer(x))
x <- as.integer(x)
if (S4Vectors:::anyMissingOrOutside(x, 0L))
stop(wmsg("when 'x' is an integer vector, ",
"it cannot contain NAs or negative values"))
if (S4Vectors:::isNotSorted(x))
stop(wmsg("when 'x' is an integer vector, ",
"it must be sorted"))
if (is.null(NG))
return(x)
## When 'NG' (number of groups) is supplied, then 'x' is considered
## to represent the group assignment of a collection of 'length(x)'
## objects. Therefore the values in 'x' must be >= 1 and <= 'NG'.
## ADDITIONALLY, 'x' must be *sorted* (not strictly) so it can be
## reconstructed from the object returned by PartitioningByEnd()
## by doing togroup() on that object.
if (!isSingleNumber(NG))
stop(wmsg("'NG' must be either NULL or a single integer"))
if (!is.integer(NG))
NG <- as.integer(NG)
NO <- length(x) # nb of objects
if (NG == 0L) {
if (NO != 0L)
stop(wmsg("when 'NG' is 0, 'x' must be of length 0"))
} else {
## 'x' is expected to be non-decreasing and with values >= 1
## and <= 'NG'.
x <- cumsum(tabulate(x, nbins=NG))
## 'x[NG]' is guaranteed to be <= 'NO'.
if (x[NG] != NO)
stop(wmsg("when 'NG' is supplied, values in 'x' must ",
"be >= 1 and <= 'NG'"))
}
x
}
.prepare_Partitioning_names <- function(names, ans_len, NG, x_names)
{
if (!is.null(names)) {
if (!is.character(names) || length(names) != ans_len)
stop(wmsg("'names' must be either NULL or a character vector ",
"of length 'NG' (if supplied) or 'length(x)' ",
"(if 'NG' is not supplied)"))
return(names)
}
if (is.null(NG))
return(x_names) # should be of length 'ans_len'
NULL
}
PartitioningByEnd <- function(x=integer(0), NG=NULL, names=NULL)
{
if (is(x, "List") || is.list(x)) {
if (!is.null(NG))
warning(wmsg("'NG' argument is ignored when 'x' ",
"is a list-like object"))
if (is(x, "CompressedList")) {
## Behaves like a getter for the 'partitioning' slot.
ans <- x@partitioning
if (!is.null(names))
names(ans) <- names
return(ans)
}
if (is(x, "PartitioningByEnd")) {
if (!is.null(names))
names(x) <- names
return(x)
}
x_names <- names(x)
if (length(x) == 0L) {
ans_end <- integer(0)
} else {
x_NROWS <- elementNROWS(x)
ans_end <- suppressWarnings(cumsum(x_NROWS))
if (is.na(ans_end[[length(ans_end)]]))
stop(wmsg(class(x)[[1L]], " object 'x' is too big (the ",
"cumulated length of its list elements is >= 2^32)"))
}
} else {
if (!is.numeric(x))
stop(wmsg("'x' must be either a list-like object or ",
"a sorted vector of non-NA non-negative integers"))
x_names <- names(x)
ans_end <- .numeric2end(x, NG)
}
ans_names <- .prepare_Partitioning_names(names, length(ans_end),
NG, x_names)
new2("PartitioningByEnd", end=unname(ans_end), NAMES=ans_names,
check=FALSE)
}
setAs("IntegerRanges", "PartitioningByEnd",
function(from)
{
ans <- PartitioningByEnd(end(from), names=names(from))
if (!identical(start(ans), start(from)))
stop(wmsg("the IntegerRanges object to coerce does not ",
"represent a partitioning"))
ans
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### PartitioningByWidth uses a compact internal representation too. Storing
### the widths instead of the ends would allow the total number of objects
### (nobj(x)) to be greater than 2^31-1 but note that some methods will break
### when this happens, e.g. nobj, end, etc...
###
setClass("PartitioningByWidth",
contains="Partitioning",
representation(
width="integer"
),
prototype(
width=integer()
)
)
setMethod("parallel_slot_names", "PartitioningByWidth",
function(x) c("width", callNextMethod())
)
setMethod("width", "PartitioningByWidth", function(x) x@width)
### Overwrite method for Ranges objects with optimized method for
### PartitioningByWidth objects.
setMethod("length", "PartitioningByWidth", function(x) length(width(x)))
setMethod("end", "PartitioningByWidth", function(x) cumsum(width(x)))
setMethod("start", "PartitioningByWidth",
function(x)
{
x_width <- width(x)
if (length(x_width) == 0L)
return(integer())
c(1L, cumsum(x_width[-length(x_width)]) + 1L)
}
)
.valid.PartitioningByWidth <- function(x)
{
if (!is.integer(width(x)))
return("the widths must be integers")
if (length(x) == 0L)
return(NULL)
if (S4Vectors:::anyMissingOrOutside(width(x), 0L))
return("the widths cannot be NAs or negative")
if (!is.null(names(width(x))))
return("the widths should not be named")
NULL
}
setValidity2("PartitioningByWidth", .valid.PartitioningByWidth)
.numeric2width <- function(x=integer(0), NG=NULL)
{
if (!is.integer(x))
x <- as.integer(x)
if (S4Vectors:::anyMissingOrOutside(x, 0L))
stop(wmsg("when 'x' is an integer vector, ",
"it cannot contain NAs or negative values"))
if (is.null(NG))
return(x)
## When 'NG' (number of groups) is supplied, then 'x' is considered
## to represent the group assignment of a collection of 'length(x)'
## objects. Therefore the values in 'x' must be >= 1 and <= 'NG'.
## ADDITIONALLY, 'x' must be *sorted* (not strictly) so it can be
## reconstructed from the object returned by PartitioningByWidth()
## by doing togroup() on that object.
if (S4Vectors:::isNotSorted(x))
stop(wmsg("when 'x' is an integer vector, it must be sorted"))
if (!isSingleNumber(NG))
stop(wmsg("'NG' must be either NULL or a single integer"))
if (!is.integer(NG))
NG <- as.integer(NG)
NO <- length(x) # nb of objects
if (NG == 0L) {
if (NO != 0L)
stop(wmsg("when 'NG' is 0, 'x' must be of length 0"))
} else {
## 'x' is expected to be non-decreasing and with values >= 1
## and <= 'NG'.
x <- tabulate(x, nbins=NG)
## 'sum(x)' is guaranteed to be <= 'NO'.
if (sum(x) != NO)
stop(wmsg("when 'NG' is supplied, values in 'x' must ",
"be >= 1 and <= 'NG'"))
}
x
}
PartitioningByWidth <- function(x=integer(0), NG=NULL, names=NULL)
{
if (is(x, "List") || is.list(x)) {
if (!is.null(NG))
warning(wmsg("'NG' argument is ignored when 'x' ",
"is a list-like object"))
if (is(x, "PartitioningByWidth")) {
if (!is.null(names))
names(x) <- names
return(x)
}
x_names <- names(x)
ans_width <- elementNROWS(x)
} else {
if (!is.numeric(x))
stop(wmsg("'x' must be either a list-like object or ",
"a vector of non-NA non-negative integers"))
x_names <- names(x)
ans_width <- .numeric2width(x, NG)
}
ans_names <- .prepare_Partitioning_names(names, length(ans_width),
NG, x_names)
new2("PartitioningByWidth", width=unname(ans_width), NAMES=ans_names,
check=FALSE)
}
setAs("IntegerRanges", "PartitioningByWidth",
function(from)
{
ans <- PartitioningByWidth(width(from), names(from))
if (!identical(start(ans), start(from)))
stop(wmsg("the IntegerRanges object to coerce does not ",
"represent a partitioning"))
ans
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### PartitioningMap contains PartitioningByEnd and one additional slot,
### 'mapOrder', to specify a different order. This object is used by the
### pack() function in GenomicFiles and is put in @partitioning of a
### GRangesList of pack()ed ranges. 'mapOrder' records the order of the
### unpacked() ranges.
###
setClass("PartitioningMap",
contains="PartitioningByEnd",
representation(
mapOrder="integer"
),
prototype(
mapOrder=integer()
)
)
setGeneric("mapOrder", function(x) standardGeneric("mapOrder"))
setMethod("mapOrder", "PartitioningMap", function(x) x@mapOrder)
.valid.PartitioningMap <- function(x)
{
if (length(x) == 0L)
return(NULL)
if (S4Vectors:::anyMissing(mapOrder(x)))
return("mapOrder cannot contain NA values")
if (any(mapOrder(x) < 0L))
return("mapOrder values cannot be negative")
if (!is.null(names(mapOrder(x))))
return("the mapOrder should not be named")
if (length(maporder <- mapOrder(x))) {
maxorder <- max(maporder)
if (max(maporder) > max(end(x)))
return("max mapOrder value must be == max(end(object))")
}
NULL
}
setValidity2("PartitioningMap", .valid.PartitioningMap)
PartitioningMap <- function(x=integer(), mapOrder=integer(), ...)
new("PartitioningMap", PartitioningByEnd(x=x), mapOrder=mapOrder, ...)
setAs("PartitioningByEnd", "PartitioningMap",
function(from)
new("PartitioningMap", from, mapOrder=numeric())
)
setMethod("show", "PartitioningMap",
function(object)
{
cat(class(object), " of length ", length(object), "\n")
cat("mapOrder: ", mapOrder(object), "\n")
print(PartitioningByEnd(object))
}
)
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### findOverlaps()
###
### A simple findOverlaps method that doesn't use NCList but works only
### on a subject with *adjacent* ranges sorted non-decreasingly.
### Can be 30% faster or more than the real findOverlaps() (NCList-based)
### when 'query' is such that 'start(query)' and 'end(query)' are also sorted
### non-decreasingly (which is the case if for example 'query' is a
### Partitioning object).
### TODO: Add a "findOverlaps" method for Partitioning,Partitioning in the
### findOverlaps-methods.R file that calls this.
findOverlaps_IntegerRanges_Partitioning <-
function(query, subject, hit.empty.query.ranges=FALSE,
hit.empty.subject.ranges=FALSE)
{
if (!is(query, "IntegerRanges"))
stop(wmsg("'query' must be an IntegerRanges object"))
if (!is(subject, "Partitioning"))
stop(wmsg("'subject' must be a Partitioning object"))
if (!isTRUEorFALSE(hit.empty.query.ranges) ||
!isTRUEorFALSE(hit.empty.subject.ranges))
stop(wmsg("'hit.empty.query.ranges' and 'hit.empty.subject.ranges' ",
"must be TRUE or FALSE"))
q_len <- length(query)
q_start <- start(query)
q_end <- end(query)
s_len <- length(subject)
s_end <- end(subject)
if (!hit.empty.query.ranges) {
q_idx <- which(width(query) != 0L)
q_start <- q_start[q_idx]
q_end <- q_end[q_idx]
}
if (!hit.empty.subject.ranges) {
s_idx <- which(width(subject) != 0L)
s_end <- s_end[s_idx]
}
vec <- c(0L, s_end) + 0.5
q_start2subject <- findInterval(q_start, vec)
q_end2subject <- findInterval(q_end, vec)
q_hits <- rep.int(seq_along(q_start),
q_end2subject - q_start2subject + 1L)
s_hits <- sequence(q_end2subject - q_start2subject + 1L, q_start2subject)
## If 'query' is a Partitioning object, all hits are guaranteed to be
## valid.
if (!is(query, "Partitioning")) {
## Remove invalid hits.
is_valid <- 1L <= s_hits & s_hits <= length(s_end)
q_hits <- q_hits[is_valid]
s_hits <- s_hits[is_valid]
}
## Remap hits to original query/subject.
if (!hit.empty.query.ranges)
q_hits <- q_idx[q_hits]
if (!hit.empty.subject.ranges)
s_hits <- s_idx[s_hits]
## Make and return Hits object.
Hits(q_hits, s_hits, q_len, s_len, sort.by.query=TRUE)
}
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Old stuff (deprecated & defunct)
###
setMethod("togroup", "ANY",
function(x, j=NULL)
{
msg <- wmsg(
"Using togroup() on a ", class(x), " object is defunct. ",
"Please use togroup(PartitioningByWidth(...)) instead."
)
.Defunct(msg=msg)
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.