Skip to content

range segment exclusion using range endpoints

11 messages · Ben quant, jim holtman, Steve Lianoglou +1 more

#
Here is an example of how you might do it.  It uses a technique of
counting how many items are in a queue based on their arrival times;
it can be used to also find areas of overlap.

Note that it would be best to use a list for the 's' end points

================================
+     queue <- rbind(queue
+                 , c(get(i)[1], 1)  # enter queue
+                 , c(get(i)[2], -1)  # exit queue
+                 )
+ }
[,1] [,2] [,3]
 [1,] -100.00    1    1
 [2,]  -25.50    1    2
 [3,]    0.77    1    3
 [4,]   10.00   -1    2
 [5,]   25.00    1    3
 [6,]   30.00   -1    2
 [7,]   35.00   -1    1
 [8,]   70.00    1    2
 [9,]   80.30   -1    1
[10,]   90.00    1    2
[11,]   95.00   -1    1
[12,]  100.00    1    2
+     cat("start:", queue[i, 1L], '  end:', queue[i + 1L, 1L], "\n")
+ }
start: -100   end: -25.5
start: 35   end: 70
start: 80.3   end: 90
start: 95   end: 100
=========================================
On Sat, May 12, 2012 at 1:54 PM, Ben quant <ccquant at gmail.com> wrote:

  
    
#
Here is some code that I've been fiddling with for years
(since I wanted to provide evidence that our main office
needed more modems and wanted to show how often
both of them were busy).  It does set operations and a
bit more on collections of half-open intervals.  (Hence
it drops zero-length intervals).

Several of the functions could be defined as methods
of standard set operators.

To see what it does try

   r1 <- as.Ranges(bottoms=c(1,3,5,7), tops=c(2, 4, 9, 8))
   r2 <- as.Ranges(bottoms=c(1.5,4,6,7), tops=c(1.7,5,7,9))
   setdiffRanges( as.Ranges(1, 5), as.Ranges(c(2, 3.5), c(3, 4.5)) )
   plot(r1, r2, setdiffRanges(r1,r2), intersectRanges(r1,r2), 
            unionRanges(r1,r2), c(r1,r2), inNIntervals(c(r1,r2), n=2))

You can use Date and POSIXct objects for the endpoints of
the intervals as well.

Bill Dunlap
Spotfire, TIBCO Software
wdunlap tibco.com

# An object of S3-class "Ranges" is a 2-column
# data.frame(bottoms, tops), describing a
# set of half-open intervals, (bottoms[i], tops[i]].
# inRanges is the only function that cares about
# the direction of the half-openness of those intervals,
# but the other rely on half-openness (so 0-width intervals
# are not allowed).

# Use as.Ranges to create a Ranges object from
#   * a matrix whose rows are intervals
#   * a data.frame whose rows are intervals
#   * a vector of interval starts and a vector of interval ends
# The endpoints must be of a class which supports the comparison (<,<=)
# operators and which can be concatenated with the c() function.
# That class must also be able to be in a data.frame and be subscriptable.
# That covers at least numeric, Data, and POSIXct.
# (The plot method only works for numeric endpoints).
# You may input a zero-width interval (with bottoms[i]==tops[i]),
# but the constructors will silently remove it.
as.Ranges <- function(x, ...) UseMethod("as.Ranges")

as.Ranges.matrix <- function(x, ...) {
    # each row of x is an interval
    stopifnot(ncol(x)==2, all(x[,1] <= x[,2]))
    x <- x[x[,1] < x[,2], , drop=FALSE]
    Ranges <- data.frame(bottoms = x[,1], tops = x[,2])
    class(Ranges) <- c("Ranges", class(Ranges))
    Ranges
}

as.Ranges.data.frame <- function(x, ...) {
    # each row of x is an interval
    stopifnot(ncol(x)==2, all(x[,1] <= x[,2]))
    x <- x[x[,1] < x[,2], , drop=FALSE]
    Ranges <- data.frame(bottoms = x[,1], tops = x[,2])
    class(Ranges) <- c("Ranges", class(Ranges))
    Ranges
}

as.Ranges.default <- function(bottoms, tops, ...) {
    # vectors of bottoms and tops of intervals
    stopifnot(all(bottoms <= tops))
    Ranges <- data.frame(bottoms=bottoms, tops=tops)[bottoms < tops, , drop=FALSE]
    class(Ranges) <- c("Ranges", class(Ranges))
    Ranges
}

c.Ranges <- function(x, ...) {
    # combine several Ranges objects into one which lists all the intervals.
    RangesList <- list(x=x, ...)
    Ranges <- x
    for (r in list(...)) {
        Ranges <- rbind(Ranges, r)
    }
    class(Ranges) <- unique(c("Ranges", class(Ranges)))
    Ranges
}

inNIntervals <- function(Ranges, n)
{
    # return Ranges object that describes points that are
    # in at least n intervals in the input Ranges object
    stopifnot(n>0)
    u <- c(Ranges[,1], Ranges[,2])
    o <- order(u)
    u <- u[o]
    jumps <- rep(c(+1L,-1L), each=nrow(Ranges))[o]
    val <- cumsum(jumps)
    as.Ranges(u[val==n & jumps==1], u[val==n-1 & jumps==-1])
}

unionIntervals <- function(Ranges) {
    # combine overlapping and adjacent intervals to create a
    # possibly smaller and simpler, but equivalent, Ranges object
    inNIntervals(Ranges, 1)
}

intersectIntervals <- function(Ranges) {
    # return 0- or 1-row Ranges object containing describing points
    # that are in all the intervals in input Ranges object.
    u <- unname(c(Ranges[,1], Ranges[,2]))
    o <- order(u)
    u <- u[o]
    jumps <- rep(c(+1L,-1L), each=nrow(Ranges))[o]
    val <- cumsum(jumps)
    as.Ranges(u[val==nrow(Ranges) & jumps==1], u[val==nrow(Ranges)-1 & jumps==-1])
}

unionRanges <- function(x, ...) {
    unionIntervals(rbind(x, ...))
}

setdiffRanges <- function (x, y) 
{
    # set difference: return Ranges object describing points that are in x but not y
    x <- unionIntervals(x)
    y <- unionIntervals(y)
    nx <- nrow(x)
    ny <- nrow(y)
    u <- c(x[, 1], y[, 1], x[, 2], y[, 2])
    o <- order(u)
    u <- u[o]
    vx <- cumsum(jx <- rep(c(1, 0, -1, 0), c(nx, ny, nx, ny))[o])
    vy <- cumsum(jy <- rep(c(0, -1, 0, 1), c(nx, ny, nx, ny))[o])
    as.Ranges(u[vx == 1 & vy == 0], u[(vx == 1 & jy == -1) | (jx == -1 & vy == 0)])
}

intersectRanges <- function(x, y)
{
    # return Ranges object describing points that are in both x and y
    x <- unionIntervals(x)
    y <- unionIntervals(y)
    nx <- nrow(x)
    ny <- nrow(y)
    u <- c(x[, 1], y[, 1], x[, 2], y[, 2])
    o <- order(u)
    u <- u[o]
    vx <- cumsum(jx <- rep(c(1, 0, -1, 0), c(nx, ny, nx, ny))[o])
    vy <- cumsum(jy <- rep(c(0, 1, 0, -1), c(nx, ny, nx, ny))[o])
    as.Ranges(u[vx == 1 & vy == 1], u[(vx == 1 & jy == -1) | (jx == -1 & vy == 1)])
}

inRanges <- function(x, Ranges)
{
    if (length(x) == 1) {
        any(x > Ranges[,1] & x <= Ranges[,2])
    } else {
        Ranges <- unionIntervals(Ranges)
        (findInterval(-x, rev(-as.vector(t(Ranges)))) %% 2) == 1
    }
}

plot.Ranges <- function(x, ...)
{
    # mainly for debugging - no plotting controls, all ... must be Ranges objects.
    RangesList <- list(x=x, ...)
    labels <- vapply(as.list(substitute(list(x, ...)))[-1], function(x)deparse(x)[1], "")
    oldmar <- par(mar = replace(par("mar"), 2, max(nchar(labels)/2, 10)))
    on.exit(par(oldmar))
    xlim <- do.call("range", c(unlist(RangesList, recursive=FALSE), list(finite=TRUE)))
    ylim <-  c(0, length(RangesList)+1)
    plot(type="n", xlim, ylim, xlab="", ylab="", axes=FALSE)
    grid(ny=0)
    axis(side=1)
    axis(side=2, at=seq_along(RangesList), lab=labels, las=1, tck=0)
    box()
    incr <- 0.45 / max(vapply(RangesList, nrow, 0))
    xr <- par("usr")[1:2] # for intervals that extend to -Inf or Inf.
    for(i in seq_along(RangesList)) {
        r <- RangesList[[i]]
        if (nrow(r)>0) {
            y <- i + seq(0, by=incr, len=nrow(r))
            r <- r[order(r[,1]),,drop=FALSE]
            segments(pmax(r[,1], xr[1]), y, pmin(r[,2], xr[2]), y)
        }
    }
}
1 day later
#
To the list of function I sent, add another that converts a list of intervals
into a Ranges object:
  as.Ranges.list <- function (x, ...) {
      stopifnot(nargs() == 1, all(vapply(x, length, 0) == 2))
      # use c() instead of unlist() because c() doesn't mangle POSIXct and Date objects
      x <- unname(do.call(c, x))
      odd <- seq(from = 1, to = length(x), by = 2)
      as.Ranges(bottoms = x[odd], tops = x[odd + 1])
  }
Then stop using get() and assign() all over the place and instead make lists of
related intervals and convert them to Ranges objects:
  > x <- as.Ranges(list(x_rng))
  > s <- as.Ranges(list(s1_rng, s2_rng, s3_rng, s4_rng, s5_rng))
  > x
    bottoms tops
  1    -100  100
  > s
    bottoms tops
  1 -250.50 30.0
  2    0.77 10.0
  3   25.00 35.0
  4   70.00 80.3
  5   90.00 95.0
and then compute the difference between the sets x and s (i.e., describe
the points in x but not s as a union of intervals):
  > setdiffRanges(x, s)
    bottoms tops
  1    35.0   70
  2    80.3   90
  3    95.0  100
and for a graphical check do
  > plot(x, s, setdiffRanges(x, s))
Are those the numbers you want?

I find it easier to use standard functions and data structures for this than
to adapt the cumsum/order idiom to different situations.

Bill Dunlap
Spotfire, TIBCO Software
wdunlap tibco.com
#
Hi all,

Nice code samples presented all around.

Just wanted to point out that I think the stuff found in the
`intervals` package might also be helpful:

http://cran.at.r-project.org/web/packages/intervals/index.html

HTH,
-steve
On Mon, May 14, 2012 at 2:54 PM, Ben quant <ccquant at gmail.com> wrote: