Andrew Piskorski <atp at piskorski.com>
http://www.piskorski.com/
--k1lZvvs/B4yU6o8G
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="all-equal-patch-20051009.txt"
Index: all.equal.R
===================================================================
RCS file: /home/cvsroot/dtk/Splus/patches/all.equal.R,v
retrieving revision 1.1.1.1
retrieving revision 1.4
diff -u -r1.1.1.1 -r1.4
--- all.equal.R 1 Oct 2005 06:51:06 -0000 1.1.1.1
+++ all.equal.R 1 Oct 2005 13:10:25 -0000 1.4
@@ -1,4 +1,75 @@
-all.equal <- function(target, current, ...) UseMethod("all.equal")
+#
+# This is a copy of "src/library/base/R/all.equal.R" from
+# "R-beta_2005-09-24_r35666.tar.gz", plus our modifications. (The
+# all.equal.R in that tarball seems to be unchanged at least as far
+# back as R 2.1.0.)
+#
+# Further detail is in the comments in each function, but basically,
+# all modifications here involve either of two sorts of improvements:
+#
+# 1. Check names! Stock R all.equal() (unlike S-Plus) ignores names
+# completely on some objects. I consider this bogus, if the names
+# are different, the object is NOT "the same".
+#
+# 2. When the object is different, return more output to help the user
+# understand just WHAT is different.
+#
+# Note: Here in our patches package, we purposely CVS import and than
+# override ALL the base all.equal() methods, NOT just the ones we're
+# actually modifying. At first I tried only overriding some of them,
+# but in that case, even though package:patches was earlier on the
+# search path than package:base, base methods appeared to
+# preferentially call the original base versions, rather than the
+# patches versions that I wanted. So, big hammer it, override
+# everything - which will probably make it easier to contribute these
+# improvements back to stock R anyway.
+#
+# --atp at piskorski.com, 2005/10/01 02:29 EDT
+#
+# $Id: all.equal.R,v 1.4 2005/10/01 13:10:25 andy Exp $
+
+
+# In S-Plus, all.equal() prefers to index objects by name, while in
+# stock R, it prefers to index by position. IMO, *NEITHER* of those
+# behaviors are fully correct. What we really want is to compare
+# things BOTH by name and by position.
+#
+# Here's ONE example of the effect of these R patches:
+#
+## S-Plus 6.2, no patches to all.equal():
+#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F))
+#[1] "Names: 4 string mismatches"
+#[2] "Components not in target: b, y"
+#[3] "Components not in current: a, x"
+#[4] "Component foo: Mean relative difference: 0.8333333"
+#[5] "Component NA: Mean relative difference: 1"
+#
+## R 2.1.0, no patches to all.equal():
+#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F))
+#[1] "Names: 4 string mismatches"
+#[2] "Component 1: Mean relative difference: 0.5"
+#[3] "Component 3: Mean relative difference: 0.3333333"
+#[4] "Component 4: Mean relative difference: 6"
+#[5] "Component 5: Mean relative difference: 0.9761905"
+#[6] "Component 6: Mean relative difference: 1"
+#
+## R 2.1.0 with our patches here:
+#> all.equal(list(a=2,2,x=3,zap=1,foo=42,"NA"=T) ,list(b=1,2,y=4,foo=7,zap=1,"NA"=F))
+# [1] "Names: 4 string mismatches"
+# [2] "Components not in target: b, y"
+# [3] "Components not in current: a, x"
+# [4] "Component foo: Mean relative difference: 0.8333333"
+# [5] "Component NA: Mean relative difference: 1"
+# [6] "Component 1: Mean relative difference: 0.5"
+# [7] "Component 3: Mean relative difference: 0.3333333"
+# [8] "Component 4: Mean relative difference: 6"
+# [9] "Component 5: Mean relative difference: 0.9761905"
+#[10] "Component 6: Mean relative difference: 1"
+
+
+#all.equal.original.fcn <- get("all.equal" ,pos="package:base")
+all.equal <- function(target, current, ... ,debug.p=FALSE) UseMethod("all.equal")
+
## NO: is.*(x) should be like S4 is(x, *) ! -- use isTRUE(all.equal(*))
## is.all.equal <- function(target, current, ...)
@@ -32,23 +103,32 @@
function(target, current, tolerance = .Machine$double.eps ^ .5,
scale=NULL, ...)
{
- if(data.class(target) != data.class(current))
- return(paste("target is ", data.class(target), ", current is ",
- data.class(current), sep = ""))
+ msg <- attr.all.equal(target, current ,...)
+ if(data.class(target) != data.class(current)) {
+ msg <- c(msg ,paste("target is ", data.class(target), ", current is ",
+ data.class(current), sep = ""))
+ return(msg)
+ }
+
lt <- length(target)
lc <- length(current)
cplx <- is.complex(target)
- if(lt != lc)
- return(paste(if(cplx)"Complex" else "Numeric",
- ": lengths (", lt, ", ", lc, ") differ", sep = ""))
+ if(lt != lc) {
+ msg <- c(msg ,paste(if(cplx)"Complex" else "Numeric",
+ ": lengths (", lt, ", ", lc, ") differ", sep = ""))
+ return(msg)
+ }
target <- as.vector(target)
current <- as.vector(current)
out <- is.na(target)
- if(any(out != is.na(current)))
- return(paste("`is.NA' value mismatches:", sum(is.na(current)),
- "in current,", sum(out), " in target"))
+ if(any(out != is.na(current))) {
+ msg <- c(msg ,paste("`is.NA' value mismatches:", sum(is.na(current)),
+ "in current,", sum(out), " in target"))
+ return(msg)
+ }
out <- out | target == current
- if(all(out)) return(TRUE)
+ if(all(out)) { if (is.null(msg)) return(TRUE) else return(msg) }
+
target <- target[!out]
current <- current[!out]
xy <- mean((if(cplx)Mod else abs)(target - current))
@@ -63,29 +143,37 @@
xy <- xy/scale
"scaled"
}
+
if(is.na(xy) || xy > tolerance)
- paste("Mean", what, if(cplx)"Mod", "difference:", format(xy)) else TRUE
+ msg <- c(msg ,paste("Mean", what, if(cplx)"Mod", "difference:", format(xy)))
+
+ if(is.null(msg)) TRUE else msg
}
all.equal.character <- function(target, current, ...)
{
- if(data.class(target) != data.class(current))
- return(paste("target is ", data.class(target), ", current is ",
- data.class(current), sep = ""))
+ msg <- attr.all.equal(target, current ,...)
+ if(data.class(target) != data.class(current)) {
+ msg <- c(msg ,paste("target is ", data.class(target), ", current is ",
+ data.class(current), sep = ""))
+ return(msg)
+ }
lt <- length(target)
lc <- length(current)
if(lt != lc) {
- msg <- paste("Lengths (", lt, ", ", lc,
+ msg <- c(msg ,paste("Lengths (", lt, ", ", lc,
") differ (string compare on first ", ll <- min(lt, lc),
- ")", sep = "")
+ ")", sep = ""))
ll <- seq(length = ll)
target <- target[ll]
current <- current[ll]
- } else msg <- NULL
+ }
nas <- is.na(target)
- if (any(nas != is.na(current)))
- return(paste("`is.NA' value mismatches:", sum(is.na(current)),
+ if (any(nas != is.na(current))) {
+ msg <- c(msg ,paste("`is.NA' value mismatches:", sum(is.na(current)),
"in current,", sum(nas), " in target"))
+ return(msg)
+ }
ne <- !nas & (target != current)
if(!any(ne) && is.null(msg)) TRUE
else if(any(ne)) c(msg, paste(sum(ne), "string mismatches"))
@@ -141,76 +229,205 @@
if(is.null(msg)) TRUE else msg
}
-all.equal.list <- function(target, current, ...)
-{
- msg <- attr.all.equal(target, current, ...)
-# nt <- names(target)
- nc <- names(current)
- iseq <-
- ## <FIXME>
- ## Commenting this eliminates PR#674, and assumes that lists are
- ## regarded as generic vectors, so that they are equal iff they
- ## have identical names attributes and all components are equal.
- ## if(length(nt) && length(nc)) {
- ## if(any(not.in <- (c.in.t <- match(nc, nt, 0)) == 0))
- ## msg <- c(msg, paste("Components not in target:",
- ## paste(nc[not.in], collapse = ", ")))
- ## if(any(not.in <- match(nt, nc, 0) == 0))
- ## msg <- c(msg, paste("Components not in current:",
- ## paste(nt[not.in], collapse = ", ")))
- ## nt[c.in.t]
- ## } else
- ## </FIXME>
- if(length(target) == length(current)) {
- seq(along = target)
- } else {
- nc <- min(length(target), length(current))
- msg <- c(msg, paste("Length mismatch: comparison on first",
- nc, "components"))
- seq(length = nc)
- }
- for(i in iseq) {
- mi <- all.equal(target[[i]], current[[i]], ...)
- if(is.character(mi))
- msg <- c(msg, paste("Component ", i, ": ", mi, sep=""))
- }
- if(is.null(msg)) TRUE else msg
+all.equal.list <- function
+(target, current, ..., by.name="auto", by.pos=TRUE, debug.p=FALSE) {
+ # This is copied from "R-beta_2005-09-24_r35666.tar.gz" (which
+ # seems to be unchangd since at least R 2.1.0) and then
+ # modified.
+ #
+ # The stock implementation checked names (and other attributes)
+ # on the list itself but NOT on the components of the list! We
+ # fix that below. Furthermore, when reporting differences, it
+ # is much more helpful to report the name-indexed as well as
+ # position-indexed differences, so do that too.
+ #
+ # See also:
+ # http://r-bugs.biostat.ku.dk/cgi-bin/R/Language-fixed?id=674
+ # https://stat.ethz.ch/pipermail/r-devel/2000-October/thread.html#21323
+ #
+ # --atp at piskorski.com, 2005/09/27 20:32 EDT
+
+ # - by.name: When set to "auto" (and by.pos=T), we display the
+ # by.name=T messages if and only if the by.pos=T checks found
+ # that one or more names differ. This is particularly useful for
+ # data frames, as it immediately disambiguates the "columns are
+ # just in different orders" vs. "columns of the same name really
+ # do have different values" cases.
+
+ msg <- attr.all.equal(target, current, ... ,debug.p=debug.p)
+
+ recurse <- function(by.which ,show.other.p) {
+ msg <- c()
+ if (by.which == "name") {
+ by.name <- TRUE ; by.pos <- FALSE
+ } else {
+ by.pos <- TRUE
+ if (by.name != "auto") by.name <- FALSE
+ }
+
+ for (i in iseq) {
+ other.str <- ""
+ mi <- all.equal(target[[i]], current[[i]], ...
+ ,by.name=by.name ,by.pos=by.pos ,debug.p=debug.p)
+ if (is.character(mi)) {
+ names.differ.p <- F
+ if (by.which == "pos") {
+ name.c <- nc[[i]] ; name.t <- nt[[i]]
+ tmp <- (name.c == name.t)
+ if (length(name.c)==0 && length(name.t)==0) {
+ # No names at all, show nothing.
+ } else if (length(tmp) && !is.na(tmp) && tmp) {
+ # Names are the same, only show one:
+ other.str <- paste(" (" ,name.c ,")" ,sep="")
+ } else {
+ # Current and Target names differ, show both:
+ other.str <- paste(" (" ,name.c ," / " ,name.t ,")" ,sep="")
+ names.differ.p <- T
+ }
+ }
+ msg <- c(msg, paste("Component ", i, other.str, ": ", mi, sep=""))
+
+ if (by.which == "pos" && names.differ.p && by.name == "auto") {
+ tmp <- which(name.c == names(target))
+ if (length(tmp)==0) {
+ ## This is redundant with the "names not in Target:"
+ ## message we already printed out:
+ #msg <- c(msg, paste("Component ", name.c, ": ", "Not in Target.", sep=""))
+ } else {
+ if (length(tmp) > 1) {
+ msg <- c(msg, paste("Warning:" ,length(tmp) ,"components in Target w/ name:" ,name.c))
+ # The code below will check only the first named component:
+ }
+ mi <- all.equal(target[[name.c]] ,current[[name.c]] ,...
+ ,by.name=TRUE ,by.pos=FALSE ,debug.p=debug.p)
+ if (is.character(mi))
+ msg <- c(msg, paste("Component ", name.c, ": ", mi, sep=""))
+ else
+ msg <- c(msg, paste("Component ", name.c, ": ", "[same]", sep=""))
+ }
+ }
+ }
+ }
+ return(msg)
+ }
+
+ if (by.name==FALSE && !by.pos)
+ stop("Cannot have both by.pos and by.name False!")
+ if (!is.logical(by.name) && by.name != "auto")
+ stop(paste("Invalid value for by.name:" ,by.name))
+ nt <- names(target) ; nc <- names(current)
+
+ if (by.name == TRUE && length(nt) > 0 && length(nc) > 0) {
+ ## These "Components not in" messages are redundant with the
+ ## "names not in" messages we've already printed out:
+ ## --atp at piskorski.com, 2005/10/01 08:05 EDT
+ if(any(not.in <- (c.in.t <- match(nc, nt, 0)) == 0)) {
+ #msg <- c(msg, paste("Components not in target:", paste(nc[not.in], collapse = ", ")))
+ }
+ #if(any(not.in <- match(nt, nc, 0) == 0)) {
+ # msg <- c(msg, paste("Components not in current:", paste(nt[not.in], collapse = ", ")))
+ #}
+ iseq <- nt[c.in.t]
+ msg <- c(msg ,recurse(by.which="name"))
+ }
+
+ if (by.pos) {
+ iseq <-
+ (if(length(target) == length(current)) {
+ seq(along = target)
+ } else {
+ tmp <- min(length(target), length(current))
+ msg <- c(msg, paste("Length mismatch: comparison on first",
+ tmp, "components"))
+ seq(length = tmp)
+ })
+ msg <- c(msg ,recurse(by.which="pos"))
+ }
+
+ if(is.null(msg)) TRUE else msg
}
+
-attr.all.equal <- function(target, current, ...)
-{
- ##--- "all.equal(.)" for attributes ---
- ##--- Auxiliary in all.equal(.) methods --- return NULL or character()
- msg <- NULL
- if(mode(target) != mode(current))
- msg <- paste("Modes: ", mode(target), ", ", mode(current), sep = "")
- if(length(target) != length(current))
- msg <- c(msg, paste("Lengths: ", length(target), ", ",
- length(current), sep = ""))
- ax <- attributes(target)
- ay <- attributes(current)
- nx <- names(target)
- ny <- names(current)
- if((lx <- length(nx)) | (ly <- length(ny))) {
- ## names() treated now; hence NOT with attributes()
- ax$names <- ay$names <- NULL
- if(lx && ly) {
- if(is.character(m <- all.equal.character(nx, ny)))
- msg <- c(msg, paste("Names:", m))
- } else if(lx)
- msg <- c(msg, "names for target but not for current")
- else msg <- c(msg, "names for current but not for target")
- }
- if(length(ax) || length(ay)) {# some (more) attributes
- ## order by names before comparison:
- nx <- names(ax)
- ny <- names(ay)
- if(length(nx)) ax <- ax[order(nx)]
- if(length(ny)) ay <- ay[order(ny)]
- tt <- all.equal(ax, ay, ...)
- if(is.character(tt)) msg <- c(msg, paste("Attributes: <", tt, ">"))
- }
- msg # NULL or character
+attr.all.equal <- function(target, current, ... ,debug.p=FALSE) {
+ # Based on stock "R-beta_2005-09-24_r35666.tar.gz". Differences are:
+ # - Also report WHICH names differ.
+ # - Do same checks on row.names and dimnames (if present) as on
+ # names.
+ # --atp at piskorski.com, 2005/10/01 01:16 EDT
+
+ ##--- "all.equal(.)" for attributes ---
+ ##--- Auxiliary in all.equal(.) methods --- return NULL or character()
+ msg <- NULL
+ if(mode(target) != mode(current))
+ msg <- paste("Modes: ", mode(target), ", ", mode(current), sep = "")
+ if(length(target) != length(current))
+ msg <- c(msg, paste("Lengths: ", length(target), ", ",
+ length(current), sep = ""))
+ ax <- attributes(target)
+ ay <- attributes(current)
+
+ local.compare.names <- function() {
+ msg <- c()
+ if(lx && ly) {
+ if(is.character(m <- all.equal.character(nx, ny ,debug.p=debug.p))) {
+ msg <- c(msg, paste("Names:", m))
+ not.in <- setdiff(ny ,nx)
+ if (length(not.in) > 0)
+ msg <- c(msg, paste(length(not.in) ,name.type.pretty
+ ,"not in Target:"
+ ,paste(not.in,collapse=", ")))
+ not.in <- setdiff(nx ,ny)
+ if (length(not.in) > 0)
+ msg <- c(msg, paste(length(not.in) ,name.type.pretty
+ ,"not in Current:"
+ ,paste(not.in,collapse=", ")))
+ }
+ } else if(lx) {
+ msg <- c(msg ,name.type.pretty ,"for Target but not for Current")
+ } else { msg <- c(msg ,name.type.pretty ,"for Current but not for Target") }
+ return(msg)
+ }
+
+ nx <- names(target) ; ny <- names(current)
+ if((lx <- length(nx)) | (ly <- length(ny))) {
+ ## names() treated now; hence NOT with attributes()
+ ax$names <- ay$names <- NULL
+ name.type.pretty <- "names"
+ msg <- c(msg ,local.compare.names())
+ }
+ if (any(names(ax) == "row.names") && any(names(ay) == "row.names")) {
+ nx <- row.names(target) ; ny <- row.names(current)
+ if((lx <- length(nx)) | (ly <- length(ny))) {
+ ## row.names() treated now; hence NOT with attributes():
+ ax$row.names <- ay$row.names <- NULL
+ name.type.pretty <- "row.names"
+ msg <- c(msg ,local.compare.names())
+ }
+ }
+ if (any(names(ax) == "dimnames") && any(names(ay) == "dimnames")) {
+ # We destructively remove dimnames, so loop from highest to lowest:
+ for (dim.i in length(dimnames(target)):1) {
+ nx <- dimnames(target)[[dim.i]] ; ny <- dimnames(current)[[dim.i]]
+ if((lx <- length(nx)) | (ly <- length(ny))) {
+ ## dimnames()[[dim.i]] treated now; hence NOT with attributes():
+ ax$dimnames[[dim.i]] <- ay$dimnames[[dim.i]] <- NULL
+ name.type.pretty <- paste("dimnames[[" ,dim.i ,"]]" ,sep="")
+ msg <- c(msg ,local.compare.names())
+ }
+ }
+ }
+
+ if(length(ax) || length(ay)) {# some (more) attributes
+ ## order by names before comparison:
+ nx <- names(ax)
+ ny <- names(ay)
+ if(length(nx)) ax <- ax[order(nx)]
+ if(length(ny)) ay <- ay[order(ny)]
+ tt <- all.equal(ax, ay, ... ,debug.p=debug.p)
+ if(is.character(tt)) msg <- c(msg, paste("Attributes: <", tt, ">"))
+ }
+
+ msg # NULL or character
}
--k1lZvvs/B4yU6o8G--