On Tue, Jul 15, 2008 at 09:29:22AM +0100, Prof Brian Ripley wrote:
On Tue, 15 Jul 2008, Martin Maechler wrote:
Hi Kurt,
"KVD" == Kurt Van Dijck <kurt.van.dijck at skynet.be> on Wed, 09 Jul 2008 10:05:39 +0200 writes:
KVD> Hi all, I got no feedback at all concerning the merge KVD> of this patch in the source tree. Am I supposed to do KVD> this myself? How should I do this (do I have subversion KVD> commit access)? Is this patch acceptable at all? Is it KVD> being tested? I don't know if it's being tested. It's vacation and traveling time, also for the R core team.
Indeed. This is on my TODO list, but I've been away (and unexpectedly mainly offline) for the last two weeks, and will be again until Friday. Hopefully I will have a chance to take a look next week, but we do need at least one example file. (I could generate SPSS examples, but they may not be what you are trying to test.)
The foreign package source is kept in svn-archive https://svn.r-project.org/R-packages/trunk/foreign/ and I have tried to apply your patch (from July 2) to the sources but patch -p0 < K_Van_Dijck_patch patching file src/sfm-read.c Hunk #1 FAILED at 188. Hunk #2 FAILED at 420. Hunk #3 FAILED at 590. Hunk #4 FAILED at 1559. 4 out of 4 hunks FAILED -- saving rejects to file src/sfm-read.c.rej patching file src/var.h.in Hunk #1 FAILED at 41. Hunk #2 FAILED at 232. Hunk #3 FAILED at 306. Hunk #4 FAILED at 377. 4 out of 4 hunks FAILED -- saving rejects to file src/var.h.in.rej Could you provide a patch against the development code from the above url ? (after installing 'subversion', you get the development directory by svn co https://svn.r-project.org/R-packages/trunk/foreign/ )
I had problems with whitespace in the patch file, I attached a new one
KVD> I got some personal reactions on my post, proving there KVD> is general interest in getting rid of the inconvenience KVD> of importing long labels from SPSS files. My problem is that I cannot do much testing apart from the tests already present in foreign/tests/spss.R Could you provide a new small *.sav file and a corresponding read.spss() call which exhibits the problems and is fixed by your patch? Thank you in advance for your contribution! Best regards,
I ran the spss.R in tests/, it worked fine. Be sure to clean all object
files before compiling.
Ilse made me a test .sav file (attached) with 2 variables (varialbe1 &
variable2), 3 records.
This piece of R code shows the problem:
# to resolve locale problem
Sys.setlocale (locale="C");
# read spss datafile
library(foreign);
data = read.spss("spss_long.sav", to.data.frame=TRUE);
# to.data.frame not necessary, but gives nicer output
# commands to show the data, the variable names and labels
data;
names(data);
attr(data, "variable.labels");
# result in unpatched version:
# both variable names are in shortened form
# (max 8 characters; provided in SPSS-file)
#> data;
# VARIABLE V2_A
#1 1 1
#2 2 1
#3 2 3
#
#> names(data);
#[1] "VARIABLE" "V2_A"
#
#> attr(data,"variable.labels");
# VARIABLE V2_A
#"variable1" "variable2"
# and in patched version:
# variable names are the full names as originally defined in the SPSS-file
#> data;
# variable1 variable2
#1 1 1
#2 2 1
#3 2 3
#> names(data);
#[1] "variable1" "variable2"
#> attr(data, "variable.labels");
# variable1 variable2
#"variable1" "variable2"
Kind regards,
Kurt & Ilse
Index: src/sfm-read.c
===================================================================
--- src/sfm-read.c (revision 5175)
+++ src/sfm-read.c (working copy)
@@ -188,6 +188,8 @@
static int read_variables (struct file_handle * h, struct variable *** var_by_index);
static int read_machine_int32_info (struct file_handle * h, int size, int count, int *encoding);
static int read_machine_flt64_info (struct file_handle * h, int size, int count);
+static int read_long_var_names (struct file_handle * h, struct dictionary *
+ , unsigned long size, unsigned int count);
static int read_documents (struct file_handle * h);
/* Displays the message X with corrupt_msg, then jumps to the lossage
@@ -418,11 +420,15 @@
break;
case 7: /* Multiple-response sets (later versions of SPSS). */
- case 13: /* long variable names. PSPP now has code for these
- that could be ported if someone is interested. */
skip = 1;
break;
+ case 13: /* long variable names. PSPP now has code for these
+ that could be ported if someone is interested. */
+ if (!read_long_var_names(h, ext->dict, data.size, data.count))
+ goto lossage;
+ break;
+
case 16: /* See http://www.nabble.com/problem-loading-SPSS-15.0-save-files-t2726500.html */
skip = 1;
break;
@@ -584,14 +590,72 @@
return 0;
}
+/* Read record type 7, subtype 13.
+ * long variable names
+ */
static int
+read_long_var_names (struct file_handle * h, struct dictionary * dict
+ , unsigned long size, unsigned int count)
+{
+ char * data;
+ unsigned int j;
+ struct variable ** lp;
+ struct variable ** end;
+ char * p;
+ char * endp;
+ char * val;
+ if ((1 != size)||(0 == count)) {
+ warning("%s: strange record info seen, size=%u, count=%u"
+ ", ignoring long variable names"
+ , h->fn, size, count);
+ return 0;
+ }
+ size *= count;
+ data = Calloc (size +1, char);
+ bufread(h, data, size, 0);
+ /* parse */
+ end = &dict->var[dict->nvar];
+ p = data;
+ do {
+ if (0 != (endp = strchr(p, '\t')))
+ *endp = 0; /* put null terminator */
+ if (0 == (val = strchr(p, '='))) {
+ warning("%s: no long variable name for variable '%s'", h->fn, p);
+ } else {
+ *val = 0;
+ ++val;
+ /* now, p is key, val is long name */
+ for (lp = dict->var; lp < end; ++lp) {
+ if (!strcmp(lp[0]->name, p)) {
+ strncpy(lp[0]->name, val, sizeof(lp[0]->name));
+ break;
+ }
+ }
+ if (lp >= end) {
+ warning("%s: long variable name mapping '%s' to '%s'"
+ "for variable which does not exist"
+ , h->fn, p, val);
+ }
+ }
+ p = &endp[1]; /* put to next */
+ } while (endp);
+
+ free(data);
+ return 1;
+
+lossage:
+ free(data);
+ return 0;
+}
+
+static int
read_header (struct file_handle * h, struct sfm_read_info * inf)
{
struct sfm_fhuser_ext *ext = h->ext; /* File extension strcut. */
struct sysfile_header hdr; /* Disk buffer. */
struct dictionary *dict; /* File dictionary. */
char prod_name[sizeof hdr.prod_name + 1]; /* Buffer for product name. */
- int skip_amt = 0; /* Amount of product name to omit. */
+ int skip_amt = 0; /* Amount of product name to omit. */
int i;
/* Create the dictionary. */
@@ -1495,7 +1559,7 @@
/* Reads one case from system file H into the value array PERM
according to the instructions given in associated dictionary DICT,
which must have the get.* elements appropriately set. Returns
- nonzero only if successful. */
+ nonzero only if successful. */
int
sfm_read_case (struct file_handle * h, union value * perm, struct dictionary * dict)
{
Index: src/var.h.in
===================================================================
--- src/var.h.in (revision 5175)
+++ src/var.h.in (working copy)
@@ -41,6 +41,10 @@
#error MAX_SHORT_STRING must be less than 8.
#endif
+/* VAR_NAME_LEN: the length of a variable.
+ * SPSS supports names of 64 long
+ */
+#define VAR_NAME_LEN 64
/* Special values. */
#define SYSMIS (-DBL_MAX)
#define LOWEST second_lowest_double_val()
@@ -228,7 +232,7 @@
/* MODIFY VARS private data. */
struct modify_vars_proc
{
- char new_name[9]; /* Variable's new name. */
+ char new_name[VAR_NAME_LEN +1]; /* Variable's new name. */
int drop_this_var; /* 0=keep this var, 1=drop this var. */
struct variable *next; /* Next in linked list. */
};
@@ -302,7 +306,7 @@
struct variable
{
/* Required by parse_variables() to be in this order. */
- char name[9]; /* As a string. */
+ char name[VAR_NAME_LEN +1]; /* As a string. */
int index; /* Index into its dictionary's var[]. */
int type; /* NUMERIC or ALPHA. */
int foo; /* Used for temporary storage. */
@@ -373,9 +377,9 @@
int weight_index; /* `value' index of $WEIGHT, or -1 if none.
Call update_weighting() before using! */
- char weight_var[9]; /* Name of WEIGHT variable. */
+ char weight_var[VAR_NAME_LEN];/* Name of WEIGHT variable. */
- char filter_var[9]; /* Name of FILTER variable. */
+ char filter_var[VAR_NAME_LEN];/* Name of FILTER variable. */
/* Do not make another field the last field! or see
temporary.c:restore_dictionary() before doing so! */
};