Dear all,
I discovered that read.table (RW1.8.0) leaves out data when reading
multiple-line records.
Replication code at the end
Best regards
Jens Oehlschlägel
filename <- "c:/tmp/c2.csv"
data <- data.frame(a=c("c", "e\nnewline"), b=c("d", '"quoted
simpleline"'))
#look at the data
write.table(data, sep=",", row.names=FALSE)
# write it out
write.table(data, sep=",", row.names=FALSE, file=filename)
# reading it in a line is missing
read.csv(filename)
a b
1 e\nnewline \\quoted simpleline\\
fc <- file(filename, open="r")
# the problem seems to be
# readTableHead erroneously counts 3 lines as 4
lines <- .Internal(readTableHead(fc, 4, "", TRUE))
lines
_
platform i386-pc-mingw32
arch i386
os mingw32
system i386, mingw32
status
major 1
minor 8.0
year 2003
month 10
day 08
language R
filename <- "c:/tmp/c2.csv"
data <- data.frame(a=c("c", "e\nnewline"), b=c("d", '"quoted simpleline"'))
#look at the data
write.table(data, sep=",", row.names=FALSE)
# write it out
write.table(data, sep=",", row.names=FALSE, file=filename)
# reading it in a line is missing
read.csv(filename)
fc <- file(filename, open="r")
# the problem seems to be
# readTableHead erroneously counts 3 lines as 4
lines <- .Internal(readTableHead(fc, 4, "", TRUE))
lines
# double pushback is fine
pushBack(c(lines,lines), fc)
# but nlines tells us we had 4 lines, which in fact are only 3
nlines <- length(lines)
nlines
# and the first scan eats up more than the first pushback
scan(fc, what="string", sep=",", nlines=nlines)
# thus the real scan misses data
scan(fc, what="string", sep=",")
close(fc)
version
--