ff package authors, hello! I'm using f_2.2-2.tar.gz from CRAN.
I've writeten my data to disk using ffdf() and as.ff(). The ffdf
object returned when I write out the files seems correct. However,
when I try to read the ff data files on in a fresh R session,
sometimes my ffdf() fails with this error:
number of rows don't match, recycling not yet implemented
The problem there is that my "pit.p" column is somehow ending up as
length 258460 rather than 258458. However, when I wrote this data to
disk, ffdf() definitely believed that all columns had length 258458.
Could you give me advice please on what the problem might be and how I
should fix it? Thanks in advance for your help!
------------------------------------------------------------
# Attempt to read ffdf from disk, this sometimes fails
ffdf( "date.time" = ff(filename=col.files["date.time"] ,vmode="double" ,ramclass="POSIXct")
,"price" = ff(filename=col.files["price" ] ,vmode="double" )
,"volume" = ff(filename=col.files["volume" ] ,vmode="double" )
,"pit.p" = ff(filename=col.files["pit.p" ] ,vmode="logical") )
# This is how I write my ffdf files:
if (is.null(td.ff)) {
# Insert the first 1 days worth of rows:
td.ff <-
ffdf( "date.time" = as.ff(index(td.xts) ,filename=col.files["date.time"] ,vmode="double" ,ramclass="POSIXct")
,"price" = as.ff(as.vector(td.xts[,"price" ]) ,filename=col.files["price" ] ,vmode="double")
,"volume" = as.ff(as.vector(td.xts[,"volume"]) ,filename=col.files["volume" ] ,vmode="double")
,"pit.p" = as.ff(as.vector(td.xts[,"pit.p" ]) ,filename=col.files["pit.p" ] ,vmode="logical") )
} else {
# Append rows for day 2 and later:
# Look in read.table.ffdf() for an example of how to append rows to an ffdf object.
nr.old <- nrow(td.ff) ; nr.add <- nrow(td.xts)
nrow(td.ff) <- nr.old + nr.add
pos <- hi((nr.old +1L) ,(nr.old + nr.add))
td.ff[pos ,"date.time"] <- index(td.xts)
td.ff[pos ,"price" ] <- as.vector(td.xts[,"price" ])
td.ff[pos ,"volume" ] <- as.vector(td.xts[,"volume"])
td.ff[pos ,"pit.p" ] <- as.vector(td.xts[,"pit.p" ])
}
# File sizes of each column on disk:
$ du -b ff_XG.201012*
2067664 ff_XG.201012_date.time
64615 ff_XG.201012_pit.p
2067664 ff_XG.201012_price
2067664 ff_XG.201012_volume
# Truncated result of running str() on the ff object:
# Bad object, read from scratch:
$ pit.p : list()
..- attr(*, "physical")=Class 'ff_pointer' <externalptr>
.. ..- attr(*, "vmode")= chr "logical"
.. ..- attr(*, "maxlength")= int 258460
.. ..- attr(*, "pattern")= chr "/data1/nobackup/andy/ff/"
.. ..- attr(*, "filename")= chr "/data1/nobackup/andy/ff/ff_XG.201012_pit.p"
.. ..- attr(*, "pagesize")= int 65536
.. ..- attr(*, "finalizer")= chr "close"
.. ..- attr(*, "finonexit")= logi TRUE
.. ..- attr(*, "readonly")= logi FALSE
.. ..- attr(*, "caching")= chr "mmnoflush"
..- attr(*, "virtual")= list()
.. ..- attr(*, "Length")= int 258460
.. ..- attr(*, "Symmetric")= logi FALSE
.. - attr(*, "class") = chr [1:2] "ff_vector" "ff"
# Good object, returned when initially writing to disk:
.. $ pit.p : list()
.. ..- attr(*, "physical")=Class 'ff_pointer' <externalptr>
.. .. ..- attr(*, "vmode")= chr "logical"
.. .. ..- attr(*, "maxlength")= int 258458
.. .. ..- attr(*, "pattern")= chr "clone"
.. .. ..- attr(*, "filename")= chr "/data1/nobackup/andy/ff/ff_XG.201012_pit.p"
.. .. ..- attr(*, "pagesize")= int 65536
.. .. ..- attr(*, "finalizer")= chr "close"
.. .. ..- attr(*, "finonexit")= logi TRUE
.. .. ..- attr(*, "readonly")= logi FALSE
.. .. ..- attr(*, "caching")= chr "mmnoflush"
.. ..- attr(*, "virtual")= list()
.. .. ..- attr(*, "Length")= int 258458
.. .. ..- attr(*, "Symmetric")= logi FALSE
.. .. ..- attr(*, "class")= chr "virtual"
.. .. - attr(*, "class") = chr [1:2] "ff_vector" "ff"
--
Andrew Piskorski <atp at piskorski.com>
http://www.piskorski.com/