Skip to content
Prev 367388 / 398502 Next

Reading S-plus data in R

You should be looking for foreign::data.restore, not data.dump nor read.S.

In any case, I think that foreign::data.restore does not recognize S-version4
data.dump files, ones whose first line is
  ## Dump S Version 4 Dump ##
Here is a quickly written and barely tested function that should read
data.frames
and other simple S+ objects in SV4 data.dump files.  It stores the
objects it reads
from the file 'file' in the environment 'env'.

data.restore4 <- function(file, print = FALSE, verbose = FALSE, env =
.GlobalEnv)
{
    if (!inherits(file, "connection")) {
        file <- file(file, "r")
        on.exit(close(file))
    }
    lineNo <- 0
    nextLine <- function(n = 1) {
        lineNo <<- lineNo + n
        readLines(file, n = n)
    }
    Message <- function(...) {
        if (verbose) {
            message(simpleMessage(paste("(line ", lineNo, ") ",
paste(..., collapse = " "), sep = ""), sys.call(-1)))
        }
    }
    Stop <- function(...) {
        stop(simpleError(paste(paste(..., collapse = " "), sep = "",
            " (file ", deparse(summary(file)$description), ", line ",
lineNo, ")"), sys.call(-1)))
    }
    txt <- nextLine()
    stopifnot(txt == "## Dump S Version 4 Dump ##")
    .data.restore4 <- function()
    {
        class <- nextLine()
        mode <- nextLine()
        length <- as.numeric(tmp <- nextLine())
        if (is.na(length) || length%%1 != 0 || length < 0) {
            Stop("Expected nonnegative integer 'length' at line ",
lineNo, " but got ", deparse(tmp))
        }
        if (mode == "character") {
            nextLine(length)
        } else if (mode == "logical") {
            txt <- nextLine(length)
            lglVector <- rep(NA, length)
            lglVector[txt != "N"] <- as.logical(as.integer(txt[txt != "N"]))
            lglVector
        } else if (mode %in% c("integer", "single", "numeric")) {
            txt <- nextLine(length)
            txt[txt == "M"] <- "NaN"
            txt[txt == "I"] <- "Inf"
            txt[txt == "J"] <- "-Inf"
            atomicVector <- rep(as(NA, mode), length)
            atomicVector[txt != "N"] <- as(txt[txt != "N"], mode)
            atomicVector
        } else if (mode == "complex") {
            txt <- nextLine(length)
            txt <- gsub("M", "NaN", txt)
            txt <- gsub("\\<I\\>", "Inf", txt)
            txt <- gsub("\\<J\\>", "-Inf", txt)
            atomicVector <- rep(as(NA, mode), length)
            atomicVector[txt != "N"] <- as(txt[txt != "N"], mode)
            atomicVector
        } else if (mode == "list") {
            vectors <- lapply(seq_len(length), function(i).data.restore4())
            vectors
        } else if (mode == "NULL") {
            NULL
        } else if (mode == "structure") {
            vectors <- lapply(seq_len(length), function(i).data.restore4())
            if (class == ".named_I" || class == "named") {
                if (length != 2) {
                    Stop("expected length of '.named_I' component is
2, but got ", length)
                } else if (length(vectors[[1]]) != length(vectors[[2]])) {
                    Stop("expected lengths of '.named_I' components to
be the same, but got ", length(vectors[[1]]), " and ",
length(vectors[[2]]))
                } else if (!is.character(vectors[[2]])) {
                    Stop("expected second component of '.named_I' to
be character, but got ", deparse(mode(vectors[[2]])))
                }
                names(vectors[[1]]) <- vectors[[2]]
                if (identical(vectors[[2]][1], ".Data")) { # a hack -
really want to know if vectors[[1] had mode "structure" or not
                    do.call(structure, vectors[[1]], quote = TRUE)
                } else {
                    vectors[[1]]
                }
            } else {
                vectors # TODO: is this ok?  It assumes that is within
a .Named_I/structure
            }
        } else if (mode == "name") {
            if (length != 1) {
                Stop("expected length of 'name' objects is 1, but got", length)
            }
            as.name(nextLine())
        } else if (mode == "call") {
            callList <- lapply(seq_len(length), function(i).data.restore4())
            as.call(callList)
        } else {
            Stop("Unimplemented mode: ", deparse(mode))
        }
    }
    while (length(objName <- nextLine()) == 1) {
        Message(objName, ": ")
        obj <- .data.restore4()
        Message("class ", deparse(class(obj)), ", size=",
object.size(obj), "\n")
        assign(objName, obj, envir=env)
    }
}



Bill Dunlap
TIBCO Software
wdunlap tibco.com


On Sun, Feb 26, 2017 at 4:28 AM, roslinazairimah zakaria
<roslinaump at gmail.com> wrote:
-------------- next part --------------
data.restore4 <- function(file, print = FALSE, verbose = FALSE, env = .GlobalEnv)
{
    if (!inherits(file, "connection")) {
        file <- file(file, "r")
        on.exit(close(file))
    }
    lineNo <- 0
    nextLine <- function(n = 1) {
        lineNo <<- lineNo + n
        readLines(file, n = n)
    }
    Message <- function(...) {
        if (verbose) {
            message(simpleMessage(paste("(line ", lineNo, ") ", paste(..., collapse = " "), sep = ""), sys.call(-1)))
        }
    }
    Stop <- function(...) {
        stop(simpleError(paste(paste(..., collapse = " "), sep = "",
            " (file ", deparse(summary(file)$description), ", line ", lineNo, ")"), sys.call(-1)))
    }
    txt <- nextLine()
    stopifnot(txt == "## Dump S Version 4 Dump ##")
    .data.restore4 <- function()
    {
        class <- nextLine()
        mode <- nextLine()
        length <- as.numeric(tmp <- nextLine())
        if (is.na(length) || length%%1 != 0 || length < 0) {
            Stop("Expected nonnegative integer 'length' at line ", lineNo, " but got ", deparse(tmp))
        }
        if (mode == "character") {
            nextLine(length)
        } else if (mode == "logical") {
            txt <- nextLine(length)
            lglVector <- rep(NA, length)
            lglVector[txt != "N"] <- as.logical(as.integer(txt[txt != "N"]))
            lglVector
        } else if (mode %in% c("integer", "single", "numeric")) {
            txt <- nextLine(length)
            txt[txt == "M"] <- "NaN"
            txt[txt == "I"] <- "Inf"
            txt[txt == "J"] <- "-Inf"
            atomicVector <- rep(as(NA, mode), length)
            atomicVector[txt != "N"] <- as(txt[txt != "N"], mode)
            atomicVector
        } else if (mode == "complex") {
            txt <- nextLine(length)
            txt <- gsub("M", "NaN", txt)
            txt <- gsub("\\<I\\>", "Inf", txt)
            txt <- gsub("\\<J\\>", "-Inf", txt)
            atomicVector <- rep(as(NA, mode), length)
            atomicVector[txt != "N"] <- as(txt[txt != "N"], mode)
            atomicVector
        } else if (mode == "list") {
            vectors <- lapply(seq_len(length), function(i).data.restore4())
            vectors
        } else if (mode == "NULL") {
            NULL
        } else if (mode == "structure") {
            vectors <- lapply(seq_len(length), function(i).data.restore4())
            if (class == ".named_I" || class == "named") {
                if (length != 2) {
                    Stop("expected length of '.named_I' component is 2, but got ", length)
                } else if (length(vectors[[1]]) != length(vectors[[2]])) {
                    Stop("expected lengths of '.named_I' components to be the same, but got ", length(vectors[[1]]), " and ", length(vectors[[2]]))
                } else if (!is.character(vectors[[2]])) {
                    Stop("expected second component of '.named_I' to be character, but got ", deparse(mode(vectors[[2]])))
                }
                names(vectors[[1]]) <- vectors[[2]]
                if (identical(vectors[[2]][1], ".Data")) { # a hack - really want to know if vectors[[1] had mode "structure" or not
                    do.call(structure, vectors[[1]], quote = TRUE)
                } else {
                    vectors[[1]]
                }
            } else {
                vectors # TODO: is this ok?  It assumes that is within a .Named_I/structure 
            }
        } else if (mode == "name") {
            if (length != 1) {
                Stop("expected length of 'name' objects is 1, but got", length)
            }
            as.name(nextLine())
        } else if (mode == "call") {
            callList <- lapply(seq_len(length), function(i).data.restore4())
            as.call(callList)
        } else {
            Stop("Unimplemented mode: ", deparse(mode))
        }
    }
    while (length(objName <- nextLine()) == 1) {
        Message(objName, ": ")
        obj <- .data.restore4()
        Message("class ", deparse(class(obj)), ", size=", object.size(obj), "\n")
        assign(objName, obj, envir=env)
    }
}