PATCH: Add fields argument to installed.packages and available.packages

Hi all,

The write_PACKAGES function has a 'fields' argument that allows a user
generating a PACKAGES file to specify additional fields to include.
For symmetry, it would be nice for the available.packages function to
be able to read those extra fields when specified.

Similarly, it would be useful for installed.packages to have a
'fields' argument.  This would allow a user to query the set of
installed packages for other fields in the DESCRIPTION file.

One use for this would be for repository hosters to include the
License field in their PACKAGES file.  In this way, end users could
query a repository and only download packages that they have a license
to use.

Below is a patch against svn 38996 that attempts to implement this.

+ seth


Index: src/library/utils/R/packages.R
===================================================================

--- src/library/utils/R/packages.R	(revision 38996)
+++ src/library/utils/R/packages.R	(working copy)
@@ -1,5 +1,6 @@
 available.packages <-
-    function(contriburl = contrib.url(getOption("repos")), method)
+    function(contriburl = contrib.url(getOption("repos")), method,
+             fields = NULL)
 {
     .checkRversion <- function(x) {
         if(is.na(xx <- x["Depends"])) return(TRUE)
@@ -9,10 +10,14 @@
         else TRUE
     }
 
-    flds <- c("Package", "Version", "Priority", "Bundle",
-              "Depends", "Imports", "Suggests", "Contains")
-    res <- matrix(as.character(NA), 0, length(flds) + 1)
-    colnames(res) <- c(flds, "Repository")
+    requiredFields <- c("Package", "Version", "Priority", "Bundle",
+                        "Depends", "Imports", "Suggests", "Contains")
+    if (!is.null(fields) && is.character(fields))
+      fields <- unique(c(requiredFields, fields))
+    else
+      fields <- requiredFields
+    res <- matrix(as.character(NA), 0, length(fields) + 1)
+    colnames(res) <- c(fields, "Repository")
     for(repos in contriburl) {
         localcran <- length(grep("^file:", repos)) > 0
         if(localcran) {
@@ -23,7 +28,7 @@
                 if(length(grep("[A-Za-z]:", tmpf)))
                     tmpf <- substring(tmpf, 2)
             }
-            res0 <- read.dcf(file = tmpf, fields = flds)
+            res0 <- read.dcf(file = tmpf)
             if(length(res0)) rownames(res0) <- res0[, "Package"]
         } else {
             dest <- file.path(tempdir(),
@@ -57,7 +62,7 @@
                             call. = FALSE, immediate. = TRUE, domain = NA)
                     next
                 }
-                res0 <- read.dcf(file = tmpf, fields = flds)
+                res0 <- read.dcf(file = tmpf)
                 if(length(res0)) rownames(res0) <- res0[, "Package"]
                 .saveRDS(res0, dest, compress = TRUE)
                 unlink(tmpf)
@@ -65,7 +70,14 @@
             } # end of download vs cached
         } # end of localcran vs online
         if (length(res0)) {
-            res0 <- cbind(res0, Repository = repos)
+            missingFields <- fields[!(fields %in% colnames(res0))]
+            if (length(missingFields)) {
+                toadd <- matrix(as.character(NA), nrow=nrow(res0),
+                                ncol=length(missingFields),
+                                dimnames=list(NULL, missingFields))
+                res0 <- cbind(res0, toadd)
+            }
+            res0 <- cbind(res0[, fields], Repository = repos)
             res <- rbind(res, res0)
         }
     }
@@ -307,22 +319,29 @@
 }
 
 installed.packages <-
-    function(lib.loc = NULL, priority = NULL,  noCache = FALSE)
+    function(lib.loc = NULL, priority = NULL,  noCache = FALSE,
+             fields = NULL)
 {
     if(is.null(lib.loc))
         lib.loc <- .libPaths()
-    pkgFlds <- c("Version", "Priority", "Bundle", "Contains", "Depends",
-                 "Suggests", "Imports", "Built")
     if(!is.null(priority)) {
         if(!is.character(priority))
             stop("'priority' must be character or NULL")
         if(any(b <- priority %in% "high"))
             priority <- c(priority[!b], "recommended","base")
     }
-    retval <- matrix("", 0, 2+length(pkgFlds))
+    requiredFields <- c("Version", "Priority", "Bundle", "Contains",
+                        "Depends", "Suggests", "Imports", "Built")
+    if (!is.null(fields) && is.character(fields))
+      fields <- unique(c(requiredFields, fields))
+    else
+      fields <- requiredFields
+    retval <- matrix("", 0, 2+length(fields))
     for(lib in lib.loc) {
         dest <- file.path(tempdir(),
-                          paste("libloc_", URLencode(lib, TRUE), ".rds",
+                          paste("libloc_", URLencode(lib, TRUE),
+                                paste(fields, collapse=","),
+                                ".rds",
                                 sep=""))
         if(!noCache && file.exists(dest) &&
             file.info(dest)$mtime > file.info(lib.loc)$mtime) {
@@ -332,12 +351,12 @@
             ## this excludes packages without DESCRIPTION files
             pkgs <- .packages(all.available = TRUE, lib.loc = lib)
             for(p in pkgs){
-                desc <- packageDescription(p, lib = lib, fields = pkgFlds,
+                desc <- packageDescription(p, lib = lib, fields = fields,
                                            encoding = NA)
                 ## this gives NA if the package has no Version field
                 if (is.logical(desc)) {
-                    desc <- rep(as.character(NA), length(pkgFlds))
-                    names(desc) <- pkgFlds
+                    desc <- rep(as.character(NA), length(fields))
+                    names(desc) <- fields
                 } else {
                     desc <- unlist(desc)
                     Rver <- strsplit(strsplit(desc["Built"], ";")[[1]][1],
@@ -352,7 +371,7 @@
             }
         }
     }
-    colnames(retval) <- c("Package", "LibPath", pkgFlds)
+    colnames(retval) <- c("Package", "LibPath", fields)
     if(length(retval) && !is.null(priority)) {
         keep <- !is.na(pmatch(retval[,"Priority"], priority,
                               duplicates.ok = TRUE))
Index: src/library/utils/man/installed.packages.Rd
===================================================================
--- src/library/utils/man/installed.packages.Rd	(revision 38996)
+++ src/library/utils/man/installed.packages.Rd	(working copy)
@@ -7,7 +7,7 @@
 }
 \usage{
 installed.packages(lib.loc = NULL, priority = NULL,
-                   noCache = FALSE)
+                   noCache = FALSE, fields = NULL)
 }
 \arguments{
   \item{lib.loc}{
@@ -21,6 +21,11 @@
     assigned priority use \code{priority = "NA"}.
   }
   \item{noCache}{Do not use cached information.}
+
+  \item{fields}{a character vector giving the fields to extract from
+    each package's \code{DESCRIPTION} file in addition to the default
+    ones, or \code{NULL} (default).  Unavailable fields result in
+    \code{NA} values.}
 }
 \details{
   \code{installed.packages} scans the \file{DESCRIPTION} files of each
@@ -31,18 +36,21 @@
   for versioned installs with the name under which the package is
   installed, in the style \code{mypkg_1.3-7}.
 
-  The information found is cached (by library) for the \R session,
-  and updated only if the top-level library directory has been altered,
-  for example by installing or removing a package.  If the cached
-  information becomes confused, it can be refreshed by running
-  \code{installed.packages(noCache = TRUE)}.
+  The information found is cached (by library) for the \R session and
+  specified \code{fields} argument, and updated only if the top-level
+  library directory has been altered, for example by installing or
+  removing a package.  If the cached information becomes confused, it
+  can be refreshed by running \code{installed.packages(noCache =
+  TRUE)}.
 }
 \value{
   A matrix with one row per package, row names the package names and
   column names \code{"Package"}, \code{"LibPath"}, \code{"Version"},
-  \code{"Priority"}, \code{"Bundle"}, \code{"Contains"}, \code{"Depends"},
-  \code{"Suggests"}, \code{"Imports"} and \code{"Built"}
-  (the \R version the package was built under).
+  \code{"Priority"}, \code{"Bundle"}, \code{"Contains"},
+  \code{"Depends"}, \code{"Suggests"}, \code{"Imports"} and
+  \code{"Built"} (the \R version the package was built under).
+  Additional columns can be specified using the \code{fields}
+  argument.
 }
 \seealso{
   \code{\link{update.packages}}, \code{\link{INSTALL}}, \code{\link{REMOVE}}.
@@ -50,5 +58,6 @@
 \examples{
 str(ip <- installed.packages(priority = "high"))
 ip[, c(1,3:5)]
+plic <- installed.packages(priotiry = "high", fields="License")
 }
 \keyword{utilities}
Index: src/library/utils/man/update.packages.Rd
===================================================================
--- src/library/utils/man/update.packages.Rd	(revision 38996)
+++ src/library/utils/man/update.packages.Rd	(working copy)
@@ -20,7 +20,7 @@
                 type = getOption("pkgType"))
 
 available.packages(contriburl = contrib.url(getOption("repos")),
-                   method)
+                   method, fields = NULL)
 
 old.packages(lib.loc = NULL, repos = getOption("repos"),
              contriburl = contrib.url(repos),
@@ -157,7 +157,14 @@
     This is sometimes used to perform additional operations at the end
     of the package installation in addition to removing intermediate files.
   }
+  \item{fields}{a character vector giving the fields to extract from
+    the \code{PACKAGES} file(s) in addition to the default ones, or
+    \code{NULL} (default).  Unavailable fields result in \code{NA}
+    values.
+  }
 }
+
+}
 \details{
   All of these functions work with the names of a package or bundle (and
   not the component packages of a bundle, except for
@@ -252,7 +259,8 @@
   with one row per package/bundle, row names the package names and
   column names \code{"Package"}, \code{"Version"}, \code{"Priority"},
   \code{"Bundle"}, \code{"Depends"}, \code{"Imports"}, \code{"Suggests"}
-  \code{"Contains"} and \code{"Repository"}.
+  \code{"Contains"} and \code{"Repository"}.  Additional columns can
+  be specified using the \code{fields} argument.
 
   For \code{old.packages}, \code{NULL} or a matrix with one row per
   package/bundle, row names the package names and column names