Skip to content

HTTP User-Agent header

10 messages · James P. Howard, II, Brian Ripley, Henrik Bengtsson +2 more

#
[moved from R-help to R-devel]

Prof Brian Ripley <ripley at stats.ox.ac.uk> writes:
I have a rough draft patch, see below, that adds a User-Agent header
to HTTP requests made in R via download.file.  If there is interest, I
will polish it.

Why have R identify itself?  Well, I think it is reasonable behavior
for legitimate "browsers" to identify themselves.  It will help a user
whose institution has a rather harsh web proxy policy (however, silly
it may be).  It will also be of use in tracking use of R, versions,
and OSes on CRAN mirrors.

Here is an example of what the user-agent string will be for an R
running on OSX:

    R (2.4.0 powerpc-apple-darwin8.7.0 powerpc darwin8.7.0)

And here is the patch...

+ seth

Index: src/include/R_ext/R-ftp-http.h
===================================================================
--- src/include/R_ext/R-ftp-http.h	(revision 38709)
+++ src/include/R_ext/R-ftp-http.h	(working copy)
@@ -36,7 +36,7 @@
 int   R_FTPRead(void *ctx, char *dest, int len);
 void  R_FTPClose(void *ctx);
 
-void *	RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK);
+void *	RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers, int cacheOK);
 int	RxmlNanoHTTPRead(void *ctx, void *dest, int len);
 void	RxmlNanoHTTPClose(void *ctx);
 int 	RxmlNanoHTTPReturnCode(void *ctx);
Index: src/include/Rmodules/Rinternet.h
===================================================================
--- src/include/Rmodules/Rinternet.h	(revision 38709)
+++ src/include/Rmodules/Rinternet.h	(working copy)
@@ -9,7 +9,7 @@
 typedef Rconnection (*R_NewUrlRoutine)(char *description, char *mode);
 typedef Rconnection (*R_NewSockRoutine)(char *host, int port, int server, char *mode); 
 
-typedef void * (*R_HTTPOpenRoutine)(const char *url, const int cacheOK);
+typedef void * (*R_HTTPOpenRoutine)(const char *url, const char *headers, const int cacheOK);
 typedef int    (*R_HTTPReadRoutine)(void *ctx, char *dest, int len);
 typedef void   (*R_HTTPCloseRoutine)(void *ctx);
 	      
Index: src/main/names.c
===================================================================
--- src/main/names.c	(revision 38709)
+++ src/main/names.c	(working copy)
@@ -885,7 +885,7 @@
 {"sockSelect",do_sockselect,0,	11,     3,      {PP_FUNCALL, PREC_FN,	0}},
 {"getAllConnections",do_getallconnections,0,11, 0,      {PP_FUNCALL, PREC_FN,	0}},
 {"summary.connection",do_sumconnection,0,11,    1,      {PP_FUNCALL, PREC_FN,	0}},
-{"download", 	do_download,	0,      11,     5,      {PP_FUNCALL, PREC_FN,	0}},
+{"download", 	do_download,	0,      11,     6,      {PP_FUNCALL, PREC_FN,	0}},
 {"nsl", 	do_nsl,		0,      11,     1,      {PP_FUNCALL, PREC_FN,	0}},
 {"gzcon", 	do_gzcon,	0,      11,     3,      {PP_FUNCALL, PREC_FN,	0}},
 
Index: src/main/memory.c
===================================================================
--- src/main/memory.c	(revision 38709)
+++ src/main/memory.c	(working copy)
@@ -2478,8 +2478,11 @@
 SEXP (STRING_ELT)(SEXP x, int i) {
 #ifdef USE_TYPE_CHECKING
     if(TYPEOF(x) != STRSXP)
+      x = 1/(1-1);
+    /*
 	error("%s() can only be applied to a '%s', not a '%s'", 
 	      "STRING_ELT", "character vector", type2char(TYPEOF(x)));
+    */
 #endif
     return STRING_ELT(x, i);
 }
Index: src/main/internet.c
===================================================================
--- src/main/internet.c	(revision 38709)
+++ src/main/internet.c	(working copy)
@@ -129,7 +129,7 @@
 {
     if(!initialized) internet_Init();
     if(initialized > 0)
-	return (*ptr->HTTPOpen)(url, 0);
+	return (*ptr->HTTPOpen)(url, NULL, 0);
     else {
 	error(_("internet routines cannot be loaded"));
 	return NULL;
Index: src/library/utils/R/unix/download.file.R
===================================================================
--- src/library/utils/R/unix/download.file.R	(revision 38709)
+++ src/library/utils/R/unix/download.file.R	(working copy)
@@ -1,5 +1,5 @@
-download.file <- function(url, destfile, method,
-                          quiet = FALSE, mode = "w", cacheOK = TRUE)
+download.file <- function(url, destfile, method, quiet = FALSE, mode = "w",
+                          cacheOK = TRUE, headers = NULL)
 {
     method <- if (missing(method))
         ifelse(!is.null(getOption("download.file.method")),
@@ -7,6 +7,8 @@
                "auto")
     else
         match.arg(method, c("auto", "internal", "wget", "lynx"))
+    if (is.null(headers))
+      headers <- httpUserAgent()
 
     if(method == "auto") {
         if(capabilities("http/ftp"))
@@ -22,7 +24,8 @@
             stop("no download method found")
     }
     if(method == "internal")
-        status <- .Internal(download(url, destfile, quiet, mode, cacheOK))
+      status <- .Internal(download(url, destfile, quiet, mode, headers,
+                                   cacheOK))
     else if(method == "wget") {
         extra <- if(quiet) " --quiet" else ""
         if(!cacheOK) extra <- paste(extra, "--cache=off")
Index: src/library/utils/R/windows/download.file.R
===================================================================
--- src/library/utils/R/windows/download.file.R	(revision 38709)
+++ src/library/utils/R/windows/download.file.R	(working copy)
@@ -1,5 +1,5 @@
-download.file <- function(url, destfile, method,
-                          quiet = FALSE, mode = "w", cacheOK = TRUE)
+download.file <- function(url, destfile, method, quiet = FALSE, mode = "w",
+                          cacheOK = TRUE, headers = NULL)
 {
     method <- if (missing(method))
         ifelse(!is.null(getOption("download.file.method")),
@@ -7,6 +7,8 @@
                "auto")
     else
         match.arg(method, c("auto", "internal", "wget", "lynx"))
+    if (is.null(headers))
+      headers <- httpUserAgent()
 
     if(method == "auto") {
         if(capabilities("http/ftp"))
@@ -22,7 +24,8 @@
             stop("no download method found")
     }
     if(method == "internal")
-        status <- .Internal(download(url, destfile, quiet, mode, cacheOK))
+        status <- .Internal(download(url, destfile, quiet, mode, headers,
+                                     cacheOK))
     else if(method == "wget") {
         extra <- if(quiet) " --quiet" else ""
         if(!cacheOK) extra <- paste(extra, "--cache=off")
Index: src/library/utils/R/readhttp.R
===================================================================
--- src/library/utils/R/readhttp.R	(revision 38709)
+++ src/library/utils/R/readhttp.R	(working copy)
@@ -6,3 +6,15 @@
         stop("transfer failure")
     file.show(file, delete.file = delete.file, title = title, ...)
 }
+
+
+httpUserAgent <- function(agent)
+{
+    if (missing(agent)) {
+        Rver <- paste(R.version$major, R.version$minor, sep=".")
+        Rdetails <- paste(Rver, R.version$platform, R.version$arch,
+                          R.version$os)
+        agent <- paste("R (", Rdetails, ")", sep="")
+    }
+    paste("User-Agent: ", agent, "\r\n", sep="")
+}
Index: src/library/utils/man/download.file.Rd
===================================================================
--- src/library/utils/man/download.file.Rd	(revision 38709)
+++ src/library/utils/man/download.file.Rd	(working copy)
@@ -29,6 +29,13 @@
 
   \item{cacheOK}{logical.  Is a server-side cached value acceptable?
     Implemented for the \code{"internal"} and \code{"wget"} methods.}
+
+  \item{headers}{character. Headers to be used in the HTTP request.
+  This should be a character vector of length one formatted correctly
+  for use in the HTTP header.  The default value of \code{NULL}
+  results in a standard user agent header to be added to the HTTP
+  request that identified R as \code{User-Agent: R (X.Y.Z platform
+  arch os)}.  }
 }
 \details{
   The function \code{download.file} can be used to download a single
Index: src/modules/internet/internet.c
===================================================================
--- src/modules/internet/internet.c	(revision 38709)
+++ src/modules/internet/internet.c	(working copy)
@@ -28,7 +28,7 @@
 #include <Rconnections.h>
 #include <R_ext/R-ftp-http.h>
 
-static void *in_R_HTTPOpen(const char *url, const int cacheOK);
+static void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK);
 static int   in_R_HTTPRead(void *ctx, char *dest, int len);
 static void  in_R_HTTPClose(void *ctx);
 
@@ -70,7 +70,7 @@
 
     switch(type) {
     case HTTPsh:
-	ctxt = in_R_HTTPOpen(url, 0);
+	ctxt = in_R_HTTPOpen(url, NULL, 0);
 	if(ctxt == NULL) {
 	  /* if we call error() we get a connection leak*/
 	  /* so do_url has to raise the error*/
@@ -238,14 +238,14 @@
 }
 #endif
 
-/* download(url, destfile, quiet, mode, cacheOK) */
+/* download(url, destfile, quiet, mode, headers, cacheOK) */
 
 #define CPBUFSIZE 65536
 #define IBUFSIZE 4096
 static SEXP in_do_download(SEXP call, SEXP op, SEXP args, SEXP env)
 {
-    SEXP ans, scmd, sfile, smode;
-    char *url, *file, *mode;
+    SEXP ans, scmd, sfile, smode, sheaders;
+    char *url, *file, *mode, *headers;
     int quiet, status = 0, cacheOK;
 
     checkArity(op, args);
@@ -268,6 +268,14 @@
     if(!isString(smode) || length(smode) != 1)
 	error(_("invalid '%s' argument"), "mode");
     mode = CHAR(STRING_ELT(smode, 0));
+    sheaders = CAR(args); args = CDR(args);
+    if(TYPEOF(sheaders) == NILSXP)
+        headers = NULL;
+    else {
+        if(!isString(sheaders) || length(sheaders) != 1)
+            error(_("invalid '%s' argument"), "headers");
+        headers = CHAR(STRING_ELT(sheaders, 0));
+    }
     cacheOK = asLogical(CAR(args));
     if(cacheOK == NA_LOGICAL)
 	error(_("invalid '%s' argument"), "cacheOK");
@@ -319,7 +327,7 @@
 #ifdef Win32
 	R_FlushConsole();
 #endif
-	ctxt = in_R_HTTPOpen(url, cacheOK);
+	ctxt = in_R_HTTPOpen(url, headers, cacheOK);
 	if(ctxt == NULL) status = 1;
 	else {
 	    if(!quiet) REprintf(_("opened URL\n"), url);
@@ -473,7 +481,7 @@
 
 #if defined(SUPPORT_LIBXML) && !defined(USE_WININET)
 
-void *in_R_HTTPOpen(const char *url, int cacheOK)
+void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK)
 {
     inetconn *con;
     void *ctxt;
@@ -484,15 +492,15 @@
     if(timeout == NA_INTEGER || timeout <= 0) timeout = 60;
 
     RxmlNanoHTTPTimeout(timeout);
-    ctxt = RxmlNanoHTTPOpen(url, NULL, cacheOK);
+    ctxt = RxmlNanoHTTPOpen(url, NULL, headers, cacheOK);
     if(ctxt != NULL) {
 	int rc = RxmlNanoHTTPReturnCode(ctxt);
 	if(rc != 200) {
 	    char *msg;
-	    RxmlNanoHTTPClose(ctxt);
 	    /* bug work-around: it will crash on OS X if passed directly */
 	    msg = _("cannot open: HTTP status was '%d %s'");
 	    warning(msg, rc, RxmlNanoHTTPStatusMsg(ctxt));
+	    RxmlNanoHTTPClose(ctxt);
 	    return NULL;
 	} else {
 	    type = RxmlNanoHTTPContentType(ctxt);
Index: src/modules/internet/nanohttp.c
===================================================================
--- src/modules/internet/nanohttp.c	(revision 38709)
+++ src/modules/internet/nanohttp.c	(working copy)
@@ -1034,6 +1034,9 @@
  * @contentType:  if available the Content-Type information will be
  *                returned at that location
  *
+ * @headers: headers to be used in the HTTP request.  These must be name/value
+ *           pairs separated by ':', each on their own line.
+ *
  * This function try to open a connection to the indicated resource
  * via HTTP GET.
  *
@@ -1042,10 +1045,11 @@
  */
 
 void*
-RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK)
+RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers,
+                 int cacheOK)
 {
     if (contentType != NULL) *contentType = NULL;
-    return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, cacheOK);
+    return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, headers, cacheOK);
 }
 
 /**
#
On 7/28/06, Seth Falcon <sfalcon at fhcrc.org> wrote:

            
It looks right, but I am running under Windows without a compiler.
#
I wonder if it would not be better to make the user agent string 
something that is configurable (at the time R is built) rather than at 
run time. This would make Seth's patch about 1% as long. Or this could 
be handled as an option. The patches are pretty extensive and allow for 
setting the agent header by setting parameters in function calls (eg 
download.files). I am not sure there is a good use case for that level 
of flexibility and the additional code is substantial.


The issue that I think arises is that there are potentially other 
systems that will be unhappy with R's identification of itself and so 
some users may also need to turn it off.

Any strong opinions?
James P. Howard, II wrote:

  
    
#
On Fri, 28 Jul 2006, Robert Gentleman wrote:

            
I also thought that there was no need for this level of complexity. 
(BTW, some of the patch is changes Seth has made for other purposes, e.g. 
that to memory.c, so please no one apply all of it.)

I'd be happy for R to just identify itself as 'R', which seems allowed:
(http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html).  But I am a bit 
concerned that sites may not just require the field but also require a 
particular format (even though W3C does not).

  
    
#
Prof Brian Ripley wrote:
For those of use that want to monitor downloads and get an idea of 
the size of the user base for different platforms (which helps to 
allocate resources) I think that we should try to include a bit more 
information.
  I could probably live with as little as R version, but would like to 
have OS there as well...

   best wishes
    Robert

  
    
#
On 7/28/06, Robert Gentleman <rgentlem at fhcrc.org> wrote:
Actually two:

1) If you wish to pull down (read extract from HTML or similar) live
data from the web, you might want to be able to "immitate" a certain
browser.  For instance, if you tell some webserver you're a simple
"mobile phone" or "lynx", you might be able get back very clean data.
Some servers might also block unknown web browsers.

2) If the webserver of a package reprocitory decided to make use of
the user-agent string to decide what version of the reprocitory it
should deliver, I would like to be able to trick the server.  Why?
Many times I found myself working on a system where I do not have the
rights to update to the latest or the developers version of R.
However, although I have not the very latest version of R you can do
work.  For instance, in Bioconductor the biocLite() & co gives you
either the stable or the developers of Bioconductor depending on your
R version, but looking into the biocLite() code and beyond, you find
that you actually can install a Bioconductor v1.9 package in R v2.3.1.
 It can be risky business, but if you know what you're doing, it can
save your day (or week).

Cheers

Henrik
#
OK, that suggests setting at the options level would solve both of your 
problems and that seems like the best approach. I don't really want to 
pass this around as a parameter through the maze of functions that might 
actually download something if we don't have to.

I think we can provide something early next week on R-devel for folks to 
test. But I suspect that as Henrik also does, the set of sites that will 
refuse us with a User-Agent header will be much larger than those that 
James has found that refuse us without it.

best wishes
   Robert
Henrik Bengtsson wrote:

  
    
#
Prof Brian Ripley <ripley at stats.ox.ac.uk> writes:
*blush* sorry about that.  I made the final diff from a src tree on
another machine and it was dirty.  memory.c should NOT have been
touched by my patch.
As long as it is going to identify itself, I think there is value in
having it provide version, platform, OS info.

Given the concern that some sites that currently work may stop working
(example?), perhaps making this a global option is a good compromise.
The option could be httpRequestHeader and the default value would be
as proposed in my patch.  A NULL value would result in the current
behavior, no extra header info in the request.
1 day later
#
Robert Gentleman <rgentlem at fhcrc.org> writes:
I have an updated patch that adds an HTTPUserAgent option.  The
default is a string like:

    R (2.4.0 x86_64-unknown-linux-gnu x86_64 linux-gnu)

If the HTTPUserAgent option is NULL, no user agent header is added to
HTTP requests (this is the current behavior).  This option allows R to
use an arbitrary user agent header.

The patch adds two non-exported functions to utils: 
   1) defaultUserAgent - returns a string like above
   2) makeUserAgent - formats content of HTTPUserAgent option for use
      as part of an HTTP request header.

I've tested on OSX and Linux, but not on Windows.  When USE_WININET is
defined, a user agent string of "R" was already being used.  With this
patch, the HTTPUserAgent options is used.  I'm unsure if NULL is
allowed.

Also, in src/main/internet.c there is a comment:
  "Next 6 are for use by libxml, only"
and then a definition for R_HTTPOpen.  Not sure how/when these get
used.  The user agent for these calls remains unspecified with this
patch.

+ seth


Patch summary:
 src/include/R_ext/R-ftp-http.h   |    2 +-
 src/include/Rmodules/Rinternet.h |    2 +-
 src/library/base/man/options.Rd  |    5 +++++
 src/library/utils/R/readhttp.R   |   25 +++++++++++++++++++++++++
 src/library/utils/R/zzz.R        |    3 ++-
 src/main/internet.c              |    2 +-
 src/modules/internet/internet.c  |   37 +++++++++++++++++++++++++------------
 src/modules/internet/nanohttp.c  |    8 ++++++--
 8 files changed, 66 insertions(+), 18 deletions(-)



Index: src/include/R_ext/R-ftp-http.h
===================================================================
--- src/include/R_ext/R-ftp-http.h	(revision 38715)
+++ src/include/R_ext/R-ftp-http.h	(working copy)
@@ -36,7 +36,7 @@
 int   R_FTPRead(void *ctx, char *dest, int len);
 void  R_FTPClose(void *ctx);
 
-void *	RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK);
+void *	RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers, int cacheOK);
 int	RxmlNanoHTTPRead(void *ctx, void *dest, int len);
 void	RxmlNanoHTTPClose(void *ctx);
 int 	RxmlNanoHTTPReturnCode(void *ctx);
Index: src/include/Rmodules/Rinternet.h
===================================================================
--- src/include/Rmodules/Rinternet.h	(revision 38715)
+++ src/include/Rmodules/Rinternet.h	(working copy)
@@ -9,7 +9,7 @@
 typedef Rconnection (*R_NewUrlRoutine)(char *description, char *mode);
 typedef Rconnection (*R_NewSockRoutine)(char *host, int port, int server, char *mode); 
 
-typedef void * (*R_HTTPOpenRoutine)(const char *url, const int cacheOK);
+typedef void * (*R_HTTPOpenRoutine)(const char *url, const char *headers, const int cacheOK);
 typedef int    (*R_HTTPReadRoutine)(void *ctx, char *dest, int len);
 typedef void   (*R_HTTPCloseRoutine)(void *ctx);
 	      
Index: src/main/internet.c
===================================================================
--- src/main/internet.c	(revision 38715)
+++ src/main/internet.c	(working copy)
@@ -129,7 +129,7 @@
 {
     if(!initialized) internet_Init();
     if(initialized > 0)
-	return (*ptr->HTTPOpen)(url, 0);
+	return (*ptr->HTTPOpen)(url, NULL, 0);
     else {
 	error(_("internet routines cannot be loaded"));
 	return NULL;
Index: src/library/utils/R/zzz.R
===================================================================
--- src/library/utils/R/zzz.R	(revision 38715)
+++ src/library/utils/R/zzz.R	(working copy)
@@ -9,7 +9,8 @@
              internet.info = 2,
              pkgType = .Platform$pkgType,
              str = list(strict.width = "no"),
-             example.ask = "default")
+             example.ask = "default",
+             HTTPUserAgent = defaultUserAgent())
     extra <-
         if(.Platform$OS.type == "windows") {
             list(mailer = "none",
Index: src/library/utils/R/readhttp.R
===================================================================
--- src/library/utils/R/readhttp.R	(revision 38715)
+++ src/library/utils/R/readhttp.R	(working copy)
@@ -6,3 +6,28 @@
         stop("transfer failure")
     file.show(file, delete.file = delete.file, title = title, ...)
 }
+
+
+
+defaultUserAgent <- function()
+{
+    Rver <- paste(R.version$major, R.version$minor, sep=".")
+    Rdetails <- paste(Rver, R.version$platform, R.version$arch,
+                      R.version$os)
+    paste("R (", Rdetails, ")", sep="")
+}
+
+
+makeUserAgent <- function(format = TRUE) {
+    agent <- getOption("HTTPUserAgent")
+    if (is.null(agent)) {
+        return(NULL)
+    }
+    if (length(agent) != 1)
+      stop(sQuote("HTTPUserAgent"),
+           " option must be a length one character vector or NULL")
+    if (format)
+      paste("User-Agent: ", agent[1], "\r\n", sep = "")
+    else
+      agent[1]
+}
Index: src/library/base/man/options.Rd
===================================================================
--- src/library/base/man/options.Rd	(revision 38715)
+++ src/library/base/man/options.Rd	(working copy)
@@ -368,6 +368,11 @@
     \item{\code{help.try.all.packages}:}{default for an argument of
       \code{\link{help}}.}
 
+    \item{\code{HTTPUserAgent}:}{string used as the user agent in HTTP
+      requests.  If \code{NULL}, HTTP requests will be made without a
+      user agent header.  The default is \code{R (<version> <platform>
+      <arch> <os>)}}
+
     \item{\code{internet.info}:}{The minimum level of information to be
       printed on URL downloads etc.  Default is 2, for failure causes.
       Set to 1 or 0 to get more information.}
Index: src/modules/internet/internet.c
===================================================================
--- src/modules/internet/internet.c	(revision 38715)
+++ src/modules/internet/internet.c	(working copy)
@@ -28,7 +28,7 @@
 #include <Rconnections.h>
 #include <R_ext/R-ftp-http.h>
 
-static void *in_R_HTTPOpen(const char *url, const int cacheOK);
+static void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK);
 static int   in_R_HTTPRead(void *ctx, char *dest, int len);
 static void  in_R_HTTPClose(void *ctx);
 
@@ -70,7 +70,7 @@
 
     switch(type) {
     case HTTPsh:
-	ctxt = in_R_HTTPOpen(url, 0);
+	ctxt = in_R_HTTPOpen(url, NULL, 0);
 	if(ctxt == NULL) {
 	  /* if we call error() we get a connection leak*/
 	  /* so do_url has to raise the error*/
@@ -238,14 +238,14 @@
 }
 #endif
 
-/* download(url, destfile, quiet, mode, cacheOK) */
+/* download(url, destfile, quiet, mode, headers, cacheOK) */
 
 #define CPBUFSIZE 65536
 #define IBUFSIZE 4096
 static SEXP in_do_download(SEXP call, SEXP op, SEXP args, SEXP env)
 {
-    SEXP ans, scmd, sfile, smode;
-    char *url, *file, *mode;
+    SEXP ans, scmd, sfile, smode, sheaders, agentFun;
+    char *url, *file, *mode, *headers;
     int quiet, status = 0, cacheOK;
 
     checkArity(op, args);
@@ -271,6 +271,17 @@
     cacheOK = asLogical(CAR(args));
     if(cacheOK == NA_LOGICAL)
 	error(_("invalid '%s' argument"), "cacheOK");
+#ifdef USE_WININET
+    PROTECT(agentFun = lang2(install("makeUserAgent"), ScalarLogical(0)));
+#else
+    PROTECT(agentFun = lang1(install("makeUserAgent")));
+#endif
+    PROTECT(sheaders = eval(agentFun, R_FindNamespace(mkString("utils"))));
+    UNPROTECT(1);
+    if(TYPEOF(sheaders) == NILSXP)
+        headers = NULL;
+    else 
+        headers = CHAR(STRING_ELT(sheaders, 0));
 #ifdef Win32
     if (!pbar.wprog) {
 	pbar.wprog = newwindow(_("Download progress"), rect(0, 0, 540, 100),
@@ -319,7 +330,7 @@
 #ifdef Win32
 	R_FlushConsole();
 #endif
-	ctxt = in_R_HTTPOpen(url, cacheOK);
+	ctxt = in_R_HTTPOpen(url, headers, cacheOK);
 	if(ctxt == NULL) status = 1;
 	else {
 	    if(!quiet) REprintf(_("opened URL\n"), url);
@@ -466,14 +477,14 @@
 
     PROTECT(ans = allocVector(INTSXP, 1));
     INTEGER(ans)[0] = status;
-    UNPROTECT(1);
+    UNPROTECT(2);
     return ans;
 }
 
 
 #if defined(SUPPORT_LIBXML) && !defined(USE_WININET)
 
-void *in_R_HTTPOpen(const char *url, int cacheOK)
+void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK)
 {
     inetconn *con;
     void *ctxt;
@@ -484,7 +495,7 @@
     if(timeout == NA_INTEGER || timeout <= 0) timeout = 60;
 
     RxmlNanoHTTPTimeout(timeout);
-    ctxt = RxmlNanoHTTPOpen(url, NULL, cacheOK);
+    ctxt = RxmlNanoHTTPOpen(url, NULL, headers, cacheOK);
     if(ctxt != NULL) {
 	int rc = RxmlNanoHTTPReturnCode(ctxt);
 	if(rc != 200) {
@@ -605,7 +616,8 @@
 }
 #endif /* USE_WININET_ASYNC */
 
-static void *in_R_HTTPOpen(const char *url, const int cacheOK)
+static void *in_R_HTTPOpen(const char *url, const char *headers, 
+                           const int cacheOK)
 {
     WIctxt  wictxt;
     DWORD status, d1 = 4, d2 = 0, d3 = 100;
@@ -622,7 +634,7 @@
     wictxt->length = -1;
     wictxt->type = NULL;
     wictxt->hand =
-	InternetOpen("R", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL,
+	InternetOpen(headers, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL,
 #ifdef USE_WININET_ASYNC
 		     INTERNET_FLAG_ASYNC
 #else
@@ -870,7 +882,8 @@
 #endif
 
 #ifndef HAVE_INTERNET
-static void *in_R_HTTPOpen(const char *url, const int cacheOK)
+static void *in_R_HTTPOpen(const char *url, const char *headers, 
+                           const int cacheOK)
 {
     return NULL;
 }
Index: src/modules/internet/nanohttp.c
===================================================================
--- src/modules/internet/nanohttp.c	(revision 38715)
+++ src/modules/internet/nanohttp.c	(working copy)
@@ -1034,6 +1034,9 @@
  * @contentType:  if available the Content-Type information will be
  *                returned at that location
  *
+ * @headers: headers to be used in the HTTP request.  These must be name/value
+ *           pairs separated by ':', each on their own line.
+ *
  * This function try to open a connection to the indicated resource
  * via HTTP GET.
  *
@@ -1042,10 +1045,11 @@
  */
 
 void*
-RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK)
+RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers,
+                 int cacheOK)
 {
     if (contentType != NULL) *contentType = NULL;
-    return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, cacheOK);
+    return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, headers, cacheOK);
 }
 
 /**
1 day later
#
should appear at an R-devel near you...
thanks Seth
Seth Falcon wrote: