Robert Gentleman <[EMAIL PROTECTED]> writes: > OK, that suggests setting at the options level would solve both of your > problems and that seems like the best approach. I don't really want to > pass this around as a parameter through the maze of functions that might > actually download something if we don't have to.
I have an updated patch that adds an HTTPUserAgent option. The default is a string like: R (2.4.0 x86_64-unknown-linux-gnu x86_64 linux-gnu) If the HTTPUserAgent option is NULL, no user agent header is added to HTTP requests (this is the current behavior). This option allows R to use an arbitrary user agent header. The patch adds two non-exported functions to utils: 1) defaultUserAgent - returns a string like above 2) makeUserAgent - formats content of HTTPUserAgent option for use as part of an HTTP request header. I've tested on OSX and Linux, but not on Windows. When USE_WININET is defined, a user agent string of "R" was already being used. With this patch, the HTTPUserAgent options is used. I'm unsure if NULL is allowed. Also, in src/main/internet.c there is a comment: "Next 6 are for use by libxml, only" and then a definition for R_HTTPOpen. Not sure how/when these get used. The user agent for these calls remains unspecified with this patch. + seth Patch summary: src/include/R_ext/R-ftp-http.h | 2 +- src/include/Rmodules/Rinternet.h | 2 +- src/library/base/man/options.Rd | 5 +++++ src/library/utils/R/readhttp.R | 25 +++++++++++++++++++++++++ src/library/utils/R/zzz.R | 3 ++- src/main/internet.c | 2 +- src/modules/internet/internet.c | 37 +++++++++++++++++++++++++------------ src/modules/internet/nanohttp.c | 8 ++++++-- 8 files changed, 66 insertions(+), 18 deletions(-) Index: src/include/R_ext/R-ftp-http.h =================================================================== --- src/include/R_ext/R-ftp-http.h (revision 38715) +++ src/include/R_ext/R-ftp-http.h (working copy) @@ -36,7 +36,7 @@ int R_FTPRead(void *ctx, char *dest, int len); void R_FTPClose(void *ctx); -void * RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK); +void * RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers, int cacheOK); int RxmlNanoHTTPRead(void *ctx, void *dest, int len); void RxmlNanoHTTPClose(void *ctx); int RxmlNanoHTTPReturnCode(void *ctx); Index: src/include/Rmodules/Rinternet.h =================================================================== --- src/include/Rmodules/Rinternet.h (revision 38715) +++ src/include/Rmodules/Rinternet.h (working copy) @@ -9,7 +9,7 @@ typedef Rconnection (*R_NewUrlRoutine)(char *description, char *mode); typedef Rconnection (*R_NewSockRoutine)(char *host, int port, int server, char *mode); -typedef void * (*R_HTTPOpenRoutine)(const char *url, const int cacheOK); +typedef void * (*R_HTTPOpenRoutine)(const char *url, const char *headers, const int cacheOK); typedef int (*R_HTTPReadRoutine)(void *ctx, char *dest, int len); typedef void (*R_HTTPCloseRoutine)(void *ctx); Index: src/main/internet.c =================================================================== --- src/main/internet.c (revision 38715) +++ src/main/internet.c (working copy) @@ -129,7 +129,7 @@ { if(!initialized) internet_Init(); if(initialized > 0) - return (*ptr->HTTPOpen)(url, 0); + return (*ptr->HTTPOpen)(url, NULL, 0); else { error(_("internet routines cannot be loaded")); return NULL; Index: src/library/utils/R/zzz.R =================================================================== --- src/library/utils/R/zzz.R (revision 38715) +++ src/library/utils/R/zzz.R (working copy) @@ -9,7 +9,8 @@ internet.info = 2, pkgType = .Platform$pkgType, str = list(strict.width = "no"), - example.ask = "default") + example.ask = "default", + HTTPUserAgent = defaultUserAgent()) extra <- if(.Platform$OS.type == "windows") { list(mailer = "none", Index: src/library/utils/R/readhttp.R =================================================================== --- src/library/utils/R/readhttp.R (revision 38715) +++ src/library/utils/R/readhttp.R (working copy) @@ -6,3 +6,28 @@ stop("transfer failure") file.show(file, delete.file = delete.file, title = title, ...) } + + + +defaultUserAgent <- function() +{ + Rver <- paste(R.version$major, R.version$minor, sep=".") + Rdetails <- paste(Rver, R.version$platform, R.version$arch, + R.version$os) + paste("R (", Rdetails, ")", sep="") +} + + +makeUserAgent <- function(format = TRUE) { + agent <- getOption("HTTPUserAgent") + if (is.null(agent)) { + return(NULL) + } + if (length(agent) != 1) + stop(sQuote("HTTPUserAgent"), + " option must be a length one character vector or NULL") + if (format) + paste("User-Agent: ", agent[1], "\r\n", sep = "") + else + agent[1] +} Index: src/library/base/man/options.Rd =================================================================== --- src/library/base/man/options.Rd (revision 38715) +++ src/library/base/man/options.Rd (working copy) @@ -368,6 +368,11 @@ \item{\code{help.try.all.packages}:}{default for an argument of \code{\link{help}}.} + \item{\code{HTTPUserAgent}:}{string used as the user agent in HTTP + requests. If \code{NULL}, HTTP requests will be made without a + user agent header. The default is \code{R (<version> <platform> + <arch> <os>)}} + \item{\code{internet.info}:}{The minimum level of information to be printed on URL downloads etc. Default is 2, for failure causes. Set to 1 or 0 to get more information.} Index: src/modules/internet/internet.c =================================================================== --- src/modules/internet/internet.c (revision 38715) +++ src/modules/internet/internet.c (working copy) @@ -28,7 +28,7 @@ #include <Rconnections.h> #include <R_ext/R-ftp-http.h> -static void *in_R_HTTPOpen(const char *url, const int cacheOK); +static void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK); static int in_R_HTTPRead(void *ctx, char *dest, int len); static void in_R_HTTPClose(void *ctx); @@ -70,7 +70,7 @@ switch(type) { case HTTPsh: - ctxt = in_R_HTTPOpen(url, 0); + ctxt = in_R_HTTPOpen(url, NULL, 0); if(ctxt == NULL) { /* if we call error() we get a connection leak*/ /* so do_url has to raise the error*/ @@ -238,14 +238,14 @@ } #endif -/* download(url, destfile, quiet, mode, cacheOK) */ +/* download(url, destfile, quiet, mode, headers, cacheOK) */ #define CPBUFSIZE 65536 #define IBUFSIZE 4096 static SEXP in_do_download(SEXP call, SEXP op, SEXP args, SEXP env) { - SEXP ans, scmd, sfile, smode; - char *url, *file, *mode; + SEXP ans, scmd, sfile, smode, sheaders, agentFun; + char *url, *file, *mode, *headers; int quiet, status = 0, cacheOK; checkArity(op, args); @@ -271,6 +271,17 @@ cacheOK = asLogical(CAR(args)); if(cacheOK == NA_LOGICAL) error(_("invalid '%s' argument"), "cacheOK"); +#ifdef USE_WININET + PROTECT(agentFun = lang2(install("makeUserAgent"), ScalarLogical(0))); +#else + PROTECT(agentFun = lang1(install("makeUserAgent"))); +#endif + PROTECT(sheaders = eval(agentFun, R_FindNamespace(mkString("utils")))); + UNPROTECT(1); + if(TYPEOF(sheaders) == NILSXP) + headers = NULL; + else + headers = CHAR(STRING_ELT(sheaders, 0)); #ifdef Win32 if (!pbar.wprog) { pbar.wprog = newwindow(_("Download progress"), rect(0, 0, 540, 100), @@ -319,7 +330,7 @@ #ifdef Win32 R_FlushConsole(); #endif - ctxt = in_R_HTTPOpen(url, cacheOK); + ctxt = in_R_HTTPOpen(url, headers, cacheOK); if(ctxt == NULL) status = 1; else { if(!quiet) REprintf(_("opened URL\n"), url); @@ -466,14 +477,14 @@ PROTECT(ans = allocVector(INTSXP, 1)); INTEGER(ans)[0] = status; - UNPROTECT(1); + UNPROTECT(2); return ans; } #if defined(SUPPORT_LIBXML) && !defined(USE_WININET) -void *in_R_HTTPOpen(const char *url, int cacheOK) +void *in_R_HTTPOpen(const char *url, const char *headers, const int cacheOK) { inetconn *con; void *ctxt; @@ -484,7 +495,7 @@ if(timeout == NA_INTEGER || timeout <= 0) timeout = 60; RxmlNanoHTTPTimeout(timeout); - ctxt = RxmlNanoHTTPOpen(url, NULL, cacheOK); + ctxt = RxmlNanoHTTPOpen(url, NULL, headers, cacheOK); if(ctxt != NULL) { int rc = RxmlNanoHTTPReturnCode(ctxt); if(rc != 200) { @@ -605,7 +616,8 @@ } #endif /* USE_WININET_ASYNC */ -static void *in_R_HTTPOpen(const char *url, const int cacheOK) +static void *in_R_HTTPOpen(const char *url, const char *headers, + const int cacheOK) { WIctxt wictxt; DWORD status, d1 = 4, d2 = 0, d3 = 100; @@ -622,7 +634,7 @@ wictxt->length = -1; wictxt->type = NULL; wictxt->hand = - InternetOpen("R", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, + InternetOpen(headers, INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, #ifdef USE_WININET_ASYNC INTERNET_FLAG_ASYNC #else @@ -870,7 +882,8 @@ #endif #ifndef HAVE_INTERNET -static void *in_R_HTTPOpen(const char *url, const int cacheOK) +static void *in_R_HTTPOpen(const char *url, const char *headers, + const int cacheOK) { return NULL; } Index: src/modules/internet/nanohttp.c =================================================================== --- src/modules/internet/nanohttp.c (revision 38715) +++ src/modules/internet/nanohttp.c (working copy) @@ -1034,6 +1034,9 @@ * @contentType: if available the Content-Type information will be * returned at that location * + * @headers: headers to be used in the HTTP request. These must be name/value + * pairs separated by ':', each on their own line. + * * This function try to open a connection to the indicated resource * via HTTP GET. * @@ -1042,10 +1045,11 @@ */ void* -RxmlNanoHTTPOpen(const char *URL, char **contentType, int cacheOK) +RxmlNanoHTTPOpen(const char *URL, char **contentType, const char *headers, + int cacheOK) { if (contentType != NULL) *contentType = NULL; - return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, cacheOK); + return RxmlNanoHTTPMethod(URL, NULL, NULL, contentType, headers, cacheOK); } /** ______________________________________________ R-devel@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-devel