only superficial comments: On 6/6/23 13:22, Richard W.M. Jones wrote: > Some servers do not support HEAD for requesting the headers. If the > HEAD request fails, fallback to using the GET method, abandoning the > transfer as soon as possible after the headers have been received. > > Fixes: https://github.com/kubevirt/containerized-data-importer/issues/2737 > --- > tests/Makefile.am | 23 +++++ > plugins/curl/pool.c | 49 ++++++++++- > tests/test-curl-cookie-script.c | 2 +- > tests/test-curl-head-forbidden.c | 134 ++++++++++++++++++++++++++++++ > tests/test-curl-header-script.c | 2 +- > tests/test-curl.c | 2 +- > tests/test-gzip-curl.c | 2 +- > tests/test-retry-request-mirror.c | 2 +- > tests/test-tar-gzip-curl.c | 2 +- > tests/test-tar-xz-curl.c | 2 +- > tests/test-xz-curl.c | 2 +- > tests/web-server.c | 51 +++++++++++- > tests/web-server.h | 9 +- > .gitignore | 1 + > 14 files changed, 271 insertions(+), 12 deletions(-) > > diff --git a/tests/Makefile.am b/tests/Makefile.am > index 165d61aff..6694e409e 100644 > --- a/tests/Makefile.am > +++ b/tests/Makefile.am > @@ -655,6 +655,7 @@ EXTRA_DIST += \ > $(NULL) > LIBGUESTFS_TESTS += test-curl > LIBNBD_TESTS += \ > + test-curl-head-forbidden \ > test-curl-header-script \ > test-curl-cookie-script \ > $(NULL) > @@ -683,6 +684,28 @@ test_curl_LDADD = \ > $(LIBGUESTFS_LIBS) \ > $(NULL) > > +test_curl_head_forbidden_SOURCES = \ > + test-curl-head-forbidden.c \ > + web-server.c \ > + web-server.h \ > + $(NULL) > +test_curl_head_forbidden_CPPFLAGS = \ > + -I$(top_srcdir)/common/include \ > + -I$(top_srcdir)/common/utils \ > + $(NULL) > +test_curl_head_forbidden_CFLAGS = \ > + $(WARNINGS_CFLAGS) \ > + $(LIBNBD_CFLAGS) \ > + $(PTHREAD_CFLAGS) \ > + $(NULL) > +test_curl_head_forbidden_LDFLAGS = \ > + $(top_builddir)/common/utils/libutils.la \ > + $(PTHREAD_LIBS) \ > + $(NULL) > +test_curl_head_forbidden_LDADD = \ > + $(LIBNBD_LIBS) \ > + $(NULL) > + > test_curl_header_script_SOURCES = \ > test-curl-header-script.c \ > web-server.c \ > diff --git a/plugins/curl/pool.c b/plugins/curl/pool.c > index 2af5461a2..c73d52293 100644 > --- a/plugins/curl/pool.c > +++ b/plugins/curl/pool.c > @@ -76,7 +76,9 @@ static int debug_cb (CURL *handle, curl_infotype type, > static size_t write_cb (char *ptr, size_t size, size_t nmemb, void *opaque); > static size_t read_cb (void *ptr, size_t size, size_t nmemb, void *opaque); > static int get_content_length_accept_range (struct curl_handle *ch); > +static bool try_fallback_GET_method (struct curl_handle *ch); > static size_t header_cb (void *ptr, size_t size, size_t nmemb, void *opaque); > +static size_t error_cb (char *ptr, size_t size, size_t nmemb, void *opaque); > > /* This lock protects access to the curl_handles vector below. */ > static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; > @@ -443,6 +445,7 @@ static int > get_content_length_accept_range (struct curl_handle *ch) > { > CURLcode r; > + long code; > #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T > curl_off_t o; > #else > @@ -469,7 +472,20 @@ get_content_length_accept_range (struct curl_handle *ch) > display_curl_error (ch, r, > "problem doing HEAD request to fetch size of URL > [%s]", > url); > - return -1; > + > + /* Get the HTTP status code, if available. */ > + r = curl_easy_getinfo (ch->c, CURLINFO_RESPONSE_CODE, &code); > + if (r == CURLE_OK) > + nbdkit_debug ("HTTP status code: %ld", code); > + else > + code = -1; > + > + /* S3 servers can return 403 Forbidden for HEAD but still respond > + * to GET, so we give it a second chance in that case. > + * > https://github.com/kubevirt/containerized-data-importer/issues/2737#issuecomment-1577786849 > + */ > + if (code != 403 || !try_fallback_GET_method (ch)) > + return -1; > } > > /* Get the content length. */ > @@ -520,6 +536,27 @@ get_content_length_accept_range (struct curl_handle *ch) > return 0; > } > > +static bool > +try_fallback_GET_method (struct curl_handle *ch) > +{ > + CURLcode r; > + > + nbdkit_debug ("attempting to fetch headers using GET method"); > + > + curl_easy_setopt (ch->c, CURLOPT_HTTPGET, 1L); > + curl_easy_setopt (ch->c, CURLOPT_HEADERFUNCTION, header_cb); > + curl_easy_setopt (ch->c, CURLOPT_HEADERDATA, ch); > + curl_easy_setopt (ch->c, CURLOPT_WRITEFUNCTION, error_cb); > + curl_easy_setopt (ch->c, CURLOPT_WRITEDATA, ch); > + r = curl_easy_perform (ch->c); > + > + /* We expect CURLE_WRITE_ERROR here, but CURLE_OK is possible too > + * (eg if the remote has zero length). Other errors might happen > + * but we ignore them since it is a fallback path. > + */ > + return r == CURLE_OK || r == CURLE_WRITE_ERROR; > +} > + > static size_t > header_cb (void *ptr, size_t size, size_t nmemb, void *opaque) > { > @@ -552,3 +589,13 @@ header_cb (void *ptr, size_t size, size_t nmemb, void > *opaque) > > return realsize; > } > + > +static size_t > +error_cb (char *ptr, size_t size, size_t nmemb, void *opaque) > +{ > +#ifdef CURL_WRITEFUNC_ERROR > + return CURL_WRITEFUNC_ERROR; > +#else > + return 0; /* in older curl, any size < requested will also be an error */ > +#endif > +} > diff --git a/tests/test-curl-cookie-script.c b/tests/test-curl-cookie-script.c > index 005e73ed0..95b602849 100644 > --- a/tests/test-curl-cookie-script.c > +++ b/tests/test-curl-cookie-script.c > @@ -82,7 +82,7 @@ main (int argc, char *argv[]) > exit (77); > #endif > > - sockpath = web_server ("disk", check_request); > + sockpath = web_server ("disk", check_request, false); > if (sockpath == NULL) { > fprintf (stderr, "%s: could not start web server thread\n", > program_name); > exit (EXIT_FAILURE); > diff --git a/tests/test-curl-head-forbidden.c > b/tests/test-curl-head-forbidden.c > new file mode 100644 > index 000000000..16b1f0533 > --- /dev/null > +++ b/tests/test-curl-head-forbidden.c > @@ -0,0 +1,134 @@ > +/* nbdkit > + * Copyright Red Hat > + * > + * Redistribution and use in source and binary forms, with or without > + * modification, are permitted provided that the following conditions are > + * met: > + * > + * * Redistributions of source code must retain the above copyright > + * notice, this list of conditions and the following disclaimer. > + * > + * * Redistributions in binary form must reproduce the above copyright > + * notice, this list of conditions and the following disclaimer in the > + * documentation and/or other materials provided with the distribution. > + * > + * * Neither the name of Red Hat nor the names of its contributors may be > + * used to endorse or promote products derived from this software without > + * specific prior written permission. > + * > + * THIS SOFTWARE IS PROVIDED BY RED HAT AND CONTRIBUTORS ''AS IS'' AND > + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, > + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A > + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RED HAT OR > + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, > + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT > + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF > + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND > + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, > + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT > + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF > + * SUCH DAMAGE. > + */ > + > +/* Test curl plugin against a simulated webserver which responds with > + * 403 Forbidden to HEAD requests, but allows the GET method. > + */ > + > +#include <config.h> > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <stdbool.h> > +#include <stdint.h> > +#include <inttypes.h> > +#include <string.h> > +#include <unistd.h> > +#include <errno.h> > +#include <sys/stat.h> > + > +#include <libnbd.h> > + > +#include "cleanup.h" > +#include "web-server.h" > + > +#include "test.h" > + > +static char buf[1024]; > + > +int > +main (int argc, char *argv[]) > +{ > + struct stat statbuf; > + const char *sockpath; > + struct nbd_handle *nbd; > + CLEANUP_FREE char *usp_param = NULL; > + int64_t size; > + > +#ifndef HAVE_CURLOPT_UNIX_SOCKET_PATH > + fprintf (stderr, "%s: curl does not support CURLOPT_UNIX_SOCKET_PATH\n", > + program_name); > + exit (77); > +#endif > + > + if (stat ("disk", &statbuf) == -1) { > + if (errno == ENOENT) { > + fprintf (stderr, "%s: test skipped because \"disk\" is missing\n", > + program_name); > + exit (77); > + } > + perror ("disk"); > + exit (EXIT_FAILURE); > + } > + > + sockpath = web_server ("disk", NULL, true); > + if (sockpath == NULL) { > + fprintf (stderr, "%s: could not start web server thread\n", > program_name); > + exit (EXIT_FAILURE); > + } > + > + nbd = nbd_create (); > + if (nbd == NULL) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + exit (EXIT_FAILURE); > + } > + > + /* Start nbdkit. */ > + if (asprintf (&usp_param, "unix-socket-path=%s", sockpath) == -1) { > + perror ("asprintf"); > + exit (EXIT_FAILURE); > + } > + if (nbd_connect_command (nbd, > + (char *[]) { > + "nbdkit", "-s", "--exit-with-parent", "-v", > + "curl", > + "-D", "curl.verbose=1", > + "http://localhost/disk", > + usp_param, /* unix-socket-path=... */ > + NULL }) == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + exit (EXIT_FAILURE); > + } > + > + /* Check the size is expected. */ > + size = nbd_get_size (nbd); > + if (size == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + exit (EXIT_FAILURE); > + } > + if (size != statbuf.st_size) { > + fprintf (stderr, "%s: incorrect export size, " > + "expected: %" PRIu64 " actual: %" PRIi64 "\n", > + program_name, > + (uint64_t) statbuf.st_size, size); > + exit (EXIT_FAILURE); > + } > + > + /* Make a request. */ > + if (nbd_pread (nbd, buf, sizeof buf, 0, 0) == -1) { > + fprintf (stderr, "%s\n", nbd_get_error ()); > + exit (EXIT_FAILURE); > + } > + > + nbd_close (nbd); > + exit (EXIT_SUCCESS); > +}
I take it this is a small customization (more precisely, lightly modified copy) of another (existent) test. The --find-copies-harder git formatting option is good for presenting new code in such terms, i.e. as a copy of another file + diffs on top. I figure the only relevant difference would be the last, "true", argument, for web_server(). > diff --git a/tests/test-curl-header-script.c b/tests/test-curl-header-script.c > index 335c96494..e1eb4dacb 100644 > --- a/tests/test-curl-header-script.c > +++ b/tests/test-curl-header-script.c > @@ -104,7 +104,7 @@ main (int argc, char *argv[]) > exit (77); > #endif > > - sockpath = web_server ("disk", check_request); > + sockpath = web_server ("disk", check_request, false); > if (sockpath == NULL) { > fprintf (stderr, "%s: could not start web server thread\n", > program_name); > exit (EXIT_FAILURE); > diff --git a/tests/test-curl.c b/tests/test-curl.c > index ac267a175..e6134d78d 100644 > --- a/tests/test-curl.c > +++ b/tests/test-curl.c > @@ -82,7 +82,7 @@ main (int argc, char *argv[]) > exit (77); > #endif > > - sockpath = web_server ("disk", check_request); > + sockpath = web_server ("disk", check_request, false); > if (sockpath == NULL) { > fprintf (stderr, "%s: could not start web server thread\n", > program_name); > exit (EXIT_FAILURE); > diff --git a/tests/test-gzip-curl.c b/tests/test-gzip-curl.c > index a4133aee5..a3fcb88bf 100644 > --- a/tests/test-gzip-curl.c > +++ b/tests/test-gzip-curl.c > @@ -68,7 +68,7 @@ main (int argc, char *argv[]) > exit (77); > #endif > > - sockpath = web_server (disk, NULL); > + sockpath = web_server (disk, NULL, false); > if (sockpath == NULL) { > fprintf (stderr, "test-gzip-curl: could not start web server thread\n"); > exit (EXIT_FAILURE); > diff --git a/tests/test-retry-request-mirror.c > b/tests/test-retry-request-mirror.c > index 276460b1f..cf42c5964 100644 > --- a/tests/test-retry-request-mirror.c > +++ b/tests/test-retry-request-mirror.c > @@ -72,7 +72,7 @@ main (int argc, char *argv[]) > exit (77); > } > > - sockpath = web_server ("disk" /* not used but must be set */, NULL); > + sockpath = web_server ("disk" /* not used but must be set */, NULL, false); > if (sockpath == NULL) { > fprintf (stderr, "%s: could not start web server thread\n", argv[0]); > exit (EXIT_FAILURE); > diff --git a/tests/test-tar-gzip-curl.c b/tests/test-tar-gzip-curl.c > index ec0035b80..bbfa02f93 100644 > --- a/tests/test-tar-gzip-curl.c > +++ b/tests/test-tar-gzip-curl.c > @@ -68,7 +68,7 @@ main (int argc, char *argv[]) > exit (77); > #endif > > - sockpath = web_server (disk, NULL); > + sockpath = web_server (disk, NULL, false); > if (sockpath == NULL) { > fprintf (stderr, "test-tar-gzip-curl: could not start web server > thread\n"); > exit (EXIT_FAILURE); > diff --git a/tests/test-tar-xz-curl.c b/tests/test-tar-xz-curl.c > index ff92dda28..01d548bb5 100644 > --- a/tests/test-tar-xz-curl.c > +++ b/tests/test-tar-xz-curl.c > @@ -68,7 +68,7 @@ main (int argc, char *argv[]) > exit (77); > #endif > > - sockpath = web_server (disk, NULL); > + sockpath = web_server (disk, NULL, false); > if (sockpath == NULL) { > fprintf (stderr, "test-tar-xz-curl: could not start web server > thread\n"); > exit (EXIT_FAILURE); > diff --git a/tests/test-xz-curl.c b/tests/test-xz-curl.c > index 52b37a0c7..af81cbac7 100644 > --- a/tests/test-xz-curl.c > +++ b/tests/test-xz-curl.c > @@ -68,7 +68,7 @@ main (int argc, char *argv[]) > exit (77); > #endif > > - sockpath = web_server (disk, NULL); > + sockpath = web_server (disk, NULL, false); > if (sockpath == NULL) { > fprintf (stderr, "test-xz-curl: could not start web server thread\n"); > exit (EXIT_FAILURE); > diff --git a/tests/web-server.c b/tests/web-server.c > index dbc8bc845..ba85e3205 100644 > --- a/tests/web-server.c > +++ b/tests/web-server.c > @@ -67,16 +67,19 @@ static int fd = -1; > static struct stat statbuf; > static char request[16384]; > static check_request_t check_request; > +static bool head_fails_with_403 = false; > > static void *start_web_server (void *arg); > static void handle_requests (int s); > static void handle_file_request (int s, enum method method); > static void handle_mirror_redirect_request (int s); > static void handle_mirror_data_request (int s, enum method method, char > byte); > +static void send_403_forbidden (int s); > static void send_404_not_found (int s); > static void send_405_method_not_allowed (int s); > static void send_500_internal_server_error (int s); > static void xwrite (int s, const char *buf, size_t len); > +static void xwrite_allow_epipe (int s, const char *buf, size_t len); > static void xpread (char *buf, size_t count, off_t offset); > > static void > @@ -99,7 +102,8 @@ ignore_sigpipe (void) > } > > const char * > -web_server (const char *filename, check_request_t _check_request) > +web_server (const char *filename, check_request_t _check_request, > + bool _head_fails_with_403) We shouldn't start identifiers with an underscore. > { > struct sockaddr_un addr; > pthread_t thread; > @@ -108,6 +112,7 @@ web_server (const char *filename, check_request_t > _check_request) > ignore_sigpipe (); > > check_request = _check_request; > + head_fails_with_403 = _head_fails_with_403; > > /* Open the file. */ > fd = open (filename, O_RDONLY|O_CLOEXEC); > @@ -250,6 +255,11 @@ handle_requests (int s) > } > memcpy (path, &request[5], n); > path[n] = '\0'; > + if (head_fails_with_403) { > + send_403_forbidden (s); > + eof = true; > + break; > + } I'm not a fan of the pre-existent pattern where we both set "eof = true" and break out of the loop. It would have to be cleaned up first, in a different patch, but I'm not sure if we care enough. > } > else if (strncmp (request, "GET ", 4) == 0) { > method = GET; > @@ -361,7 +371,14 @@ handle_file_request (int s, enum method method) > } > > xpread (data, length, offset); > - xwrite (s, data, length); > + if (!head_fails_with_403 || offset != 0) > + xwrite (s, data, length); > + else > + /* In the special case where we are testing the fallback from HEAD > + * request case, the curl plugin will issue a GET for the whole > + * data, but not read it all. Ignore EPIPE errors here. > + */ > + xwrite_allow_epipe (s, data, length); > > free (data); > } > @@ -449,6 +466,16 @@ handle_mirror_data_request (int s, enum method method, > char byte) > free (data); > } > > +static void > +send_403_forbidden (int s) > +{ > + const char response[] = > + "HTTP/1.1 403 Forbidden\r\n" > + "Connection: close\r\n" > + "\r\n"; > + xwrite (s, response, strlen (response)); > +} > + > static void > send_404_not_found (int s) > { > @@ -498,6 +525,26 @@ xwrite (int s, const char *buf, size_t len) > } > } > > +static void > +xwrite_allow_epipe (int s, const char *buf, size_t len) > +{ > + ssize_t r; > + > + while (len > 0) { > + r = write (s, buf, len); > + if (r == -1) { > + if (errno != EPIPE) { > + perror ("write"); > + exit (EXIT_FAILURE); > + } > + else > + return; > + } exit() doesn't return, so I suggest removing the "else" (i.e., unnesting the "return"). > + buf += r; > + len -= r; > + } > +} > + > static void > xpread (char *buf, size_t count, off_t offset) > { > diff --git a/tests/web-server.h b/tests/web-server.h > index 36f0ea1e1..0961f4c5d 100644 > --- a/tests/web-server.h > +++ b/tests/web-server.h > @@ -43,6 +43,8 @@ > #ifndef NBDKIT_WEB_SERVER_H > #define NBDKIT_WEB_SERVER_H > > +#include <stdbool.h> > + > /* Starts a web server in a background thread. The web server will > * serve 'filename' (only) - the URL in requests is ignored. > * > @@ -57,10 +59,15 @@ > * The optional check_request function is called when the request is > * received (note: not in the main thread) and can be used to perform > * checks for example that particular headers were sent. > + * > + * If head_fails_with_403 == true then we simulate a server that > + * responds to GET but fails HEAD with 403 errors, see: > + * > https://github.com/kubevirt/containerized-data-importer/issues/2737#issuecomment-1577786849 > */ > typedef void (*check_request_t) (const char *request); > extern const char *web_server (const char *filename, > - check_request_t check_request) > + check_request_t check_request, > + bool head_fails_with_403) > __attribute__ ((__nonnull__ (1))); > > #endif /* NBDKIT_WEB_SERVER_H */ I'd prefer the introduction of this new parameter to be a separate patch. That would isolate much the boilerplate from the relevant feature addition. > diff --git a/.gitignore b/.gitignore > index e62c23d8b..49af3998f 100644 > --- a/.gitignore > +++ b/.gitignore > @@ -122,6 +122,7 @@ plugins/*/*.3 > /tests/test-curl > /tests/test-curl-cookie-script > /tests/test-curl-header-script > +/tests/test-curl-head-forbidden > /tests/test-data > /tests/test-delay > /tests/test-exit-with-parent Reviewed-by: Laszlo Ersek <ler...@redhat.com> _______________________________________________ Libguestfs mailing list Libguestfs@redhat.com https://listman.redhat.com/mailman/listinfo/libguestfs