This is an automated email from the ASF dual-hosted git repository. xiaoxiang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nuttx-apps.git
commit 44e80ac972cdff22456993b220d7832215751f31 Author: YAMAMOTO Takashi <yamam...@midokura.com> AuthorDate: Mon Mar 7 12:48:59 2022 +0900 webclient: Implement chunked transfer (receiving side) This is a requirement for HTTP 1.1. Tested against: * http://httpbin.org/stream/10 * Docker API --- include/netutils/webclient.h | 3 + netutils/webclient/webclient.c | 451 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 406 insertions(+), 48 deletions(-) diff --git a/include/netutils/webclient.h b/include/netutils/webclient.h index 1d69f71..e352d44 100644 --- a/include/netutils/webclient.h +++ b/include/netutils/webclient.h @@ -133,6 +133,9 @@ * buflen - A pointer to the length of the buffer. If the callee wishes * to change the size of the buffer, it may write to buflen. * arg - User argument passed to callback. + * + * Note: changing buffer address and/or size is only allowed for HTTP 1.0. + * It's not allowed for HTTP 1.1. */ typedef void (*wget_callback_t)(FAR char **buffer, int offset, diff --git a/netutils/webclient/webclient.c b/netutils/webclient/webclient.c index fe5ffc1..3191add 100644 --- a/netutils/webclient/webclient.c +++ b/netutils/webclient/webclient.c @@ -154,6 +154,11 @@ enum webclient_state_e WEBCLIENT_STATE_STATUSLINE, WEBCLIENT_STATE_HEADERS, WEBCLIENT_STATE_DATA, + WEBCLIENT_STATE_CHUNKED_HEADER, + WEBCLIENT_STATE_CHUNKED_DATA, + WEBCLIENT_STATE_CHUNKED_ENDDATA, + WEBCLIENT_STATE_CHUNKED_TRAILER, + WEBCLIENT_STATE_WAIT_CLOSE, WEBCLIENT_STATE_CLOSE, WEBCLIENT_STATE_DONE, }; @@ -174,7 +179,8 @@ struct conn_s /* flags for wget_s::internal_flags */ -#define WGET_FLAG_GOT_CONTENT_LENGTH 1 +#define WGET_FLAG_GOT_CONTENT_LENGTH 1U +#define WGET_FLAG_CHUNKED 2U struct wget_s { @@ -202,6 +208,9 @@ struct wget_s uintmax_t expected_resp_body_len; uintmax_t received_body_len; + uintmax_t chunk_len; + uintmax_t chunk_received; + #ifdef CONFIG_WEBCLIENT_GETMIMETYPE char mimetype[CONFIG_WEBCLIENT_MAXMIMESIZE]; #endif @@ -226,13 +235,14 @@ struct wget_s * Private Data ****************************************************************************/ -static const char g_http10[] = "HTTP/1.0"; -static const char g_http11[] = "HTTP/1.1"; +static const char g_http10[] = "HTTP/1.0"; +static const char g_http11[] = "HTTP/1.1"; #ifdef CONFIG_WEBCLIENT_GETMIMETYPE -static const char g_httpcontenttype[] = "content-type: "; +static const char g_httpcontenttype[] = "content-type: "; #endif -static const char g_httphost[] = "host: "; -static const char g_httplocation[] = "location: "; +static const char g_httphost[] = "host: "; +static const char g_httplocation[] = "location: "; +static const char g_httptransferencoding[] = "transfer-encoding: "; static const char g_httpuseragentfields[] = "Connection: close\r\n" @@ -413,13 +423,13 @@ static char *wget_urlencode_strcpy(char *dest, const char *src) * Name: wget_parseint ****************************************************************************/ -static int wget_parseint(const char *cp, uintmax_t *resultp) +static int wget_parseint(const char *cp, uintmax_t *resultp, int base) { char *ep; uintmax_t val; errno = 0; - val = strtoumax(cp, &ep, 10); + val = strtoumax(cp, &ep, base); if (cp == ep) { return -EINVAL; /* not a number */ @@ -459,7 +469,7 @@ static inline int wget_parsestatus(struct webclient_context *ctx, { bool got_nl; - ws->line[ndx] = ws->buffer[offset]; + ws->line[ndx] = ws->buffer[offset++]; got_nl = ws->line[ndx] == ISO_NL; if (got_nl || ndx == CONFIG_WEBCLIENT_MAXHTTPLINE - 1) { @@ -540,12 +550,12 @@ static inline int wget_parsestatus(struct webclient_context *ctx, */ ws->state = WEBCLIENT_STATE_HEADERS; + ws->internal_flags &= ~WGET_FLAG_CHUNKED; ndx = 0; break; } else { - offset++; ndx++; } } @@ -614,7 +624,7 @@ static inline int wget_parseheaders(struct webclient_context *ctx, { bool got_nl; - ws->line[ndx] = ws->buffer[offset]; + ws->line[ndx] = ws->buffer[offset++]; got_nl = ws->line[ndx] == ISO_NL; if (got_nl || ndx == CONFIG_WEBCLIENT_MAXHTTPLINE - 1) { @@ -649,7 +659,16 @@ static inline int wget_parseheaders(struct webclient_context *ctx, * actual data. */ - ws->state = WEBCLIENT_STATE_DATA; + if ((ws->internal_flags & WGET_FLAG_CHUNKED) != 0) + { + ws->state = WEBCLIENT_STATE_CHUNKED_HEADER; + ndx = 0; + } + else + { + ws->state = WEBCLIENT_STATE_DATA; + } + goto exit; } @@ -730,7 +749,7 @@ static inline int wget_parseheaders(struct webclient_context *ctx, if (got_nl) { ret = wget_parseint(ws->line + strlen(g_httpcontsize), - &ws->expected_resp_body_len); + &ws->expected_resp_body_len, 10); if (ret != 0) { goto exit; @@ -742,6 +761,24 @@ static inline int wget_parseheaders(struct webclient_context *ctx, ws->expected_resp_body_len); } } + else if (strncasecmp(ws->line, g_httptransferencoding, + strlen(g_httptransferencoding)) == 0) + { + /* Parse the new host and filename from the URL. + */ + + FAR const char *encodings = + ws->line + strlen(g_httptransferencoding); + + if (strcasecmp(encodings, "chunked")) + { + nerr("unknown encodings: '%s'\n", encodings); + return -EPROTO; + } + + ninfo("transfer encodings: '%s'\n", encodings); + ws->internal_flags |= WGET_FLAG_CHUNKED; + } } if (found && !got_nl) @@ -769,12 +806,229 @@ static inline int wget_parseheaders(struct webclient_context *ctx, { ndx++; } - - offset++; } exit: - ws->offset = ++offset; + ws->offset = offset; + ws->ndx = ndx; + return ret; +} + +/**************************************************************************** + * Name: wget_parsechunkheader + ****************************************************************************/ + +static inline int wget_parsechunkheader(struct webclient_context *ctx, + struct wget_s *ws) +{ + int offset; + int ndx; + int ret = OK; + + offset = ws->offset; + ndx = ws->ndx; + + while (offset < ws->datend) + { + bool got_nl; + + ws->line[ndx] = ws->buffer[offset++]; + got_nl = ws->line[ndx] == ISO_NL; + if (got_nl || ndx == CONFIG_WEBCLIENT_MAXHTTPLINE - 1) + { + bool found_extension = false; + + /* We have an entire header line in ws->line, or + * our buffer is already full, so we start parsing it. + */ + + if (ndx > 0) /* Should always be true */ + { + FAR char *semicolon; + + ninfo("Got chunk header line%s: %.*s\n", + got_nl ? "" : " (truncated)", + ndx - 1, &ws->line[0]); + + if (ws->line[0] == ISO_CR) + { + nerr("ERROR: empty chunk header\n"); + ret = -EPROTO; + break; + } + + /* Truncate the trailing \r\n */ + + if (got_nl) + { + ndx--; + if (ws->line[ndx] != ISO_CR) + { + nerr("ERROR: unexpected EOL from the server\n"); + ret = -EPROTO; + break; + } + } + + ws->line[ndx] = '\0'; + + semicolon = strchr(ws->line, ';'); + if (semicolon != NULL) + { + found_extension = true; + ninfo("Ignoring extentions in chunk header\n"); + *semicolon = 0; + } + } + + if (!got_nl && !found_extension) + { + /* We found something we might care. + * but we couldn't process it correctly. + */ + + nerr("ERROR: truncated a header due to " + "small CONFIG_WEBCLIENT_MAXHTTPLINE\n"); + ret = -E2BIG; + break; + } + + ret = wget_parseint(ws->line, &ws->chunk_len, 16); + if (ret != 0) + { + break; + } + + if (ws->chunk_len != 0) + { + ninfo("Receiving a chunk with %ju bytes\n", ws->chunk_len); + ws->state = WEBCLIENT_STATE_CHUNKED_DATA; + ws->chunk_received = 0; + } + else + { + ws->state = WEBCLIENT_STATE_CHUNKED_TRAILER; + } + + ndx = 0; + break; + } + else + { + ndx++; + } + } + + ws->offset = offset; + ws->ndx = ndx; + return ret; +} + +/**************************************************************************** + * Name: wget_parsechunkenddata + ****************************************************************************/ + +static inline int wget_parsechunkenddata(struct webclient_context *ctx, + struct wget_s *ws) +{ + int offset; + int ndx; + int ret = OK; + + offset = ws->offset; + ndx = ws->ndx; + + while (offset < ws->datend) + { + ws->line[ndx] = ws->buffer[offset++]; + if (ws->line[ndx] == ISO_NL) + { + if (ndx == 0) + { + ret = -EPROTO; + break; + } + + if (ws->line[ndx - 1] != ISO_CR) + { + ret = -EPROTO; + break; + } + + if (ndx != 1) + { + ret = -EPROTO; + break; + } + + if (ws->chunk_len == 0) + { + ws->state = WEBCLIENT_STATE_CHUNKED_TRAILER; + } + else + { + ws->state = WEBCLIENT_STATE_CHUNKED_HEADER; + } + + ndx = 0; + break; + } + + ndx++; + } + + ws->offset = offset; + ws->ndx = ndx; + return ret; +} + +/**************************************************************************** + * Name: wget_parsechunktrailer + ****************************************************************************/ + +static inline int wget_parsechunktrailer(struct webclient_context *ctx, + struct wget_s *ws) +{ + int offset; + int ndx; + int ret = OK; + + offset = ws->offset; + ndx = ws->ndx; + + while (offset < ws->datend) + { + ws->line[ndx] = ws->buffer[offset++]; + if (ws->line[ndx] == ISO_NL) + { + if (ndx == 0) + { + ret = -EPROTO; + break; + } + + if (ws->line[ndx - 1] != ISO_CR) + { + ret = -EPROTO; + break; + } + + if (ndx != 1) + { + /* Ignore all non empty lines. */ + + ndx = 0; + continue; + } + + ws->state = WEBCLIENT_STATE_WAIT_CLOSE; + break; + } + + ndx++; + } + + ws->offset = offset; ws->ndx = ndx; return ret; } @@ -1293,46 +1547,60 @@ int webclient_perform(FAR struct webclient_context *ctx) if (ws->state == WEBCLIENT_STATE_STATUSLINE || ws->state == WEBCLIENT_STATE_HEADERS || - ws->state == WEBCLIENT_STATE_DATA) + ws->state == WEBCLIENT_STATE_DATA || + ws->state == WEBCLIENT_STATE_CHUNKED_HEADER || + ws->state == WEBCLIENT_STATE_CHUNKED_DATA) { for (; ; ) { - ws->datend = conn_recv(ctx, conn, ws->buffer, ws->buflen); - if (ws->datend < 0) + if (ws->datend - ws->offset == 0) { - ret = ws->datend; - nerr("ERROR: recv failed: %d\n", -ret); - goto errout_with_errno; - } - else if (ws->datend == 0) - { - if (ws->state != WEBCLIENT_STATE_DATA) + ssize_t ssz; + + ninfo("Reading new data\n"); + ssz = conn_recv(ctx, conn, ws->buffer, ws->buflen); + if (ssz < 0) { - nerr("Connection lost unexpectedly\n"); - ret = -ECONNABORTED; + ret = ssz; + nerr("ERROR: recv failed: %d\n", -ret); goto errout_with_errno; } - - if ((ws->internal_flags & - WGET_FLAG_GOT_CONTENT_LENGTH) != 0 && - ws->expected_resp_body_len != ws->received_body_len) + else if (ssz == 0) { - nerr("Unexpected response body length: %ju != %ju\n", - ws->expected_resp_body_len, - ws->received_body_len); - ret = -EPROTO; - goto errout_with_errno; + if (ws->state != WEBCLIENT_STATE_DATA && + ws->state != WEBCLIENT_STATE_WAIT_CLOSE) + { + nerr("Connection lost unexpectedly\n"); + ret = -ECONNABORTED; + goto errout_with_errno; + } + + if ((ws->internal_flags & + WGET_FLAG_GOT_CONTENT_LENGTH) != 0 && + ws->expected_resp_body_len != + ws->received_body_len) + { + nerr("Unexpected response body length: " + "%ju != %ju\n", + ws->expected_resp_body_len, + ws->received_body_len); + ret = -EPROTO; + goto errout_with_errno; + } + + ninfo("Connection lost\n"); + ws->state = WEBCLIENT_STATE_CLOSE; + ws->redirected = 0; + break; } - ninfo("Connection lost\n"); - ws->state = WEBCLIENT_STATE_CLOSE; - ws->redirected = 0; - break; + ninfo("Got %zd bytes data\n", ssz); + ws->offset = 0; + ws->datend = ssz; } /* Handle initial parsing of the status line */ - ws->offset = 0; if (ws->state == WEBCLIENT_STATE_STATUSLINE) { ret = wget_parsestatus(ctx, ws); @@ -1353,29 +1621,88 @@ int webclient_perform(FAR struct webclient_context *ctx) } } + /* Parse the chunk header */ + + if (ws->state == WEBCLIENT_STATE_CHUNKED_HEADER) + { + ret = wget_parsechunkheader(ctx, ws); + if (ret < 0) + { + goto errout_with_errno; + } + } + + if (ws->state == WEBCLIENT_STATE_CHUNKED_ENDDATA) + { + ret = wget_parsechunkenddata(ctx, ws); + if (ret < 0) + { + goto errout_with_errno; + } + } + + if (ws->state == WEBCLIENT_STATE_CHUNKED_TRAILER) + { + ret = wget_parsechunktrailer(ctx, ws); + if (ret < 0) + { + goto errout_with_errno; + } + } + + if (ws->state == WEBCLIENT_STATE_WAIT_CLOSE) + { + uintmax_t received = ws->datend - ws->offset; + if (received != 0) + { + nerr("Unexpected %ju bytes data received", received); + ret = -EPROTO; + goto errout_with_errno; + } + } + /* Dispose of the data payload */ - if (ws->state == WEBCLIENT_STATE_DATA) + if (ws->state == WEBCLIENT_STATE_DATA || + ws->state == WEBCLIENT_STATE_CHUNKED_DATA) { if (ws->httpstatus != HTTPSTATUS_MOVED) { + uintmax_t received = ws->datend - ws->offset; + FAR char *orig_buffer = ws->buffer; + int orig_buflen = ws->buflen; + + if (ws->state == WEBCLIENT_STATE_CHUNKED_DATA) + { + uintmax_t chunk_left = + ws->chunk_len - ws->chunk_received; + + if (received > chunk_left) + { + received = chunk_left; + } + + ws->chunk_received += received; + } + ninfo("Processing resp body %ju - %ju\n", ws->received_body_len, - ws->received_body_len + ws->datend - ws->offset); - ws->received_body_len += ws->datend - ws->offset; + ws->received_body_len + received); + ws->received_body_len += received; /* Let the client decide what to do with the * received file. */ - if (ws->offset == ws->datend) + if (received == 0) { /* We don't have data to give to the client yet. */ } else if (ctx->sink_callback) { ret = ctx->sink_callback(&ws->buffer, ws->offset, - ws->datend, &ws->buflen, + ws->offset + received, + &ws->buflen, ctx->sink_callback_arg); if (ret != 0) { @@ -1384,9 +1711,37 @@ int webclient_perform(FAR struct webclient_context *ctx) } else { - ctx->callback(&ws->buffer, ws->offset, ws->datend, + ctx->callback(&ws->buffer, ws->offset, + ws->offset + received, &ws->buflen, ctx->sink_callback_arg); } + + ws->offset += received; + + /* The buffer swapping API doesn't work for + * HTTP 1.1 chunked transfer because the buffer here + * might already contain the next chunk header. + */ + + if (ctx->protocol_version == + WEBCLIENT_PROTOCOL_VERSION_HTTP_1_1) + { + if (orig_buffer != ws->buffer || + orig_buflen != ws->buflen) + { + ret = -EINVAL; + goto errout_with_errno; + } + } + + if (ws->state == WEBCLIENT_STATE_CHUNKED_DATA) + { + if (ws->chunk_len == ws->chunk_received) + { + ws->state = WEBCLIENT_STATE_CHUNKED_ENDDATA; + ws->ndx = 0; + } + } } else {