Hi,
Thanks for comments.
The updated patch fixes RFC incompatibilities. It just ignores
extensions and trailer.
The patch also exports "chunked" filter into user space so now it can be
used with any streams (see test in the patch).
In case user opens http stream and server responds with
"Transfer-Encoding: chunked" header, php applies chunked filter
automatically and doesn't bypass "Transfer-Encoding" header to user.
The automatic decoding may be disabled using stream_context.
stream_context_create(array("http"=>array("auto_decode"=>0)));
Any objections against applying it into 5.3?
I think it's very easy to add automatic decompression of HTTP responses
with HTTP headers "Content-Encoding: gzip" (and others) using similar
patch (except for filters are already implemented and the patch will
need to update only http_fopen_wrapper.c).
Thanks. Dmitry.
Sara Golemon wrote:
The attached patch implements automatic decoding of chunked
transfer-encoding.
Any objections against committing the patch into PHP_5_3?
I didn't have objections when I offered this filter several years ago,
and I still don't. I do recall Andi (or perhaps it was someone else)
saying it was adding unnecessary complexity to the http wrapper and
such things should be left for cURL though, and this is why it didn't
go into 5.1
Maybe someone has ideas about patch improvements?
I'd add a context option to disable automatic application of the
filter, and register the filter with the streams layer so that it can
be applied manually. (And of course, I'd second Stas' comment wrt
chunk-header length)
The issue wrt 1.0 versus 1.1 is fairly moot. If the caller doesn't
override the version then the server won't send chunked encoding.
They are capable of doing so however, and they may already be doing it
(e.g. for a streaming client). So the concern about potential BC
breakage for apps still exists, it's just less severe (since I doubt
many apps *are* explicitly setting the version to 1.1)
-Sara
Index: ext/standard/filters.c
===================================================================
RCS file: /repository/php-src/ext/standard/filters.c,v
retrieving revision 1.44.2.6.2.4.2.3
diff -u -p -d -r1.44.2.6.2.4.2.3 filters.c
--- ext/standard/filters.c 31 Dec 2008 11:15:45 -0000 1.44.2.6.2.4.2.3
+++ ext/standard/filters.c 15 Apr 2009 11:35:07 -0000
@@ -1897,6 +1897,220 @@ php_stream_filter_factory consumed_filte
/* }}} */
+/* {{{ chunked filter implementation */
+typedef enum _php_chunked_filter_state {
+ CHUNK_SIZE_START,
+ CHUNK_SIZE,
+ CHUNK_SIZE_EXT_START,
+ CHUNK_SIZE_EXT,
+ CHUNK_SIZE_CR,
+ CHUNK_SIZE_LF,
+ CHUNK_BODY,
+ CHUNK_BODY_CR,
+ CHUNK_BODY_LF,
+ CHUNK_TRAILER,
+ CHUNK_ERROR
+} php_chunked_filter_state;
+
+typedef struct _php_chunked_filter_data {
+ php_chunked_filter_state state;
+ int chunk_size;
+ int persistent;
+} php_chunked_filter_data;
+
+static int php_dechunk(char *buf, int len, php_chunked_filter_data *data)
+{
+ char *p = buf;
+ char *end = p + len;
+ char *out = buf;
+ int out_len = 0;
+
+ while (p < end) {
+ switch (data->state) {
+ case CHUNK_SIZE_START:
+ data->chunk_size = 0;
+ case CHUNK_SIZE:
+ while (p < end) {
+ if (*p >= '0' && *p <= '9') {
+ data->chunk_size =
(data->chunk_size * 16) + (*p - '0');
+ } else if (*p >= 'A' && *p <= 'F') {
+ data->chunk_size =
(data->chunk_size * 16) + (*p - 'A' + 10);
+ } else if (*p >= 'a' && *p <= 'f') {
+ data->chunk_size =
(data->chunk_size * 16) + (*p - 'a' + 10);
+ } else if (data->state ==
CHUNK_SIZE_START) {
+ data->state = CHUNK_ERROR;
+ break;
+ } else {
+ data->state =
CHUNK_SIZE_EXT_START;
+ break;
+ }
+ data->state = CHUNK_SIZE;
+ p++;
+ }
+ if (data->state == CHUNK_ERROR) {
+ continue;
+ } else if (p == end) {
+ return out_len;
+ }
+ case CHUNK_SIZE_EXT_START:
+ if (*p == ';'|| *p == '\r' || *p == '\n') {
+ data->state = CHUNK_SIZE_EXT;
+ } else {
+ data->state = CHUNK_ERROR;
+ continue;
+ }
+ case CHUNK_SIZE_EXT:
+ /* skip extension */
+ while (p < end && *p != '\r' && *p != '\n') {
+ p++;
+ }
+ if (p == end) {
+ return out_len;
+ }
+ case CHUNK_SIZE_CR:
+ if (*p == '\r') {
+ p++;
+ if (p == end) {
+ data->state = CHUNK_SIZE_LF;
+ return out_len;
+ }
+ }
+ case CHUNK_SIZE_LF:
+ if (*p == '\n') {
+ p++;
+ if (data->chunk_size == 0) {
+ /* last chunk */
+ data->state = CHUNK_TRAILER;
+ continue;
+ } else if (p == end) {
+ data->state = CHUNK_BODY;
+ return out_len;
+ }
+ } else {
+ data->state = CHUNK_ERROR;
+ continue;
+ }
+ case CHUNK_BODY:
+ if (end - p >= data->chunk_size) {
+ if (p != out) {
+ memmove(out, p,
data->chunk_size);
+ }
+ out += data->chunk_size;
+ out_len += data->chunk_size;
+ p += data->chunk_size;
+ if (p == end) {
+ data->state = CHUNK_BODY_CR;
+ return out_len;
+ }
+ } else {
+ if (p != out) {
+ memmove(out, p, end - p);
+ }
+ data->chunk_size -= end - p;
+ out_len += end - p;
+ return out_len;
+ }
+ case CHUNK_BODY_CR:
+ if (*p == '\r') {
+ p++;
+ if (p == end) {
+ data->state = CHUNK_BODY_LF;
+ return out_len;
+ }
+ }
+ case CHUNK_BODY_LF:
+ if (*p == '\n') {
+ p++;
+ data->state = CHUNK_SIZE_START;
+ continue;
+ } else {
+ data->state = CHUNK_ERROR;
+ continue;
+ }
+ case CHUNK_TRAILER:
+ /* ignore trailer */
+ p = end;
+ continue;
+ case CHUNK_ERROR:
+ if (p != out) {
+ memmove(out, p, end - p);
+ }
+ out_len += end - p;
+ return out_len;
+ }
+ }
+ return out_len;
+}
+
+static php_stream_filter_status_t php_chunked_filter(
+ php_stream *stream,
+ php_stream_filter *thisfilter,
+ php_stream_bucket_brigade *buckets_in,
+ php_stream_bucket_brigade *buckets_out,
+ size_t *bytes_consumed,
+ int flags
+ TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+ size_t consumed = 0;
+ php_chunked_filter_data *data = (php_chunked_filter_data *)
thisfilter->abstract;
+
+ while (buckets_in->head) {
+ bucket = php_stream_bucket_make_writeable(buckets_in->head
TSRMLS_CC);
+ consumed += bucket->buflen;
+ bucket->buflen = php_dechunk(bucket->buf, bucket->buflen,
data);
+ php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+ }
+
+ if (bytes_consumed) {
+ *bytes_consumed = consumed;
+ }
+
+ return PSFS_PASS_ON;
+}
+
+static void php_chunked_dtor(php_stream_filter *thisfilter TSRMLS_DC)
+{
+ if (thisfilter && thisfilter->abstract) {
+ php_chunked_filter_data *data = (php_chunked_filter_data *)
thisfilter->abstract;
+ pefree(data, data->persistent);
+ }
+}
+
+static php_stream_filter_ops chunked_filter_ops = {
+ php_chunked_filter,
+ php_chunked_dtor,
+ "chunked"
+};
+
+static php_stream_filter *chunked_filter_create(const char *filtername, zval
*filterparams, int persistent TSRMLS_DC)
+{
+ php_stream_filter_ops *fops = NULL;
+ php_chunked_filter_data *data;
+
+ if (strcasecmp(filtername, "chunked")) {
+ return NULL;
+ }
+
+ /* Create this filter */
+ data = (php_chunked_filter_data *)pecalloc(1,
sizeof(php_chunked_filter_data), persistent);
+ if (!data) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "Failed allocating
%zd bytes", sizeof(php_chunked_filter_data));
+ return NULL;
+ }
+ data->state = CHUNK_SIZE_START;
+ data->chunk_size = 0;
+ data->persistent = persistent;
+ fops = &chunked_filter_ops;
+
+ return php_stream_filter_alloc(fops, data, persistent);
+}
+
+static php_stream_filter_factory chunked_filter_factory = {
+ chunked_filter_create
+};
+/* }}} */
+
static const struct {
php_stream_filter_ops *ops;
php_stream_filter_factory *factory;
@@ -1907,6 +2121,7 @@ static const struct {
{ &strfilter_strip_tags_ops, &strfilter_strip_tags_factory },
{ &strfilter_convert_ops, &strfilter_convert_factory },
{ &consumed_filter_ops, &consumed_filter_factory },
+ { &chunked_filter_ops, &chunked_filter_factory },
/* additional filters to go here */
{ NULL, NULL }
};
Index: ext/standard/http_fopen_wrapper.c
===================================================================
RCS file: /repository/php-src/ext/standard/http_fopen_wrapper.c,v
retrieving revision 1.99.2.12.2.9.2.12
diff -u -p -d -r1.99.2.12.2.9.2.12 http_fopen_wrapper.c
--- ext/standard/http_fopen_wrapper.c 31 Dec 2008 11:15:45 -0000
1.99.2.12.2.9.2.12
+++ ext/standard/http_fopen_wrapper.c 15 Apr 2009 11:35:07 -0000
@@ -111,6 +111,7 @@ php_stream *php_stream_url_wrap_http_ex(
char *user_headers = NULL;
int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0);
int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0);
+ php_stream_filter *transfer_encoding = NULL;
tmp_line[0] = '\0';
@@ -597,6 +598,25 @@ php_stream *php_stream_url_wrap_http_ex(
} else if (!strncasecmp(http_header_line,
"Content-Length: ", 16)) {
file_size = atoi(http_header_line + 16);
php_stream_notify_file_size(context, file_size,
http_header_line, 0);
+ } else if (!strncasecmp(http_header_line,
"Transfer-Encoding: chunked", sizeof("Transfer-Encoding: chunked"))) {
+
+ /* create filter to decode response body */
+ if (!(options & STREAM_ONLY_GET_HEADERS)) {
+ long decode = 1;
+
+ if (context &&
php_stream_context_get_option(context, "http", "auto_decode", &tmpzval) ==
SUCCESS) {
+ SEPARATE_ZVAL(tmpzval);
+ convert_to_boolean(*tmpzval);
+ decode = Z_LVAL_PP(tmpzval);
+ }
+ if (decode) {
+ transfer_encoding =
php_stream_filter_create("chunked", NULL, php_stream_is_persistent(stream)
TSRMLS_CC);
+ if (transfer_encoding) {
+ /* don't store
transfer-encodeing header */
+ continue;
+ }
+ }
+ }
}
if (http_header_line[0] == '\0') {
@@ -740,6 +760,11 @@ out:
* the stream */
stream->position = 0;
+ if (transfer_encoding) {
+ php_stream_filter_append(&stream->readfilters,
transfer_encoding);
+ }
+ } else if (transfer_encoding) {
+ php_stream_filter_free(transfer_encoding TSRMLS_CC);
}
return stream;
Index: ext/standard/tests/filters/chunked_001.phpt
===================================================================
RCS file: ext/standard/tests/filters/chunked_001.phpt
diff -N ext/standard/tests/filters/chunked_001.phpt
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ ext/standard/tests/filters/chunked_001.phpt 15 Apr 2009 11:35:07 -0000
@@ -0,0 +1,33 @@
+--TEST--
+Chunked encoding
+--SKIPIF--
+<?php
+$filters = stream_get_filters();
+if(! in_array( "chunked", $filters )) die( "chunked filter not available." );
+?>
+--FILE--
+<?php
+$streams = array(
+ "data://text/plain,0\r\n",
+ "data://text/plain,2\r\nte\r\n2\r\nst\r\n0\r\n",
+ "data://text/plain,2\nte\n2\nst\n0\n",
+ "data://text/plain,2;a=1\nte\n2;a=2;b=3\r\nst\n0\n",
+ "data://text/plain,2\nte\n2\nst\n0\na=b\r\nc=d\n\r\n",
+ "data://text/plain,1f\n0123456789abcdef0123456789abcde\n1\nf\n0\n",
+ "data://text/plain,1E\n0123456789abcdef0123456789abcd\n2\nef\n0\n",
+);
+foreach ($streams as $name) {
+ $fp = fopen($name, "r");
+ stream_filter_append($fp, "chunked", STREAM_FILTER_READ);
+ var_dump(stream_get_contents($fp));
+ fclose($fp);
+}
+?>
+--EXPECT--
+string(0) ""
+string(4) "test"
+string(4) "test"
+string(4) "test"
+string(4) "test"
+string(32) "0123456789abcdef0123456789abcdef"
+string(32) "0123456789abcdef0123456789abcdef"
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php