Hi,

The attached patch implements automatic decoding of chunked transfer-encoding.

It fixes http://bugs.php.net/bug.php?id=47021 but also affects all php stream functions (e.g. file_get_contents("http://...";);)

Some PHP applications which check for Transfer-Encoding HTTP header and perform manual decoding might be broken.

Any objections against committing the patch into PHP_5_3?

My be someone has ideas about patch improvements?

Thanks. Dmitry.
Index: ext/standard/http_fopen_wrapper.c
===================================================================
RCS file: /repository/php-src/ext/standard/http_fopen_wrapper.c,v
retrieving revision 1.99.2.12.2.9.2.12
diff -u -p -d -r1.99.2.12.2.9.2.12 http_fopen_wrapper.c
--- ext/standard/http_fopen_wrapper.c   31 Dec 2008 11:15:45 -0000      
1.99.2.12.2.9.2.12
+++ ext/standard/http_fopen_wrapper.c   14 Apr 2009 14:40:12 -0000
@@ -84,6 +84,8 @@
 #define HTTP_WRAPPER_HEADER_INIT    1
 #define HTTP_WRAPPER_REDIRECTED     2
 
+static void php_add_chunked_filter(php_stream *stream TSRMLS_DC);
+
 php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char 
*path, char *mode, int options, char **opened_path, php_stream_context 
*context, int redirect_max, int flags STREAMS_DC TSRMLS_DC)
 {
        php_stream *stream = NULL;
@@ -111,6 +113,7 @@ php_stream *php_stream_url_wrap_http_ex(
        char *user_headers = NULL;
        int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0);
        int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0);
+       int chunked = 0;
 
        tmp_line[0] = '\0';
 
@@ -597,6 +600,8 @@ php_stream *php_stream_url_wrap_http_ex(
                        } else if (!strncasecmp(http_header_line, 
"Content-Length: ", 16)) {
                                file_size = atoi(http_header_line + 16);
                                php_stream_notify_file_size(context, file_size, 
http_header_line, 0);
+                       } else if (!strncasecmp(http_header_line, 
"Transfer-Encoding: chunked", sizeof("Transfer-Encoding: chunked"))) {
+                               chunked = 1;
                        }
 
                        if (http_header_line[0] == '\0') {
@@ -740,6 +745,9 @@ out:
                 * the stream */
                stream->position = 0;
 
+               if (chunked) {
+                       php_add_chunked_filter(stream TSRMLS_CC);
+               }
        }
 
        return stream;
@@ -780,6 +788,193 @@ PHPAPI php_stream_wrapper php_stream_htt
        1 /* is_url */
 };
 
+typedef enum _php_chunked_filter_state {
+       CHUNKED_HEADER,
+       CHUNKED_HEADER_1,
+       CHUNKED_HEADER_2,
+       CHUNKED_HEADER_3,
+       CHUNKED_HEADER_R,
+       CHUNKED_HEADER_N,
+       CHUNKED_BODY,
+       CHUNKED_BODY_R,
+       CHUNKED_BODY_N,
+       CHUNKED_ERROR
+} php_chunked_filter_state;
+
+typedef struct _php_chunked_filter_data {
+       php_chunked_filter_state state;
+       int chunk_size;
+       int persistent;
+} php_chunked_filter_data;
+
+static int php_dechunk(char *buf, int len, php_chunked_filter_data *data)
+{
+       char *p = buf;
+       char *end = p + len;
+       char *out = buf;
+       char *out_len = 0;
+
+       while (p < end) {
+               switch (data->state) {
+                       case CHUNKED_HEADER:
+                               data->chunk_size = 0;
+                       case CHUNKED_HEADER_1:
+                       case CHUNKED_HEADER_2:
+                       case CHUNKED_HEADER_3:
+                               while (p < end && data->state < 
CHUNKED_HEADER_R) {
+                                       if (*p >= '0' && *p <= '9') {
+                                               data->chunk_size = 
(data->chunk_size * 16) + (*p - '0');
+                                       } else if (*p >= 'A' && *p <= 'F') {
+                                               data->chunk_size = 
(data->chunk_size * 16) + (*p - 'A' + 10);
+                                       } else if (*p >= 'a' && *p <= 'f') {
+                                               data->chunk_size = 
(data->chunk_size * 16) + (*p - 'a' + 10);
+                                       } else {
+                                               break;
+                                       }
+                                       p++;
+                                       data->state++;
+                               }
+                               if (data->state == CHUNKED_HEADER) {
+                                       /* not a hex number */
+                                       data->state = CHUNKED_ERROR;
+                                       continue;
+                               } else if (p == end) {
+                                       data->state = CHUNKED_HEADER_R;
+                                       return out_len;
+                               }
+                       case CHUNKED_HEADER_R:
+                               if (*p == '\r') {
+                                       p++;
+                                       if (p == end) {
+                                               data->state = CHUNKED_HEADER_N;
+                                               return out_len;
+                                       }
+                               }
+                       case CHUNKED_HEADER_N:
+                               if (*p == '\n') {
+                                       p++;
+                                       if (data->chunk_size == 0) {
+                                               /* EOF */
+                                               data->state = CHUNKED_ERROR;
+                                               continue;
+                                       } else if (p == end) {
+                                               data->state = CHUNKED_BODY;
+                                               return out_len;
+                                       }
+                               } else {
+                                       data->state = CHUNKED_ERROR;
+                                       continue;
+                               }
+                       case CHUNKED_BODY:
+                               if (end - p >= data->chunk_size) {
+                                       if (p != out) {
+                                               memmove(out, p, 
data->chunk_size);
+                                       }
+                                       out += data->chunk_size;
+                                       out_len += data->chunk_size;
+                                       p += data->chunk_size;
+                                       if (p == end) {
+                                               data->state = CHUNKED_BODY_R;
+                                               return out_len;
+                                       }
+                               } else {
+                                       if (p != out) {
+                                               memmove(out, p, end - p);
+                                       }
+                                       out_len += end - p;
+                                       return out_len;
+                               }
+                       case CHUNKED_BODY_R:
+                               if (*p == '\r') {
+                                       p++;
+                                       if (p == end) {
+                                               data->state = CHUNKED_BODY_N;
+                                               return out_len;
+                                       }
+                               }
+                       case CHUNKED_BODY_N:
+                               if (*p == '\n') {
+                                       p++;
+                                       data->state = CHUNKED_HEADER;
+                                       continue;
+                               } else {
+                                       data->state = CHUNKED_ERROR;
+                                       continue;
+                               }
+                       case CHUNKED_ERROR:
+                               if (p != out) {
+                                       memmove(out, p, end - p);
+                               }
+                               out_len += end - p;
+                               return out_len; 
+               }
+       }
+       return out_len;
+}
+
+static php_stream_filter_status_t php_chunked_filter(
+       php_stream *stream,
+       php_stream_filter *thisfilter,
+       php_stream_bucket_brigade *buckets_in,
+       php_stream_bucket_brigade *buckets_out,
+       size_t *bytes_consumed,
+       int flags
+       TSRMLS_DC)
+{
+       php_stream_bucket *bucket;
+       size_t consumed = 0;
+       php_chunked_filter_data *data = (php_chunked_filter_data *) 
thisfilter->abstract;
+       char *buf;
+       int len;
+
+       while (buckets_in->head) {
+               bucket = php_stream_bucket_make_writeable(buckets_in->head 
TSRMLS_CC);
+               consumed += bucket->buflen;
+               
+               bucket->buflen = php_dechunk(bucket->buf, bucket->buflen, data);
+       
+               php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+       }
+
+       if (bytes_consumed) {
+               *bytes_consumed = consumed;
+       }
+       
+       return PSFS_PASS_ON;
+}
+
+static void php_chunked_dtor(php_stream_filter *thisfilter TSRMLS_DC)
+{
+       if (thisfilter && thisfilter->abstract) {
+               php_chunked_filter_data *data = (php_chunked_filter_data *) 
thisfilter->abstract;
+               pefree(data, data->persistent);
+       }
+}
+
+static php_stream_filter_ops php_chunked_ops = {
+       php_chunked_filter,
+       php_chunked_dtor,
+       "chunked"
+};
+
+static void php_add_chunked_filter(php_stream *stream TSRMLS_DC)
+{
+       int persistent = php_stream_is_persistent(stream);
+       php_chunked_filter_data *data = 
pemalloc(sizeof(php_chunked_filter_data), persistent);
+       php_stream_filter *temp_filter = 
php_stream_filter_alloc(&php_chunked_ops, data, persistent);
+
+       if (temp_filter) {
+               data->state = CHUNKED_HEADER;
+               data->chunk_size = 0;
+               data->persistent = persistent;
+               php_stream_filter_append(&stream->readfilters, temp_filter);
+       } else {
+               pefree(data, persistent);
+       }
+}
+
+
+
 /*
  * Local variables:
  * tab-width: 4

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to