Hi,
The attached patch implements automatic decoding of chunked
transfer-encoding.
It fixes http://bugs.php.net/bug.php?id=47021 but also affects all php
stream functions (e.g. file_get_contents("http://...");)
Some PHP applications which check for Transfer-Encoding HTTP header and
perform manual decoding might be broken.
Any objections against committing the patch into PHP_5_3?
My be someone has ideas about patch improvements?
Thanks. Dmitry.
Index: ext/standard/http_fopen_wrapper.c
===================================================================
RCS file: /repository/php-src/ext/standard/http_fopen_wrapper.c,v
retrieving revision 1.99.2.12.2.9.2.12
diff -u -p -d -r1.99.2.12.2.9.2.12 http_fopen_wrapper.c
--- ext/standard/http_fopen_wrapper.c 31 Dec 2008 11:15:45 -0000
1.99.2.12.2.9.2.12
+++ ext/standard/http_fopen_wrapper.c 14 Apr 2009 14:40:12 -0000
@@ -84,6 +84,8 @@
#define HTTP_WRAPPER_HEADER_INIT 1
#define HTTP_WRAPPER_REDIRECTED 2
+static void php_add_chunked_filter(php_stream *stream TSRMLS_DC);
+
php_stream *php_stream_url_wrap_http_ex(php_stream_wrapper *wrapper, char
*path, char *mode, int options, char **opened_path, php_stream_context
*context, int redirect_max, int flags STREAMS_DC TSRMLS_DC)
{
php_stream *stream = NULL;
@@ -111,6 +113,7 @@ php_stream *php_stream_url_wrap_http_ex(
char *user_headers = NULL;
int header_init = ((flags & HTTP_WRAPPER_HEADER_INIT) != 0);
int redirected = ((flags & HTTP_WRAPPER_REDIRECTED) != 0);
+ int chunked = 0;
tmp_line[0] = '\0';
@@ -597,6 +600,8 @@ php_stream *php_stream_url_wrap_http_ex(
} else if (!strncasecmp(http_header_line,
"Content-Length: ", 16)) {
file_size = atoi(http_header_line + 16);
php_stream_notify_file_size(context, file_size,
http_header_line, 0);
+ } else if (!strncasecmp(http_header_line,
"Transfer-Encoding: chunked", sizeof("Transfer-Encoding: chunked"))) {
+ chunked = 1;
}
if (http_header_line[0] == '\0') {
@@ -740,6 +745,9 @@ out:
* the stream */
stream->position = 0;
+ if (chunked) {
+ php_add_chunked_filter(stream TSRMLS_CC);
+ }
}
return stream;
@@ -780,6 +788,193 @@ PHPAPI php_stream_wrapper php_stream_htt
1 /* is_url */
};
+typedef enum _php_chunked_filter_state {
+ CHUNKED_HEADER,
+ CHUNKED_HEADER_1,
+ CHUNKED_HEADER_2,
+ CHUNKED_HEADER_3,
+ CHUNKED_HEADER_R,
+ CHUNKED_HEADER_N,
+ CHUNKED_BODY,
+ CHUNKED_BODY_R,
+ CHUNKED_BODY_N,
+ CHUNKED_ERROR
+} php_chunked_filter_state;
+
+typedef struct _php_chunked_filter_data {
+ php_chunked_filter_state state;
+ int chunk_size;
+ int persistent;
+} php_chunked_filter_data;
+
+static int php_dechunk(char *buf, int len, php_chunked_filter_data *data)
+{
+ char *p = buf;
+ char *end = p + len;
+ char *out = buf;
+ char *out_len = 0;
+
+ while (p < end) {
+ switch (data->state) {
+ case CHUNKED_HEADER:
+ data->chunk_size = 0;
+ case CHUNKED_HEADER_1:
+ case CHUNKED_HEADER_2:
+ case CHUNKED_HEADER_3:
+ while (p < end && data->state <
CHUNKED_HEADER_R) {
+ if (*p >= '0' && *p <= '9') {
+ data->chunk_size =
(data->chunk_size * 16) + (*p - '0');
+ } else if (*p >= 'A' && *p <= 'F') {
+ data->chunk_size =
(data->chunk_size * 16) + (*p - 'A' + 10);
+ } else if (*p >= 'a' && *p <= 'f') {
+ data->chunk_size =
(data->chunk_size * 16) + (*p - 'a' + 10);
+ } else {
+ break;
+ }
+ p++;
+ data->state++;
+ }
+ if (data->state == CHUNKED_HEADER) {
+ /* not a hex number */
+ data->state = CHUNKED_ERROR;
+ continue;
+ } else if (p == end) {
+ data->state = CHUNKED_HEADER_R;
+ return out_len;
+ }
+ case CHUNKED_HEADER_R:
+ if (*p == '\r') {
+ p++;
+ if (p == end) {
+ data->state = CHUNKED_HEADER_N;
+ return out_len;
+ }
+ }
+ case CHUNKED_HEADER_N:
+ if (*p == '\n') {
+ p++;
+ if (data->chunk_size == 0) {
+ /* EOF */
+ data->state = CHUNKED_ERROR;
+ continue;
+ } else if (p == end) {
+ data->state = CHUNKED_BODY;
+ return out_len;
+ }
+ } else {
+ data->state = CHUNKED_ERROR;
+ continue;
+ }
+ case CHUNKED_BODY:
+ if (end - p >= data->chunk_size) {
+ if (p != out) {
+ memmove(out, p,
data->chunk_size);
+ }
+ out += data->chunk_size;
+ out_len += data->chunk_size;
+ p += data->chunk_size;
+ if (p == end) {
+ data->state = CHUNKED_BODY_R;
+ return out_len;
+ }
+ } else {
+ if (p != out) {
+ memmove(out, p, end - p);
+ }
+ out_len += end - p;
+ return out_len;
+ }
+ case CHUNKED_BODY_R:
+ if (*p == '\r') {
+ p++;
+ if (p == end) {
+ data->state = CHUNKED_BODY_N;
+ return out_len;
+ }
+ }
+ case CHUNKED_BODY_N:
+ if (*p == '\n') {
+ p++;
+ data->state = CHUNKED_HEADER;
+ continue;
+ } else {
+ data->state = CHUNKED_ERROR;
+ continue;
+ }
+ case CHUNKED_ERROR:
+ if (p != out) {
+ memmove(out, p, end - p);
+ }
+ out_len += end - p;
+ return out_len;
+ }
+ }
+ return out_len;
+}
+
+static php_stream_filter_status_t php_chunked_filter(
+ php_stream *stream,
+ php_stream_filter *thisfilter,
+ php_stream_bucket_brigade *buckets_in,
+ php_stream_bucket_brigade *buckets_out,
+ size_t *bytes_consumed,
+ int flags
+ TSRMLS_DC)
+{
+ php_stream_bucket *bucket;
+ size_t consumed = 0;
+ php_chunked_filter_data *data = (php_chunked_filter_data *)
thisfilter->abstract;
+ char *buf;
+ int len;
+
+ while (buckets_in->head) {
+ bucket = php_stream_bucket_make_writeable(buckets_in->head
TSRMLS_CC);
+ consumed += bucket->buflen;
+
+ bucket->buflen = php_dechunk(bucket->buf, bucket->buflen, data);
+
+ php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
+ }
+
+ if (bytes_consumed) {
+ *bytes_consumed = consumed;
+ }
+
+ return PSFS_PASS_ON;
+}
+
+static void php_chunked_dtor(php_stream_filter *thisfilter TSRMLS_DC)
+{
+ if (thisfilter && thisfilter->abstract) {
+ php_chunked_filter_data *data = (php_chunked_filter_data *)
thisfilter->abstract;
+ pefree(data, data->persistent);
+ }
+}
+
+static php_stream_filter_ops php_chunked_ops = {
+ php_chunked_filter,
+ php_chunked_dtor,
+ "chunked"
+};
+
+static void php_add_chunked_filter(php_stream *stream TSRMLS_DC)
+{
+ int persistent = php_stream_is_persistent(stream);
+ php_chunked_filter_data *data =
pemalloc(sizeof(php_chunked_filter_data), persistent);
+ php_stream_filter *temp_filter =
php_stream_filter_alloc(&php_chunked_ops, data, persistent);
+
+ if (temp_filter) {
+ data->state = CHUNKED_HEADER;
+ data->chunk_size = 0;
+ data->persistent = persistent;
+ php_stream_filter_append(&stream->readfilters, temp_filter);
+ } else {
+ pefree(data, persistent);
+ }
+}
+
+
+
/*
* Local variables:
* tab-width: 4
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php