libxml2's charset encoding auto-detection mode is broken with the push parser in current versions of libxml2, I found that recently:
http://bugzilla.gnome.org/show_bug.cgi?id=162613 but trying to force it can trigger infinite loops in libxml2, which is what happens in http://bugs.php.net/?id=32001 So I think it's best to not force this mode. Future versions of libxml2 will set parser->charset to XML_CHAR_ENCODING_NONE by default with the push parser and will hence work as desired with no explicit setting of parser->charset required. Is this patch OK? http://www.apache.org/~jorton/php_xmlenc.diff Index: ext/xml/compat.c =================================================================== RCS file: /repository/php-src/ext/xml/compat.c,v retrieving revision 1.32.2.7 diff -u -r1.32.2.7 compat.c --- ext/xml/compat.c 17 Dec 2004 12:21:34 -0000 1.32.2.7 +++ ext/xml/compat.c 17 Feb 2005 11:12:08 -0000 @@ -379,8 +379,6 @@ } if (encoding != NULL) { parser->parser->encoding = xmlStrdup(encoding); - } else { - parser->parser->charset = XML_CHAR_ENCODING_NONE; } parser->parser->replaceEntities = 1; parser->parser->wellFormed = 0; Index: ext/xml/tests/bug32001.phpt =================================================================== RCS file: ext/xml/tests/bug32001.phpt diff -N ext/xml/tests/bug32001.phpt --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ext/xml/tests/bug32001.phpt 17 Feb 2005 11:12:08 -0000 @@ -0,0 +1,40 @@ +--TEST-- +Bug #32001 (infinite loop in libxml character encoding detection) +--FILE-- +<?php +$myparser = xml_parser_create(''); +$simple = "<para><note>simple note</note></para>"; +xml_parse_into_struct($myparser, $simple, $myvals, $mytags); +var_dump($myvals); +--EXPECT-- +array(3) { + [0]=> + array(3) { + ["tag"]=> + string(4) "PARA" + ["type"]=> + string(4) "open" + ["level"]=> + int(1) + } + [1]=> + array(4) { + ["tag"]=> + string(4) "NOTE" + ["type"]=> + string(8) "complete" + ["level"]=> + int(2) + ["value"]=> + string(11) "simple note" + } + [2]=> + array(3) { + ["tag"]=> + string(4) "PARA" + ["type"]=> + string(5) "close" + ["level"]=> + int(1) + } +} -- PHP Internals - PHP Runtime Development Mailing List To unsubscribe, visit: http://www.php.net/unsub.php