Attached is a patch for my initial cut for unicode and XML (made against the /ext directory).
I started with XMLReader since it was the smallest.
The code can probably be optimized a bit, but I want to make sure this is how it should be because the changes made here will be the changes needed for the rest of the XML based extensions (simplexml, xsl, xmlwriter, and xml to a point).

It includes the following:
Macros defined in php_libxml.h (names can be changed if anyone has a problem with them).
       ZVAL_XML_STRING(z, s, flags)
       RETVAL_XML_STRING(s, flags)
These are used to take the UTF-8 output from libxml2 functions and return correct string (UTF-16 when running unicode mode or UTF-8 when not)

   XMLReader:
In order to maintain BC with PHP 5 it accepts unicode and binary strings (UTF-8 as in PHP 5) as parameters. The paramters can be mixed (some unicode and some binary so strings are properly converted to UTF-8 to work with libxml2).

In order to only require 1 hash table for properties, the following is used in MINIT: zend_u_hash_init(&xmlreader_prop_handlers, 0, NULL, NULL, 1, (zend_bool)zend_ini_long("unicode.semantics", sizeof("unicode.semantics"), 1));

      Tests have been updated for unicode mode.

Let me know if anyone sees any problems with these changes.

Rob
Index: libxml/php_libxml.h
===================================================================
RCS file: /repository/php-src/ext/libxml/php_libxml.h,v
retrieving revision 1.19
diff -u -r1.19 php_libxml.h
--- libxml/php_libxml.h 13 Jun 2006 13:12:18 -0000      1.19
+++ libxml/php_libxml.h 17 Jul 2006 17:55:47 -0000
@@ -102,6 +102,35 @@
 PHP_LIBXML_API void php_libxml_initialize();
 PHP_LIBXML_API void php_libxml_shutdown();
 
+/*
+#define ZVAL_XML_STRING(z, s, flags) {                                         
                \
+       UConverter *conv = NULL;                                                
                                \
+                                                                               
                                                        \
+       if (UG(unicode)) {                                                      
                                        \
+               UErrorCode status = U_ZERO_ERROR;                               
                                                \
+               conv = ucnv_open("UTF-8", &status);                             
                        \
+               if (U_FAILURE(status)) {                                        
                                \
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not 
create converter for 'UTF-8' encoding"); \                                      
                                                 \
+               }                                                               
                                                        \
+               zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags);    
\
+       }       \
+\
+       ZVAL_U_STRING(conv, z, s, flags);       \
+       if (conv) {     \
+               ucnv_close(conv);       \
+       }       \
+}
+*/
+#define ZVAL_XML_STRING(z, s, flags) {                                         
                \
+       UConverter *libxml_utf_conv = NULL;                                     
                        \
+       if (UG(unicode)) {                                                      
                                        \
+               libxml_utf_conv = UG(utf8_conv);                                
                        \
+       }                                                                       
                                                        \
+       ZVAL_U_STRING(libxml_utf_conv, z, s, flags);                            
        \
+}
+
+#define RETVAL_XML_STRING(s, flags)                    
ZVAL_XML_STRING(return_value, s, flags)
+
 #ifdef ZTS
 #define LIBXML(v) TSRMG(libxml_globals_id, zend_libxml_globals *, v)
 #else
Index: xmlreader/php_xmlreader.c
===================================================================
RCS file: /repository/php-src/ext/xmlreader/php_xmlreader.c,v
retrieving revision 1.33
diff -u -r1.33 php_xmlreader.c
--- xmlreader/php_xmlreader.c   10 May 2006 11:58:56 -0000      1.33
+++ xmlreader/php_xmlreader.c   17 Jul 2006 17:55:49 -0000
@@ -92,9 +92,9 @@
        switch (hnd->type) {
                case IS_STRING:
                        if (retchar) {
-                               ZVAL_STRING(*retval, (xmlChar *) retchar, 1);
+                               ZVAL_XML_STRING(*retval, (char *) retchar, 
ZSTR_DUPLICATE);
                        } else {
-                               ZVAL_EMPTY_STRING(*retval);
+                               ZVAL_EMPTY_TEXT(*retval);
                        }
                        break;
                case IS_BOOL:
@@ -121,17 +121,17 @@
        zend_object_handlers *std_hnd;
        int ret = FAILURE;
 
-       if (member->type != IS_STRING) {
+       if (member->type != IS_STRING && member->type != IS_UNICODE) {
                tmp_member = *member;
                zval_copy_ctor(&tmp_member);
-               convert_to_string(&tmp_member);
+               convert_to_text(&tmp_member);
                member = &tmp_member;
        }
 
        obj = (xmlreader_object *)zend_objects_get_address(object TSRMLS_CC);
 
        if (obj->prop_handler != NULL) {
-               ret = zend_hash_find(obj->prop_handler, Z_STRVAL_P(member), 
Z_STRLEN_P(member)+1, (void **) &hnd);
+               ret = zend_u_hash_find(obj->prop_handler, Z_TYPE_P(member), 
Z_UNIVAL_P(member), Z_UNILEN_P(member)+1, (void **) &hnd);
        }
        if (ret == FAILURE) {
                std_hnd = zend_get_std_object_handlers();
@@ -155,10 +155,10 @@
        zend_object_handlers *std_hnd;
        int ret;
 
-       if (member->type != IS_STRING) {
+       if (member->type != IS_STRING && member->type != IS_UNICODE) {
                tmp_member = *member;
                zval_copy_ctor(&tmp_member);
-               convert_to_string(&tmp_member);
+               convert_to_text(&tmp_member);
                member = &tmp_member;
        }
 
@@ -166,7 +166,7 @@
        obj = (xmlreader_object *)zend_objects_get_address(object TSRMLS_CC);
 
        if (obj->prop_handler != NULL) {
-               ret = zend_hash_find(obj->prop_handler, Z_STRVAL_P(member), 
Z_STRLEN_P(member)+1, (void **) &hnd);
+               ret = zend_u_hash_find(obj->prop_handler, Z_TYPE_P(member), 
Z_UNIVAL_P(member), Z_UNILEN_P(member)+1, (void **) &hnd);
        }
        if (ret == SUCCESS) {
                ret = xmlreader_property_reader(obj, hnd, &retval TSRMLS_CC);
@@ -197,10 +197,10 @@
        zend_object_handlers *std_hnd;
        int ret;
 
-       if (member->type != IS_STRING) {
+       if (member->type != IS_STRING && member->type != IS_UNICODE) {
                tmp_member = *member;
                zval_copy_ctor(&tmp_member);
-               convert_to_string(&tmp_member);
+               convert_to_text(&tmp_member);
                member = &tmp_member;
        }
 
@@ -208,7 +208,7 @@
        obj = (xmlreader_object *)zend_objects_get_address(object TSRMLS_CC);
 
        if (obj->prop_handler != NULL) {
-               ret = zend_hash_find((HashTable *)obj->prop_handler, 
Z_STRVAL_P(member), Z_STRLEN_P(member)+1, (void **) &hnd);
+               ret = zend_u_hash_find(obj->prop_handler, Z_TYPE_P(member), 
Z_UNIVAL_P(member), Z_UNILEN_P(member)+1, (void **) &hnd);
        }
        if (ret == SUCCESS) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot write to 
read-only property");
@@ -233,8 +233,8 @@
        int isFileUri = 0;
 
        uri = xmlCreateURI();
-       escsource = xmlURIEscapeStr(source, ":");
-       xmlParseURIReference(uri, escsource);
+       escsource = xmlURIEscapeStr((xmlChar *) source, (xmlChar *) ":");
+       xmlParseURIReference(uri, (char *)escsource);
        xmlFree(escsource);
 
        if (uri->scheme != NULL) {
@@ -420,13 +420,14 @@
 static void php_xmlreader_string_arg(INTERNAL_FUNCTION_PARAMETERS, 
xmlreader_read_one_char_t internal_function) {
        zval *id;
        int name_len = 0;
-       char *retchar = NULL;
+       xmlChar *retchar = NULL;
        xmlreader_object *intern;
-       char *name;
+       void *name;
+    zend_uchar type;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, 
&name_len) == FAILURE) {
-               return;
-       }
+    if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &name, 
&name_len, &type) == FAILURE) {
+        return;
+    }
 
        if (!name_len) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument cannot be 
an empty string");
@@ -437,10 +438,29 @@
 
        intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC);
        if (intern && intern->ptr) {
-               retchar = internal_function(intern->ptr, name);
+               if (type == IS_UNICODE) {
+                       char *utf8_name;
+                       UErrorCode err = U_ZERO_ERROR;
+                       int32_t u8TargetLength =0;
+                       int32_t u8DestLen =0;
+                       u8TargetLength = name_len * 3 + 1;
+
+                       utf8_name = emalloc (sizeof(char *) * u8TargetLength);
+
+                       u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, name, 
name_len,&err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_name);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+                       retchar = internal_function(intern->ptr, (xmlChar 
*)utf8_name);
+                       efree(utf8_name);
+               } else {
+                  retchar = internal_function(intern->ptr, (xmlChar *)name);
+               }
        }
        if (retchar) {
-               RETVAL_STRING(retchar, 1);
+               RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE);
                xmlFree(retchar);
                return;
        } else {
@@ -472,7 +492,7 @@
 /* {{{ php_xmlreader_no_arg_string */
 static void php_xmlreader_no_arg_string(INTERNAL_FUNCTION_PARAMETERS, 
xmlreader_read_char_t internal_function) {
        zval *id;
-       char *retchar = NULL;
+       xmlChar *retchar = NULL;
        xmlreader_object *intern;
 
        id = getThis();
@@ -482,7 +502,7 @@
                retchar = internal_function(intern->ptr);
        }
        if (retchar) {
-               RETVAL_STRING(retchar, 1);
+               RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE);
                xmlFree(retchar);
                return;
        } else {
@@ -498,9 +518,10 @@
        int source_len = 0, retval = -1;
        xmlreader_object *intern;
        xmlRelaxNGPtr schema = NULL;
-       char *source;
+       void *source;
+    zend_uchar ctype;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s!", &source, 
&source_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t!", &source, 
&source_len, &ctype) == FAILURE) {
                return;
        }
 
@@ -514,7 +535,27 @@
        intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC);
        if (intern && intern->ptr) {
                if (source) {
-                       schema =  _xmlreader_get_relaxNG(source, source_len, 
type, NULL, NULL TSRMLS_CC);
+                       if (ctype == IS_UNICODE) {
+                               char *utf8_name;
+                               UErrorCode err = U_ZERO_ERROR;
+                               int32_t u8TargetLength =0;
+                               int32_t u8DestLen =0;
+                               u8TargetLength = source_len * 3 + 1;
+
+                               utf8_name = emalloc (sizeof(char *) * 
u8TargetLength);
+
+                               u_strToUTF8(utf8_name,u8TargetLength, 
&u8DestLen, source, source_len,&err);
+                               if (U_FAILURE(err)) {
+                                       efree(utf8_name);
+                                       php_error_docref(NULL TSRMLS_CC, 
E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                                       RETURN_FALSE;
+                               }
+                               schema =  _xmlreader_get_relaxNG(utf8_name, 
u8DestLen, type, NULL, NULL TSRMLS_CC);
+                               efree(utf8_name);
+                       } else {
+                          schema =  _xmlreader_get_relaxNG(source, source_len, 
type, NULL, NULL TSRMLS_CC);
+                       }
+                       
                        if (schema) {
                                retval = 
xmlTextReaderRelaxNGSetSchema(intern->ptr, schema);
                        }
@@ -577,7 +618,7 @@
 {
        zval *id;
        long attr_pos;
-       char *retchar = NULL;
+       xmlChar *retchar = NULL;
        xmlreader_object *intern;
 
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &attr_pos) == 
FAILURE) {
@@ -591,11 +632,11 @@
                retchar = xmlTextReaderGetAttributeNo(intern->ptr,attr_pos);
        }
        if (retchar) {
-               RETVAL_STRING(retchar, 1);
+               RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE);
                xmlFree(retchar);
                return;
        } else {
-               RETURN_EMPTY_STRING();
+               RETURN_EMPTY_TEXT();
        }
 }
 /* }}} */
@@ -607,12 +648,21 @@
        zval *id;
        int name_len = 0, ns_uri_len = 0;
        xmlreader_object *intern;
-       char *name, *ns_uri, *retchar = NULL;
+       xmlChar *retchar = NULL;
+       void *name, *ns_uri;
+       zend_uchar name_type, ns_type;
+       UConverter *orig_runtime_conv;
+
+       orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv));
+       UG(runtime_encoding_conv) = UG(utf8_conv);
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss", &name, 
&name_len, &ns_uri, &ns_uri_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT", &name, 
&name_len, &name_type, &ns_uri, &ns_uri_len, &ns_type) == FAILURE) {
+               UG(runtime_encoding_conv) = orig_runtime_conv;
                return;
        }
 
+       UG(runtime_encoding_conv) = orig_runtime_conv;
+
        if (name_len == 0 || ns_uri_len == 0) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Attribute Name and 
Namespace URI cannot be empty");
                RETURN_FALSE;
@@ -622,14 +672,44 @@
 
        intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC);
        if (intern && intern->ptr) {
-               retchar = xmlTextReaderGetAttributeNs(intern->ptr, name, 
ns_uri);
+               if (name_type == IS_UNICODE) {
+                       char *utf8_buffer, *utf8_name, *utf8_ns;
+                       UErrorCode err = U_ZERO_ERROR;
+                       int32_t u8TargetLength =0;
+                       int32_t u8DestLen =0;
+                       /* create buffer large enough for all strings */
+                       u8TargetLength = (name_len + ns_uri_len) * 3 + 2;
+
+                       utf8_buffer = emalloc (sizeof(char *) * u8TargetLength);
+                       utf8_name = utf8_buffer;
+
+                       u_strToUTF8(utf8_name, u8TargetLength, &u8DestLen, 
name, name_len, &err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_buffer);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+
+                       utf8_ns = &utf8_buffer[u8DestLen+1];
+                       u_strToUTF8(utf8_ns, u8TargetLength, &u8DestLen, 
ns_uri, ns_uri_len, &err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_buffer);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+
+                       retchar = xmlTextReaderGetAttributeNs(intern->ptr, 
(xmlChar *)utf8_name, (xmlChar *)utf8_ns);
+                       efree(utf8_buffer);
+               } else {
+                       retchar = xmlTextReaderGetAttributeNs(intern->ptr, 
(xmlChar *)name, (xmlChar *)ns_uri);
+               }
        }
        if (retchar) {
-               RETVAL_STRING(retchar, 1);
+               RETVAL_XML_STRING((char *)retchar, ZSTR_DUPLICATE);
                xmlFree(retchar);
                return;
        } else {
-               RETURN_EMPTY_STRING();
+               RETURN_EMPTY_TEXT();
        }
 }
 /* }}} */
@@ -687,9 +767,10 @@
        zval *id;
        int name_len = 0, retval;
        xmlreader_object *intern;
-       char *name;
+       void *name;
+    zend_uchar type;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, 
&name_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t", &name, 
&name_len, &type) == FAILURE) {
                return;
        }
 
@@ -702,7 +783,27 @@
 
        intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC);
        if (intern && intern->ptr) {
-               retval = xmlTextReaderMoveToAttribute(intern->ptr, name);
+               if (type == IS_UNICODE) {
+                       char *utf8_name;
+                       UErrorCode err = U_ZERO_ERROR;
+                       int32_t u8TargetLength =0;
+                       int32_t u8DestLen =0;
+                       u8TargetLength = name_len * 3 + 1;
+
+                       utf8_name = emalloc (sizeof(char *) * u8TargetLength);
+
+                       u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, name, 
name_len,&err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_name);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+                       retval = xmlTextReaderMoveToAttribute(intern->ptr, 
(xmlChar *)utf8_name);
+                       efree(utf8_name);
+               } else {
+                  retval = xmlTextReaderMoveToAttribute(intern->ptr, (xmlChar 
*)name);
+               }
+               
                if (retval == 1) {
                        RETURN_TRUE;
                }
@@ -748,12 +849,20 @@
        zval *id;
        int name_len=0, ns_uri_len=0, retval;
        xmlreader_object *intern;
-       char *name, *ns_uri;
+       void *name, *ns_uri;
+       zend_uchar name_type, ns_type;
+       UConverter *orig_runtime_conv;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss", &name, 
&name_len, &ns_uri, &ns_uri_len) == FAILURE) {
+       orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv));
+       UG(runtime_encoding_conv) = UG(utf8_conv);
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "TT", &name, 
&name_len, &name_type, &ns_uri, &ns_uri_len, &ns_type) == FAILURE) {
+               UG(runtime_encoding_conv) = orig_runtime_conv;
                return;
        }
 
+       UG(runtime_encoding_conv) = orig_runtime_conv;
+
        if (name_len == 0 || ns_uri_len == 0) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "Attribute Name and 
Namespace URI cannot be empty");
                RETURN_FALSE;
@@ -763,7 +872,37 @@
 
        intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC);
        if (intern && intern->ptr) {
-               retval = xmlTextReaderMoveToAttributeNs(intern->ptr, name, 
ns_uri);
+               if (name_type == IS_UNICODE) {
+                       char *utf8_buffer, *utf8_name, *utf8_ns;
+                       UErrorCode err = U_ZERO_ERROR;
+                       int32_t u8TargetLength =0;
+                       int32_t u8DestLen =0;
+                       /* create buffer large enough for all strings */
+                       u8TargetLength = (name_len + ns_uri_len) * 3 + 2;
+
+                       utf8_buffer = emalloc (sizeof(char *) * u8TargetLength);
+                       utf8_name = utf8_buffer;
+
+                       u_strToUTF8(utf8_name, u8TargetLength, &u8DestLen, 
name, name_len, &err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_buffer);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+
+                       utf8_ns = &utf8_buffer[u8DestLen+1];
+                       u_strToUTF8(utf8_ns, u8TargetLength, &u8DestLen, 
ns_uri, ns_uri_len, &err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_buffer);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+
+                       retval = xmlTextReaderMoveToAttributeNs(intern->ptr, 
(xmlChar *)utf8_name, (xmlChar *)utf8_ns);
+                       efree(utf8_buffer);
+               } else {
+                       retval = xmlTextReaderMoveToAttributeNs(intern->ptr, 
(xmlChar *)name, (xmlChar *)ns_uri);
+               }
                if (retval == 1) {
                        RETURN_TRUE;
                }
@@ -829,9 +968,10 @@
        zval *id;
        int retval, name_len=0;
        xmlreader_object *intern;
-       char *name = NULL;
+       void *name = NULL;
+       zend_uchar name_type;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, 
&name_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|t", &name, 
&name_len, &name_type) == FAILURE) {
                return;
        }
 
@@ -845,11 +985,35 @@
                } else
 #endif
                retval = xmlTextReaderNext(intern->ptr);
-               while (name != NULL && retval == 1) {
-                       if 
(xmlStrEqual(xmlTextReaderConstLocalName(intern->ptr), name)) {
-                               RETURN_TRUE;
+               if (name != NULL && retval == 1) {
+                       char *utf8_name;
+
+                       if (name_type == IS_UNICODE) {
+                               UErrorCode err = U_ZERO_ERROR;
+                               int32_t u8TargetLength =0;
+                               int32_t u8DestLen =0;
+                               u8TargetLength = name_len * 3 + 1;
+
+                               utf8_name = emalloc (sizeof(char *) * 
u8TargetLength);
+
+                               u_strToUTF8(utf8_name,u8TargetLength, 
&u8DestLen, name, name_len,&err);
+                               if (U_FAILURE(err)) {
+                                       efree(utf8_name);
+                                       php_error_docref(NULL TSRMLS_CC, 
E_NOTICE, "Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                                       RETURN_FALSE;
+                               }
+                       } else {
+                               utf8_name = name;
+                       }
+                       while (retval == 1) {
+                               if 
(xmlStrEqual(xmlTextReaderConstLocalName(intern->ptr), (xmlChar *)utf8_name)) {
+                                       RETURN_TRUE;
+                               }
+                               retval = xmlTextReaderNext(intern->ptr); 
+                       }
+                       if (name_type == IS_UNICODE) {
+                               efree(utf8_name);
                        }
-                       retval = xmlTextReaderNext(intern->ptr); 
                }
                if (retval == -1) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "An Error 
Occured while reading");
@@ -872,15 +1036,24 @@
        int source_len = 0, encoding_len = 0;
        long options = 0;
        xmlreader_object *intern = NULL;
-       char *source, *valid_file = NULL;
-       char *encoding = NULL;
+       char *utf8_buffer=NULL;
+       char *utf8_source, *utf8_encoding = NULL, *valid_file = NULL;
+       void *source, *encoding = NULL;
        char resolved_path[MAXPATHLEN + 1];
        xmlTextReaderPtr reader = NULL;
+       zend_uchar source_type, encoding_type;
+       UConverter *orig_runtime_conv;
+
+       orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv));
+       UG(runtime_encoding_conv) = UG(utf8_conv);
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!l", &source, 
&source_len, &encoding, &encoding_len, &options) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "T|T!l", &source, 
&source_len, &source_type, &encoding, &encoding_len, &encoding_type, &options) 
== FAILURE) {
+               UG(runtime_encoding_conv) = orig_runtime_conv;
                return;
        }
 
+       UG(runtime_encoding_conv) = orig_runtime_conv;
+
        id = getThis();
        if (id != NULL) {
                if (! instanceof_function(Z_OBJCE_P(id), xmlreader_class_entry 
TSRMLS_CC)) {
@@ -896,10 +1069,45 @@
                RETURN_FALSE;
        }
 
-       valid_file = _xmlreader_get_valid_file_path(source, resolved_path, 
MAXPATHLEN  TSRMLS_CC);
+       if (source_type == IS_UNICODE) {
+               UErrorCode err = U_ZERO_ERROR;
+               int32_t u8TargetLength =0;
+               int32_t u8DestLen =0;
+               /* create buffer large enough for all strings */
+               u8TargetLength = (source_len + encoding_len) * 3 + 2;
+
+               utf8_buffer = emalloc (sizeof(char *) * u8TargetLength);
+               utf8_source = utf8_buffer;
+
+               u_strToUTF8(utf8_source, u8TargetLength, &u8DestLen, source, 
source_len, &err);
+               if (U_FAILURE(err)) {
+                       efree(utf8_buffer);
+                       php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error 
converting from Unicode to UTF-8: %s", u_errorName(err));
+                       RETURN_FALSE;
+               }
+
+               if (encoding_len > 0) {
+                       utf8_encoding = &utf8_buffer[u8DestLen+1];
+                       u_strToUTF8(utf8_encoding, u8TargetLength, &u8DestLen, 
encoding, encoding_len, &err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_buffer);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+               }
+       } else {
+               utf8_source = (char *)source;
+               utf8_encoding = (char *)encoding;
+       }
+
+       valid_file = _xmlreader_get_valid_file_path(utf8_source, resolved_path, 
MAXPATHLEN  TSRMLS_CC);
 
        if (valid_file) {
-               reader = xmlReaderForFile(valid_file, encoding, options);
+               reader = xmlReaderForFile(valid_file, utf8_encoding, options);
+       }
+
+       if (utf8_buffer != NULL) {
+               efree(utf8_buffer);
        }
 
        if (reader == NULL) {
@@ -961,9 +1169,10 @@
        zval *id;
        int source_len = 0, retval = -1;
        xmlreader_object *intern;
-       char *source;
+       void *source;
+    zend_uchar type;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s!", &source, 
&source_len) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "t!", &source, 
&source_len, &type) == FAILURE) {
                return;
        }
 
@@ -976,8 +1185,26 @@
 
        intern = (xmlreader_object *)zend_object_store_get_object(id TSRMLS_CC);
        if (intern && intern->ptr) {
-               retval = xmlTextReaderSchemaValidate(intern->ptr, source);
-
+               if (type == IS_UNICODE) {
+                       char *utf8_name;
+                       UErrorCode err = U_ZERO_ERROR;
+                       int32_t u8TargetLength =0;
+                       int32_t u8DestLen =0;
+                       u8TargetLength = source_len * 3 + 1;
+
+                       utf8_name = emalloc (sizeof(char *) * u8TargetLength);
+
+                       u_strToUTF8(utf8_name,u8TargetLength, &u8DestLen, 
source, source_len,&err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_name);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+                       retval = xmlTextReaderSchemaValidate(intern->ptr, 
utf8_name);
+                       efree(utf8_name);
+               } else {
+                  retval = xmlTextReaderSchemaValidate(intern->ptr, source);
+               }
                if (retval == 0) {
                        RETURN_TRUE;
                }
@@ -1055,16 +1282,26 @@
        int source_len = 0, encoding_len = 0;
        long options = 0;
        xmlreader_object *intern = NULL;
-       char *source, *uri = NULL, *encoding = NULL;
        int resolved_path_len;
+       char *utf8_buffer=NULL, *uri = NULL;
+       char *utf8_source, *utf8_encoding = NULL;
+       void *source, *encoding = NULL;
        char *directory=NULL, resolved_path[MAXPATHLEN];
        xmlParserInputBufferPtr inputbfr;
-       xmlTextReaderPtr reader;
+       xmlTextReaderPtr reader = NULL;
+       zend_uchar source_type, encoding_type;
+       UConverter *orig_runtime_conv;
+
+       orig_runtime_conv = ZEND_U_CONVERTER(UG(runtime_encoding_conv));
+       UG(runtime_encoding_conv) = UG(utf8_conv);
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!l", &source, 
&source_len, &encoding, &encoding_len, &options) == FAILURE) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "T|T!l", &source, 
&source_len, &source_type, &encoding, &encoding_len, &encoding_type, &options) 
== FAILURE) {
+               UG(runtime_encoding_conv) = orig_runtime_conv;
                return;
        }
 
+       UG(runtime_encoding_conv) = orig_runtime_conv;
+
        id = getThis();
        if (id != NULL && ! instanceof_function(Z_OBJCE_P(id), 
xmlreader_class_entry TSRMLS_CC)) {
                id = NULL;
@@ -1079,7 +1316,44 @@
                RETURN_FALSE;
        }
 
-       inputbfr = xmlParserInputBufferCreateMem(source, source_len, 
XML_CHAR_ENCODING_NONE);
+       if (source_type == IS_UNICODE) {
+               UErrorCode err = U_ZERO_ERROR;
+               int32_t u8TargetLength =0;
+               int32_t u8DestLen =0;
+               /* create buffer large enough for all strings */
+               u8TargetLength = (source_len + encoding_len) * 3 + 2;
+
+               utf8_buffer = emalloc (sizeof(char *) * u8TargetLength);
+               utf8_source = utf8_buffer;
+
+               u_strToUTF8(utf8_source, u8TargetLength, &u8DestLen, source, 
source_len, &err);
+               if (U_FAILURE(err)) {
+                       efree(utf8_buffer);
+                       php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Error 
converting from Unicode to UTF-8: %s", u_errorName(err));
+                       RETURN_FALSE;
+               }
+
+               source_len = u8DestLen;
+
+               if (encoding_len > 0) {
+                       utf8_encoding = &utf8_buffer[u8DestLen+1];
+                       u_strToUTF8(utf8_encoding, u8TargetLength, &u8DestLen, 
encoding, encoding_len, &err);
+                       if (U_FAILURE(err)) {
+                               efree(utf8_buffer);
+                               php_error_docref(NULL TSRMLS_CC, E_NOTICE, 
"Error converting from Unicode to UTF-8: %s", u_errorName(err));
+                               RETURN_FALSE;
+                       }
+               }
+       } else {
+               utf8_source = (char *)source;
+               utf8_encoding = (char *)encoding;
+       }
+
+       inputbfr = xmlParserInputBufferCreateMem(utf8_source, source_len, 
XML_CHAR_ENCODING_NONE);
+
+       if (utf8_buffer != NULL) {
+               efree(utf8_buffer);
+       }
 
     if (inputbfr != NULL) {
 /* Get the URI of the current script so that we can set the base directory in 
libxml */
@@ -1210,7 +1484,8 @@
        ce.create_object = xmlreader_objects_new;
        xmlreader_class_entry = zend_register_internal_class(&ce TSRMLS_CC);
 
-       zend_hash_init(&xmlreader_prop_handlers, 0, NULL, NULL, 1);
+       zend_u_hash_init(&xmlreader_prop_handlers, 0, NULL, NULL, 1, 
(zend_bool)zend_ini_long("unicode.semantics", sizeof("unicode.semantics"), 1));
+
        xmlreader_register_prop_handler(&xmlreader_prop_handlers, 
"attributeCount", xmlTextReaderAttributeCount, NULL, IS_LONG TSRMLS_CC);
        xmlreader_register_prop_handler(&xmlreader_prop_handlers, "baseURI", 
NULL, xmlTextReaderConstBaseUri, IS_STRING TSRMLS_CC);
        xmlreader_register_prop_handler(&xmlreader_prop_handlers, "depth", 
xmlTextReaderDepth, NULL, IS_LONG TSRMLS_CC);
Index: xmlreader/tests/012.phpt
===================================================================
RCS file: /repository/php-src/ext/xmlreader/tests/012.phpt,v
retrieving revision 1.1
diff -u -r1.1 012.phpt
--- xmlreader/tests/012.phpt    30 Mar 2006 21:45:27 -0000      1.1
+++ xmlreader/tests/012.phpt    17 Jul 2006 17:55:49 -0000
@@ -67,3 +67,14 @@
 string(0) ""
 string(0) ""
 ===DONE===
+--UEXPECT--
+unicode(0) ""
+NULL
+unicode(0) ""
+unicode(0) ""
+===FILE===
+unicode(0) ""
+NULL
+unicode(0) ""
+unicode(0) ""
+===DONE===
Index: xmlreader/tests/013.phpt
===================================================================
RCS file: /repository/php-src/ext/xmlreader/tests/013.phpt,v
retrieving revision 1.1
diff -u -r1.1 013.phpt
--- xmlreader/tests/013.phpt    31 Mar 2006 20:50:29 -0000      1.1
+++ xmlreader/tests/013.phpt    17 Jul 2006 17:55:49 -0000
@@ -50,3 +50,10 @@
 
 Warning: XMLReader::read(): Element 'foo': %s
 ===DONE===
+--UEXPECTF--
+unicode(3) "123"
+unicode(3) "456"
+===FAIL===
+
+Warning: XMLReader::read(): Element 'foo': %s
+===DONE===

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to