When serializing binary strings in PHP 6, we have to escape non-ASCII characters and then unescape them on unserialization. This patch adds the unescapement support to PHP 5.2, in order to make it easier to exchange data between PHP 5 and 6. If no one has objections, I will commit soon.

-Andrei

Index: ext/standard/var_unserializer.c
===================================================================
RCS file: /repository/php-src/ext/standard/var_unserializer.c,v
retrieving revision 1.70.2.4
diff -u -r1.70.2.4 var_unserializer.c
--- ext/standard/var_unserializer.c     1 Jan 2006 12:50:16 -0000       1.70.2.4
+++ ext/standard/var_unserializer.c     1 Dec 2006 21:39:39 -0000
@@ -1,4 +1,4 @@
-/* Generated by re2c 0.9.11 on Sun Jan  1 14:39:32 2006 */
+/* Generated by re2c 0.9.12 on Fri Dec  1 13:39:29 2006 */
 #line 1 "ext/standard/var_unserializer.re"
 /*
   +----------------------------------------------------------------------+
@@ -18,7 +18,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: var_unserializer.c,v 1.70.2.4 2006/01/01 12:50:16 sniper Exp $ */
+/* $Id: var_unserializer.re,v 1.52.2.2 2006/01/01 12:26:08 sniper Exp $ */
 
 #include "php.h"
 #include "ext/standard/php_var.h"
@@ -140,6 +140,38 @@
 
 /* }}} */
 
+static char *unserialize_str(const unsigned char **p, int len)
+{
+       int i, j;
+       char *str = emalloc(len+1);
+
+       for (i = 0; i < len; i++) {
+               if (**p != '\\') {
+                       str[i] = (char)**p;
+               } else {
+                       unsigned char ch = 0;
+
+                       for (j = 0; j < 2; j++) {
+                               (*p)++;
+                               if (**p >= '0' && **p <= '9') {
+                                       ch = (ch << 4) + (**p -'0');
+                               } else if (**p >= 'a' && **p <= 'f') {
+                                       ch = (ch << 4) + (**p -'a'+10);
+                               } else if (**p >= 'A' && **p <= 'F') {
+                                       ch = (ch << 4) + (**p -'A'+10);
+                               } else {
+                                       efree(str);
+                                       return NULL;
+                               }
+                       }
+                       str[i] = (char)ch;
+               }
+               (*p)++;
+       }
+       str[i] = 0;
+       return str;
+}
+
 #define YYFILL(n) do { } while (0)
 #define YYCTYPE unsigned char
 #define YYCURSOR cursor
@@ -147,7 +179,7 @@
 #define YYMARKER marker
 
 
-#line 155 "ext/standard/var_unserializer.re"
+#line 187 "ext/standard/var_unserializer.re"
 
 
 
@@ -390,7 +422,7 @@
          0,   0,   0,   0,   0,   0,   0,   0, 
        };
 
-#line 394 "ext/standard/var_unserializer.c"
+#line 426 "ext/standard/var_unserializer.c"
 {
        YYCTYPE yych;
        unsigned int yyaccept = 0;
@@ -418,9 +450,9 @@
        if(yych == ':') goto yy87;
        goto yy3;
 yy3:
-#line 626 "ext/standard/var_unserializer.re"
+#line 659 "ext/standard/var_unserializer.re"
 { return 0; }
-#line 424 "ext/standard/var_unserializer.c"
+#line 456 "ext/standard/var_unserializer.c"
 yy4:   yyaccept = 0;
        yych = *(YYMARKER = ++YYCURSOR);
        if(yych == ':') goto yy81;
@@ -459,13 +491,13 @@
 yy13:  ++YYCURSOR;
        goto yy14;
 yy14:
-#line 620 "ext/standard/var_unserializer.re"
+#line 653 "ext/standard/var_unserializer.re"
 {
        /* this is the case where we have less data than planned */
        php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Unexpected end of 
serialized data");
        return 0; /* not sure if it should be 0 or 1 here? */
 }
-#line 469 "ext/standard/var_unserializer.c"
+#line 501 "ext/standard/var_unserializer.c"
 yy15:  yych = *++YYCURSOR;
        goto yy3;
 yy16:  yych = *++YYCURSOR;
@@ -498,7 +530,7 @@
 yy22:  ++YYCURSOR;
        goto yy23;
 yy23:
-#line 508 "ext/standard/var_unserializer.re"
+#line 541 "ext/standard/var_unserializer.re"
 {
        size_t len, len2, len3, maxlen;
        long elements;
@@ -610,7 +642,7 @@
 
        return object_common2(UNSERIALIZE_PASSTHRU, elements);
 }
-#line 614 "ext/standard/var_unserializer.c"
+#line 646 "ext/standard/var_unserializer.c"
 yy24:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy17;
@@ -639,7 +671,7 @@
 yy29:  ++YYCURSOR;
        goto yy30;
 yy30:
-#line 500 "ext/standard/var_unserializer.re"
+#line 533 "ext/standard/var_unserializer.re"
 {
 
        INIT_PZVAL(*rval);
@@ -647,7 +679,7 @@
        return object_common2(UNSERIALIZE_PASSTHRU,
                        object_common1(UNSERIALIZE_PASSTHRU, 
ZEND_STANDARD_CLASS_DEF_PTR));
 }
-#line 651 "ext/standard/var_unserializer.c"
+#line 683 "ext/standard/var_unserializer.c"
 yy31:  yych = *++YYCURSOR;
        if(yych == '+') goto yy32;
        if(yych <= '/') goto yy17;
@@ -671,7 +703,7 @@
 yy36:  ++YYCURSOR;
        goto yy37;
 yy37:
-#line 478 "ext/standard/var_unserializer.re"
+#line 511 "ext/standard/var_unserializer.re"
 {
        long elements = parse_iv(start + 2);
        /* use iv() not uiv() in order to check data range */
@@ -693,7 +725,7 @@
 
        return finish_nested_data(UNSERIALIZE_PASSTHRU);
 }
-#line 697 "ext/standard/var_unserializer.c"
+#line 729 "ext/standard/var_unserializer.c"
 yy38:  yych = *++YYCURSOR;
        if(yych == '+') goto yy39;
        if(yych <= '/') goto yy17;
@@ -717,7 +749,7 @@
 yy43:  ++YYCURSOR;
        goto yy44;
 yy44:
-#line 450 "ext/standard/var_unserializer.re"
+#line 482 "ext/standard/var_unserializer.re"
 {
        size_t len, maxlen;
        char *str;
@@ -729,11 +761,12 @@
                return 0;
        }
 
-       str = (char*)YYCURSOR;
-
-       YYCURSOR += len;
+       if ((str = unserialize_str(&YYCURSOR, len)) == NULL) {
+               return 0;
+       }
 
        if (*(YYCURSOR) != '"') {
+               efree(str);
                *p = YYCURSOR;
                return 0;
        }
@@ -742,10 +775,10 @@
        *p = YYCURSOR;
 
        INIT_PZVAL(*rval);
-       ZVAL_STRINGL(*rval, str, len, 1);
+       ZVAL_STRINGL(*rval, str, len, 0);
        return 1;
 }
-#line 749 "ext/standard/var_unserializer.c"
+#line 782 "ext/standard/var_unserializer.c"
 yy45:  yych = *++YYCURSOR;
        if(yych <= '/'){
                if(yych <= ','){
@@ -834,14 +867,14 @@
 yy55:  ++YYCURSOR;
        goto yy56;
 yy56:
-#line 443 "ext/standard/var_unserializer.re"
+#line 475 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_DOUBLE(*rval, zend_strtod((const char *)start + 2, NULL));
        return 1;
 }
-#line 845 "ext/standard/var_unserializer.c"
+#line 878 "ext/standard/var_unserializer.c"
 yy57:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy17;
@@ -901,7 +934,7 @@
 yy66:  ++YYCURSOR;
        goto yy67;
 yy67:
-#line 428 "ext/standard/var_unserializer.re"
+#line 460 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
@@ -916,7 +949,7 @@
 
        return 1;
 }
-#line 920 "ext/standard/var_unserializer.c"
+#line 953 "ext/standard/var_unserializer.c"
 yy68:  yych = *++YYCURSOR;
        if(yych == 'N') goto yy65;
        goto yy17;
@@ -945,14 +978,14 @@
 yy73:  ++YYCURSOR;
        goto yy74;
 yy74:
-#line 421 "ext/standard/var_unserializer.re"
+#line 453 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_LONG(*rval, parse_iv(start + 2));
        return 1;
 }
-#line 956 "ext/standard/var_unserializer.c"
+#line 989 "ext/standard/var_unserializer.c"
 yy75:  yych = *++YYCURSOR;
        if(yych <= '/') goto yy17;
        if(yych >= '2') goto yy17;
@@ -963,25 +996,25 @@
 yy77:  ++YYCURSOR;
        goto yy78;
 yy78:
-#line 414 "ext/standard/var_unserializer.re"
+#line 446 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_BOOL(*rval, parse_iv(start + 2));
        return 1;
 }
-#line 974 "ext/standard/var_unserializer.c"
+#line 1007 "ext/standard/var_unserializer.c"
 yy79:  ++YYCURSOR;
        goto yy80;
 yy80:
-#line 407 "ext/standard/var_unserializer.re"
+#line 439 "ext/standard/var_unserializer.re"
 {
        *p = YYCURSOR;
        INIT_PZVAL(*rval);
        ZVAL_NULL(*rval);
        return 1;
 }
-#line 985 "ext/standard/var_unserializer.c"
+#line 1018 "ext/standard/var_unserializer.c"
 yy81:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy17;
@@ -1007,7 +1040,7 @@
 yy85:  ++YYCURSOR;
        goto yy86;
 yy86:
-#line 384 "ext/standard/var_unserializer.re"
+#line 416 "ext/standard/var_unserializer.re"
 {
        long id;
 
@@ -1030,7 +1063,7 @@
        
        return 1;
 }
-#line 1034 "ext/standard/var_unserializer.c"
+#line 1067 "ext/standard/var_unserializer.c"
 yy87:  yych = *++YYCURSOR;
        if(yych <= ','){
                if(yych != '+') goto yy17;
@@ -1056,7 +1089,7 @@
 yy91:  ++YYCURSOR;
        goto yy92;
 yy92:
-#line 363 "ext/standard/var_unserializer.re"
+#line 395 "ext/standard/var_unserializer.re"
 {
        long id;
 
@@ -1077,10 +1110,10 @@
        
        return 1;
 }
-#line 1081 "ext/standard/var_unserializer.c"
+#line 1114 "ext/standard/var_unserializer.c"
 }
 }
-#line 628 "ext/standard/var_unserializer.re"
+#line 661 "ext/standard/var_unserializer.re"
 
 
        return 0;
Index: ext/standard/var_unserializer.re
===================================================================
RCS file: /repository/php-src/ext/standard/var_unserializer.re,v
retrieving revision 1.52.2.2
diff -u -r1.52.2.2 var_unserializer.re
--- ext/standard/var_unserializer.re    1 Jan 2006 12:26:08 -0000       1.52.2.2
+++ ext/standard/var_unserializer.re    1 Dec 2006 21:39:39 -0000
@@ -138,6 +138,38 @@
 
 /* }}} */
 
+static char *unserialize_str(const unsigned char **p, int len)
+{
+       int i, j;
+       char *str = emalloc(len+1);
+
+       for (i = 0; i < len; i++) {
+               if (**p != '\\') {
+                       str[i] = (char)**p;
+               } else {
+                       unsigned char ch = 0;
+
+                       for (j = 0; j < 2; j++) {
+                               (*p)++;
+                               if (**p >= '0' && **p <= '9') {
+                                       ch = (ch << 4) + (**p -'0');
+                               } else if (**p >= 'a' && **p <= 'f') {
+                                       ch = (ch << 4) + (**p -'a'+10);
+                               } else if (**p >= 'A' && **p <= 'F') {
+                                       ch = (ch << 4) + (**p -'A'+10);
+                               } else {
+                                       efree(str);
+                                       return NULL;
+                               }
+                       }
+                       str[i] = (char)ch;
+               }
+               (*p)++;
+       }
+       str[i] = 0;
+       return str;
+}
+
 #define YYFILL(n) do { } while (0)
 #define YYCTYPE unsigned char
 #define YYCURSOR cursor
@@ -458,11 +490,12 @@
                return 0;
        }
 
-       str = (char*)YYCURSOR;
-
-       YYCURSOR += len;
+       if ((str = unserialize_str(&YYCURSOR, len)) == NULL) {
+               return 0;
+       }
 
        if (*(YYCURSOR) != '"') {
+               efree(str);
                *p = YYCURSOR;
                return 0;
        }
@@ -471,7 +504,7 @@
        *p = YYCURSOR;
 
        INIT_PZVAL(*rval);
-       ZVAL_STRINGL(*rval, str, len, 1);
+       ZVAL_STRINGL(*rval, str, len, 0);
        return 1;
 }
 

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to