While it's technically "safe" to include user supplied data in
json_encode() serialized values. The fact that characters such as <>&'
remain as is means there room for some as-yet unidentified problem
either in the browser's rendering or (more likely) elsewhere in one's
codebase for this data to get into the wrong context and be executed.
To that end, the attached patch allows the caller to be paranoid about
their data and stipulate that <>&' should be encoded to hex references
instead. This doesn't stop a web developer from dropping that content
into an innerHTML of course, but it's one more rope holding the ship
together.
Obviously, since this adds five characters per pedantically escaped
character, it's not something you'd want on by default, so the normal
behavior would be to leave them alone.
echo json_encode("<foo>");
"<foo>"
echo json_encode("<foo>", JSON_HEX_TAG);
"\u003Cfoo\u003E"
echo json_encode("<foo bar='baz'>", JSON_HEX_TAG | JSON_HEX_APOS);
"\u003Cfoo bar=\u0027baz\u0027\u003E"
If noone objects, I'll commit this in a week along with an MFH for 5.3
-Sara
Index: json.c
===================================================================
RCS file: /repository/pecl/json/json.c,v
retrieving revision 1.31
diff -u -p -r1.31 json.c
--- json.c 1 Oct 2007 15:25:01 -0000 1.31
+++ json.c 29 Nov 2007 19:01:34 -0000
@@ -32,6 +32,10 @@
static const char digits[] = "0123456789abcdef";
+#define PHP_JSON_HEX_TAG (1<<0)
+#define PHP_JSON_HEX_AMP (1<<1)
+#define PHP_JSON_HEX_APOS (1<<2)
+
/* {{{ json_functions[]
*
* Every user visible function must have an entry in json_functions[].
@@ -43,6 +47,18 @@ const function_entry json_functions[] =
};
/* }}} */
+/* {{{ MINIT */
+static PHP_MINIT_FUNCTION(json)
+{
+ REGISTER_LONG_CONSTANT("JSON_HEX_TAG", PHP_JSON_HEX_TAG, CONST_CS |
CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("JSON_HEX_AMP", PHP_JSON_HEX_AMP, CONST_CS |
CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("JSON_HEX_APOS", PHP_JSON_HEX_APOS, CONST_CS |
CONST_PERSISTENT);
+
+ return SUCCESS;
+}
+/* }}} */
+
+
/* {{{ json_module_entry
*/
zend_module_entry json_module_entry = {
@@ -51,7 +67,7 @@ zend_module_entry json_module_entry = {
#endif
"json",
json_functions,
- NULL,
+ PHP_MINIT(json),
NULL,
NULL,
NULL,
@@ -78,8 +94,8 @@ PHP_MINFO_FUNCTION(json)
}
/* }}} */
-static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC);
-static void json_escape_string(smart_str *buf, zstr s, int len,
zend_uchar type);
+static void json_encode_r(smart_str *buf, zval *val, int options
TSRMLS_DC);
+static void json_escape_string(smart_str *buf, zstr s, int len,
zend_uchar type, int options);
static int json_determine_array_type(zval **val TSRMLS_DC) /* {{{ */
{
@@ -115,7 +131,7 @@ static int json_determine_array_type(zva
}
/* }}} */
-static void json_encode_array(smart_str *buf, zval **val TSRMLS_DC) /*
{{{ */
+static void json_encode_array(smart_str *buf, zval **val, int options
TSRMLS_DC) /* {{{ */
{
int i, r;
HashTable *myht;
@@ -172,7 +188,7 @@ static void json_encode_array(smart_str
need_comma = 1;
}
- json_encode_r(buf, *data TSRMLS_CC);
+ json_encode_r(buf, *data, options
TSRMLS_CC);
} else if (r == 1) {
if (i == HASH_KEY_IS_STRING ||
i == HASH_KEY_IS_UNICODE) {
@@ -187,10 +203,10 @@ static void json_encode_array(smart_str
need_comma = 1;
}
- json_escape_string(buf, key,
key_len - 1,
(i==HASH_KEY_IS_UNICODE)?IS_UNICODE:IS_STRING);
+ json_escape_string(buf, key,
key_len - 1,
(i==HASH_KEY_IS_UNICODE)?IS_UNICODE:IS_STRING, options);
smart_str_appendc(buf, ':');
- json_encode_r(buf, *data
TSRMLS_CC);
+ json_encode_r(buf, *data,
options TSRMLS_CC);
} else {
if (need_comma) {
smart_str_appendc(buf,
',');
@@ -203,7 +219,7 @@ static void json_encode_array(smart_str
smart_str_appendc(buf, '"');
smart_str_appendc(buf, ':');
- json_encode_r(buf, *data
TSRMLS_CC);
+ json_encode_r(buf, *data,
options TSRMLS_CC);
}
}
@@ -227,7 +243,7 @@ static void json_encode_array(smart_str
#define REVERSE16(us) (((us & 0xf) << 12) | (((us >> 4) & 0xf) << 8) |
(((us >> 8) & 0xf) << 4) | ((us >> 12) & 0xf))
-static void json_escape_string(smart_str *buf, zstr s, int len,
zend_uchar type) /* {{{ */
+static void json_escape_string(smart_str *buf, zstr s, int len,
zend_uchar type, int options) /* {{{ */
{
int pos = 0;
unsigned short us;
@@ -305,6 +321,42 @@ static void json_escape_string(smart_str
smart_str_appendl(buf, "\\t", 2);
}
break;
+ case '<':
+ {
+ if (options & PHP_JSON_HEX_TAG) {
+ smart_str_appendl(buf,
"\\u003C", 6);
+ } else {
+ smary_str_appendc(buf, '<');
+ }
+ }
+ break;
+ case '>':
+ {
+ if (options & PHP_JSON_HEX_TAG) {
+ smart_str_appendl(buf,
"\\u003E", 6);
+ } else {
+ smary_str_appendc(buf, '>');
+ }
+ }
+ break;
+ case '&':
+ {
+ if (options & PHP_JSON_HEX_AMP) {
+ smart_str_appendl(buf,
"\\u0026", 6);
+ } else {
+ smary_str_appendc(buf, '&');
+ }
+ }
+ break;
+ case '\'':
+ {
+ if (options & PHP_JSON_HEX_APOS) {
+ smart_str_appendl(buf,
"\\u0027", 6);
+ } else {
+ smary_str_appendc(buf, '\'');
+ }
+ }
+ break;
default:
{
if (us >= ' ' && (us & 127) == us)
@@ -337,7 +389,7 @@ static void json_escape_string(smart_str
}
/* }}} */
-static void json_encode_r(smart_str *buf, zval *val TSRMLS_DC) /* {{{ */
+static void json_encode_r(smart_str *buf, zval *val, int options
TSRMLS_DC) /* {{{ */
{
switch (Z_TYPE_P(val)) {
case IS_NULL:
@@ -374,11 +426,11 @@ static void json_encode_r(smart_str *buf
break;
case IS_STRING:
case IS_UNICODE:
- json_escape_string(buf, Z_UNIVAL_P(val),
Z_UNILEN_P(val),
Z_TYPE_P(val));
+ json_escape_string(buf, Z_UNIVAL_P(val),
Z_UNILEN_P(val),
Z_TYPE_P(val), options);
break;
case IS_ARRAY:
case IS_OBJECT:
- json_encode_array(buf, &val TSRMLS_CC);
+ json_encode_array(buf, &val, options TSRMLS_CC);
break;
default:
zend_error(E_WARNING, "[json] (json_encode_r) type is
unsupported,
encoded as null.");
@@ -396,12 +448,13 @@ PHP_FUNCTION(json_encode)
{
zval *parameter;
smart_str buf = {0};
+ long options = 0;
- if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", ¶meter)
== FAILURE) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|l",
¶meter, &options) == FAILURE) {
return;
}
- json_encode_r(&buf, parameter TSRMLS_CC);
+ json_encode_r(&buf, parameter, options TSRMLS_CC);
/*
* Return as binary string, since the result is 99% likely to be just
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php