On 01/27/2015 12:24 AM, Noah Misch wrote:
For the moment, maybe I could commit the fix for the non U+0000 case for
escape_json, and we could think some more about detecting and warning about
the problem strings.
+1 for splitting development that way. Fixing the use of escape_json() is
objective, but the choices around the warning are more subtle.
OK, so something like this patch? I'm mildly concerned that you and I
are the only ones commenting on this.
cheers
andrew
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 3c137ea..8d2b38f 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -94,6 +94,8 @@ static void datum_to_json(Datum val, bool is_null, StringInfo result,
static void add_json(Datum val, bool is_null, StringInfo result,
Oid val_type, bool key_scalar);
static text *catenate_stringinfo_string(StringInfo buffer, const char *addon);
+static void escape_json_work(StringInfo buf, const char *str,
+ bool jsonb_unicode);
/* the null action object used for pure validation */
static JsonSemAction nullSemAction =
@@ -2356,6 +2358,18 @@ json_object_two_arg(PG_FUNCTION_ARGS)
void
escape_json(StringInfo buf, const char *str)
{
+ escape_json_work( buf, str, false);
+}
+
+void
+escape_jsonb(StringInfo buf, const char *str)
+{
+ escape_json_work( buf, str, true);
+}
+
+static void
+escape_json_work(StringInfo buf, const char *str, bool jsonb_unicode)
+{
const char *p;
appendStringInfoCharMacro(buf, '\"');
@@ -2398,7 +2412,9 @@ escape_json(StringInfo buf, const char *str)
* unicode escape that should be present is \u0000, all the
* other unicode escapes will have been resolved.
*/
- if (p[1] == 'u' &&
+ if (jsonb_unicode && strncmp(p+1, "u0000", 5) == 0)
+ appendStringInfoCharMacro(buf, *p);
+ else if (!jsonb_unicode && p[1] == 'u' &&
isxdigit((unsigned char) p[2]) &&
isxdigit((unsigned char) p[3]) &&
isxdigit((unsigned char) p[4]) &&
diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index 644ea6d..22e1263 100644
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@@ -305,7 +305,7 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal)
appendBinaryStringInfo(out, "null", 4);
break;
case jbvString:
- escape_json(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len));
+ escape_jsonb(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len));
break;
case jbvNumeric:
appendStringInfoString(out,
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index 6f69403..661d7bd 100644
--- a/src/include/utils/json.h
+++ b/src/include/utils/json.h
@@ -42,7 +42,13 @@ extern Datum json_build_array_noargs(PG_FUNCTION_ARGS);
extern Datum json_object(PG_FUNCTION_ARGS);
extern Datum json_object_two_arg(PG_FUNCTION_ARGS);
+/*
+ * escape_jsonb is more strict about unicode escapes, and will only not
+ * escape \u0000, as that is the only unicode escape that should still be
+ * present.
+ */
extern void escape_json(StringInfo buf, const char *str);
+extern void escape_jsonb(StringInfo buf, const char *str);
extern Datum json_typeof(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index aa5686f..b5457c4 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -324,11 +324,20 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row)
COMMIT;
--- unicode escape - backslash is not escaped
+-- unicode null - backslash not escaped
+-- note: using to_jsonb here bypasses the jsonb input routine.
+select jsonb '"\u0000"', to_jsonb(text '\u0000');
+ jsonb | to_jsonb
+----------+----------
+ "\u0000" | "\u0000"
+(1 row)
+
+-- any other unicode-looking escape - backslash is escaped
+-- all unicode characters should have been resolved
select to_jsonb(text '\uabcd');
- to_jsonb
-----------
- "\uabcd"
+ to_jsonb
+-----------
+ "\\uabcd"
(1 row)
-- any other backslash is escaped
@@ -338,6 +347,14 @@ select to_jsonb(text '\abcd');
"\\abcd"
(1 row)
+-- doubled backslash should be reproduced
+-- this isn't a unicode escape, it's an escaped backslash followed by 'uxxxx'
+select jsonb '"\\uabcd"';
+ jsonb
+-----------
+ "\\uabcd"
+(1 row)
+
--jsonb_agg
CREATE TEMP TABLE rows AS
SELECT x, 'txt' || x as y
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out
index 687ae63..10aaac8 100644
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -324,11 +324,20 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
(1 row)
COMMIT;
--- unicode escape - backslash is not escaped
+-- unicode null - backslash not escaped
+-- note: using to_jsonb here bypasses the jsonb input routine.
+select jsonb '"\u0000"', to_jsonb(text '\u0000');
+ jsonb | to_jsonb
+----------+----------
+ "\u0000" | "\u0000"
+(1 row)
+
+-- any other unicode-looking escape - backslash is escaped
+-- all unicode characters should have been resolved
select to_jsonb(text '\uabcd');
- to_jsonb
-----------
- "\uabcd"
+ to_jsonb
+-----------
+ "\\uabcd"
(1 row)
-- any other backslash is escaped
@@ -338,6 +347,14 @@ select to_jsonb(text '\abcd');
"\\abcd"
(1 row)
+-- doubled backslash should be reproduced
+-- this isn't a unicode escape, it's an escaped backslash followed by 'uxxxx'
+select jsonb '"\\uabcd"';
+ jsonb
+-----------
+ "\\uabcd"
+(1 row)
+
--jsonb_agg
CREATE TEMP TABLE rows AS
SELECT x, 'txt' || x as y
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index a846103..bfda1c2 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -73,7 +73,13 @@ SET LOCAL TIME ZONE -8;
select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
COMMIT;
--- unicode escape - backslash is not escaped
+-- unicode null - backslash not escaped
+-- note: using to_jsonb here bypasses the jsonb input routine.
+
+select jsonb '"\u0000"', to_jsonb(text '\u0000');
+
+-- any other unicode-looking escape - backslash is escaped
+-- all unicode characters should have been resolved
select to_jsonb(text '\uabcd');
@@ -81,6 +87,11 @@ select to_jsonb(text '\uabcd');
select to_jsonb(text '\abcd');
+-- doubled backslash should be reproduced
+-- this isn't a unicode escape, it's an escaped backslash followed by 'uxxxx'
+
+select jsonb '"\\uabcd"';
+
--jsonb_agg
CREATE TEMP TABLE rows AS
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers