On 01/27/2015 12:24 AM, Noah Misch wrote:
For the moment, maybe I could commit the fix for the non U+0000 case for
escape_json, and we could think some more about detecting and warning about
the problem strings.
+1 for splitting development that way.  Fixing the use of escape_json() is
objective, but the choices around the warning are more subtle.



OK, so something like this patch? I'm mildly concerned that you and I are the only ones commenting on this.

cheers

andrew


diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 3c137ea..8d2b38f 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -94,6 +94,8 @@ static void datum_to_json(Datum val, bool is_null, StringInfo result,
 static void add_json(Datum val, bool is_null, StringInfo result,
 		 Oid val_type, bool key_scalar);
 static text *catenate_stringinfo_string(StringInfo buffer, const char *addon);
+static void  escape_json_work(StringInfo buf, const char *str,
+							  bool jsonb_unicode);
 
 /* the null action object used for pure validation */
 static JsonSemAction nullSemAction =
@@ -2356,6 +2358,18 @@ json_object_two_arg(PG_FUNCTION_ARGS)
 void
 escape_json(StringInfo buf, const char *str)
 {
+	escape_json_work( buf, str, false);
+}
+
+void
+escape_jsonb(StringInfo buf, const char *str)
+{
+	escape_json_work( buf, str, true);
+}
+
+static void
+escape_json_work(StringInfo buf, const char *str, bool jsonb_unicode)
+{
 	const char *p;
 
 	appendStringInfoCharMacro(buf, '\"');
@@ -2398,7 +2412,9 @@ escape_json(StringInfo buf, const char *str)
 				 * unicode escape that should be present is \u0000, all the
 				 * other unicode escapes will have been resolved.
 				 */
-				if (p[1] == 'u' &&
+				if (jsonb_unicode && strncmp(p+1, "u0000", 5) == 0)
+					appendStringInfoCharMacro(buf, *p);
+				else if (!jsonb_unicode && p[1] == 'u' &&
 					isxdigit((unsigned char) p[2]) &&
 					isxdigit((unsigned char) p[3]) &&
 					isxdigit((unsigned char) p[4]) &&
diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
index 644ea6d..22e1263 100644
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@@ -305,7 +305,7 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal)
 			appendBinaryStringInfo(out, "null", 4);
 			break;
 		case jbvString:
-			escape_json(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len));
+			escape_jsonb(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len));
 			break;
 		case jbvNumeric:
 			appendStringInfoString(out,
diff --git a/src/include/utils/json.h b/src/include/utils/json.h
index 6f69403..661d7bd 100644
--- a/src/include/utils/json.h
+++ b/src/include/utils/json.h
@@ -42,7 +42,13 @@ extern Datum json_build_array_noargs(PG_FUNCTION_ARGS);
 extern Datum json_object(PG_FUNCTION_ARGS);
 extern Datum json_object_two_arg(PG_FUNCTION_ARGS);
 
+/*
+ * escape_jsonb is more strict about unicode escapes, and will only not
+ * escape \u0000, as that is the only unicode escape that should still be
+ * present.
+ */
 extern void escape_json(StringInfo buf, const char *str);
+extern void escape_jsonb(StringInfo buf, const char *str);
 
 extern Datum json_typeof(PG_FUNCTION_ARGS);
 
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index aa5686f..b5457c4 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -324,11 +324,20 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
--- unicode escape - backslash is not escaped
+-- unicode null - backslash not escaped
+-- note: using to_jsonb here bypasses the jsonb input routine.
+select jsonb '"\u0000"', to_jsonb(text '\u0000');
+  jsonb   | to_jsonb 
+----------+----------
+ "\u0000" | "\u0000"
+(1 row)
+
+-- any other unicode-looking escape - backslash is escaped
+-- all unicode characters should have been resolved
 select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
+ to_jsonb  
+-----------
+ "\\uabcd"
 (1 row)
 
 -- any other backslash is escaped
@@ -338,6 +347,14 @@ select to_jsonb(text '\abcd');
  "\\abcd"
 (1 row)
 
+-- doubled backslash should be reproduced
+-- this isn't a unicode escape, it's an escaped backslash followed by 'uxxxx'
+select jsonb '"\\uabcd"';
+   jsonb   
+-----------
+ "\\uabcd"
+(1 row)
+
 --jsonb_agg
 CREATE TEMP TABLE rows AS
 SELECT x, 'txt' || x as y
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out
index 687ae63..10aaac8 100644
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -324,11 +324,20 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
--- unicode escape - backslash is not escaped
+-- unicode null - backslash not escaped
+-- note: using to_jsonb here bypasses the jsonb input routine.
+select jsonb '"\u0000"', to_jsonb(text '\u0000');
+  jsonb   | to_jsonb 
+----------+----------
+ "\u0000" | "\u0000"
+(1 row)
+
+-- any other unicode-looking escape - backslash is escaped
+-- all unicode characters should have been resolved
 select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
+ to_jsonb  
+-----------
+ "\\uabcd"
 (1 row)
 
 -- any other backslash is escaped
@@ -338,6 +347,14 @@ select to_jsonb(text '\abcd');
  "\\abcd"
 (1 row)
 
+-- doubled backslash should be reproduced
+-- this isn't a unicode escape, it's an escaped backslash followed by 'uxxxx'
+select jsonb '"\\uabcd"';
+   jsonb   
+-----------
+ "\\uabcd"
+(1 row)
+
 --jsonb_agg
 CREATE TEMP TABLE rows AS
 SELECT x, 'txt' || x as y
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index a846103..bfda1c2 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -73,7 +73,13 @@ SET LOCAL TIME ZONE -8;
 select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
 COMMIT;
 
--- unicode escape - backslash is not escaped
+-- unicode null - backslash not escaped
+-- note: using to_jsonb here bypasses the jsonb input routine.
+
+select jsonb '"\u0000"', to_jsonb(text '\u0000');
+
+-- any other unicode-looking escape - backslash is escaped
+-- all unicode characters should have been resolved
 
 select to_jsonb(text '\uabcd');
 
@@ -81,6 +87,11 @@ select to_jsonb(text '\uabcd');
 
 select to_jsonb(text '\abcd');
 
+-- doubled backslash should be reproduced
+-- this isn't a unicode escape, it's an escaped backslash followed by 'uxxxx'
+
+select jsonb '"\\uabcd"';
+
 --jsonb_agg
 
 CREATE TEMP TABLE rows AS
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to