details: https://hg.nginx.org/njs/rev/82ca4be8b357 branches: changeset: 992:82ca4be8b357 user: Alexander Borisov <alexander.bori...@nginx.com> date: Tue May 28 20:51:24 2019 +0300 description: Improved processing of invalid surrogate pairs in JSON strings.
Previously, an exception was thrown on invalid surrogate pairs. Now, all such pairs are converted to replacement character. diffstat: njs/njs_json.c | 19 ++++++++++++------- njs/test/njs_unit_test.c | 13 +++++++++++-- 2 files changed, 23 insertions(+), 9 deletions(-) diffs (62 lines): diff -r 96dc9de9f92c -r 82ca4be8b357 njs/njs_json.c --- a/njs/njs_json.c Tue May 28 20:49:58 2019 +0300 +++ b/njs/njs_json.c Tue May 28 20:51:24 2019 +0300 @@ -806,8 +806,8 @@ njs_json_parse_string(njs_json_parse_ctx /* Surrogate pair. */ if (utf > 0xdbff || p[0] != '\\' || p[1] != 'u') { - njs_json_parse_exception(ctx, "Invalid Unicode char", p); - return NULL; + s = nxt_utf8_encode(s, NXT_UTF8_REPLACEMENT); + continue; } p += 2; @@ -815,12 +815,17 @@ njs_json_parse_string(njs_json_parse_ctx utf_low = njs_json_unicode(p); p += 4; - if (nxt_slow_path(utf_low < 0xdc00 || utf_low > 0xdfff)) { - njs_json_parse_exception(ctx, "Invalid surrogate pair", p); - return NULL; + if (nxt_fast_path(utf_low >= 0xdc00 && utf_low <= 0xdfff)) { + utf = njs_string_surrogate_pair(utf, utf_low); + + } else if (utf_low >= 0xd800 && utf_low <= 0xdbff) { + utf = NXT_UTF8_REPLACEMENT; + s = nxt_utf8_encode(s, NXT_UTF8_REPLACEMENT); + + } else { + utf = utf_low; + s = nxt_utf8_encode(s, NXT_UTF8_REPLACEMENT); } - - utf = njs_string_surrogate_pair(utf, utf_low); } s = nxt_utf8_encode(s, utf); diff -r 96dc9de9f92c -r 82ca4be8b357 njs/test/njs_unit_test.c --- a/njs/test/njs_unit_test.c Tue May 28 20:49:58 2019 +0300 +++ b/njs/test/njs_unit_test.c Tue May 28 20:51:24 2019 +0300 @@ -11799,10 +11799,19 @@ static njs_unit_test_t njs_test[] = nxt_string("SyntaxError: Unknown escape char at position 2") }, { nxt_string("JSON.parse('\"\\\\uDC01\"')"), - nxt_string("SyntaxError: Invalid Unicode char at position 7") }, + nxt_string("�") }, { nxt_string("JSON.parse('\"\\\\uD801\\\\uE000\"')"), - nxt_string("SyntaxError: Invalid surrogate pair at position 13") }, + nxt_string("�") }, + + { nxt_string("JSON.parse('\"\\\\uD83D\"')"), + nxt_string("�") }, + + { nxt_string("JSON.parse('\"\\\\uD800\\\\uDB00\"')"), + nxt_string("��") }, + + { nxt_string("JSON.parse('\"\\\\ud800[\"')"), + nxt_string("�[") }, { nxt_string("JSON.parse('{')"), nxt_string("SyntaxError: Unexpected end of input at position 1") }, _______________________________________________ nginx-devel mailing list nginx-devel@nginx.org http://mailman.nginx.org/mailman/listinfo/nginx-devel