On Thu, May 23, 2024 at 8:24 AM David Rowley <dgrowle...@gmail.com> wrote: > Other things I considered were if doing 16 bytes at a time is too much > as it puts quite a bit of work into byte-at-a-time processing if just > 1 special char exists in a 16-byte chunk. I considered doing SWAR [1] > processing to do the job of vector8_has_le() and vector8_has() byte > maybe with just uint32s. It might be worth doing that. However, I've > not done it yet as it raises the bar for this patch quite a bit. SWAR > vector processing is pretty much write-only code. Imagine trying to > write comments for the code in [2] so that the average person could > understand what's going on!?
Sorry to resurrect this thread, but I recently saw something that made me think of this commit (as well as the similar one 0a8de93a48c): https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/ I don't find this use of SWAR that bad for readability, and there's only one obtuse clever part that merits a comment. Plus, it seems json escapes are pretty much set in stone? I gave this a spin with https://www.postgresql.org/message-id/attachment/163406/json_bench.sh.txt master: Test 1 tps = 321.522667 (without initial connection time) tps = 315.070985 (without initial connection time) tps = 331.070054 (without initial connection time) Test 2 tps = 35.107257 (without initial connection time) tps = 34.977670 (without initial connection time) tps = 35.898471 (without initial connection time) Test 3 tps = 33.575570 (without initial connection time) tps = 32.383352 (without initial connection time) tps = 31.876192 (without initial connection time) Test 4 tps = 810.676116 (without initial connection time) tps = 745.948518 (without initial connection time) tps = 747.651923 (without initial connection time) swar patch: Test 1 tps = 291.919004 (without initial connection time) tps = 294.446640 (without initial connection time) tps = 307.670464 (without initial connection time) Test 2 tps = 30.984440 (without initial connection time) tps = 31.660630 (without initial connection time) tps = 32.538174 (without initial connection time) Test 3 tps = 29.828546 (without initial connection time) tps = 30.332913 (without initial connection time) tps = 28.873059 (without initial connection time) Test 4 tps = 748.676688 (without initial connection time) tps = 768.798734 (without initial connection time) tps = 766.924632 (without initial connection time) While noisy, this test seems a bit faster with SWAR, and it's more portable to boot. I'm not sure where I'd put the new function so both call sites can see it, but that's a small detail... -- John Naylor Amazon Web Services
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index 51452755f58..8f832dacd40 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -19,7 +19,6 @@ #include "funcapi.h" #include "libpq/pqformat.h" #include "miscadmin.h" -#include "port/simd.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/date.h" @@ -1621,6 +1620,22 @@ escape_json(StringInfo buf, const char *str) */ #define ESCAPE_JSON_FLUSH_AFTER 512 +static inline bool +has_json_escapable_byte(const char *str) +{ + uint64 x; + + memcpy(&x, str, sizeof(uint64)); + + uint64 is_ascii = 0x8080808080808080ULL & ~x; + uint64 xor2 = x ^ 0x0202020202020202ULL; + uint64 lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64 sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64 eq92 = (sub92 - 0x0101010101010101ULL); + + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + /* * escape_json_with_len * Produce a JSON string literal, properly escaping the possibly not @@ -1645,7 +1660,7 @@ escape_json_with_len(StringInfo buf, const char *str, int len) * Figure out how many bytes to process using SIMD. Round 'len' down to * the previous multiple of sizeof(Vector8), assuming that's a power-of-2. */ - vlen = len & (int) (~(sizeof(Vector8) - 1)); + vlen = len & (int) (~(sizeof(uint64) - 1)); appendStringInfoCharMacro(buf, '"'); @@ -1661,19 +1676,13 @@ escape_json_with_len(StringInfo buf, const char *str, int len) * string byte-by-byte. This optimization assumes that most chunks of * sizeof(Vector8) bytes won't contain any special characters. */ - for (; i < vlen; i += sizeof(Vector8)) + for (; i < vlen; i += sizeof(uint64)) { - Vector8 chunk; - - vector8_load(&chunk, (const uint8 *) &str[i]); - /* * Break on anything less than ' ' or if we find a '"' or '\\'. * Those need special handling. That's done in the per-byte loop. */ - if (vector8_has_le(chunk, (unsigned char) 0x1F) || - vector8_has(chunk, (unsigned char) '"') || - vector8_has(chunk, (unsigned char) '\\')) + if (has_json_escapable_byte(&str[i])) break; #ifdef ESCAPE_JSON_FLUSH_AFTER @@ -1706,7 +1715,7 @@ escape_json_with_len(StringInfo buf, const char *str, int len) * Per-byte loop for Vector8s containing special chars and for * processing the tail of the string. */ - for (int b = 0; b < sizeof(Vector8); b++) + for (int b = 0; b < sizeof(uint64); b++) { /* check if we've finished */ if (i == len) diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index 7dad4da65f6..6803ccbbc29 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -19,7 +19,6 @@ #include "common/jsonapi.h" #include "mb/pg_wchar.h" -#include "port/pg_lfind.h" #ifdef JSONAPI_USE_PQEXPBUFFER #include "pqexpbuffer.h" @@ -1949,6 +1948,23 @@ json_lex(JsonLexContext *lex) return JSON_SUCCESS; } +static inline +bool +has_json_escapable_byte(const char *str) +{ + uint64 x; + + memcpy(&x, str, sizeof(uint64)); + + uint64 is_ascii = 0x8080808080808080ULL & ~x; + uint64 xor2 = x ^ 0x0202020202020202ULL; + uint64 lt32_or_eq34 = xor2 - 0x2121212121212121ULL; + uint64 sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL; + uint64 eq92 = (sub92 - 0x0101010101010101ULL); + + return ((lt32_or_eq34 | eq92) & is_ascii) != 0; +} + /* * The next token in the input stream is known to be a string; lex it. * @@ -2166,11 +2182,9 @@ json_lex_string(JsonLexContext *lex) * Skip to the first byte that requires special handling, so we * can batch calls to jsonapi_appendBinaryStringInfo. */ - while (p < end - sizeof(Vector8) && - !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) && - !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) && - !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8))) - p += sizeof(Vector8); + while (p < end - sizeof(uint64) && + !has_json_escapable_byte(p)) + p += sizeof(uint64); for (; p < end; p++) {