On Thu, May 23, 2024 at 8:24 AM David Rowley <dgrowle...@gmail.com> wrote:
> Other things I considered were if doing 16 bytes at a time is too much
> as it puts quite a bit of work into byte-at-a-time processing if just
> 1 special char exists in a 16-byte chunk. I considered doing SWAR [1]
> processing to do the job of vector8_has_le() and vector8_has() byte
> maybe with just uint32s.  It might be worth doing that. However, I've
> not done it yet as it raises the bar for this patch quite a bit.  SWAR
> vector processing is pretty much write-only code. Imagine trying to
> write comments for the code in [2] so that the average person could
> understand what's going on!?

Sorry to resurrect this thread, but I recently saw something that made
me think of this commit (as well as the similar one 0a8de93a48c):

https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/

I don't find this use of SWAR that bad for readability, and there's
only one obtuse clever part that merits a comment. Plus, it seems json
escapes are pretty much set in stone? I gave this a spin with

https://www.postgresql.org/message-id/attachment/163406/json_bench.sh.txt

master:

Test 1
tps = 321.522667 (without initial connection time)
tps = 315.070985 (without initial connection time)
tps = 331.070054 (without initial connection time)
Test 2
tps = 35.107257 (without initial connection time)
tps = 34.977670 (without initial connection time)
tps = 35.898471 (without initial connection time)
Test 3
tps = 33.575570 (without initial connection time)
tps = 32.383352 (without initial connection time)
tps = 31.876192 (without initial connection time)
Test 4
tps = 810.676116 (without initial connection time)
tps = 745.948518 (without initial connection time)
tps = 747.651923 (without initial connection time)

swar patch:

Test 1
tps = 291.919004 (without initial connection time)
tps = 294.446640 (without initial connection time)
tps = 307.670464 (without initial connection time)
Test 2
tps = 30.984440 (without initial connection time)
tps = 31.660630 (without initial connection time)
tps = 32.538174 (without initial connection time)
Test 3
tps = 29.828546 (without initial connection time)
tps = 30.332913 (without initial connection time)
tps = 28.873059 (without initial connection time)
Test 4
tps = 748.676688 (without initial connection time)
tps = 768.798734 (without initial connection time)
tps = 766.924632 (without initial connection time)

While noisy, this test seems a bit faster with SWAR, and it's more
portable to boot. I'm not sure where I'd put the new function so both
call sites can see it, but that's a small detail...


--
John Naylor
Amazon Web Services
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 51452755f58..8f832dacd40 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -19,7 +19,6 @@
 #include "funcapi.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
-#include "port/simd.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/date.h"
@@ -1621,6 +1620,22 @@ escape_json(StringInfo buf, const char *str)
  */
 #define ESCAPE_JSON_FLUSH_AFTER 512
 
+static inline bool
+has_json_escapable_byte(const char *str)
+{
+	uint64		x;
+
+	memcpy(&x, str, sizeof(uint64));
+
+	uint64		is_ascii = 0x8080808080808080ULL & ~x;
+	uint64		xor2 = x ^ 0x0202020202020202ULL;
+	uint64		lt32_or_eq34 = xor2 - 0x2121212121212121ULL;
+	uint64		sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL;
+	uint64		eq92 = (sub92 - 0x0101010101010101ULL);
+
+	return ((lt32_or_eq34 | eq92) & is_ascii) != 0;
+}
+
 /*
  * escape_json_with_len
  *		Produce a JSON string literal, properly escaping the possibly not
@@ -1645,7 +1660,7 @@ escape_json_with_len(StringInfo buf, const char *str, int len)
 	 * Figure out how many bytes to process using SIMD.  Round 'len' down to
 	 * the previous multiple of sizeof(Vector8), assuming that's a power-of-2.
 	 */
-	vlen = len & (int) (~(sizeof(Vector8) - 1));
+	vlen = len & (int) (~(sizeof(uint64) - 1));
 
 	appendStringInfoCharMacro(buf, '"');
 
@@ -1661,19 +1676,13 @@ escape_json_with_len(StringInfo buf, const char *str, int len)
 		 * string byte-by-byte.  This optimization assumes that most chunks of
 		 * sizeof(Vector8) bytes won't contain any special characters.
 		 */
-		for (; i < vlen; i += sizeof(Vector8))
+		for (; i < vlen; i += sizeof(uint64))
 		{
-			Vector8		chunk;
-
-			vector8_load(&chunk, (const uint8 *) &str[i]);
-
 			/*
 			 * Break on anything less than ' ' or if we find a '"' or '\\'.
 			 * Those need special handling.  That's done in the per-byte loop.
 			 */
-			if (vector8_has_le(chunk, (unsigned char) 0x1F) ||
-				vector8_has(chunk, (unsigned char) '"') ||
-				vector8_has(chunk, (unsigned char) '\\'))
+			if (has_json_escapable_byte(&str[i]))
 				break;
 
 #ifdef ESCAPE_JSON_FLUSH_AFTER
@@ -1706,7 +1715,7 @@ escape_json_with_len(StringInfo buf, const char *str, int len)
 		 * Per-byte loop for Vector8s containing special chars and for
 		 * processing the tail of the string.
 		 */
-		for (int b = 0; b < sizeof(Vector8); b++)
+		for (int b = 0; b < sizeof(uint64); b++)
 		{
 			/* check if we've finished */
 			if (i == len)
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 7dad4da65f6..6803ccbbc29 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -19,7 +19,6 @@
 
 #include "common/jsonapi.h"
 #include "mb/pg_wchar.h"
-#include "port/pg_lfind.h"
 
 #ifdef JSONAPI_USE_PQEXPBUFFER
 #include "pqexpbuffer.h"
@@ -1949,6 +1948,23 @@ json_lex(JsonLexContext *lex)
 		return JSON_SUCCESS;
 }
 
+static inline
+bool
+has_json_escapable_byte(const char *str)
+{
+	uint64		x;
+
+	memcpy(&x, str, sizeof(uint64));
+
+	uint64		is_ascii = 0x8080808080808080ULL & ~x;
+	uint64		xor2 = x ^ 0x0202020202020202ULL;
+	uint64		lt32_or_eq34 = xor2 - 0x2121212121212121ULL;
+	uint64		sub92 = x ^ 0x5C5C5C5C5C5C5C5CULL;
+	uint64		eq92 = (sub92 - 0x0101010101010101ULL);
+
+	return ((lt32_or_eq34 | eq92) & is_ascii) != 0;
+}
+
 /*
  * The next token in the input stream is known to be a string; lex it.
  *
@@ -2166,11 +2182,9 @@ json_lex_string(JsonLexContext *lex)
 			 * Skip to the first byte that requires special handling, so we
 			 * can batch calls to jsonapi_appendBinaryStringInfo.
 			 */
-			while (p < end - sizeof(Vector8) &&
-				   !pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
-				   !pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
-				   !pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
-				p += sizeof(Vector8);
+			while (p < end - sizeof(uint64) &&
+				   !has_json_escapable_byte(p))
+				p += sizeof(uint64);
 
 			for (; p < end; p++)
 			{

Reply via email to