Re: [PATCH] Optimize json_lex_string by batching character copying

John Naylor Tue, 05 Jul 2022 22:10:50 -0700

On Sat, Jun 25, 2022 at 8:05 AM Andres Freund <and...@anarazel.de> wrote:


> > I tried your patch with:
> >
> > DROP TABLE IF EXISTS json_as_text;
> > CREATE TABLE json_as_text AS SELECT (SELECT json_agg(row_to_json(pd)) as t 
> > FROM pg_description pd) FROM generate_series(1, 100);
> > VACUUM FREEZE json_as_text;
> >
> > SELECT 1 FROM json_as_text WHERE jsonb_typeof(t::jsonb) = 'not me';
> >
> > Which the patch improves from 846ms to 754ms (best of three). A bit smaller
> > than your improvement, but still nice.
> >
> >
> > I think your patch doesn't quite go far enough - we still end up looping for
> > each character, have the added complication of needing to flush the
> > "buffer". I'd be surprised if a "dedicated" loop to see until where the 
> > string
> > last isn't faster.  That then obviously could be SIMDified.
>
> A naive implementation (attached) of that gets me down to 706ms.

Taking this a step further, I modified json_lex and json_lex_string to
use a const end pointer instead of maintaining the length (0001). The
observed speedup is small enough that it might not be real, but the
code is simpler this way, and it makes 0002 and 0003 easier to reason
about. Then I modified your patch to do the same (0002). Hackish SSE2
support is in 0003.

To exercise the SIMD code a bit, I added a second test:

DROP TABLE IF EXISTS long_json_as_text;
CREATE TABLE long_json_as_text AS
with long as (
        select repeat(description, 10) from pg_description pd
)
SELECT (select json_agg(row_to_json(long)) as t from long) from
generate_series(1, 100);
VACUUM FREEZE long_json_as_text;

SELECT 1 FROM long_json_as_text WHERE jsonb_typeof(t::jsonb) = 'not me';

With this, I get (turbo disabled, min of 3):

short test:
master: 769ms
0001:   751ms
0002:   703ms
0003:   701ms

long test;
master: 939ms
0001:   883ms
0002:   537ms
0003:   439ms

I think 0001/2 are mostly in committable shape.

With 0003, I'd want to make the portability check a bit nicer and more
centralized. I'm thinking of modifying the CRC check to report that
the host cpu/compiler understands SSE4.2 x86 intrinsics, and then the
compile time SSE2 check can piggyback on top of that without a runtime
check. This is conceptually easy but a bit of work to not look like a
hack (which probably means the ARM CRC check should look more generic
somehow...). The regression tests will likely need some work as well.

> Separately, it seems pretty awful efficiency / code density wise to have the
> NULL checks for ->strval all over. Might be worth forcing json_lex() and
> json_lex_string() to be inlined, with a constant parameter deciding whether
> ->strval is expected. That'd likely be enough to get the compiler specialize
> the code for us.

I had a look at this but it's a bit more invasive than I want to
devote time to at this point.

--
John Naylor
EDB: http://www.enterprisedb.com

From 1484b7541b9ed3f0c476e31f99340d36895bc629 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Tue, 5 Jul 2022 18:57:37 +0700
Subject: [PATCH v3 3/3] Use vectorized lookahead in json_lex_string on x86

---
 src/common/jsonapi.c | 48 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index ad4858c623..978f18b129 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -24,6 +24,12 @@
 #include "miscadmin.h"
 #endif
 
+/*  WIP: put somewhere sensible and consider removing CRC from the names */
+#if (defined (__x86_64__) || defined(_M_AMD64)) && (defined(USE_SSE42_CRC32C) || defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK))
+#include <nmmintrin.h>
+#define USE_SSE2
+#endif
+
 /*
  * The context of the parser is maintained by the recursive descent
  * mechanism, but is passed explicitly to the error reporting routine
@@ -851,12 +857,54 @@ json_lex_string(JsonLexContext *lex)
 		}
 		else if (lex->strval != NULL)
 		{
+#ifdef USE_SSE2
+			__m128i		block,
+						has_backslash,
+						has_doublequote,
+						control,
+						has_control,
+						error_cum = _mm_setzero_si128();
+			const		__m128i backslash = _mm_set1_epi8('\\');
+			const		__m128i doublequote = _mm_set1_epi8('"');
+			const		__m128i max_control = _mm_set1_epi8(0x1F);
+#endif
 			/* start lookahead at next byte */
 			char	   *p = s + 1;
 
 			if (hi_surrogate != -1)
 				return JSON_UNICODE_LOW_SURROGATE;
 
+#ifdef USE_SSE2
+			while (p < end - sizeof(__m128i))
+			{
+				block = _mm_loadu_si128((const __m128i *) p);
+
+				/* direct comparison to quotes and backslashes */
+				has_backslash = _mm_cmpeq_epi8(block, backslash);
+				has_doublequote = _mm_cmpeq_epi8(block, doublequote);
+
+				/*
+				 * use saturation arithmetic to check for <= highest control
+				 * char
+				 */
+				control = _mm_subs_epu8(block, max_control);
+				has_control = _mm_cmpeq_epi8(control, _mm_setzero_si128());
+
+				/*
+				 * set bits in error_cum where the corresponding lanes in has_*
+				 * are set
+				 */
+				error_cum = _mm_or_si128(error_cum, has_backslash);
+				error_cum = _mm_or_si128(error_cum, has_doublequote);
+				error_cum = _mm_or_si128(error_cum, has_control);
+
+				if (_mm_movemask_epi8(error_cum))
+					break;
+
+				p += sizeof(__m128i);
+			}
+#endif							/* USE_SSE2 */
+
 			while (p < end)
 			{
 				if (*p == '\\' || *p == '"' || (unsigned char) *p < 32)
-- 
2.36.1

From 03d3be083ba60d272e848b3ec96db3c6d47a3b06 Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Fri, 1 Jul 2022 17:28:20 +0700
Subject: [PATCH v3 2/3] Build json strings in larger chunks during lexing

Add lookahead loop to json_lex_string. This way, we can batch calls
to appendBinaryStringInfo.

Jelte Fennema and Andres Freund, with some adjustments by me

Discussion:
https://www.postgresql.org/message-id/CAGECzQQuXbies_nKgSiYifZUjBk6nOf2%3DTSXqRjj2BhUh8CTeA%40mail.gmail.com
Discussion:
https://www.postgresql.org/message-id/flat/pr3pr83mb0476f098cbcf68af7a1ca89ff7...@pr3pr83mb0476.eurprd83.prod.outlook.com
---
 src/common/jsonapi.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index eeedc0645a..ad4858c623 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -851,10 +851,26 @@ json_lex_string(JsonLexContext *lex)
 		}
 		else if (lex->strval != NULL)
 		{
+			/* start lookahead at next byte */
+			char	   *p = s + 1;
+
 			if (hi_surrogate != -1)
 				return JSON_UNICODE_LOW_SURROGATE;
 
-			appendStringInfoChar(lex->strval, *s);
+			while (p < end)
+			{
+				if (*p == '\\' || *p == '"' || (unsigned char) *p < 32)
+					break;
+				p++;
+			}
+
+			appendBinaryStringInfo(lex->strval, s, p - s);
+
+			/*
+			 * s will be incremented at the top of the loop, so set it to just
+			 * behind our lookahead position
+			 */
+			s = p - 1;
 		}
 	}
 
-- 
2.36.1

From 3d8b39ff1c1a4abf9effc45323b293b62551770a Mon Sep 17 00:00:00 2001
From: John Naylor <john.nay...@postgresql.org>
Date: Wed, 6 Jul 2022 08:35:24 +0700
Subject: [PATCH v3 1/3] Simplify json lexing state

Instead of updating the length as we go, use a const pointer to end of
the input, which we know already at the start
---
 src/common/jsonapi.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 98e4ef0942..eeedc0645a 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -519,26 +519,23 @@ JsonParseErrorType
 json_lex(JsonLexContext *lex)
 {
 	char	   *s;
-	int			len;
+	char	   *const end = lex->input + lex->input_length;
 	JsonParseErrorType result;
 
 	/* Skip leading whitespace. */
 	s = lex->token_terminator;
-	len = s - lex->input;
-	while (len < lex->input_length &&
-		   (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
+	while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
 	{
 		if (*s++ == '\n')
 		{
 			++lex->line_number;
 			lex->line_start = s;
 		}
-		len++;
 	}
 	lex->token_start = s;
 
 	/* Determine token type. */
-	if (len >= lex->input_length)
+	if (s >= end)
 	{
 		lex->token_start = NULL;
 		lex->prev_token_terminator = lex->token_terminator;
@@ -623,7 +620,7 @@ json_lex(JsonLexContext *lex)
 					 * the whole word as an unexpected token, rather than just
 					 * some unintuitive prefix thereof.
 					 */
-					for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)
+					for (p = s; p < end && JSON_ALPHANUMERIC_CHAR(*p); p++)
 						 /* skip */ ;
 
 					/*
@@ -672,7 +669,7 @@ static inline JsonParseErrorType
 json_lex_string(JsonLexContext *lex)
 {
 	char	   *s;
-	int			len;
+	char	   *const end = lex->input + lex->input_length;
 	int			hi_surrogate = -1;
 
 	if (lex->strval != NULL)
@@ -680,13 +677,11 @@ json_lex_string(JsonLexContext *lex)
 
 	Assert(lex->input_length > 0);
 	s = lex->token_start;
-	len = lex->token_start - lex->input;
 	for (;;)
 	{
 		s++;
-		len++;
 		/* Premature end of the string. */
-		if (len >= lex->input_length)
+		if (s >= end)
 		{
 			lex->token_terminator = s;
 			return JSON_INVALID_TOKEN;
@@ -704,8 +699,7 @@ json_lex_string(JsonLexContext *lex)
 		{
 			/* OK, we have an escape character. */
 			s++;
-			len++;
-			if (len >= lex->input_length)
+			if (s >= end)
 			{
 				lex->token_terminator = s;
 				return JSON_INVALID_TOKEN;
@@ -718,8 +712,7 @@ json_lex_string(JsonLexContext *lex)
 				for (i = 1; i <= 4; i++)
 				{
 					s++;
-					len++;
-					if (len >= lex->input_length)
+					if (s >= end)
 					{
 						lex->token_terminator = s;
 						return JSON_INVALID_TOKEN;
-- 
2.36.1

Re: [PATCH] Optimize json_lex_string by batching character copying

Reply via email to