On 12/16/24 8:39 AM, Peter Eisentraut wrote:
I'll leave it at this for now and wait for some reviews.

I really like this work since it makes the code cleaner to read on top of paving the way for threading.

Reviewed the patches and found a couple of issues.

- Shouldn't yyext in syncrep_scanner_init() be allocated on the heap? Or at least on the stack but by the caller?

- I think you have flipped the parameters of replication_yyerror(), see attached fixup patch.

- Some white space issues fixed in an attached fixup patch.

- Also fixed the static remaining variables in the replication parser in an attached patch.

- There seems to be a lot left to do to make the plpgsql scanner actually re-entrant so I do not think it would makes sense to commit the patch which sets the re-entrant option before that is done.

Andreas
From 6cada3e9f2d8929e8646bc2e3894ad74ca10eb43 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Tue, 17 Dec 2024 00:32:23 +0100
Subject: [PATCH v2 20/22] fixup! replication parser: pure parser and reentrant
 scanner

Fix argument order.
---
 src/backend/replication/repl_scanner.l | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 56f93e681ed..6776d3a85a8 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -152,7 +152,7 @@ UPLOAD_MANIFEST		{ return K_UPLOAD_MANIFEST; }
 					uint32	hi,
 							lo;
 					if (sscanf(yytext, "%X/%X", &hi, &lo) != 2)
-						replication_yyerror("invalid streaming start location", yyscanner);
+						replication_yyerror(yyscanner, "invalid streaming start location");
 					yylval->recptr = ((uint64) hi) << 32 | lo;
 					return RECPTR;
 				}
@@ -209,7 +209,7 @@ UPLOAD_MANIFEST		{ return K_UPLOAD_MANIFEST; }
 					return yytext[0];
 				}
 
-<xq,xd><<EOF>>	{ replication_yyerror("unterminated quoted string", yyscanner); }
+<xq,xd><<EOF>>	{ replication_yyerror(yyscanner, "unterminated quoted string"); }
 
 
 <<EOF>>			{
-- 
2.45.2

From a05de695de6c73878e1fe6a74c24936d8c69f05a Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Tue, 17 Dec 2024 00:32:47 +0100
Subject: [PATCH v2 21/22] fixup! replication parser: pure parser and reentrant
 scanner

Fix whitespace.
---
 src/backend/replication/repl_scanner.l | 1 -
 src/backend/replication/walsender.c    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 6776d3a85a8..6388024a598 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -252,7 +252,6 @@ replication_yyerror(yyscan_t yyscanner, const char *message)
 			 errmsg_internal("%s", message)));
 }
 
-
 void
 replication_scanner_init(const char *str, yyscan_t *yyscannerp)
 {
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 4971bcfb765..dc25dd6af91 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1951,7 +1951,7 @@ WalSndWaitForWal(XLogRecPtr loc)
 bool
 exec_replication_command(const char *cmd_string)
 {
-	yyscan_t    scanner;
+	yyscan_t	scanner;
 	int			parse_rc;
 	Node	   *cmd_node;
 	const char *cmdtag;
-- 
2.45.2

From 676ed65db9cac70189c1f33269fbbac519f59ca9 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Mon, 16 Dec 2024 23:13:36 +0100
Subject: [PATCH v2 22/22] replication parser: Use flex yyextra

Use flex yyextra to handle context information, instead of global
variables.  This complements the earlier patch to make the scanner
reentrant.
---
 src/backend/replication/repl_scanner.l | 75 +++++++++++++++-----------
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 6388024a598..1c4fc302df0 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -38,16 +38,20 @@ fprintf_to_ereport(const char *fmt, const char *msg)
 	ereport(ERROR, (errmsg_internal("%s", msg)));
 }
 
-/* Pushed-back token (we only handle one) */
-static int	repl_pushed_back_token;	/* FIXME */
+struct repl_yy_extra_type
+{
+	/* Pushed-back token (we only handle one) */
+	int	repl_pushed_back_token;
 
-/* Work area for collecting literals */
-static StringInfoData litbuf;		/* FIXME */
+	/* Work area for collecting literals */
+	StringInfoData litbuf;
+};
+#define YY_EXTRA_TYPE struct repl_yy_extra_type *
 
-static void startlit(void);
-static char *litbufdup(void);
-static void addlit(char *ytext, int yleng);
-static void addlitchar(unsigned char ychar);
+static void startlit(yyscan_t yyscanner);
+static char *litbufdup(yyscan_t yyscanner);
+static void addlit(char *ytext, int yleng, yyscan_t yyscanner);
+static void addlitchar(unsigned char ychar, yyscan_t yyscanner);
 
 /* LCOV_EXCL_START */
 
@@ -110,11 +114,11 @@ identifier		{ident_start}{ident_cont}*
 	/* This code is inserted at the start of replication_yylex() */
 
 	/* If we have a pushed-back token, return that. */
-	if (repl_pushed_back_token)
+	if (yyextra->repl_pushed_back_token)
 	{
-		int			result = repl_pushed_back_token;
+		int			result = yyextra->repl_pushed_back_token;
 
-		repl_pushed_back_token = 0;
+		yyextra->repl_pushed_back_token = 0;
 		return result;
 	}
 %}
@@ -159,27 +163,27 @@ UPLOAD_MANIFEST		{ return K_UPLOAD_MANIFEST; }
 
 {xqstart}		{
 					BEGIN(xq);
-					startlit();
+					startlit(yyscanner);
 				}
 
 <xq>{quotestop}	{
 					yyless(1);
 					BEGIN(INITIAL);
-					yylval->str = litbufdup();
+					yylval->str = litbufdup(yyscanner);
 					return SCONST;
 				}
 
 <xq>{xqdouble}	{
-					addlitchar('\'');
+					addlitchar('\'', yyscanner);
 				}
 
 <xq>{xqinside}  {
-					addlit(yytext, yyleng);
+					addlit(yytext, yyleng, yyscanner);
 				}
 
 {xdstart}		{
 					BEGIN(xd);
-					startlit();
+					startlit(yyscanner);
 				}
 
 <xd>{xdstop}	{
@@ -187,14 +191,14 @@ UPLOAD_MANIFEST		{ return K_UPLOAD_MANIFEST; }
 
 					yyless(1);
 					BEGIN(INITIAL);
-					yylval->str = litbufdup();
+					yylval->str = litbufdup(yyscanner);
 					len = strlen(yylval->str);
 					truncate_identifier(yylval->str, len, true);
 					return IDENT;
 				}
 
 <xd>{xdinside}  {
-					addlit(yytext, yyleng);
+					addlit(yytext, yyleng, yyscanner);
 				}
 
 {identifier}	{
@@ -220,28 +224,37 @@ UPLOAD_MANIFEST		{ return K_UPLOAD_MANIFEST; }
 
 /* LCOV_EXCL_STOP */
 
+/*
+ * Arrange access to yyextra for subroutines of the main yylex() function.
+ * We expect each subroutine to have a yyscanner parameter.  Rather than
+ * use the yyget_xxx functions, which might or might not get inlined by the
+ * compiler, we cheat just a bit and cast yyscanner to the right type.
+ */
+#undef yyextra
+#define yyextra  (((struct yyguts_t *) yyscanner)->yyextra_r)
+
 static void
-startlit(void)
+startlit(yyscan_t yyscanner)
 {
-	initStringInfo(&litbuf);
+	initStringInfo(&yyextra->litbuf);
 }
 
 static char *
-litbufdup(void)
+litbufdup(yyscan_t yyscanner)
 {
-	return litbuf.data;
+	return yyextra->litbuf.data;
 }
 
 static void
-addlit(char *ytext, int yleng)
+addlit(char *ytext, int yleng, yyscan_t yyscanner)
 {
-	appendBinaryStringInfo(&litbuf, ytext, yleng);
+	appendBinaryStringInfo(&yyextra->litbuf, ytext, yleng);
 }
 
 static void
-addlitchar(unsigned char ychar)
+addlitchar(unsigned char ychar, yyscan_t yyscanner)
 {
-	appendStringInfoChar(&litbuf, ychar);
+	appendStringInfoChar(&yyextra->litbuf, ychar);
 }
 
 void
@@ -256,16 +269,18 @@ void
 replication_scanner_init(const char *str, yyscan_t *yyscannerp)
 {
 	yyscan_t	yyscanner;
+	struct repl_yy_extra_type *yyext = palloc(sizeof(struct repl_yy_extra_type));
 
 	if (yylex_init(yyscannerp) != 0)
 		elog(ERROR, "yylex_init() failed: %m");
 
 	yyscanner = *yyscannerp;
 
-	yy_scan_string(str, yyscanner);
+	yyset_extra(yyext, yyscanner);
+
+	yyext->repl_pushed_back_token = 0;
 
-	/* Make sure we start in proper state */
-	repl_pushed_back_token = 0;
+	yy_scan_string(str, yyscanner);
 }
 
 void
@@ -301,7 +316,7 @@ replication_scanner_is_replication_command(yyscan_t yyscanner)
 		case K_UPLOAD_MANIFEST:
 		case K_SHOW:
 			/* Yes; push back the first token so we can parse later. */
-			repl_pushed_back_token = first_token;
+			yyextra->repl_pushed_back_token = first_token;
 			return true;
 		default:
 			/* Nope; we don't bother to push back the token. */
-- 
2.45.2

Reply via email to