On 12/16/24 8:39 AM, Peter Eisentraut wrote:
I'll leave it at this for now and wait for some reviews.
I really like this work since it makes the code cleaner to read on top
of paving the way for threading.
Reviewed the patches and found a couple of issues.
- Shouldn't yyext in syncrep_scanner_init() be allocated on the heap? Or
at least on the stack but by the caller?
- I think you have flipped the parameters of replication_yyerror(), see
attached fixup patch.
- Some white space issues fixed in an attached fixup patch.
- Also fixed the static remaining variables in the replication parser in
an attached patch.
- There seems to be a lot left to do to make the plpgsql scanner
actually re-entrant so I do not think it would makes sense to commit the
patch which sets the re-entrant option before that is done.
Andreas
From 6cada3e9f2d8929e8646bc2e3894ad74ca10eb43 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Tue, 17 Dec 2024 00:32:23 +0100
Subject: [PATCH v2 20/22] fixup! replication parser: pure parser and reentrant
scanner
Fix argument order.
---
src/backend/replication/repl_scanner.l | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 56f93e681ed..6776d3a85a8 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -152,7 +152,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
uint32 hi,
lo;
if (sscanf(yytext, "%X/%X", &hi, &lo) != 2)
- replication_yyerror("invalid streaming start location", yyscanner);
+ replication_yyerror(yyscanner, "invalid streaming start location");
yylval->recptr = ((uint64) hi) << 32 | lo;
return RECPTR;
}
@@ -209,7 +209,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
return yytext[0];
}
-<xq,xd><<EOF>> { replication_yyerror("unterminated quoted string", yyscanner); }
+<xq,xd><<EOF>> { replication_yyerror(yyscanner, "unterminated quoted string"); }
<<EOF>> {
--
2.45.2
From a05de695de6c73878e1fe6a74c24936d8c69f05a Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Tue, 17 Dec 2024 00:32:47 +0100
Subject: [PATCH v2 21/22] fixup! replication parser: pure parser and reentrant
scanner
Fix whitespace.
---
src/backend/replication/repl_scanner.l | 1 -
src/backend/replication/walsender.c | 2 +-
2 files changed, 1 insertion(+), 2 deletions(-)
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 6776d3a85a8..6388024a598 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -252,7 +252,6 @@ replication_yyerror(yyscan_t yyscanner, const char *message)
errmsg_internal("%s", message)));
}
-
void
replication_scanner_init(const char *str, yyscan_t *yyscannerp)
{
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 4971bcfb765..dc25dd6af91 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -1951,7 +1951,7 @@ WalSndWaitForWal(XLogRecPtr loc)
bool
exec_replication_command(const char *cmd_string)
{
- yyscan_t scanner;
+ yyscan_t scanner;
int parse_rc;
Node *cmd_node;
const char *cmdtag;
--
2.45.2
From 676ed65db9cac70189c1f33269fbbac519f59ca9 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Mon, 16 Dec 2024 23:13:36 +0100
Subject: [PATCH v2 22/22] replication parser: Use flex yyextra
Use flex yyextra to handle context information, instead of global
variables. This complements the earlier patch to make the scanner
reentrant.
---
src/backend/replication/repl_scanner.l | 75 +++++++++++++++-----------
1 file changed, 45 insertions(+), 30 deletions(-)
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 6388024a598..1c4fc302df0 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -38,16 +38,20 @@ fprintf_to_ereport(const char *fmt, const char *msg)
ereport(ERROR, (errmsg_internal("%s", msg)));
}
-/* Pushed-back token (we only handle one) */
-static int repl_pushed_back_token; /* FIXME */
+struct repl_yy_extra_type
+{
+ /* Pushed-back token (we only handle one) */
+ int repl_pushed_back_token;
-/* Work area for collecting literals */
-static StringInfoData litbuf; /* FIXME */
+ /* Work area for collecting literals */
+ StringInfoData litbuf;
+};
+#define YY_EXTRA_TYPE struct repl_yy_extra_type *
-static void startlit(void);
-static char *litbufdup(void);
-static void addlit(char *ytext, int yleng);
-static void addlitchar(unsigned char ychar);
+static void startlit(yyscan_t yyscanner);
+static char *litbufdup(yyscan_t yyscanner);
+static void addlit(char *ytext, int yleng, yyscan_t yyscanner);
+static void addlitchar(unsigned char ychar, yyscan_t yyscanner);
/* LCOV_EXCL_START */
@@ -110,11 +114,11 @@ identifier {ident_start}{ident_cont}*
/* This code is inserted at the start of replication_yylex() */
/* If we have a pushed-back token, return that. */
- if (repl_pushed_back_token)
+ if (yyextra->repl_pushed_back_token)
{
- int result = repl_pushed_back_token;
+ int result = yyextra->repl_pushed_back_token;
- repl_pushed_back_token = 0;
+ yyextra->repl_pushed_back_token = 0;
return result;
}
%}
@@ -159,27 +163,27 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
{xqstart} {
BEGIN(xq);
- startlit();
+ startlit(yyscanner);
}
<xq>{quotestop} {
yyless(1);
BEGIN(INITIAL);
- yylval->str = litbufdup();
+ yylval->str = litbufdup(yyscanner);
return SCONST;
}
<xq>{xqdouble} {
- addlitchar('\'');
+ addlitchar('\'', yyscanner);
}
<xq>{xqinside} {
- addlit(yytext, yyleng);
+ addlit(yytext, yyleng, yyscanner);
}
{xdstart} {
BEGIN(xd);
- startlit();
+ startlit(yyscanner);
}
<xd>{xdstop} {
@@ -187,14 +191,14 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
yyless(1);
BEGIN(INITIAL);
- yylval->str = litbufdup();
+ yylval->str = litbufdup(yyscanner);
len = strlen(yylval->str);
truncate_identifier(yylval->str, len, true);
return IDENT;
}
<xd>{xdinside} {
- addlit(yytext, yyleng);
+ addlit(yytext, yyleng, yyscanner);
}
{identifier} {
@@ -220,28 +224,37 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
/* LCOV_EXCL_STOP */
+/*
+ * Arrange access to yyextra for subroutines of the main yylex() function.
+ * We expect each subroutine to have a yyscanner parameter. Rather than
+ * use the yyget_xxx functions, which might or might not get inlined by the
+ * compiler, we cheat just a bit and cast yyscanner to the right type.
+ */
+#undef yyextra
+#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
+
static void
-startlit(void)
+startlit(yyscan_t yyscanner)
{
- initStringInfo(&litbuf);
+ initStringInfo(&yyextra->litbuf);
}
static char *
-litbufdup(void)
+litbufdup(yyscan_t yyscanner)
{
- return litbuf.data;
+ return yyextra->litbuf.data;
}
static void
-addlit(char *ytext, int yleng)
+addlit(char *ytext, int yleng, yyscan_t yyscanner)
{
- appendBinaryStringInfo(&litbuf, ytext, yleng);
+ appendBinaryStringInfo(&yyextra->litbuf, ytext, yleng);
}
static void
-addlitchar(unsigned char ychar)
+addlitchar(unsigned char ychar, yyscan_t yyscanner)
{
- appendStringInfoChar(&litbuf, ychar);
+ appendStringInfoChar(&yyextra->litbuf, ychar);
}
void
@@ -256,16 +269,18 @@ void
replication_scanner_init(const char *str, yyscan_t *yyscannerp)
{
yyscan_t yyscanner;
+ struct repl_yy_extra_type *yyext = palloc(sizeof(struct repl_yy_extra_type));
if (yylex_init(yyscannerp) != 0)
elog(ERROR, "yylex_init() failed: %m");
yyscanner = *yyscannerp;
- yy_scan_string(str, yyscanner);
+ yyset_extra(yyext, yyscanner);
+
+ yyext->repl_pushed_back_token = 0;
- /* Make sure we start in proper state */
- repl_pushed_back_token = 0;
+ yy_scan_string(str, yyscanner);
}
void
@@ -301,7 +316,7 @@ replication_scanner_is_replication_command(yyscan_t yyscanner)
case K_UPLOAD_MANIFEST:
case K_SHOW:
/* Yes; push back the first token so we can parse later. */
- repl_pushed_back_token = first_token;
+ yyextra->repl_pushed_back_token = first_token;
return true;
default:
/* Nope; we don't bother to push back the token. */
--
2.45.2