Erik Wienhold <[email protected]> writes:
> On 2024-04-07 06:33 +0200, Tom Lane wrote:
>> I suspect it'd be much more robust if we could remove the comment from
>> the expr->query string. No idea how hard that is.
> I slept on it and I think this can be fixed by tracking the end of the
> last token before THEN and use that instead of yylloc in the call to
> plpgsql_append_source_text. We already already track the token length
> in plpgsql_yyleng but don't make it available outside pl_scanner.c yet.
> Attached v2 tries to do that. But it breaks other test cases, probably
> because the calculation of endlocation is off. I'm missing something
> here.
I poked at this and found that the failures occur when the patched
code decides to trim an expression like "_r.v" to just "_r", naturally
breaking the semantics completely. That happens because when
plpgsql_yylex recognizes a compound token, it doesn't bother to
adjust the token length to include the additional word(s). I vaguely
remember having thought about that when writing the lookahead logic,
and deciding that it wasn't worth the trouble -- but now it is.
Up to now, the only thing we did with plpgsql_yyleng was to set the
cutoff point for text reported by plpgsql_yyerror. Extending the
token length changes reports like this:
regression=# do $$ declare r record; r.x$$;
ERROR: syntax error at or near "r"
LINE 1: do $$ declare r record; r.x$$;
^
to this:
regression=# do $$ declare r record; r.x$$;
ERROR: syntax error at or near "r.x"
LINE 1: do $$ declare r record; r.x$$;
^
which seems like strictly an improvement to me (the syntax error is
premature EOF, which is after the "x"); but in any case it's minor
enough to not be worth worrying about.
Looking around, I noticed that we *have* had a similar case in the
past, which 4adead1d2 noticed and worked around by suppressing the
whitespace-trimming action in read_sql_construct. We could probably
reach a near-one-line fix for the current problem by passing
trim=false in the CASE calls, but TBH that discovery just reinforces
my feeling that we need a cleaner fix. The attached v3 reverts
the make-trim-optional hack that 4adead1d2 added, since we don't
need or want the manual trimming anymore.
With this in mind, I find the other manual whitespace trimming logic,
in make_execsql_stmt(), quite scary; but it looks somewhat nontrivial
to get rid of it. (The problem is that parsing of an INTO clause
will leave us with a pushed-back token as next, and then we don't
know where the end of the token before that is.) Since we don't
currently do anything as crazy as combining execsql statements,
I think the problem is only latent, but still...
Anyway, the attached works for me.
regards, tom lane
diff --git a/src/pl/plpgsql/src/expected/plpgsql_control.out b/src/pl/plpgsql/src/expected/plpgsql_control.out
index 328bd48586..ccd4f54704 100644
--- a/src/pl/plpgsql/src/expected/plpgsql_control.out
+++ b/src/pl/plpgsql/src/expected/plpgsql_control.out
@@ -681,3 +681,20 @@ select case_test(13);
other
(1 row)
+-- test line comment between WHEN and THEN
+create or replace function case_comment(int) returns text as $$
+begin
+ case $1
+ when 1 -- comment before THEN
+ then return 'one';
+ else
+ return 'other';
+ end case;
+end;
+$$ language plpgsql immutable;
+select case_comment(1);
+ case_comment
+--------------
+ one
+(1 row)
+
diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y
index bef33d58a2..a29d2dfacd 100644
--- a/src/pl/plpgsql/src/pl_gram.y
+++ b/src/pl/plpgsql/src/pl_gram.y
@@ -66,7 +66,6 @@ static PLpgSQL_expr *read_sql_construct(int until,
RawParseMode parsemode,
bool isexpression,
bool valid_sql,
- bool trim,
int *startloc,
int *endtoken);
static PLpgSQL_expr *read_sql_expression(int until,
@@ -895,7 +894,7 @@ stmt_perform : K_PERFORM
*/
new->expr = read_sql_construct(';', 0, 0, ";",
RAW_PARSE_DEFAULT,
- false, false, true,
+ false, false,
&startloc, NULL);
/* overwrite "perform" ... */
memcpy(new->expr->query, " SELECT", 7);
@@ -981,7 +980,7 @@ stmt_assign : T_DATUM
plpgsql_push_back_token(T_DATUM);
new->expr = read_sql_construct(';', 0, 0, ";",
pmode,
- false, true, true,
+ false, true,
NULL, NULL);
$$ = (PLpgSQL_stmt *) new;
@@ -1474,7 +1473,6 @@ for_control : for_variable K_IN
RAW_PARSE_DEFAULT,
true,
false,
- true,
&expr1loc,
&tok);
@@ -1879,7 +1877,7 @@ stmt_raise : K_RAISE
expr = read_sql_construct(',', ';', K_USING,
", or ; or USING",
RAW_PARSE_PLPGSQL_EXPR,
- true, true, true,
+ true, true,
NULL, &tok);
new->params = lappend(new->params, expr);
}
@@ -2016,7 +2014,7 @@ stmt_dynexecute : K_EXECUTE
expr = read_sql_construct(K_INTO, K_USING, ';',
"INTO or USING or ;",
RAW_PARSE_PLPGSQL_EXPR,
- true, true, true,
+ true, true,
NULL, &endtoken);
new = palloc(sizeof(PLpgSQL_stmt_dynexecute));
@@ -2055,7 +2053,7 @@ stmt_dynexecute : K_EXECUTE
expr = read_sql_construct(',', ';', K_INTO,
", or ; or INTO",
RAW_PARSE_PLPGSQL_EXPR,
- true, true, true,
+ true, true,
NULL, &endtoken);
new->params = lappend(new->params, expr);
} while (endtoken == ',');
@@ -2640,7 +2638,7 @@ read_sql_expression(int until, const char *expected)
{
return read_sql_construct(until, 0, 0, expected,
RAW_PARSE_PLPGSQL_EXPR,
- true, true, true, NULL, NULL);
+ true, true, NULL, NULL);
}
/* Convenience routine to read an expression with two possible terminators */
@@ -2650,7 +2648,7 @@ read_sql_expression2(int until, int until2, const char *expected,
{
return read_sql_construct(until, until2, 0, expected,
RAW_PARSE_PLPGSQL_EXPR,
- true, true, true, NULL, endtoken);
+ true, true, NULL, endtoken);
}
/* Convenience routine to read a SQL statement that must end with ';' */
@@ -2659,7 +2657,7 @@ read_sql_stmt(void)
{
return read_sql_construct(';', 0, 0, ";",
RAW_PARSE_DEFAULT,
- false, true, true, NULL, NULL);
+ false, true, NULL, NULL);
}
/*
@@ -2672,7 +2670,6 @@ read_sql_stmt(void)
* parsemode: raw_parser() mode to use
* isexpression: whether to say we're reading an "expression" or a "statement"
* valid_sql: whether to check the syntax of the expr
- * trim: trim trailing whitespace
* startloc: if not NULL, location of first token is stored at *startloc
* endtoken: if not NULL, ending token is stored at *endtoken
* (this is only interesting if until2 or until3 isn't zero)
@@ -2685,7 +2682,6 @@ read_sql_construct(int until,
RawParseMode parsemode,
bool isexpression,
bool valid_sql,
- bool trim,
int *startloc,
int *endtoken)
{
@@ -2693,6 +2689,7 @@ read_sql_construct(int until,
StringInfoData ds;
IdentifierLookup save_IdentifierLookup;
int startlocation = -1;
+ int endlocation = -1;
int parenlevel = 0;
PLpgSQL_expr *expr;
@@ -2743,6 +2740,8 @@ read_sql_construct(int until,
expected),
parser_errposition(yylloc)));
}
+ /* Remember end+1 location of last accepted token */
+ endlocation = yylloc + plpgsql_token_length();
}
plpgsql_IdentifierLookup = save_IdentifierLookup;
@@ -2753,7 +2752,7 @@ read_sql_construct(int until,
*endtoken = tok;
/* give helpful complaint about empty input */
- if (startlocation >= yylloc)
+ if (startlocation >= endlocation)
{
if (isexpression)
yyerror("missing expression");
@@ -2761,14 +2760,14 @@ read_sql_construct(int until,
yyerror("missing SQL statement");
}
- plpgsql_append_source_text(&ds, startlocation, yylloc);
-
- /* trim any trailing whitespace, for neatness */
- if (trim)
- {
- while (ds.len > 0 && scanner_isspace(ds.data[ds.len - 1]))
- ds.data[--ds.len] = '\0';
- }
+ /*
+ * We save only the text from startlocation to endlocation-1. This
+ * suppresses the "until" token as well as any whitespace or comments
+ * following the last accepted token. (We used to strip such trailing
+ * whitespace by hand, but that causes problems if there's a "-- comment"
+ * in front of said whitespace.)
+ */
+ plpgsql_append_source_text(&ds, startlocation, endlocation);
expr = palloc0(sizeof(PLpgSQL_expr));
expr->query = pstrdup(ds.data);
@@ -3933,16 +3932,12 @@ read_cursor_args(PLpgSQL_var *cursor, int until)
* Read the value expression. To provide the user with meaningful
* parse error positions, we check the syntax immediately, instead of
* checking the final expression that may have the arguments
- * reordered. Trailing whitespace must not be trimmed, because
- * otherwise input of the form (param -- comment\n, param) would be
- * translated into a form where the second parameter is commented
- * out.
+ * reordered.
*/
item = read_sql_construct(',', ')', 0,
",\" or \")",
RAW_PARSE_PLPGSQL_EXPR,
true, true,
- false, /* do not trim */
NULL, &endtoken);
argv[argpos] = item->query;
diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c
index 101804d506..9407da51ef 100644
--- a/src/pl/plpgsql/src/pl_scanner.c
+++ b/src/pl/plpgsql/src/pl_scanner.c
@@ -184,6 +184,8 @@ plpgsql_yylex(void)
tok1 = T_DATUM;
else
tok1 = T_CWORD;
+ /* Adjust token length to include A.B.C */
+ aux1.leng = aux5.lloc - aux1.lloc + aux5.leng;
}
else
{
@@ -197,6 +199,8 @@ plpgsql_yylex(void)
tok1 = T_DATUM;
else
tok1 = T_CWORD;
+ /* Adjust token length to include A.B */
+ aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
}
}
else
@@ -210,6 +214,8 @@ plpgsql_yylex(void)
tok1 = T_DATUM;
else
tok1 = T_CWORD;
+ /* Adjust token length to include A.B */
+ aux1.leng = aux3.lloc - aux1.lloc + aux3.leng;
}
}
else
@@ -298,6 +304,17 @@ plpgsql_yylex(void)
return tok1;
}
+/*
+ * Return the length of the token last returned by plpgsql_yylex().
+ *
+ * In the case of compound tokens, the length includes all the parts.
+ */
+int
+plpgsql_token_length(void)
+{
+ return plpgsql_yyleng;
+}
+
/*
* Internal yylex function. This wraps the core lexer and adds one feature:
* a token pushback stack. We also make a couple of trivial single-token
diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h
index 40056bb851..50c3b28472 100644
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@@ -1317,6 +1317,7 @@ extern void plpgsql_dumptree(PLpgSQL_function *func);
*/
extern int plpgsql_base_yylex(void);
extern int plpgsql_yylex(void);
+extern int plpgsql_token_length(void);
extern void plpgsql_push_back_token(int token);
extern bool plpgsql_token_is_unreserved_keyword(int token);
extern void plpgsql_append_source_text(StringInfo buf,
diff --git a/src/pl/plpgsql/src/sql/plpgsql_control.sql b/src/pl/plpgsql/src/sql/plpgsql_control.sql
index ed7231134f..8e007c51dc 100644
--- a/src/pl/plpgsql/src/sql/plpgsql_control.sql
+++ b/src/pl/plpgsql/src/sql/plpgsql_control.sql
@@ -486,3 +486,17 @@ select case_test(1);
select case_test(2);
select case_test(12);
select case_test(13);
+
+-- test line comment between WHEN and THEN
+create or replace function case_comment(int) returns text as $$
+begin
+ case $1
+ when 1 -- comment before THEN
+ then return 'one';
+ else
+ return 'other';
+ end case;
+end;
+$$ language plpgsql immutable;
+
+select case_comment(1);
diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out
index 0637256676..074af8f33a 100644
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@@ -2390,11 +2390,9 @@ select namedparmcursor_test7();
ERROR: division by zero
CONTEXT: SQL expression "42/0 AS p1, 77 AS p2"
PL/pgSQL function namedparmcursor_test7() line 6 at OPEN
--- check that line comments work correctly within the argument list (there
--- is some special handling of this case in the code: the newline after the
--- comment must be preserved when the argument-evaluating query is
--- constructed, otherwise the comment effectively comments out the next
--- argument, too)
+-- check that line comments work correctly within the argument list
+-- (this used to require a special hack in the code; it no longer does,
+-- but let's keep the test anyway)
create function namedparmcursor_test8() returns int4 as $$
declare
c1 cursor (p1 int, p2 int) for
diff --git a/src/test/regress/sql/plpgsql.sql b/src/test/regress/sql/plpgsql.sql
index 9ca9449a50..18c91572ae 100644
--- a/src/test/regress/sql/plpgsql.sql
+++ b/src/test/regress/sql/plpgsql.sql
@@ -2047,11 +2047,9 @@ begin
end $$ language plpgsql;
select namedparmcursor_test7();
--- check that line comments work correctly within the argument list (there
--- is some special handling of this case in the code: the newline after the
--- comment must be preserved when the argument-evaluating query is
--- constructed, otherwise the comment effectively comments out the next
--- argument, too)
+-- check that line comments work correctly within the argument list
+-- (this used to require a special hack in the code; it no longer does,
+-- but let's keep the test anyway)
create function namedparmcursor_test8() returns int4 as $$
declare
c1 cursor (p1 int, p2 int) for