Patches 0001 - 0006 are same as the previous set.
0007 - fixes all the problems you reported till now and also the one I
found. The commit message describes the fixes in detail.
Hi,
I found a potential bug in the parsing of the left and right arrows.
They can be broken up in - > and < - respectively. Does the SQL/PGQ
standard really allow this?
I found this while working on a patch of our own and I was trying to
figure out how you guys had solved this very same problem that we ran
into, and if you had done so in a better way. The fundamental problem is
that parsing the left arrow as one token is a bit tricky due to how
PostgreSQL treats operators ending with minus or plus.
I have attached our very ugly solution for it (broken out from our
patch) in case it helps you. Feel free to use it or ignore it. We do not
plan to work on this right now since you are already working on the same
problem.
I especially dislike the static variable in our patch. And as far as I
understand it you can avoid the static by changing the lexer to use the
push parser so it can emit multiple terminal tokens from one parsed
token, but I have not looked into push parsers and have no idea how this
would affect performance.
https://www.gnu.org/software/bison/manual/html_node/Push-Decl.html
Examples:
# SELECT count(*) FROM GRAPH_TABLE (g1 MATCH ()-[]->() COLUMNS (1 as one));
count
-------
32
(1 row)
# SELECT count(*) FROM GRAPH_TABLE (g1 MATCH ()-[]- >() COLUMNS (1 as one));
count
-------
32
(1 row)
# SELECT * FROM GRAPH_TABLE (myshop MATCH (o IS orders)<-[IS
customer_orders]-(c IS customers) COLUMNS (c.name, o.ordered_when));
name | ordered_when
-----------+--------------
customer1 | 2024-01-01
customer2 | 2024-01-02
(2 rows)
# SELECT * FROM GRAPH_TABLE (myshop MATCH (o IS orders)< -[IS
customer_orders]-(c IS customers) COLUMNS (c.name, o.ordered_when));
name | ordered_when
-----------+--------------
customer1 | 2024-01-01
customer2 | 2024-01-02
(2 rows)
Andreas
From 07c3e1908de413db9fa383165eac78df0a80ab50 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Tue, 29 Oct 2024 20:23:24 +0100
Subject: [PATCH] Broken out tokeniziation of arrows
---
src/backend/parser/gram.y | 20 ++++++++++++++---
src/backend/parser/scan.l | 37 +++++++++++++++++++++++++++++++
src/fe_utils/psqlscan.l | 5 +++++
src/interfaces/ecpg/preproc/pgc.l | 34 ++++++++++++++++++++++++++++
src/pl/plpgsql/src/pl_gram.y | 1 +
5 files changed, 94 insertions(+), 3 deletions(-)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index dd458182f02..8f07a1c8c0c 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -677,6 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%token <ival> ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
%token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
/*
* If you want to make any keyword changes, update the keyword table in
@@ -817,7 +818,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%left AND
%right NOT
%nonassoc IS ISNULL NOTNULL /* IS sets precedence for IS NULL, etc */
-%nonassoc '<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%nonassoc '<' LEFT_ARROW_LESS '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
%nonassoc BETWEEN IN_P LIKE ILIKE SIMILAR NOT_LA
%nonassoc ESCAPE /* ESCAPE must be just above LIKE/ILIKE/SIMILAR */
@@ -870,8 +871,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%nonassoc UNBOUNDED NESTED /* ideally would have same precedence as IDENT */
%nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP
SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH
-%left Op OPERATOR /* multi-character ops and user-defined operators */
-%left '+' '-'
+%left Op OPERATOR RIGHT_ARROW /* multi-character ops and user-defined operators */
+%left '+' '-' LEFT_ARROW_MINUS
%left '*' '/' '%'
%left '^'
/* Unary Operators */
@@ -14842,6 +14843,8 @@ a_expr: c_expr { $$ = $1; }
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", NULL, $2, @1); }
| '-' a_expr %prec UMINUS
{ $$ = doNegate($2, @1); }
+ | LEFT_ARROW_MINUS a_expr %prec UMINUS
+ { $$ = doNegate($2, @1); }
| a_expr '+' a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", $1, $3, @2); }
| a_expr '-' a_expr
@@ -14856,6 +14859,8 @@ a_expr: c_expr { $$ = $1; }
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
| a_expr '<' a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+ | a_expr LEFT_ARROW_LESS a_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
| a_expr '>' a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
| a_expr '=' a_expr
@@ -14866,6 +14871,8 @@ a_expr: c_expr { $$ = $1; }
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
| a_expr NOT_EQUALS a_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+ | a_expr RIGHT_ARROW a_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
| a_expr qual_Op a_expr %prec Op
{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
@@ -15335,6 +15342,8 @@ b_expr: c_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
| b_expr '<' b_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+ | b_expr LEFT_ARROW_LESS b_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
| b_expr '>' b_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
| b_expr '=' b_expr
@@ -15345,6 +15354,8 @@ b_expr: c_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
| b_expr NOT_EQUALS b_expr
{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+ | b_expr RIGHT_ARROW b_expr
+ { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
| b_expr qual_Op b_expr %prec Op
{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
| qual_Op b_expr %prec Op
@@ -16503,16 +16514,19 @@ all_Op: Op { $$ = $1; }
MathOp: '+' { $$ = "+"; }
| '-' { $$ = "-"; }
+ | LEFT_ARROW_MINUS { $$ = "-"; }
| '*' { $$ = "*"; }
| '/' { $$ = "/"; }
| '%' { $$ = "%"; }
| '^' { $$ = "^"; }
| '<' { $$ = "<"; }
+ | LEFT_ARROW_LESS { $$ = "<"; }
| '>' { $$ = ">"; }
| '=' { $$ = "="; }
| LESS_EQUALS { $$ = "<="; }
| GREATER_EQUALS { $$ = ">="; }
| NOT_EQUALS { $$ = "<>"; }
+ | RIGHT_ARROW { $$ = "->"; }
;
qual_Op: Op
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 994ed9995ac..d7ed04011b0 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -366,6 +366,7 @@ less_equals "<="
greater_equals ">="
less_greater "<>"
not_equals "!="
+right_arrow "->"
/*
* "self" is the set of chars that should be returned as single-character
@@ -454,6 +455,10 @@ other .
%%
+%{
+ static bool inleftarrow = false;
+%}
+
{whitespace} {
/* ignore */
}
@@ -892,8 +897,18 @@ other .
return NOT_EQUALS;
}
+{right_arrow} {
+ SET_YYLLOC();
+ return RIGHT_ARROW;
+ }
+
{self} {
SET_YYLLOC();
+ if (yytext[0] == '-' && inleftarrow)
+ {
+ inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
return yytext[0];
}
@@ -919,6 +934,26 @@ other .
if (slashstar)
nchars = slashstar - yytext;
+ if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+ {
+ /* Strip the unwanted chars from the token */
+ yyless(1);
+
+ inleftarrow = true;
+
+ return LEFT_ARROW_LESS;
+ }
+
+ if (nchars == 1 && yytext[0] == '-' && inleftarrow)
+ {
+ /* Strip the unwanted chars from the token */
+ if (nchars < yyleng)
+ yyless(nchars);
+
+ inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
+
/*
* For SQL compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
@@ -989,6 +1024,8 @@ other .
return NOT_EQUALS;
if (yytext[0] == '!' && yytext[1] == '=')
return NOT_EQUALS;
+ if (yytext[0] == '-' && yytext[1] == '>')
+ return RIGHT_ARROW;
}
}
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 8e8b049e15f..6f8fd7cd258 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -302,6 +302,7 @@ less_equals "<="
greater_equals ">="
less_greater "<>"
not_equals "!="
+right_arrow "->"
/*
* "self" is the set of chars that should be returned as single-character
@@ -661,6 +662,10 @@ other .
ECHO;
}
+{right_arrow} {
+ ECHO;
+ }
+
/*
* These rules are specific to psql --- they implement parenthesis
* counting and detection of command-ending semicolon. These must
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index f3c03482aec..d1fd12fe5af 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -335,6 +335,7 @@ less_equals "<="
greater_equals ">="
less_greater "<>"
not_equals "!="
+right_arrow "->"
/*
* "self" is the set of chars that should be returned as single-character
@@ -463,6 +464,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
%%
%{
+ static bool inleftarrow = false;
+
/* code to execute during start of each call of yylex() */
char *newdefsymbol = NULL;
@@ -854,6 +857,10 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
return NOT_EQUALS;
}
+{right_arrow} {
+ return RIGHT_ARROW;
+ }
+
{informix_special} {
/* are we simulating Informix? */
if (INFORMIX_MODE)
@@ -871,6 +878,11 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
*/
if (yytext[0] == ';' && struct_level == 0)
BEGIN(C);
+ if (yytext[0] == '-' && inleftarrow)
+ {
+ inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
return yytext[0];
}
@@ -896,6 +908,26 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
if (slashstar)
nchars = slashstar - yytext;
+ if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+ {
+ /* Strip the unwanted chars from the token */
+ yyless(1);
+
+ inleftarrow = true;
+
+ return LEFT_ARROW_LESS;
+ }
+
+ if (nchars == 1 && yytext[0] == '-' && inleftarrow)
+ {
+ /* Strip the unwanted chars from the token */
+ if (nchars < yyleng)
+ yyless(nchars);
+
+ inleftarrow = false;
+ return LEFT_ARROW_MINUS;
+ }
+
/*
* For SQL compatibility, '+' and '-' cannot be the
* last char of a multi-char operator unless the operator
@@ -968,6 +1000,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
return NOT_EQUALS;
if (yytext[0] == '!' && yytext[1] == '=')
return NOT_EQUALS;
+ if (yytext[0] == '-' && yytext[1] == '>')
+ return RIGHT_ARROW;
}
}
diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y
index 8182ce28aa1..c5cea379554 100644
--- a/src/pl/plpgsql/src/pl_gram.y
+++ b/src/pl/plpgsql/src/pl_gram.y
@@ -237,6 +237,7 @@ static void check_raise_parameters(PLpgSQL_stmt_raise *stmt);
%token <ival> ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
%token LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
/*
* Other tokens recognized by plpgsql's lexer interface layer (pl_scanner.c).
--
2.45.2