On 10/29/24 8:55 PM, Andreas Karlsson wrote:
I especially dislike the static variable in our patch. And as far as I understand it you can avoid the static by changing the lexer to use the push parser so it can emit multiple terminal tokens from one parsed token, but I have not looked into push parsers and have no idea how this would affect performance.
Updated the patch to remove the static variable. No clue why I thought that one was necessary.
Andreas
From 79e9474fd02fab7210bed6a5a3db3bc57d725193 Mon Sep 17 00:00:00 2001 From: Andreas Karlsson <andr...@proxel.se> Date: Tue, 29 Oct 2024 20:23:24 +0100 Subject: [PATCH v2] Broken out tokeniziation of arrows --- src/backend/parser/gram.y | 20 +++++++++++++++--- src/backend/parser/scan.l | 35 +++++++++++++++++++++++++++++++ src/fe_utils/psqlscan.l | 5 +++++ src/include/parser/scanner.h | 1 + src/interfaces/ecpg/preproc/pgc.l | 34 ++++++++++++++++++++++++++++++ src/pl/plpgsql/src/pl_gram.y | 1 + 6 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 67eb96396af..179069e0299 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -681,6 +681,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %token <ival> ICONST PARAM %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS +%token LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW /* * If you want to make any keyword changes, update the keyword table in @@ -821,7 +822,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %left AND %right NOT %nonassoc IS ISNULL NOTNULL /* IS sets precedence for IS NULL, etc */ -%nonassoc '<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS +%nonassoc '<' LEFT_ARROW_LESS '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS %nonassoc BETWEEN IN_P LIKE ILIKE SIMILAR NOT_LA %nonassoc ESCAPE /* ESCAPE must be just above LIKE/ILIKE/SIMILAR */ @@ -874,8 +875,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %nonassoc UNBOUNDED NESTED /* ideally would have same precedence as IDENT */ %nonassoc IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH -%left Op OPERATOR /* multi-character ops and user-defined operators */ -%left '+' '-' +%left Op OPERATOR RIGHT_ARROW /* multi-character ops and user-defined operators */ +%left '+' '-' LEFT_ARROW_MINUS %left '*' '/' '%' %left '^' /* Unary Operators */ @@ -14893,6 +14894,8 @@ a_expr: c_expr { $$ = $1; } { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", NULL, $2, @1); } | '-' a_expr %prec UMINUS { $$ = doNegate($2, @1); } + | LEFT_ARROW_MINUS a_expr %prec UMINUS + { $$ = doNegate($2, @1); } | a_expr '+' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", $1, $3, @2); } | a_expr '-' a_expr @@ -14907,6 +14910,8 @@ a_expr: c_expr { $$ = $1; } { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); } | a_expr '<' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); } + | a_expr LEFT_ARROW_LESS a_expr + { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); } | a_expr '>' a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); } | a_expr '=' a_expr @@ -14917,6 +14922,8 @@ a_expr: c_expr { $$ = $1; } { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); } | a_expr NOT_EQUALS a_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); } + | a_expr RIGHT_ARROW a_expr + { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); } | a_expr qual_Op a_expr %prec Op { $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); } @@ -15386,6 +15393,8 @@ b_expr: c_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); } | b_expr '<' b_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); } + | b_expr LEFT_ARROW_LESS b_expr + { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); } | b_expr '>' b_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); } | b_expr '=' b_expr @@ -15396,6 +15405,8 @@ b_expr: c_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); } | b_expr NOT_EQUALS b_expr { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); } + | b_expr RIGHT_ARROW b_expr + { $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); } | b_expr qual_Op b_expr %prec Op { $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); } | qual_Op b_expr %prec Op @@ -16554,16 +16565,19 @@ all_Op: Op { $$ = $1; } MathOp: '+' { $$ = "+"; } | '-' { $$ = "-"; } + | LEFT_ARROW_MINUS { $$ = "-"; } | '*' { $$ = "*"; } | '/' { $$ = "/"; } | '%' { $$ = "%"; } | '^' { $$ = "^"; } | '<' { $$ = "<"; } + | LEFT_ARROW_LESS { $$ = "<"; } | '>' { $$ = ">"; } | '=' { $$ = "="; } | LESS_EQUALS { $$ = "<="; } | GREATER_EQUALS { $$ = ">="; } | NOT_EQUALS { $$ = "<>"; } + | RIGHT_ARROW { $$ = "->"; } ; qual_Op: Op diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 72404e72fff..a17e42d0ef1 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -366,6 +366,7 @@ less_equals "<=" greater_equals ">=" less_greater "<>" not_equals "!=" +right_arrow "->" /* * "self" is the set of chars that should be returned as single-character @@ -892,8 +893,18 @@ other . return NOT_EQUALS; } +{right_arrow} { + SET_YYLLOC(); + return RIGHT_ARROW; + } + {self} { SET_YYLLOC(); + if (yytext[0] == '-' && yyextra->inleftarrow) + { + yyextra->inleftarrow = false; + return LEFT_ARROW_MINUS; + } return yytext[0]; } @@ -919,6 +930,26 @@ other . if (slashstar) nchars = slashstar - yytext; + if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-') + { + /* Strip the unwanted chars from the token */ + yyless(1); + + yyextra->inleftarrow = true; + + return LEFT_ARROW_LESS; + } + + if (nchars == 1 && yytext[0] == '-' && yyextra->inleftarrow) + { + /* Strip the unwanted chars from the token */ + if (nchars < yyleng) + yyless(nchars); + + yyextra->inleftarrow = false; + return LEFT_ARROW_MINUS; + } + /* * For SQL compatibility, '+' and '-' cannot be the * last char of a multi-char operator unless the operator @@ -989,6 +1020,8 @@ other . return NOT_EQUALS; if (yytext[0] == '!' && yytext[1] == '=') return NOT_EQUALS; + if (yytext[0] == '-' && yytext[1] == '>') + return RIGHT_ARROW; } } @@ -1294,6 +1327,8 @@ scanner_init(const char *str, yyext->literalbuf = (char *) palloc(yyext->literalalloc); yyext->literallen = 0; + yyext->inleftarrow = false; + return scanner; } diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l index 8e8b049e15f..6f8fd7cd258 100644 --- a/src/fe_utils/psqlscan.l +++ b/src/fe_utils/psqlscan.l @@ -302,6 +302,7 @@ less_equals "<=" greater_equals ">=" less_greater "<>" not_equals "!=" +right_arrow "->" /* * "self" is the set of chars that should be returned as single-character @@ -661,6 +662,10 @@ other . ECHO; } +{right_arrow} { + ECHO; + } + /* * These rules are specific to psql --- they implement parenthesis * counting and detection of command-ending semicolon. These must diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h index d6293b1e878..61647be928c 100644 --- a/src/include/parser/scanner.h +++ b/src/include/parser/scanner.h @@ -105,6 +105,7 @@ typedef struct core_yy_extra_type int state_before_str_stop; /* start cond. before end quote */ int xcdepth; /* depth of nesting in slash-star comments */ char *dolqstart; /* current $foo$ quote start string */ + bool inleftarrow; /* are we parsing a -> operator? */ YYLTYPE save_yylloc; /* one-element stack for PUSH_YYLLOC() */ /* first part of UTF16 surrogate pair for Unicode escapes */ diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l index 82708013ee6..5ea88117aa1 100644 --- a/src/interfaces/ecpg/preproc/pgc.l +++ b/src/interfaces/ecpg/preproc/pgc.l @@ -335,6 +335,7 @@ less_equals "<=" greater_equals ">=" less_greater "<>" not_equals "!=" +right_arrow "->" /* * "self" is the set of chars that should be returned as single-character @@ -463,6 +464,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ %% %{ + static bool inleftarrow = false; + /* code to execute during start of each call of yylex() */ char *newdefsymbol = NULL; @@ -854,6 +857,10 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ return NOT_EQUALS; } +{right_arrow} { + return RIGHT_ARROW; + } + {informix_special} { /* are we simulating Informix? */ if (INFORMIX_MODE) @@ -871,6 +878,11 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ */ if (yytext[0] == ';' && struct_level == 0) BEGIN(C); + if (yytext[0] == '-' && inleftarrow) + { + inleftarrow = false; + return LEFT_ARROW_MINUS; + } return yytext[0]; } @@ -896,6 +908,26 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ if (slashstar) nchars = slashstar - yytext; + if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-') + { + /* Strip the unwanted chars from the token */ + yyless(1); + + inleftarrow = true; + + return LEFT_ARROW_LESS; + } + + if (nchars == 1 && yytext[0] == '-' && inleftarrow) + { + /* Strip the unwanted chars from the token */ + if (nchars < yyleng) + yyless(nchars); + + inleftarrow = false; + return LEFT_ARROW_MINUS; + } + /* * For SQL compatibility, '+' and '-' cannot be the * last char of a multi-char operator unless the operator @@ -968,6 +1000,8 @@ cppline {space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+ return NOT_EQUALS; if (yytext[0] == '!' && yytext[1] == '=') return NOT_EQUALS; + if (yytext[0] == '-' && yytext[1] == '>') + return RIGHT_ARROW; } } diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 8182ce28aa1..c5cea379554 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -237,6 +237,7 @@ static void check_raise_parameters(PLpgSQL_stmt_raise *stmt); %token <ival> ICONST PARAM %token TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER %token LESS_EQUALS GREATER_EQUALS NOT_EQUALS +%token LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW /* * Other tokens recognized by plpgsql's lexer interface layer (pl_scanner.c). -- 2.45.2