On 10/29/24 8:55 PM, Andreas Karlsson wrote:
I especially dislike the static variable in our patch. And as far as I understand it you can avoid the static by changing the lexer to use the push parser so it can emit multiple terminal tokens from one parsed token, but I have not looked into push parsers and have no idea how this would affect performance.

Updated the patch to remove the static variable. No clue why I thought that one was necessary.

Andreas
From 79e9474fd02fab7210bed6a5a3db3bc57d725193 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Tue, 29 Oct 2024 20:23:24 +0100
Subject: [PATCH v2] Broken out tokeniziation of arrows

---
 src/backend/parser/gram.y         | 20 +++++++++++++++---
 src/backend/parser/scan.l         | 35 +++++++++++++++++++++++++++++++
 src/fe_utils/psqlscan.l           |  5 +++++
 src/include/parser/scanner.h      |  1 +
 src/interfaces/ecpg/preproc/pgc.l | 34 ++++++++++++++++++++++++++++++
 src/pl/plpgsql/src/pl_gram.y      |  1 +
 6 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 67eb96396af..179069e0299 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -681,6 +681,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %token <ival>	ICONST PARAM
 %token			TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
 %token			LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token			LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
 
 /*
  * If you want to make any keyword changes, update the keyword table in
@@ -821,7 +822,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %left		AND
 %right		NOT
 %nonassoc	IS ISNULL NOTNULL	/* IS sets precedence for IS NULL, etc */
-%nonassoc	'<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%nonassoc	'<' LEFT_ARROW_LESS '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
 %nonassoc	BETWEEN IN_P LIKE ILIKE SIMILAR NOT_LA
 %nonassoc	ESCAPE			/* ESCAPE must be just above LIKE/ILIKE/SIMILAR */
 
@@ -874,8 +875,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %nonassoc	UNBOUNDED NESTED /* ideally would have same precedence as IDENT */
 %nonassoc	IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP
 			SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH
-%left		Op OPERATOR		/* multi-character ops and user-defined operators */
-%left		'+' '-'
+%left		Op OPERATOR RIGHT_ARROW	/* multi-character ops and user-defined operators */
+%left		'+' '-' LEFT_ARROW_MINUS
 %left		'*' '/' '%'
 %left		'^'
 /* Unary Operators */
@@ -14893,6 +14894,8 @@ a_expr:		c_expr									{ $$ = $1; }
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", NULL, $2, @1); }
 			| '-' a_expr					%prec UMINUS
 				{ $$ = doNegate($2, @1); }
+			| LEFT_ARROW_MINUS a_expr		%prec UMINUS
+				{ $$ = doNegate($2, @1); }
 			| a_expr '+' a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", $1, $3, @2); }
 			| a_expr '-' a_expr
@@ -14907,6 +14910,8 @@ a_expr:		c_expr									{ $$ = $1; }
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
 			| a_expr '<' a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+			| a_expr LEFT_ARROW_LESS a_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
 			| a_expr '>' a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
 			| a_expr '=' a_expr
@@ -14917,6 +14922,8 @@ a_expr:		c_expr									{ $$ = $1; }
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
 			| a_expr NOT_EQUALS a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+			| a_expr RIGHT_ARROW a_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
 
 			| a_expr qual_Op a_expr				%prec Op
 				{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
@@ -15386,6 +15393,8 @@ b_expr:		c_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
 			| b_expr '<' b_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+			| b_expr LEFT_ARROW_LESS b_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
 			| b_expr '>' b_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
 			| b_expr '=' b_expr
@@ -15396,6 +15405,8 @@ b_expr:		c_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
 			| b_expr NOT_EQUALS b_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+			| b_expr RIGHT_ARROW b_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
 			| b_expr qual_Op b_expr				%prec Op
 				{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
 			| qual_Op b_expr					%prec Op
@@ -16554,16 +16565,19 @@ all_Op:		Op										{ $$ = $1; }
 
 MathOp:		 '+'									{ $$ = "+"; }
 			| '-'									{ $$ = "-"; }
+			| LEFT_ARROW_MINUS						{ $$ = "-"; }
 			| '*'									{ $$ = "*"; }
 			| '/'									{ $$ = "/"; }
 			| '%'									{ $$ = "%"; }
 			| '^'									{ $$ = "^"; }
 			| '<'									{ $$ = "<"; }
+			| LEFT_ARROW_LESS						{ $$ = "<"; }
 			| '>'									{ $$ = ">"; }
 			| '='									{ $$ = "="; }
 			| LESS_EQUALS							{ $$ = "<="; }
 			| GREATER_EQUALS						{ $$ = ">="; }
 			| NOT_EQUALS							{ $$ = "<>"; }
+			| RIGHT_ARROW							{ $$ = "->"; }
 		;
 
 qual_Op:	Op
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 72404e72fff..a17e42d0ef1 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -366,6 +366,7 @@ less_equals		"<="
 greater_equals	">="
 less_greater	"<>"
 not_equals		"!="
+right_arrow		"->"
 
 /*
  * "self" is the set of chars that should be returned as single-character
@@ -892,8 +893,18 @@ other			.
 					return NOT_EQUALS;
 				}
 
+{right_arrow}	{
+					SET_YYLLOC();
+					return RIGHT_ARROW;
+				}
+
 {self}			{
 					SET_YYLLOC();
+					if (yytext[0] == '-' && yyextra->inleftarrow)
+					{
+						yyextra->inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
 					return yytext[0];
 				}
 
@@ -919,6 +930,26 @@ other			.
 					if (slashstar)
 						nchars = slashstar - yytext;
 
+					if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+					{
+						/* Strip the unwanted chars from the token */
+						yyless(1);
+
+						yyextra->inleftarrow = true;
+
+						return LEFT_ARROW_LESS;
+					}
+
+					if (nchars == 1 && yytext[0] == '-' && yyextra->inleftarrow)
+					{
+						/* Strip the unwanted chars from the token */
+						if (nchars < yyleng)
+							yyless(nchars);
+
+						yyextra->inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
+
 					/*
 					 * For SQL compatibility, '+' and '-' cannot be the
 					 * last char of a multi-char operator unless the operator
@@ -989,6 +1020,8 @@ other			.
 								return NOT_EQUALS;
 							if (yytext[0] == '!' && yytext[1] == '=')
 								return NOT_EQUALS;
+							if (yytext[0] == '-' && yytext[1] == '>')
+								return RIGHT_ARROW;
 						}
 					}
 
@@ -1294,6 +1327,8 @@ scanner_init(const char *str,
 	yyext->literalbuf = (char *) palloc(yyext->literalalloc);
 	yyext->literallen = 0;
 
+	yyext->inleftarrow = false;
+
 	return scanner;
 }
 
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 8e8b049e15f..6f8fd7cd258 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -302,6 +302,7 @@ less_equals		"<="
 greater_equals	">="
 less_greater	"<>"
 not_equals		"!="
+right_arrow     "->"
 
 /*
  * "self" is the set of chars that should be returned as single-character
@@ -661,6 +662,10 @@ other			.
 					ECHO;
 				}
 
+{right_arrow}	{
+					ECHO;
+				}
+
 	/*
 	 * These rules are specific to psql --- they implement parenthesis
 	 * counting and detection of command-ending semicolon.  These must
diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h
index d6293b1e878..61647be928c 100644
--- a/src/include/parser/scanner.h
+++ b/src/include/parser/scanner.h
@@ -105,6 +105,7 @@ typedef struct core_yy_extra_type
 	int			state_before_str_stop;	/* start cond. before end quote */
 	int			xcdepth;		/* depth of nesting in slash-star comments */
 	char	   *dolqstart;		/* current $foo$ quote start string */
+	bool		inleftarrow;	/* are we parsing a -> operator? */
 	YYLTYPE		save_yylloc;	/* one-element stack for PUSH_YYLLOC() */
 
 	/* first part of UTF16 surrogate pair for Unicode escapes */
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 82708013ee6..5ea88117aa1 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -335,6 +335,7 @@ less_equals		"<="
 greater_equals	">="
 less_greater	"<>"
 not_equals		"!="
+right_arrow		"->"
 
 /*
  * "self" is the set of chars that should be returned as single-character
@@ -463,6 +464,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 %%
 
 %{
+		static bool inleftarrow = false;
+
 		/* code to execute during start of each call of yylex() */
 		char *newdefsymbol = NULL;
 
@@ -854,6 +857,10 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					return NOT_EQUALS;
 				}
 
+{right_arrow}	{
+					return RIGHT_ARROW;
+				}
+
 {informix_special} {
 					/* are we simulating Informix? */
 					if (INFORMIX_MODE)
@@ -871,6 +878,11 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					 */
 					if (yytext[0] == ';' && struct_level == 0)
 						BEGIN(C);
+					if (yytext[0] == '-' && inleftarrow)
+					{
+						inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
 					return yytext[0];
 				}
 
@@ -896,6 +908,26 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					if (slashstar)
 						nchars = slashstar - yytext;
 
+					if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+					{
+						/* Strip the unwanted chars from the token */
+						yyless(1);
+
+						inleftarrow = true;
+
+						return LEFT_ARROW_LESS;
+					}
+
+					if (nchars == 1 && yytext[0] == '-' && inleftarrow)
+					{
+						/* Strip the unwanted chars from the token */
+						if (nchars < yyleng)
+							yyless(nchars);
+
+						inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
+
 					/*
 					 * For SQL compatibility, '+' and '-' cannot be the
 					 * last char of a multi-char operator unless the operator
@@ -968,6 +1000,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 								return NOT_EQUALS;
 							if (yytext[0] == '!' && yytext[1] == '=')
 								return NOT_EQUALS;
+							if (yytext[0] == '-' && yytext[1] == '>')
+								return RIGHT_ARROW;
 						}
 					}
 
diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y
index 8182ce28aa1..c5cea379554 100644
--- a/src/pl/plpgsql/src/pl_gram.y
+++ b/src/pl/plpgsql/src/pl_gram.y
@@ -237,6 +237,7 @@ static	void			check_raise_parameters(PLpgSQL_stmt_raise *stmt);
 %token <ival>	ICONST PARAM
 %token			TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
 %token			LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token			LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
 
 /*
  * Other tokens recognized by plpgsql's lexer interface layer (pl_scanner.c).
-- 
2.45.2

Reply via email to