Here's what I will push unless there's something important I missed.

I split the regression tests between create_operator.sql and
polymorphism.sql, because => is really "named argument syntax" rather
than an operator as such, and polymorphism.sql is where the existing
tests were for that.

I tried to keep the three lexers as closely matched as possible, even
though psqlscan.l doesn't actually need some of the changes.

-- 
Andrew (irc:RhodiumToad)

>From 276f9c626f187aafd70185c13caf48cdf5afcf98 Mon Sep 17 00:00:00 2001
From: Andrew Gierth <and...@tao146.riddles.org.uk>
Date: Thu, 23 Aug 2018 16:35:33 +0100
Subject: [PATCH 1/2] Reduce an unnecessary O(N^3) loop in lexer.

The lexer's handling of operators contained an O(N^3) hazard when
dealing with long strings of + or - characters; it seems hard to
prevent this case from being O(N^2), but the additional N multiplier
was not needed.

Backpatch all the way since this has been there since 7.x, and it
presents at least a mild hazard in that trying to do Bind, PREPARE or
EXPLAIN on a hostile query could take excessive time (without
honouring cancels or timeouts) even if the query was never executed.
---
 src/backend/parser/scan.l         | 28 +++++++++++++++++++++-------
 src/fe_utils/psqlscan.l           | 28 +++++++++++++++++++++-------
 src/interfaces/ecpg/preproc/pgc.l | 30 ++++++++++++++++++++++--------
 3 files changed, 64 insertions(+), 22 deletions(-)

diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 0cd782827a..8ee50d85ec 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -885,20 +885,34 @@ other			.
 					 * to forbid operator names like '?-' that could not be
 					 * sequences of SQL operators.
 					 */
-					while (nchars > 1 &&
-						   (yytext[nchars - 1] == '+' ||
-							yytext[nchars - 1] == '-'))
+					if (nchars > 1 &&
+						(yytext[nchars - 1] == '+' ||
+						 yytext[nchars - 1] == '-'))
 					{
 						int			ic;
 
 						for (ic = nchars - 2; ic >= 0; ic--)
 						{
-							if (strchr("~!@#^&|`?%", yytext[ic]))
+							char c = yytext[ic];
+							if (c == '~' || c == '!' || c == '@' ||
+								c == '#' || c == '^' || c == '&' ||
+								c == '|' || c == '`' || c == '?' ||
+								c == '%')
 								break;
 						}
-						if (ic >= 0)
-							break; /* found a char that makes it OK */
-						nchars--; /* else remove the +/-, and check again */
+						if (ic < 0)
+						{
+							/*
+							 * didn't find a qualifying character, so remove
+							 * all trailing [+-]
+							 */
+							for (nchars--;
+								 nchars > 1 &&
+								 (yytext[nchars - 1] == '+' ||
+								  yytext[nchars - 1] == '-');
+								 nchars--)
+								continue;
+						}
 					}
 
 					SET_YYLLOC();
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 1cc587be34..ab0d56bcd2 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -817,20 +817,34 @@ other			.
 					 * to forbid operator names like '?-' that could not be
 					 * sequences of SQL operators.
 					 */
-					while (nchars > 1 &&
-						   (yytext[nchars - 1] == '+' ||
-							yytext[nchars - 1] == '-'))
+					if (nchars > 1 &&
+						(yytext[nchars - 1] == '+' ||
+						 yytext[nchars - 1] == '-'))
 					{
 						int			ic;
 
 						for (ic = nchars - 2; ic >= 0; ic--)
 						{
-							if (strchr("~!@#^&|`?%", yytext[ic]))
+							char c = yytext[ic];
+							if (c == '~' || c == '!' || c == '@' ||
+								c == '#' || c == '^' || c == '&' ||
+								c == '|' || c == '`' || c == '?' ||
+								c == '%')
 								break;
 						}
-						if (ic >= 0)
-							break; /* found a char that makes it OK */
-						nchars--; /* else remove the +/-, and check again */
+						if (ic < 0)
+						{
+							/*
+							 * didn't find a qualifying character, so remove
+							 * all trailing [+-]
+							 */
+							for (nchars--;
+								 nchars > 1 &&
+								 (yytext[nchars - 1] == '+' ||
+								  yytext[nchars - 1] == '-');
+								 nchars--)
+								continue;
+						}
 					}
 
 					if (nchars < yyleng)
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 405dee73b0..5d351481f8 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -690,20 +690,34 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 						 * to forbid operator names like '?-' that could not be
 						 * sequences of SQL operators.
 						 */
-						while (nchars > 1 &&
-							   (yytext[nchars-1] == '+' ||
-								yytext[nchars-1] == '-'))
+						if (nchars > 1 &&
+							(yytext[nchars - 1] == '+' ||
+							 yytext[nchars - 1] == '-'))
 						{
 							int		ic;
 
-							for (ic = nchars-2; ic >= 0; ic--)
+							for (ic = nchars - 2; ic >= 0; ic--)
 							{
-								if (strchr("~!@#^&|`?%", yytext[ic]))
+								char c = yytext[ic];
+								if (c == '~' || c == '!' || c == '@' ||
+									c == '#' || c == '^' || c == '&' ||
+									c == '|' || c == '`' || c == '?' ||
+									c == '%')
 									break;
 							}
-							if (ic >= 0)
-								break; /* found a char that makes it OK */
-							nchars--; /* else remove the +/-, and check again */
+							if (ic < 0)
+							{
+								/*
+								 * didn't find a qualifying character, so remove
+								 * all trailing [+-]
+								 */
+								for (nchars--;
+									 nchars > 1 &&
+									 (yytext[nchars - 1] == '+' ||
+									  yytext[nchars - 1] == '-');
+									 nchars--)
+								continue;
+							}
 						}
 
 						if (nchars < yyleng)
-- 
2.11.1

>From fde0c119db017be1279a8c1f2f3ee601607ca998 Mon Sep 17 00:00:00 2001
From: Andrew Gierth <and...@tao146.riddles.org.uk>
Date: Thu, 23 Aug 2018 18:29:18 +0100
Subject: [PATCH 2/2] Fix lexing of standard multi-character operators in edge
 cases.

Commits c6b3c939b (which fixed the precedence of >=, <=, <> operators)
and 865f14a2d (which added support for the standard => notation for
named arguments) created a class of lexer tokens which look like
multi-character operators but which have their own token IDs distinct
from Op. However, longest-match rules meant that following any of
these tokens with another operator character, as in (1<>-1), would
cause them to be incorrectly returned as Op.

The error here isn't immediately obvious, because the parser would
usually still find the correct operator via the Op token, but there
were more subtle problems:

1. If immediately followed by a comment or +-, >= <= <> would be given
   the old precedence of Op rather than the correct new precedence;

2. If followed by a comment, != would be returned as Op rather than as
   NOT_EQUAL, causing it not to be found at all;

3. If followed by a comment or +-, the => token for named arguments
   would be lexed as Op, causing the argument to be mis-parsed as a
   simple expression, usually causing an error.

Fix by explicitly checking for the operators in the {operator} code
block in addition to all the existing special cases there.

Backpatch to 9.5 where the problem was introduced.

Analysis and patch by me; review by Tom Lane.
Discussion: https://postgr.es/m/87va851ppl....@news-spur.riddles.org.uk
---
 src/backend/parser/scan.l                     | 28 ++++++++++
 src/fe_utils/psqlscan.l                       |  9 ++++
 src/interfaces/ecpg/preproc/pgc.l             | 28 ++++++++++
 src/test/regress/expected/create_operator.out | 74 +++++++++++++++++++++++++++
 src/test/regress/expected/polymorphism.out    | 36 +++++++++++++
 src/test/regress/sql/create_operator.sql      | 31 +++++++++++
 src/test/regress/sql/polymorphism.sql         | 15 ++++++
 7 files changed, 221 insertions(+)

diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 8ee50d85ec..98083844aa 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -339,6 +339,15 @@ identifier		{ident_start}{ident_cont}*
 typecast		"::"
 dot_dot			\.\.
 colon_equals	":="
+
+/*
+ * These operator-like tokens (unlike the above ones) also match the {operator}
+ * rule, which means that they might be overridden by a longer match if they
+ * are followed by a comment start or a + or - character. Accordingly, if you
+ * add to this list, you must also add corresponding code to the {operator}
+ * block to return the correct token in such cases. (This is not needed in
+ * psqlscan.l since the token value is ignored there.)
+ */
 equals_greater	"=>"
 less_equals		"<="
 greater_equals	">="
@@ -930,6 +939,25 @@ other			.
 						if (nchars == 1 &&
 							strchr(",()[].;:+-*/%^<>=", yytext[0]))
 							return yytext[0];
+						/*
+						 * Likewise, if what we have left is two chars, and
+						 * those match the tokens ">=", "<=", "=>", "<>" or
+						 * "!=", then we must return the appropriate token
+						 * rather than the generic Op.
+						 */
+						if (nchars == 2)
+						{
+							if (yytext[0] == '=' && yytext[1] == '>')
+								return EQUALS_GREATER;
+							if (yytext[0] == '>' && yytext[1] == '=')
+								return GREATER_EQUALS;
+							if (yytext[0] == '<' && yytext[1] == '=')
+								return LESS_EQUALS;
+							if (yytext[0] == '<' && yytext[1] == '>')
+								return NOT_EQUALS;
+							if (yytext[0] == '!' && yytext[1] == '=')
+								return NOT_EQUALS;
+						}
 					}
 
 					/*
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index ab0d56bcd2..2c5e079a68 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -298,6 +298,15 @@ identifier		{ident_start}{ident_cont}*
 typecast		"::"
 dot_dot			\.\.
 colon_equals	":="
+
+/*
+ * These operator-like tokens (unlike the above ones) also match the {operator}
+ * rule, which means that they might be overridden by a longer match if they
+ * are followed by a comment start or a + or - character. Accordingly, if you
+ * add to this list, you must also add corresponding code to the {operator}
+ * block to return the correct token in such cases. (This is not needed in
+ * psqlscan.l since the token value is ignored there.)
+ */
 equals_greater	"=>"
 less_equals		"<="
 greater_equals	">="
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index 5d351481f8..0a89223004 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -245,6 +245,15 @@ array			({ident_cont}|{whitespace}|[\[\]\+\-\*\%\/\(\)\>\.])*
 typecast		"::"
 dot_dot			\.\.
 colon_equals	":="
+
+/*
+ * These operator-like tokens (unlike the above ones) also match the {operator}
+ * rule, which means that they might be overridden by a longer match if they
+ * are followed by a comment start or a + or - character. Accordingly, if you
+ * add to this list, you must also add corresponding code to the {operator}
+ * block to return the correct token in such cases. (This is not needed in
+ * psqlscan.l since the token value is ignored there.)
+ */
 equals_greater	"=>"
 less_equals		"<="
 greater_equals	">="
@@ -733,6 +742,25 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 							if (nchars == 1 &&
 								strchr(",()[].;:+-*/%^<>=", yytext[0]))
 								return yytext[0];
+							/*
+							 * Likewise, if what we have left is two chars, and
+							 * those match the tokens ">=", "<=", "=>", "<>" or
+							 * "!=", then we must return the appropriate token
+							 * rather than the generic Op.
+							 */
+							if (nchars == 2)
+							{
+								if (yytext[0] == '=' && yytext[1] == '>')
+									return EQUALS_GREATER;
+								if (yytext[0] == '>' && yytext[1] == '=')
+									return GREATER_EQUALS;
+								if (yytext[0] == '<' && yytext[1] == '=')
+									return LESS_EQUALS;
+								if (yytext[0] == '<' && yytext[1] == '>')
+									return NOT_EQUALS;
+								if (yytext[0] == '!' && yytext[1] == '=')
+									return NOT_EQUALS;
+							}
 						}
 
 						base_yylval.str = mm_strdup(yytext);
diff --git a/src/test/regress/expected/create_operator.out b/src/test/regress/expected/create_operator.out
index 77237f4850..54e8b79159 100644
--- a/src/test/regress/expected/create_operator.out
+++ b/src/test/regress/expected/create_operator.out
@@ -45,6 +45,80 @@ CREATE OPERATOR => (
 ERROR:  syntax error at or near "=>"
 LINE 1: CREATE OPERATOR => (
                         ^
+-- lexing of <=, >=, <>, != has a number of edge cases
+-- (=> is tested elsewhere)
+-- this is legal because ! is not allowed in sql ops
+CREATE OPERATOR !=- (
+   leftarg = int8,		-- right unary
+   procedure = numeric_fac
+);
+SELECT 2 !=-;
+ ?column? 
+----------
+        2
+(1 row)
+
+-- make sure lexer returns != as <> even in edge cases
+SELECT 2 !=/**/ 1, 2 !=/**/ 2;
+ ?column? | ?column? 
+----------+----------
+ t        | f
+(1 row)
+
+SELECT 2 !=-- comment to be removed by psql
+  1;
+ ?column? 
+----------
+ t
+(1 row)
+
+DO $$ -- use DO to protect -- from psql
+  declare r boolean;
+  begin
+    execute $e$ select 2 !=-- comment
+      1 $e$ into r;
+    raise info 'r = %', r;
+  end;
+$$;
+INFO:  r = t
+-- check that <= etc. followed by more operator characters are returned
+-- as the correct token with correct precedence
+SELECT true<>-1 BETWEEN 1 AND 1;  -- BETWEEN has prec. above <> but below Op
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT false<>/**/1 BETWEEN 1 AND 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT false<=-1 BETWEEN 1 AND 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT false>=-1 BETWEEN 1 AND 1;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
+ ?column? | ?column? | ?column? 
+----------+----------+----------
+ t        | t        | t
+(1 row)
+
+SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
+ ?column? | ?column? | ?column? 
+----------+----------+----------
+ f        | f        | f
+(1 row)
+
 -- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
 BEGIN TRANSACTION;
 CREATE ROLE regress_rol_op1;
diff --git a/src/test/regress/expected/polymorphism.out b/src/test/regress/expected/polymorphism.out
index 67e70c8c14..986417a188 100644
--- a/src/test/regress/expected/polymorphism.out
+++ b/src/test/regress/expected/polymorphism.out
@@ -1478,6 +1478,42 @@ select dfunc('a'::text, 'b', flag => true); -- mixed notation
  a
 (1 row)
 
+-- this tests lexer edge cases around =>
+select dfunc(a =>-1);
+ dfunc 
+-------
+    -1
+(1 row)
+
+select dfunc(a =>+1);
+ dfunc 
+-------
+     1
+(1 row)
+
+select dfunc(a =>/**/1);
+ dfunc 
+-------
+     1
+(1 row)
+
+select dfunc(a =>--comment to be removed by psql
+  1);
+ dfunc 
+-------
+     1
+(1 row)
+
+-- need DO to protect the -- from psql
+do $$
+  declare r integer;
+  begin
+    select dfunc(a=>-- comment
+      1) into r;
+    raise info 'r = %', r;
+  end;
+$$;
+INFO:  r = 1
 -- check reverse-listing of named-arg calls
 CREATE VIEW dfview AS
    SELECT q1, q2,
diff --git a/src/test/regress/sql/create_operator.sql b/src/test/regress/sql/create_operator.sql
index 625e9b9748..8b6fd0bb43 100644
--- a/src/test/regress/sql/create_operator.sql
+++ b/src/test/regress/sql/create_operator.sql
@@ -45,6 +45,37 @@ CREATE OPERATOR => (
    procedure = numeric_fac
 );
 
+-- lexing of <=, >=, <>, != has a number of edge cases
+-- (=> is tested elsewhere)
+
+-- this is legal because ! is not allowed in sql ops
+CREATE OPERATOR !=- (
+   leftarg = int8,		-- right unary
+   procedure = numeric_fac
+);
+SELECT 2 !=-;
+-- make sure lexer returns != as <> even in edge cases
+SELECT 2 !=/**/ 1, 2 !=/**/ 2;
+SELECT 2 !=-- comment to be removed by psql
+  1;
+DO $$ -- use DO to protect -- from psql
+  declare r boolean;
+  begin
+    execute $e$ select 2 !=-- comment
+      1 $e$ into r;
+    raise info 'r = %', r;
+  end;
+$$;
+
+-- check that <= etc. followed by more operator characters are returned
+-- as the correct token with correct precedence
+SELECT true<>-1 BETWEEN 1 AND 1;  -- BETWEEN has prec. above <> but below Op
+SELECT false<>/**/1 BETWEEN 1 AND 1;
+SELECT false<=-1 BETWEEN 1 AND 1;
+SELECT false>=-1 BETWEEN 1 AND 1;
+SELECT 2<=/**/3, 3>=/**/2, 2<>/**/3;
+SELECT 3<=/**/2, 2>=/**/3, 2<>/**/2;
+
 -- Should fail. CREATE OPERATOR requires USAGE on SCHEMA
 BEGIN TRANSACTION;
 CREATE ROLE regress_rol_op1;
diff --git a/src/test/regress/sql/polymorphism.sql b/src/test/regress/sql/polymorphism.sql
index 2f65f0f97d..03606671d9 100644
--- a/src/test/regress/sql/polymorphism.sql
+++ b/src/test/regress/sql/polymorphism.sql
@@ -785,6 +785,21 @@ select dfunc('a'::text, 'b', flag => false); -- mixed notation
 select dfunc('a'::text, 'b', true); -- full positional notation
 select dfunc('a'::text, 'b', flag => true); -- mixed notation
 
+-- this tests lexer edge cases around =>
+select dfunc(a =>-1);
+select dfunc(a =>+1);
+select dfunc(a =>/**/1);
+select dfunc(a =>--comment to be removed by psql
+  1);
+-- need DO to protect the -- from psql
+do $$
+  declare r integer;
+  begin
+    select dfunc(a=>-- comment
+      1) into r;
+    raise info 'r = %', r;
+  end;
+$$;
 
 -- check reverse-listing of named-arg calls
 CREATE VIEW dfview AS
-- 
2.11.1

Reply via email to