From c99abc8ef3d9367bc7442252afdf1e301d71111a Mon Sep 17 00:00:00 2001
From: "Garen J. Torikian" <gjtorikian@users.noreply.github.com>
Date: Tue, 3 Jan 2023 13:51:10 -0500
Subject: [PATCH] Expand character set for ltree labels This patch expands the
 character set for ltree labels to one additional character: the hyphen.

Furthermore, the label length is doubled to account for longer labels.
---
 contrib/ltree/expected/ltree.out | 48 ++++++++++++++++++++------------
 contrib/ltree/ltree.h            |  9 ++++--
 contrib/ltree/ltree_io.c         | 10 +++----
 contrib/ltree/ltxtquery_io.c     |  4 +--
 contrib/ltree/sql/ltree.sql      | 15 ++++++----
 doc/src/sgml/ltree.sgml          |  9 +++---
 6 files changed, 57 insertions(+), 38 deletions(-)

diff --git a/contrib/ltree/expected/ltree.out b/contrib/ltree/expected/ltree.out
index b95be71c78..0c61f4fffc 100644
--- a/contrib/ltree/expected/ltree.out
+++ b/contrib/ltree/expected/ltree.out
@@ -25,6 +25,12 @@ SELECT '1.2'::ltree;
  1.2
 (1 row)

+SELECT '1.2.-3'::ltree;
+ ltree
+--------
+ 1.2.-3
+(1 row)
+
 SELECT '1.2._3'::ltree;
  ltree
 --------
@@ -45,15 +51,15 @@ ERROR:  ltree syntax error
 LINE 1: SELECT '1.2.'::ltree;
                ^
 DETAIL:  Unexpected end of input.
-SELECT repeat('x', 255)::ltree;
-                                                                                                                             repeat
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+SELECT repeat('x', 511)::ltree;
+                                                                                                                                                                                                                                                             repeat
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 (1 row)

-SELECT repeat('x', 256)::ltree;
+SELECT repeat('x', 512)::ltree;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 257.
+DETAIL:  Label length is 512, must be at most 511, at character 513.
 SELECT ltree2text('1.2.3.34.sdf');
   ltree2text
 --------------
@@ -531,24 +537,24 @@ SELECT '1.2.3|@.4'::lquery;
 ERROR:  lquery syntax error at character 7
 LINE 1: SELECT '1.2.3|@.4'::lquery;
                ^
-SELECT (repeat('x', 255) || '*@@*')::lquery;
-                                                                                                                              lquery
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@*
+SELECT (repeat('x', 511) || '*@@*')::lquery;
+                                                                                                                                                                                                                                                              lquery
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@*
 (1 row)

-SELECT (repeat('x', 256) || '*@@*')::lquery;
+SELECT (repeat('x', 512) || '*@@*')::lquery;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 257.
-SELECT ('!' || repeat('x', 255))::lquery;
-                                                                                                                              lquery
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- !xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+DETAIL:  Label length is 512, must be at most 511, at character 513.
+SELECT ('!' || repeat('x', 511))::lquery;
+                                                                                                                                                                                                                                                              lquery
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ !xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 (1 row)

-SELECT ('!' || repeat('x', 256))::lquery;
+SELECT ('!' || repeat('x', 512))::lquery;
 ERROR:  label string is too long
-DETAIL:  Label length is 256, must be at most 255, at character 258.
+DETAIL:  Label length is 512, must be at most 511, at character 514.
 SELECT nlevel('1.2.3.4');
  nlevel
 --------
@@ -1195,6 +1201,12 @@ SELECT 'tree & aw_qw%*'::ltxtquery;
  tree & aw_qw%*
 (1 row)

+SELECT 'tree & aw-qw%*'::ltxtquery;
+   ltxtquery
+----------------
+ tree & aw-qw%*
+(1 row)
+
 SELECT 'ltree.awdfg'::ltree @ '!tree & aWdf@*'::ltxtquery;
  ?column?
 ----------
diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h
index 2a80a02495..cfcdaba7e9 100644
--- a/contrib/ltree/ltree.h
+++ b/contrib/ltree/ltree.h
@@ -12,10 +12,10 @@

 /*
  * We want the maximum length of a label to be encoding-independent, so
- * set it somewhat arbitrarily at 255 characters (not bytes), while using
+ * set it somewhat arbitrarily at 511 characters (not bytes), while using
  * uint16 fields to hold the byte length.
  */
-#define LTREE_LABEL_MAX_CHARS 255
+#define LTREE_LABEL_MAX_CHARS 511

 /*
  * LOWER_NODE used to be defined in the Makefile via the compile flags.
@@ -126,7 +126,10 @@ typedef struct

 #define LQUERY_HASNOT		0x01

-#define ISALNUM(x)	( t_isalnum(x) || t_iseq(x, '_') )
+#define ISALNUM(x)	( t_isalnum(x) )
+#define ISDASH(x)	( t_iseq(x, '_') || t_iseq(x, '-') )
+
+#define ISVALID(x) ( ISALNUM(x) || ISDASH(x) )

 /* full text query */

diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c
index f0dd3df511..cd94265352 100644
--- a/contrib/ltree/ltree_io.c
+++ b/contrib/ltree/ltree_io.c
@@ -74,7 +74,7 @@ parse_ltree(const char *buf, struct Node *escontext)
 		switch (state)
 		{
 			case LTPRS_WAITNAME:
-				if (ISALNUM(ptr))
+				if (ISVALID(ptr))
 				{
 					lptr->start = ptr;
 					lptr->wlen = 0;
@@ -92,7 +92,7 @@ parse_ltree(const char *buf, struct Node *escontext)
 					lptr++;
 					state = LTPRS_WAITNAME;
 				}
-				else if (!ISALNUM(ptr))
+				else if (!ISVALID(ptr))
 					UNCHAR;
 				break;
 			default:
@@ -316,7 +316,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 		switch (state)
 		{
 			case LQPRS_WAITLEVEL:
-				if (ISALNUM(ptr))
+				if (ISVALID(ptr))
 				{
 					GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
 					lptr->start = ptr;
@@ -339,7 +339,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 					UNCHAR;
 				break;
 			case LQPRS_WAITVAR:
-				if (ISALNUM(ptr))
+				if (ISVALID(ptr))
 				{
 					lptr++;
 					lptr->start = ptr;
@@ -385,7 +385,7 @@ parse_lquery(const char *buf, struct Node *escontext)
 					state = LQPRS_WAITLEVEL;
 					curqlevel = NEXTLEV(curqlevel);
 				}
-				else if (ISALNUM(ptr))
+				else if (ISVALID(ptr))
 				{
 					/* disallow more chars after a flag */
 					if (lptr->flag)
diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c
index a16e577303..96a9de0bd9 100644
--- a/contrib/ltree/ltxtquery_io.c
+++ b/contrib/ltree/ltxtquery_io.c
@@ -80,7 +80,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
 					(state->buf)++;
 					return OPEN;
 				}
-				else if (ISALNUM(state->buf))
+				else if (ISVALID(state->buf))
 				{
 					state->state = INOPERAND;
 					*strval = state->buf;
@@ -93,7 +93,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint
 							 errmsg("operand syntax error")));
 				break;
 			case INOPERAND:
-				if (ISALNUM(state->buf))
+				if (ISVALID(state->buf))
 				{
 					if (*flag)
 						ereturn(state->escontext, ERR,
diff --git a/contrib/ltree/sql/ltree.sql b/contrib/ltree/sql/ltree.sql
index eabef4f851..2f2fda88ee 100644
--- a/contrib/ltree/sql/ltree.sql
+++ b/contrib/ltree/sql/ltree.sql
@@ -8,6 +8,7 @@ WHERE opc.oid >= 16384 AND NOT amvalidate(opc.oid);
 SELECT ''::ltree;
 SELECT '1'::ltree;
 SELECT '1.2'::ltree;
+SELECT '1.2.-3'::ltree;
 SELECT '1.2._3'::ltree;

 -- empty labels not allowed
@@ -15,8 +16,8 @@ SELECT '.2.3'::ltree;
 SELECT '1..3'::ltree;
 SELECT '1.2.'::ltree;

-SELECT repeat('x', 255)::ltree;
-SELECT repeat('x', 256)::ltree;
+SELECT repeat('x', 511)::ltree;
+SELECT repeat('x', 512)::ltree;

 SELECT ltree2text('1.2.3.34.sdf');
 SELECT text2ltree('1.2.3.34.sdf');
@@ -111,10 +112,10 @@ SELECT '1.!.3'::lquery;
 SELECT '1.2.!'::lquery;
 SELECT '1.2.3|@.4'::lquery;

-SELECT (repeat('x', 255) || '*@@*')::lquery;
-SELECT (repeat('x', 256) || '*@@*')::lquery;
-SELECT ('!' || repeat('x', 255))::lquery;
-SELECT ('!' || repeat('x', 256))::lquery;
+SELECT (repeat('x', 511) || '*@@*')::lquery;
+SELECT (repeat('x', 512) || '*@@*')::lquery;
+SELECT ('!' || repeat('x', 511))::lquery;
+SELECT ('!' || repeat('x', 512))::lquery;

 SELECT nlevel('1.2.3.4');
 SELECT nlevel(('1' || repeat('.1', 65534))::ltree);
@@ -233,6 +234,8 @@ SELECT 'QWER_GY'::ltree ~ 'q_t%@*';
 --ltxtquery
 SELECT '!tree & aWdf@*'::ltxtquery;
 SELECT 'tree & aw_qw%*'::ltxtquery;
+SELECT 'tree & aw-qw%*'::ltxtquery;
+
 SELECT 'ltree.awdfg'::ltree @ '!tree & aWdf@*'::ltxtquery;
 SELECT 'tree.awdfg'::ltree @ '!tree & aWdf@*'::ltxtquery;
 SELECT 'tree.awdfg'::ltree @ '!tree | aWdf@*'::ltxtquery;
diff --git a/doc/src/sgml/ltree.sgml b/doc/src/sgml/ltree.sgml
index 508f404ae8..42f659537f 100644
--- a/doc/src/sgml/ltree.sgml
+++ b/doc/src/sgml/ltree.sgml
@@ -23,10 +23,11 @@
   <title>Definitions</title>

   <para>
-   A <firstterm>label</firstterm> is a sequence of alphanumeric characters
-   and underscores (for example, in C locale the characters
-   <literal>A-Za-z0-9_</literal> are allowed).
-   Labels must be less than 256 characters long.
+   A <firstterm>label</firstterm> is a sequence of alphanumeric characters,
+   underscores, and hyphens. Valid alphanumerics character ranges are dependent on
+   your database locale. For example, in C locale, the characters
+   <literal>A-Za-z0-9-_</literal> are allowed.
+   Labels must be less than 512 characters long.
   </para>

   <para>
--
2.39.0

