> > > At the same time AFAICT there isn't much more code paths
> > > to worry about in case of a LocationExpr as a node
> >
> > I can imagine there are others like value expressions,
> > row expressions, json array expressions, etc. that we may
> > want to also normalize.

> Exactly. When using a node, one can explicitly wrap whatever is needed
> into it, while otherwise one would need to find a new way to piggy back
> on A_Expr in a new context.

Looking at the VALUES expression case, we will need to carry the info
with SelectStmt and ultimately to RangeTblEntry which is where the
values_list is, so either approach we take RangeTblEntry will need the
LocationExpr pointer or the additional ParseLoc info I am suggesting.
A_Expr is not used in the values list case.


> I'll take a look at the proposed change, but a bit later.

Here is a v4 to compare with v3.

0001- is the infrastructure to track the boundaries
0002- the changes to jumbling
0003 - the additional tests introduced in v3


--
Sami
From 6f7f7c2abb9e3cd2b5654869ee3626e6fe6549c5 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-38-230.ec2.internal>
Date: Wed, 21 May 2025 18:55:52 +0000
Subject: [PATCH v4 2/3] Support external parameters for query squashing

62d712ec introduced the concept of element squashing for
quwry normalization purposes. However, it did not account for
external parameters passed to a list of elements. This adds
support to these types of values and simplifies the squashing
logic further.

Discussion: https://www.postgresql.org/message-id/flat/202505021256.4yaa24s3sytm%40alvherre.pgsql#1195a340edca50cc3b7389a2ba8b0467
---
 .../pg_stat_statements/expected/squashing.out |  14 +-
 .../pg_stat_statements/pg_stat_statements.c   |  84 +++---------
 src/backend/nodes/gen_node_support.pl         |   2 +-
 src/backend/nodes/queryjumblefuncs.c          | 121 +++++++++++-------
 4 files changed, 100 insertions(+), 121 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index 7b138af098c..d92cfbd35fb 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -246,7 +246,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_bigint WHERE data IN    +|     1
-         ($1 /*, ... */::bigint)                    | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -353,7 +353,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_cast WHERE data IN      +|     1
-         ($1 /*, ... */::int4::casttesttype)        | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -376,7 +376,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_jsonb WHERE data IN     +|     1
-         (($1 /*, ... */)::jsonb)                   | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -393,10 +393,10 @@ SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                           query                            | calls 
-------------------------------------------------------------+-------
- SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) |     1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t         |     1
+                         query                         | calls 
+-------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t    |     1
 (2 rows)
 
 -- Test constants evaluation in a CTE, which was causing issues in the past
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 9778407cba3..efcad87d684 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2825,10 +2825,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 				n_quer_loc = 0, /* Normalized query byte location */
 				last_off = 0,	/* Offset from start for previous tok */
 				last_tok_len = 0;	/* Length (in bytes) of that tok */
-	bool		in_squashed = false;	/* in a run of squashed consts? */
-	int			skipped_constants = 0;	/* Position adjustment of later
-										 * constants after squashed ones */
-
 
 	/*
 	 * Get constants' lengths (core system only gives us locations).  Note
@@ -2842,9 +2838,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 	 * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
 	 * could refine that limit based on the max value of n for the current
 	 * query, but it hardly seems worth any extra effort to do so.
-	 *
-	 * Note this also gives enough room for the commented-out ", ..." list
-	 * syntax used by constant squashing.
 	 */
 	norm_query_buflen = query_len + jstate->clocations_count * 10;
 
@@ -2857,7 +2850,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 					tok_len;	/* Length (in bytes) of that tok */
 
 		off = jstate->clocations[i].location;
-
 		/* Adjust recorded location if we're dealing with partial string */
 		off -= query_loc;
 
@@ -2866,67 +2858,24 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		if (tok_len < 0)
 			continue;			/* ignore any duplicates */
 
-		/*
-		 * What to do next depends on whether we're squashing constant lists,
-		 * and whether we're already in a run of such constants.
-		 */
-		if (!jstate->clocations[i].squashed)
-		{
-			/*
-			 * This location corresponds to a constant not to be squashed.
-			 * Print what comes before the constant ...
-			 */
-			len_to_wrt = off - last_off;
-			len_to_wrt -= last_tok_len;
-
-			Assert(len_to_wrt >= 0);
+		/* Copy next chunk (what precedes the next constant) */
+		len_to_wrt = off - last_off;
+		len_to_wrt -= last_tok_len;
 
-			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-			n_quer_loc += len_to_wrt;
+		Assert(len_to_wrt >= 0);
+		memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+		n_quer_loc += len_to_wrt;
 
-			/* ... and then a param symbol replacing the constant itself */
-			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-								  i + 1 + jstate->highest_extern_param_id - skipped_constants);
-
-			/* In case previous constants were merged away, stop doing that */
-			in_squashed = false;
-		}
-		else if (!in_squashed)
-		{
-			/*
-			 * This location is the start position of a run of constants to be
-			 * squashed, so we need to print the representation of starting a
-			 * group of stashed constants.
-			 *
-			 * Print what comes before the constant ...
-			 */
-			len_to_wrt = off - last_off;
-			len_to_wrt -= last_tok_len;
-			Assert(len_to_wrt >= 0);
-			Assert(i + 1 < jstate->clocations_count);
-			Assert(jstate->clocations[i + 1].squashed);
-			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-			n_quer_loc += len_to_wrt;
-
-			/* ... and then start a run of squashed constants */
-			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */",
-								  i + 1 + jstate->highest_extern_param_id - skipped_constants);
-
-			/* The next location will match the block below, to end the run */
-			in_squashed = true;
-
-			skipped_constants++;
-		}
-		else
-		{
-			/*
-			 * The second location of a run of squashable elements; this
-			 * indicates its end.
-			 */
-			in_squashed = false;
-		}
+		/*
+		 * And insert a param symbol in place of the constant token.
+		 *
+		 * However, If we have a squashable list, insert a comment in place of
+		 * the second and remaining values of the list.
+		 */
+		n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
+							  i + 1 + jstate->highest_extern_param_id,
+							  (jstate->clocations[i].squashed) ? " /*, ... */" : "");
 
-		/* Otherwise the constant is squashed away -- move forward */
 		quer_loc = off + tok_len;
 		last_off = off;
 		last_tok_len = tok_len;
@@ -3017,6 +2966,9 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query,
 
 		Assert(loc >= 0);
 
+		if (locs[i].squashed)
+			continue;			/* squashable list, ignore */
+
 		if (loc <= last_loc)
 			continue;			/* Duplicate constant, ignore */
 
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 77659b0f760..17ba3696226 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1324,7 +1324,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
 			# Node type.  Squash constants if requested.
 			if ($query_jumble_squash)
 			{
-				print $jff "\tJUMBLE_ELEMENTS($f);\n"
+				print $jff "\tJUMBLE_ELEMENTS($f, node);\n"
 				  unless $query_jumble_ignore;
 			}
 			else
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d1e82a63f09..32bc42bffca 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -60,10 +60,10 @@ static uint64 DoJumble(JumbleState *jstate, Node *node);
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *value, Size size);
 static void FlushPendingNulls(JumbleState *jstate);
-static void RecordConstLocation(JumbleState *jstate,
-								int location, bool squashed);
+static void RecordExpressionLocation(JumbleState *jstate,
+									 int location, int len);
 static void _jumbleNode(JumbleState *jstate, Node *node);
-static void _jumbleElements(JumbleState *jstate, List *elements);
+static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -381,7 +381,7 @@ FlushPendingNulls(JumbleState *jstate)
  * element contributes nothing to the jumble hash.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location, bool squashed)
+RecordExpressionLocation(JumbleState *jstate, int location, int len)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -396,9 +396,15 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
 						 sizeof(LocationLen));
 		}
 		jstate->clocations[jstate->clocations_count].location = location;
-		/* initialize lengths to -1 to simplify third-party module usage */
-		jstate->clocations[jstate->clocations_count].squashed = squashed;
-		jstate->clocations[jstate->clocations_count].length = -1;
+
+		/*
+		 * initialize lengths to -1 to simplify third-party module usage
+		 *
+		 * If we have a length that is greater than -1, this indicates a
+		 * squashable list.
+		 */
+		jstate->clocations[jstate->clocations_count].length = (len > -1) ? len : -1;
+		jstate->clocations[jstate->clocations_count].squashed = (len > -1) ? true : false;
 		jstate->clocations_count++;
 	}
 }
@@ -413,7 +419,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
  * - Otherwise test if the expression is a simple Const.
  */
 static bool
-IsSquashableConst(Node *element)
+IsSquashableExpression(Node *element)
 {
 	if (IsA(element, RelabelType))
 		element = (Node *) ((RelabelType *) element)->arg;
@@ -437,22 +443,45 @@ IsSquashableConst(Node *element)
 		{
 			Node	   *arg = lfirst(temp);
 
-			if (!IsA(arg, Const))	/* XXX we could recurse here instead */
-				return false;
+			switch (nodeTag(arg))
+			{
+				case T_Const:
+					return true;
+				case T_Param:
+					{
+						Param	   *param = (Param *) element;
+
+						return param->paramkind == PARAM_EXTERN;
+					}
+				default:
+					break;
+			}
 		}
 
-		return true;
+		return false;
 	}
 
-	if (!IsA(element, Const))
-		return false;
+	switch (nodeTag(element))
+	{
+		case T_Const:
+			return true;
+		case T_Param:
+			{
+				Param	   *param = (Param *) element;
 
-	return true;
+				return param->paramkind == PARAM_EXTERN;
+			}
+		default:
+			break;
+	}
+
+	return false;
 }
 
 /*
  * Subroutine for _jumbleElements: Verify whether the provided list
- * can be squashed, meaning it contains only constant expressions.
+ * can be squashed, meaning it contains only constant and external
+ * parameter expressions.
  *
  * Return value indicates if squashing is possible.
  *
@@ -461,7 +490,7 @@ IsSquashableConst(Node *element)
  * expressions.
  */
 static bool
-IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+IsSquashableExpressionList(List *elements)
 {
 	ListCell   *temp;
 
@@ -474,22 +503,19 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
 
 	foreach(temp, elements)
 	{
-		if (!IsSquashableConst(lfirst(temp)))
+		if (!IsSquashableExpression(lfirst(temp)))
 			return false;
 	}
 
-	*firstExpr = linitial(elements);
-	*lastExpr = llast(elements);
-
 	return true;
 }
 
 #define JUMBLE_NODE(item) \
 	_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_ELEMENTS(list) \
-	_jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_ELEMENTS(list, node) \
+	_jumbleElements(jstate, (List *) expr->list, node)
 #define JUMBLE_LOCATION(location) \
-	RecordConstLocation(jstate, expr->location, false)
+	RecordExpressionLocation(jstate, expr->location, -1)
 #define JUMBLE_FIELD(item) \
 do { \
 	if (sizeof(expr->item) == 8) \
@@ -517,36 +543,37 @@ do { \
 #include "queryjumblefuncs.funcs.c"
 
 /*
- * We jumble lists of constant elements as one individual item regardless
- * of how many elements are in the list.  This means different queries
- * jumble to the same query_id, if the only difference is the number of
- * elements in the list.
+ * We try to jumble lists of expressions as one individual item regardless
+ * of how many elements are in the list. This is know as squashing, which
+ * results in different queries jumbling to the same query_id, if the only
+ * difference is the number of elements in the list.
+ *
+ * We allow for Constants and Params of type external to be squashed. To
+ * be able to normalize such queries by stripping away the squashed away
+ * values, we must track the start and end of the expression list.
  */
 static void
-_jumbleElements(JumbleState *jstate, List *elements)
+_jumbleElements(JumbleState *jstate, List *elements, Node *node)
 {
-	Node	   *first,
-			   *last;
+	bool		normalize_list = false;
 
-	if (IsSquashableConstList(elements, &first, &last))
+	if (IsSquashableExpressionList(elements))
 	{
-		/*
-		 * If this list of elements is squashable, keep track of the location
-		 * of its first and last elements.  When reading back the locations
-		 * array, we'll see two consecutive locations with ->squashed set to
-		 * true, indicating the location of initial and final elements of this
-		 * list.
-		 *
-		 * For the limited set of cases we support now (implicit coerce via
-		 * FuncExpr, Const) it's fine to use exprLocation of the 'last'
-		 * expression, but if more complex composite expressions are to be
-		 * supported (e.g., OpExpr or FuncExpr as an explicit call), more
-		 * sophisticated tracking will be needed.
-		 */
-		RecordConstLocation(jstate, exprLocation(first), true);
-		RecordConstLocation(jstate, exprLocation(last), true);
+		if (IsA(node, ArrayExpr))
+		{
+			ArrayExpr  *aexpr = (ArrayExpr *) node;
+
+			if (aexpr->list_start > 0 && aexpr->list_end > 0)
+			{
+				RecordExpressionLocation(jstate,
+										 aexpr->list_start + 1,
+										 (aexpr->list_end - aexpr->list_start) - 1);
+				normalize_list = true;
+			}
+		}
 	}
-	else
+
+	if (!normalize_list)
 	{
 		_jumbleNode(jstate, (Node *) elements);
 	}
-- 
2.39.5 (Apple Git-154)

From 8c14c0ebb20e79925fdd8b6bbd4fcce91ba92dcf Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Tue, 20 May 2025 16:12:05 +0200
Subject: [PATCH v4 3/3] Extend ARRAY squashing tests

Testing coverage for ARRAY expressions is not enough. Add more test
cases, similar to already existing ones.
---
 .../pg_stat_statements/expected/squashing.out | 178 ++++++++++++++++++
 contrib/pg_stat_statements/sql/squashing.sql  |  60 ++++++
 2 files changed, 238 insertions(+)

diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index d92cfbd35fb..d628a451a1e 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -429,3 +429,181 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
+-- Nested arrays are squashed only at constants level
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    ];
+                                             array                                             
+-----------------------------------------------------------------------------------------------
+ {{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10}}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[                                     +|     1
+     ARRAY[$1 /*, ... */],                         +| 
+     ARRAY[$2 /*, ... */],                         +| 
+     ARRAY[$3 /*, ... */],                         +| 
+     ARRAY[$4 /*, ... */]                          +| 
+     ]                                              | 
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- Relabel type
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid];
+        array        
+---------------------
+ {1,2,3,4,5,6,7,8,9}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+    ('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+	( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+	( '"9"')::jsonb, ( '"10"')::jsonb
+];
+                                       array                                        
+------------------------------------------------------------------------------------
+ {"\"1\"","\"2\"","\"3\"","\"4\"","\"5\"","\"6\"","\"7\"","\"8\"","\"9\"","\"10\""}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- CoerceViaIO
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+	1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+	4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+	7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+	10::int4::casttesttype, 11::int4::casttesttype
+];
+           array           
+---------------------------
+ {1,2,3,4,5,6,7,8,9,10,11}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const is not squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+	(SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+	(SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+	(SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+	(SELECT '"10"')::jsonb
+];
+                                       array                                        
+------------------------------------------------------------------------------------
+ {"\"1\"","\"2\"","\"3\"","\"4\"","\"5\"","\"6\"","\"7\"","\"8\"","\"9\"","\"10\""}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                query                                | calls 
+---------------------------------------------------------------------+-------
+ SELECT ARRAY[                                                      +|     1
+         (SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+| 
+         (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+| 
+         (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+| 
+         (SELECT $10)::jsonb                                        +| 
+ ]                                                                   | 
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                  |     1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+	abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+	abs(800), abs(900), abs(1000), ((abs(1100)))
+];
+                      array                      
+-------------------------------------------------
+ {100,200,300,400,500,600,700,800,900,1000,1100}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT ARRAY[                                                         +|     1
+         abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+| 
+         abs($8), abs($9), abs($10), ((abs($11)))                      +| 
+ ]                                                                      | 
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+	1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+	7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint
+];
+           array           
+---------------------------
+ {1,2,3,4,5,6,7,8,9,10,11}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index 03efd4b40c8..5ac624ae1f7 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -167,3 +167,63 @@ FROM cte;
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Nested arrays are squashed only at constants level
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    ];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Relabel type
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+    ('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+	( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+	( '"9"')::jsonb, ( '"10"')::jsonb
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+	1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+	4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+	7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+	10::int4::casttesttype, 11::int4::casttesttype
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const is not squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+	(SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+	(SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+	(SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+	(SELECT '"10"')::jsonb
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+	abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+	abs(800), abs(900), abs(1000), ((abs(1100)))
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+	1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+	7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- 
2.39.5 (Apple Git-154)

From 0af27d235ca6fd1db12d81c657d8b349f3b29316 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-38-230.ec2.internal>
Date: Wed, 21 May 2025 17:25:02 +0000
Subject: [PATCH v4 1/3] Add tracking for expression boundaries

This adds the ability to track the locations of the start and
end of a list of elements such as those in an 'IN' list of an
Array expression to support squashing of values for query
normalization purposes. This corrects various normalization
issues that are a result of 62d712ec.

Discussion: https://www.postgresql.org/message-id/flat/202505021256.4yaa24s3sytm%40alvherre.pgsql#1195a340edca50cc3b7389a2ba8b0467
---
 src/backend/parser/gram.y       | 94 +++++++++++++++++++++++----------
 src/backend/parser/parse_expr.c |  4 ++
 src/include/nodes/parsenodes.h  |  4 ++
 src/include/nodes/primnodes.h   |  4 ++
 4 files changed, 79 insertions(+), 27 deletions(-)

diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0b5652071d1..0cd5f794db3 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -136,6 +136,17 @@ typedef struct KeyActions
 	KeyAction *deleteAction;
 } KeyActions;
 
+/*
+ * Track the start and end of a list in an expression, such as an 'IN' list
+ * or Array Expression
+ */
+typedef struct ListWithBoundary
+{
+	Node	   *expr;
+	ParseLoc	start;
+	ParseLoc	end;
+} ListWithBoundary;
+
 /* ConstraintAttributeSpec yields an integer bitmask of these flags: */
 #define CAS_NOT_DEFERRABLE			0x01
 #define CAS_DEFERRABLE				0x02
@@ -184,7 +195,7 @@ static void doNegateFloat(Float *v);
 static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeNotExpr(Node *expr, int location);
-static Node *makeAArrayExpr(List *elements, int location);
+static Node *makeAArrayExpr(List *elements, int location, int end_location);
 static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod,
 								  int location);
 static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
@@ -269,6 +280,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 	struct KeyAction *keyaction;
 	ReturningClause *retclause;
 	ReturningOptionKind retoptionkind;
+	struct ListWithBoundary *listwithboundary;
 }
 
 %type <node>	stmt toplevel_stmt schema_stmt routine_body_stmt
@@ -523,8 +535,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	def_elem reloption_elem old_aggr_elem operator_def_elem
 %type <node>	def_arg columnElem where_clause where_or_current_clause
 				a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound
-				columnref in_expr having_clause func_table xmltable array_expr
+				columnref having_clause func_table xmltable array_expr
 				OptWhereClause operator_def_arg
+%type <listwithboundary> in_expr
 %type <list>	opt_column_and_period_list
 %type <list>	rowsfrom_item rowsfrom_list opt_col_def_list
 %type <boolean> opt_ordinality opt_without_overlaps
@@ -15289,46 +15302,58 @@ a_expr:		c_expr									{ $$ = $1; }
 				}
 			| a_expr IN_P in_expr
 				{
+					ListWithBoundary *n = $3;
+
 					/* in_expr returns a SubLink or a list of a_exprs */
-					if (IsA($3, SubLink))
+					if (IsA(n->expr, SubLink))
 					{
 						/* generate foo = ANY (subquery) */
-						SubLink	   *n = (SubLink *) $3;
-
-						n->subLinkType = ANY_SUBLINK;
-						n->subLinkId = 0;
-						n->testexpr = $1;
-						n->operName = NIL;		/* show it's IN not = ANY */
-						n->location = @2;
-						$$ = (Node *) n;
+						SubLink	   *n2 = (SubLink *) n->expr;
+
+						n2->subLinkType = ANY_SUBLINK;
+						n2->subLinkId = 0;
+						n2->testexpr = $1;
+						n2->operName = NIL;		/* show it's IN not = ANY */
+						n2->location = @2;
+						$$ = (Node *) n2;
 					}
 					else
 					{
 						/* generate scalar IN expression */
-						$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2);
+						A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "=", $1, n->expr, @2);
+
+						n2->rexpr_list_start = $3->start;
+						n2->rexpr_list_end = $3->end;
+						$$ = (Node *) n2;
 					}
 				}
 			| a_expr NOT_LA IN_P in_expr						%prec NOT_LA
 				{
+					ListWithBoundary *n = $4;
+
 					/* in_expr returns a SubLink or a list of a_exprs */
-					if (IsA($4, SubLink))
+					if (IsA(n->expr, SubLink))
 					{
 						/* generate NOT (foo = ANY (subquery)) */
 						/* Make an = ANY node */
-						SubLink	   *n = (SubLink *) $4;
+						SubLink	   *n2 = (SubLink *) n->expr;
 
-						n->subLinkType = ANY_SUBLINK;
-						n->subLinkId = 0;
-						n->testexpr = $1;
-						n->operName = NIL;		/* show it's IN not = ANY */
-						n->location = @2;
+						n2->subLinkType = ANY_SUBLINK;
+						n2->subLinkId = 0;
+						n2->testexpr = $1;
+						n2->operName = NIL;		/* show it's IN not = ANY */
+						n2->location = @2;
 						/* Stick a NOT on top; must have same parse location */
-						$$ = makeNotExpr((Node *) n, @2);
+						$$ = makeNotExpr((Node *) n2, @2);
 					}
 					else
 					{
 						/* generate scalar NOT IN expression */
-						$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2);
+						A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "<>", $1, n->expr, @2);
+
+						n2->rexpr_list_start = $4->start;
+						n2->rexpr_list_end = $4->end;
+						$$ = (Node *) n2;
 					}
 				}
 			| a_expr subquery_Op sub_type select_with_parens	%prec Op
@@ -16764,15 +16789,15 @@ type_list:	Typename								{ $$ = list_make1($1); }
 
 array_expr: '[' expr_list ']'
 				{
-					$$ = makeAArrayExpr($2, @1);
+					$$ = makeAArrayExpr($2, @1, @3);
 				}
 			| '[' array_expr_list ']'
 				{
-					$$ = makeAArrayExpr($2, @1);
+					$$ = makeAArrayExpr($2, @1, @3);
 				}
 			| '[' ']'
 				{
-					$$ = makeAArrayExpr(NIL, @1);
+					$$ = makeAArrayExpr(NIL, @1, @2);
 				}
 		;
 
@@ -16897,12 +16922,25 @@ trim_list:	a_expr FROM expr_list					{ $$ = lappend($3, $1); }
 in_expr:	select_with_parens
 				{
 					SubLink	   *n = makeNode(SubLink);
+					ListWithBoundary *n2 = palloc(sizeof(ListWithBoundary));
 
 					n->subselect = $1;
 					/* other fields will be filled later */
-					$$ = (Node *) n;
+
+					n2->expr = (Node *) n;
+					n2->start = -1;
+					n2->end = -1;
+					$$ = n2;
+				}
+			| '(' expr_list ')'
+				{
+					ListWithBoundary *n = palloc(sizeof(ListWithBoundary));
+
+					n->expr = (Node *) $2;
+					n->start = @1;
+					n->end = @3;
+					$$ = n;
 				}
-			| '(' expr_list ')'						{ $$ = (Node *) $2; }
 		;
 
 /*
@@ -19300,12 +19338,14 @@ makeNotExpr(Node *expr, int location)
 }
 
 static Node *
-makeAArrayExpr(List *elements, int location)
+makeAArrayExpr(List *elements, int location, int location_end)
 {
 	A_ArrayExpr *n = makeNode(A_ArrayExpr);
 
 	n->elements = elements;
 	n->location = location;
+	n->list_start = location;
+	n->list_end = location_end;
 	return (Node *) n;
 }
 
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 1f8e2d54673..7347c989e11 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1224,6 +1224,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
 			newa->elements = aexprs;
 			newa->multidims = false;
 			newa->location = -1;
+			newa->list_start = a->rexpr_list_start;
+			newa->list_end = a->rexpr_list_end;
 
 			result = (Node *) make_scalar_array_op(pstate,
 												   a->name,
@@ -2166,6 +2168,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
 	newa->element_typeid = element_type;
 	newa->elements = newcoercedelems;
 	newa->location = a->location;
+	newa->list_start = a->list_start;
+	newa->list_end = a->list_end;
 
 	return (Node *) newa;
 }
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 4610fc61293..2f078887d06 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -347,6 +347,8 @@ typedef struct A_Expr
 	Node	   *lexpr;			/* left argument, or NULL if none */
 	Node	   *rexpr;			/* right argument, or NULL if none */
 	ParseLoc	location;		/* token location, or -1 if unknown */
+	ParseLoc	rexpr_list_start;	/* location of the start of a rexpr list */
+	ParseLoc	rexpr_list_end; /* location of the end of a rexpr list */
 } A_Expr;
 
 /*
@@ -502,6 +504,8 @@ typedef struct A_ArrayExpr
 	NodeTag		type;
 	List	   *elements;		/* array element expressions */
 	ParseLoc	location;		/* token location, or -1 if unknown */
+	ParseLoc	list_start;		/* location of the start of the elements list */
+	ParseLoc	list_end;		/* location of the end of the elements list */
 } A_ArrayExpr;
 
 /*
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 7d3b4198f26..773cdd880aa 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1399,6 +1399,10 @@ typedef struct ArrayExpr
 	bool		multidims pg_node_attr(query_jumble_ignore);
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
+	/* location of the start of the elements list */
+	ParseLoc	list_start;
+	/* location of the end of the elements list */
+	ParseLoc	list_end;
 } ArrayExpr;
 
 /*
-- 
2.39.5 (Apple Git-154)

Reply via email to