Hello,

I have pushed that now, and here's a rebase of patch 0003 to add support
for PARAM_EXTERN.  I'm not really sure about this one yet ...

-- 
Álvaro Herrera         PostgreSQL Developer  —  https://www.EnterpriseDB.com/
"La victoria es para quien se atreve a estar solo"
>From 0a836189a2e3af3beeb7e3c55d7d0e4ce99b4e8e Mon Sep 17 00:00:00 2001
From: Sami Imseih <sims...@amazon.com>
Date: Fri, 30 May 2025 11:42:56 -0500
Subject: [PATCH v10] Support Squashing of External Parameters

62d712ec introduced the concept of element squashing for
quwry normalization purposes. However, it did not account for
external parameters passed to a list of elements. This adds
support to these types of values and simplifies the squashing
logic further.
---
 .../pg_stat_statements/expected/extended.out  | 36 ++++++---
 .../pg_stat_statements/expected/squashing.out | 26 +++---
 .../pg_stat_statements/pg_stat_statements.c   |  4 +
 contrib/pg_stat_statements/sql/extended.sql   |  5 +-
 contrib/pg_stat_statements/sql/squashing.sql  |  4 +-
 src/backend/nodes/queryjumblefuncs.c          | 80 ++++++++++++-------
 src/include/nodes/primnodes.h                 |  6 +-
 src/include/nodes/queryjumble.h               | 16 +++-
 8 files changed, 118 insertions(+), 59 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/extended.out b/contrib/pg_stat_statements/expected/extended.out
index 7da308ba84f..6f2c231bf2a 100644
--- a/contrib/pg_stat_statements/expected/extended.out
+++ b/contrib/pg_stat_statements/expected/extended.out
@@ -69,13 +69,13 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
 (4 rows)
 
 -- Various parameter numbering patterns
+-- Unique query IDs with parameter numbers switched.
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
 ---
  t
 (1 row)
 
--- Unique query IDs with parameter numbers switched.
 SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g
 --
 (0 rows)
@@ -96,7 +96,24 @@ SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g
 --
 (0 rows)
 
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                            query                             | calls 
+--------------------------------------------------------------+-------
+ SELECT WHERE $1::int IN ($2 /*, ... */)                      |     1
+ SELECT WHERE $1::int IN ($2 /*, ... */)                      |     1
+ SELECT WHERE $1::int IN ($2 /*, ... */)                      |     1
+ SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) |     1
+ SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t           |     1
+(6 rows)
+
 -- Two groups of two queries with the same query ID.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
 SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g
 --
 (1 row)
@@ -114,15 +131,10 @@ SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                            query                             | calls 
---------------------------------------------------------------+-------
- SELECT WHERE $1::int IN ($2::int, $3::int)                   |     1
- SELECT WHERE $2::int IN ($1::int, $3::int)                   |     2
- SELECT WHERE $2::int IN ($1::int, $3::int)                   |     2
- SELECT WHERE $2::int IN ($3::int, $1::int)                   |     1
- SELECT WHERE $3::int IN ($1::int, $2::int)                   |     1
- SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) |     1
- SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) |     1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t           |     1
-(8 rows)
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT WHERE $1::int IN ($2 /*, ... */)            |     2
+ SELECT WHERE $1::int IN ($2 /*, ... */)            |     2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(3 rows)
 
diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out
index 7b935d464ec..e5dd9337165 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -103,7 +103,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
--- external parameters will not be squashed
+-- external parameters will be squashed
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
 ---
@@ -123,14 +123,14 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                                   query                                   | calls 
----------------------------------------------------------------------------+-------
- SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5)                |     1
- SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) |     1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t                        |     1
+                                query                                 | calls 
+----------------------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */)                |     1
+ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1 /*, ... */]) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                   |     1
 (3 rows)
 
--- neither are prepared statements
+-- prepared statements will also be squashed
 -- the IN and ARRAY forms of this statement will have the same queryId
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
@@ -155,12 +155,12 @@ EXECUTE p1(1, 2, 3, 4, 5);
 
 DEALLOCATE p1;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                           query                            | calls 
-------------------------------------------------------------+-------
- DEALLOCATE $1                                              |     2
- PREPARE p1(int, int, int, int, int) AS                    +|     2
- SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) | 
- SELECT pg_stat_statements_reset() IS NOT NULL AS t         |     1
+                         query                         | calls 
+-------------------------------------------------------+-------
+ DEALLOCATE $1                                         |     2
+ PREPARE p1(int, int, int, int, int) AS               +|     2
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t    |     1
 (3 rows)
 
 -- More conditions in the query
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ecc7f2fb266..97b62b635bc 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2841,6 +2841,10 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		int			off,		/* Offset from start for cur tok */
 					tok_len;	/* Length (in bytes) of that tok */
 
+		if (jstate->clocations[i].extern_param &&
+			!jstate->has_squashed_lists)
+			continue;
+
 		off = jstate->clocations[i].location;
 
 		/* Adjust recorded location if we're dealing with partial string */
diff --git a/contrib/pg_stat_statements/sql/extended.sql b/contrib/pg_stat_statements/sql/extended.sql
index a366658a53a..ffb5b162819 100644
--- a/contrib/pg_stat_statements/sql/extended.sql
+++ b/contrib/pg_stat_statements/sql/extended.sql
@@ -21,17 +21,18 @@ SELECT $1 \bind 'unnamed_val1' \g
 SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 -- Various parameter numbering patterns
-SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 -- Unique query IDs with parameter numbers switched.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g
 SELECT WHERE ($2::int, 10) IN ((11, $3::int), ($1::int, 12)) \bind '1' '2' '3' \g
 SELECT WHERE $1::int IN ($2::int, $3::int) \bind '1' '2' '3' \g
 SELECT WHERE $2::int IN ($3::int, $1::int) \bind '1' '2' '3' \g
 SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 -- Two groups of two queries with the same query ID.
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g
 SELECT WHERE '4'::int IN ($1::int, '5'::int) \bind '2' \g
 SELECT WHERE $2::int IN ($1::int, '1'::int) \bind '1' '2' \g
 SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g
-
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql
index bd3243ec9cd..1e36708778a 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -32,7 +32,7 @@ SELECT WHERE 1 IN (1, int4(1), int4(2), 2);
 SELECT WHERE 1 = ANY (ARRAY[1, int4(1), int4(2), 2]);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
--- external parameters will not be squashed
+-- external parameters will be squashed
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5)  \bind 1 2 3 4 5
 ;
@@ -40,7 +40,7 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind
 ;
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
--- neither are prepared statements
+-- prepared statements will also be squashed
 -- the IN and ARRAY forms of this statement will have the same queryId
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 PREPARE p1(int, int, int, int, int) AS
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index fb33e6931ad..0f81a08704d 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -61,6 +61,7 @@ static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *value, Size size);
 static void FlushPendingNulls(JumbleState *jstate);
 static void RecordConstLocation(JumbleState *jstate,
+								bool extern_param,
 								int location, int len);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
@@ -70,6 +71,7 @@ static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
 static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
 									  RangeTblEntry *rte,
 									  Alias *expr);
+static void _jumbleParam(JumbleState *jstate, Node *node);
 
 /*
  * Given a possibly multi-statement source string, confine our attention to the
@@ -185,6 +187,7 @@ InitJumble(void)
 	jstate->clocations_count = 0;
 	jstate->highest_extern_param_id = 0;
 	jstate->pending_nulls = 0;
+	jstate->has_squashed_lists = false;
 #ifdef USE_ASSERT_CHECKING
 	jstate->total_jumble_len = 0;
 #endif
@@ -207,6 +210,10 @@ DoJumble(JumbleState *jstate, Node *node)
 	if (jstate->pending_nulls > 0)
 		FlushPendingNulls(jstate);
 
+	/* Squashed list found, reset highest_extern_param_id */
+	if (jstate->has_squashed_lists)
+		jstate->highest_extern_param_id = 0;
+
 	/* Process the jumble buffer and produce the hash value */
 	return DatumGetInt64(hash_any_extended(jstate->jumble,
 										   jstate->jumble_len,
@@ -376,14 +383,14 @@ FlushPendingNulls(JumbleState *jstate)
  * Record the location of some kind of constant within a query string.
  * These are not only bare constants but also expressions that ultimately
  * constitute a constant, such as those inside casts and simple function
- * calls.
+ * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param.
  *
  * If length is -1, it indicates a single such constant element.  If
  * it's a positive integer, it indicates the length of a squashable
  * list of them.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location, int len)
+RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -406,6 +413,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len)
 		Assert(len > -1 || len == -1);
 		jstate->clocations[jstate->clocations_count].length = len;
 		jstate->clocations[jstate->clocations_count].squashed = (len > -1);
+		jstate->clocations[jstate->clocations_count].extern_param = extern_param;
 		jstate->clocations_count++;
 	}
 }
@@ -422,6 +430,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len)
 static bool
 IsSquashableConstant(Node *element)
 {
+	/* Unwrap RelabelType and CoerceViaIO layers */
 	if (IsA(element, RelabelType))
 		element = (Node *) ((RelabelType *) element)->arg;
 
@@ -430,6 +439,12 @@ IsSquashableConstant(Node *element)
 
 	switch (nodeTag(element))
 	{
+		case T_Param:
+			return castNode(Param, element)->paramkind == PARAM_EXTERN;
+
+		case T_Const:
+			return true;
+
 		case T_FuncExpr:
 			{
 				FuncExpr   *func = (FuncExpr *) element;
@@ -468,11 +483,8 @@ IsSquashableConstant(Node *element)
 			}
 
 		default:
-			if (!IsA(element, Const))
-				return false;
+			return false;
 	}
-
-	return true;
 }
 
 /*
@@ -508,7 +520,7 @@ IsSquashableConstantList(List *elements)
 #define JUMBLE_ELEMENTS(list, node) \
 	_jumbleElements(jstate, (List *) expr->list, node)
 #define JUMBLE_LOCATION(location) \
-	RecordConstLocation(jstate, expr->location, -1)
+	RecordConstLocation(jstate, false, expr->location, -1)
 #define JUMBLE_FIELD(item) \
 do { \
 	if (sizeof(expr->item) == 8) \
@@ -558,9 +570,11 @@ _jumbleElements(JumbleState *jstate, List *elements, Node *node)
 			if (aexpr->list_start > 0 && aexpr->list_end > 0)
 			{
 				RecordConstLocation(jstate,
+									false,
 									aexpr->list_start + 1,
 									(aexpr->list_end - aexpr->list_start) - 1);
 				normalize_list = true;
+				jstate->has_squashed_lists = true;
 			}
 		}
 	}
@@ -612,26 +626,6 @@ _jumbleNode(JumbleState *jstate, Node *node)
 			break;
 	}
 
-	/* Special cases to handle outside the automated code */
-	switch (nodeTag(expr))
-	{
-		case T_Param:
-			{
-				Param	   *p = (Param *) node;
-
-				/*
-				 * Update the highest Param id seen, in order to start
-				 * normalization correctly.
-				 */
-				if (p->paramkind == PARAM_EXTERN &&
-					p->paramid > jstate->highest_extern_param_id)
-					jstate->highest_extern_param_id = p->paramid;
-			}
-			break;
-		default:
-			break;
-	}
-
 	/* Ensure we added something to the jumble buffer */
 	Assert(jstate->total_jumble_len > prev_jumble_len);
 }
@@ -734,3 +728,35 @@ _jumbleRangeTblEntry_eref(JumbleState *jstate,
 	 */
 	JUMBLE_STRING(aliasname);
 }
+
+/*
+ * Custom query jumble function for _jumbleParam.
+ */
+static void
+_jumbleParam(JumbleState *jstate, Node *node)
+{
+	Param	   *expr = (Param *) node;
+
+	JUMBLE_FIELD(paramkind);
+	JUMBLE_FIELD(paramid);
+	JUMBLE_FIELD(paramtype);
+
+	if (expr->paramkind == PARAM_EXTERN)
+	{
+		/*
+		 * At this point, only external parameter locations outside of
+		 * squashable lists will be recorded.
+		 */
+		RecordConstLocation(jstate, true, expr->location, -1);
+
+		/*
+		 * Update the highest Param id seen, in order to start normalization
+		 * correctly.
+		 *
+		 * Note: This value is reset at the end of jumbling if there exists a
+		 * squashable list. See the comment in the definition of JumbleState.
+		 */
+		if (expr->paramid > jstate->highest_extern_param_id)
+			jstate->highest_extern_param_id = expr->paramid;
+	}
+}
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 01510b01b64..6dfca3cb35b 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -389,14 +389,16 @@ typedef enum ParamKind
 
 typedef struct Param
 {
+	pg_node_attr(custom_query_jumble)
+
 	Expr		xpr;
 	ParamKind	paramkind;		/* kind of parameter. See above */
 	int			paramid;		/* numeric ID for parameter */
 	Oid			paramtype;		/* pg_type OID of parameter's datatype */
 	/* typmod value, if known */
-	int32		paramtypmod pg_node_attr(query_jumble_ignore);
+	int32		paramtypmod;
 	/* OID of collation, or InvalidOid if none */
-	Oid			paramcollid pg_node_attr(query_jumble_ignore);
+	Oid			paramcollid;
 	/* token location, or -1 if unknown */
 	ParseLoc	location;
 } Param;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index da7c7abed2e..bab971162dc 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -29,6 +29,13 @@ typedef struct LocationLen
 	 * of squashed constants.
 	 */
 	bool		squashed;
+
+	/*
+	 * Indicates whether a location is that of an external parameter, so it
+	 * can be decided during normalization whether the parameter number should
+	 * be replaced or kept as is.
+	 */
+	bool		extern_param;
 } LocationLen;
 
 /*
@@ -52,8 +59,15 @@ typedef struct JumbleState
 	/* Current number of valid entries in clocations array */
 	int			clocations_count;
 
-	/* highest Param id we've seen, in order to start normalization correctly */
+	/*
+	 * Highest Param id we've seen, in order to start normalization correctly.
+	 * However, if the jumble contains at least one squashed list, we
+	 * disregard the highest_extern_param_id value because parameters can
+	 * exist within the squashed list and are no longer considered for
+	 * normalization.
+	 */
 	int			highest_extern_param_id;
+	bool		has_squashed_lists;
 
 	/*
 	 * Count of the number of NULL nodes seen since last appending a value.
-- 
2.39.5

Reply via email to