> On Fri, Oct 13, 2023 at 03:35:19PM +0200, Dmitry Dolgov wrote:
> > On Fri, Oct 13, 2023 at 05:07:00PM +0900, Michael Paquier wrote:
> > Now, it doesn't mean that this approach with the "powers" will never
> > happen, but based on the set of opinions I am gathering on this thread
> > I would suggest to rework the patch as follows:
> > - First implement an on/off switch that reduces the lists in IN and/or
> > ANY to one parameter.  Simply.
> > - Second, refactor the powers routine.
> > - Third, extend the on/off switch, or just implement a threshold with
> > a second switch.
>
> Well, if it will help move this patch forward, why not. To clarify, I'm
> going to split the current implementation into three patches, one for
> each point you've mentioned.

Here is what I had mind. The first patch implements the basic notion of
merging, and I guess everyone agrees on its usefulness. The second and
third implement merging into groups power of 10, which I still find
useful as well. The last one adds a lower threshold for merging on top
of that. My intentions are to get the first one in, ideally I would love
to see the second and third applied as well.

> > When it comes to my opinion, I am not seeing any objections to the
> > feature as a whole, and I'm OK with the first step.  I'm also OK to
> > keep the door open for more improvements in controlling how these
> > IN/ANY lists show up, but there could be more than just the number of
> > items as parameter (say the query size, different behaviors depending
> > on the number of clauses in queries, subquery context or CTEs/WITH,
> > etc. just to name a few things coming in mind).
>
> Interesting point, but now it's my turn to have troubles imagining the
> case, where list representation could be controlled depending on
> something else than the number of elements in it. Do you have any
> specific example in mind?

In the current patch version I didn't add anything yet to address the
question of having more parameters to tune constants merging. The main
obstacle as I see it is that the information for that has to be
collected when jumbling various query nodes. Anything except information
about the ArrayExpr itself would have to be acquired when jumbling some
other parts of the query, not directly related to the ArrayExpr. It
seems to me this interdependency between otherwise unrelated nodes
outweigh the value it brings, and would require some more elaborate (and
more invasive for the purpose of this patch) mechanism to implement.
>From 4367571d3d886a14690ba31ad0163d0d309a52a3 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Sat, 14 Oct 2023 15:00:48 +0200
Subject: [PATCH v15 1/4] Prevent jumbling of every element in ArrayExpr

pg_stat_statements produces multiple entries for queries like

    SELECT something FROM table WHERE col IN (1, 2, 3, ...)

depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.

Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
GUC query_id_const_merge with the default value off.

Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier
Tested-by: Chengxi Sun
---
 contrib/pg_stat_statements/Makefile           |   2 +-
 .../pg_stat_statements/expected/merging.out   | 167 ++++++++++++++++++
 contrib/pg_stat_statements/meson.build        |   1 +
 .../pg_stat_statements/pg_stat_statements.c   |  34 +++-
 contrib/pg_stat_statements/sql/merging.sql    |  58 ++++++
 doc/src/sgml/config.sgml                      |  28 +++
 doc/src/sgml/pgstatstatements.sgml            |  25 ++-
 src/backend/nodes/gen_node_support.pl         |   2 +-
 src/backend/nodes/queryjumblefuncs.c          |  86 ++++++++-
 src/backend/utils/misc/guc_tables.c           |  10 ++
 src/backend/utils/misc/postgresql.conf.sample |   2 +-
 src/include/nodes/primnodes.h                 |   2 +
 src/include/nodes/queryjumble.h               |   9 +-
 13 files changed, 406 insertions(+), 20 deletions(-)
 create mode 100644 contrib/pg_stat_statements/expected/merging.out
 create mode 100644 contrib/pg_stat_statements/sql/merging.sql

diff --git a/contrib/pg_stat_statements/Makefile b/contrib/pg_stat_statements/Makefile
index eba4a95d91a..af731fc9a58 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -19,7 +19,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
 
 REGRESS_OPTS = --temp-config $(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
 REGRESS = select dml cursors utility level_tracking planning \
-	user_activity wal cleanup oldextversions
+	user_activity wal cleanup oldextversions merging
 # Disabled because these tests require "shared_preload_libraries=pg_stat_statements",
 # which typical installcheck users do not have (e.g. buildfarm clients).
 NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 00000000000..7711572e0b7
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                        query                                        | calls 
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9)           |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)      |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) |     1
+ SELECT pg_stat_statements_reset()                                                   |     1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                   query                    | calls 
+--------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)  |     1
+ SELECT * FROM test_merge WHERE id IN (...) |     1
+ SELECT pg_stat_statements_reset()          |     1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)                              |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     4
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                          query                           | calls 
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 |     3
+ SELECT pg_stat_statements_reset()                        |     1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                                              query                                                              | calls 
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 + $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |     1
+ SELECT pg_stat_statements_reset()                                                                                               |     1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) |     1
+ SELECT pg_stat_statements_reset()                  |     1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result 
+--------
+(0 rows)
+
+RESET query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 15b7c7f2b02..6371c81e138 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -51,6 +51,7 @@ tests += {
       'wal',
       'cleanup',
       'oldextversions',
+      'merging',
     ],
     'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
     # Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index a46f2db352b..0b03009a053 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2695,6 +2695,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 				n_quer_loc = 0, /* Normalized query byte location */
 				last_off = 0,	/* Offset from start for previous tok */
 				last_tok_len = 0;	/* Length (in bytes) of that tok */
+	bool		skip = false; 	/* Signals that certain constants are
+								   merged together and have to be skipped */
+
 
 	/*
 	 * Get constants' lengths (core system only gives us locations).  Note
@@ -2718,7 +2721,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 	{
 		int			off,		/* Offset from start for cur tok */
 					tok_len;	/* Length (in bytes) of that tok */
-
 		off = jstate->clocations[i].location;
 		/* Adjust recorded location if we're dealing with partial string */
 		off -= query_loc;
@@ -2733,12 +2735,32 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		len_to_wrt -= last_tok_len;
 
 		Assert(len_to_wrt >= 0);
-		memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-		n_quer_loc += len_to_wrt;
 
-		/* And insert a param symbol in place of the constant token */
-		n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-							  i + 1 + jstate->highest_extern_param_id);
+		/* Normal path, non merged constant */
+		if (!jstate->clocations[i].merged)
+		{
+			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+			n_quer_loc += len_to_wrt;
+
+			/* And insert a param symbol in place of the constant token */
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+								  i + 1 + jstate->highest_extern_param_id);
+
+			/* In case previous constants were merged away, stop doing that */
+			if (skip)
+				skip = false;
+		}
+		/* The firsts merged constant */
+		else if (!skip)
+		{
+			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+			n_quer_loc += len_to_wrt;
+
+			/* Skip the following until a non merged constant appear */
+			skip = true;
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+		}
+		/* Otherwise the constant is merged away */
 
 		quer_loc = off + tok_len;
 		last_off = off;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 00000000000..02266a92ce8
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET query_id_const_merge;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 3839c72c868..55b0795bce1 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8190,6 +8190,34 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-query-id-const-merge" xreflabel="query_id_const_merge">
+      <term><varname>query_id_const_merge</varname> (<type>bool</type>)
+      <indexterm>
+       <primary><varname>query_id_const_merge</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies how an array of constants (e.g. for an "IN" clause)
+        contributes to the query identifier computation. Normally every element
+        of an array contributes to the query identifier, which means the same
+        query will get multiple different identifiers, one for each occurrence
+        with an array of different lenght.
+
+        If this parameter is on, an array of constants will contribute only the
+        first and the last elements to the query identifier. It means two
+        occurences of the same query, where the only difference is number of
+        constants in the array, are going to get the same query identifier.
+
+        The parameter could be used to reduce amount of repeating data stored
+        via <xref linkend="pgstatstatements"/> extension.  The default value is off.
+
+        The <xref linkend="pgstatstatements"/> extension will represent such
+        queries in form <literal>'(...)'</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
 
     </sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 7e7c5c9ff82..8df4064cbf4 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -548,10 +548,27 @@
   <para>
    In some cases, queries with visibly different texts might get merged into a
    single <structname>pg_stat_statements</structname> entry.  Normally this will happen
-   only for semantically equivalent queries, but there is a small chance of
-   hash collisions causing unrelated queries to be merged into one entry.
-   (This cannot happen for queries belonging to different users or databases,
-   however.)
+   only for semantically equivalent queries, or if
+   <xref linkend="guc-query-id-const-merge"/> is enabled and the only
+   difference between queries is the length of an array with constants they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+   But there is a small chance of hash collisions causing unrelated queries to
+   be merged into one entry. (This cannot happen for queries belonging to
+   different users or databases, however.)
   </para>
 
   <para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 72c79635781..5b4a3f37a2b 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1308,7 +1308,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
 			# Track the node's location only if directly requested.
 			if ($query_jumble_location)
 			{
-				print $jff "\tJUMBLE_LOCATION($f);\n"
+				print $jff "\tJUMBLE_LOCATION($f, false);\n"
 				  unless $query_jumble_ignore;
 			}
 		}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 281907a4d83..52d45c2b093 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,12 +42,16 @@
 /* GUC parameters */
 int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 
+/* Whether to merge constants in a list when computing query_id */
+bool		query_id_const_merge = false;
+
 /* True when compute_query_id is ON, or AUTO and a module requests them */
 bool		query_id_enabled = false;
 
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+								int location, bool merged);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
@@ -186,11 +190,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
 }
 
 /*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -206,15 +214,65 @@ RecordConstLocation(JumbleState *jstate, int location)
 		}
 		jstate->clocations[jstate->clocations_count].location = location;
 		/* initialize lengths to -1 to simplify third-party module usage */
+		jstate->clocations[jstate->clocations_count].merged = merged;
 		jstate->clocations[jstate->clocations_count].length = -1;
 		jstate->clocations_count++;
 	}
 }
 
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+	ListCell   *temp;
+	Node	   *firstExpr = NULL;
+
+	if (elements == NULL)
+		return false;
+
+	if (!query_id_const_merge)
+	{
+		/* Merging is disabled, process everything one by one */
+		return false;
+	}
+
+	firstExpr = linitial(elements);
+
+	/*
+	 * If the first expression is a constant, verify if the following elements
+	 * are constants as well. If yes, the list is eligible for merging, and the
+	 * order of magnitude need to be calculated.
+	 */
+	if (IsA(firstExpr, Const))
+	{
+		foreach(temp, elements)
+			if (!IsA(lfirst(temp), Const))
+				return false;
+
+		*firstConst = (Const *) firstExpr;
+		*lastConst = llast_node(Const, elements);
+		return true;
+	}
+
+	/*
+	 * If we end up here, it means no constants merging is possible, process
+	 * the list as usual.
+	 */
+	return false;
+}
+
 #define JUMBLE_NODE(item) \
 	_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
-	RecordConstLocation(jstate, expr->location)
+#define JUMBLE_LOCATION(location, merged) \
+	RecordConstLocation(jstate, expr->location, merged)
 #define JUMBLE_FIELD(item) \
 	AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
 #define JUMBLE_FIELD_SINGLE(item) \
@@ -227,6 +285,22 @@ do { \
 
 #include "queryjumblefuncs.funcs.c"
 
+static void
+_jumbleArrayExpr(JumbleState *jstate, Node *node)
+{
+	ArrayExpr *expr = (ArrayExpr *) node;
+	Const *first, *last;
+	if(IsMergeableConstList(expr->elements, &first, &last))
+	{
+		RecordConstLocation(jstate, first->location, true);
+		RecordConstLocation(jstate, last->location, true);
+	}
+	else
+	{
+		JUMBLE_NODE(elements);
+	}
+}
+
 static void
 _jumbleNode(JumbleState *jstate, Node *node)
 {
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 4c585741661..fbb50adb9f9 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -2011,6 +2011,16 @@ struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"query_id_const_merge", PGC_SUSET, STATS_MONITORING,
+			gettext_noop("Sets whether an array of constants will contribute to"
+						 " query identified computation."),
+		},
+		&query_id_const_merge,
+		false,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d08d55c3fe4..3397780dc72 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -628,7 +628,7 @@
 #log_parser_stats = off
 #log_planner_stats = off
 #log_executor_stats = off
-
+#query_id_const_merge = off
 
 #------------------------------------------------------------------------------
 # AUTOVACUUM
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 60d72a876b4..fde0320b263 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1296,6 +1296,8 @@ typedef struct CaseTestExpr
  */
 typedef struct ArrayExpr
 {
+	pg_node_attr(custom_query_jumble)
+
 	Expr		xpr;
 	/* type of expression result */
 	Oid			array_typeid pg_node_attr(query_jumble_ignore);
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 7649e095aa5..225957d7b3c 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
 #define QUERYJUMBLE_H
 
 #include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
 
 /*
  * Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,12 @@ typedef struct LocationLen
 {
 	int			location;		/* start offset in query text */
 	int			length;			/* length in bytes, or -1 to ignore */
+
+	/*
+	 * Indicates the constant represents the beginning or the end of a merged
+	 * constants interval.
+	 */
+	bool		merged;
 } LocationLen;
 
 /*
@@ -61,7 +68,7 @@ enum ComputeQueryIdType
 
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT bool query_id_const_merge;
 
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query);

base-commit: 22655aa23132a0645fdcdce4b233a1fff0c0cf8f
-- 
2.41.0

>From 0eadf14630fa0e888cdd27e41a091c6564a4637c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Fri, 17 Feb 2023 10:17:55 +0100
Subject: [PATCH v15 2/4] Reusable decimalLength functions

Move out decimalLength functions to reuse in the following patch.
---
 src/backend/utils/adt/numutils.c | 50 +-----------------------
 src/include/utils/numutils.h     | 67 ++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 49 deletions(-)
 create mode 100644 src/include/utils/numutils.h

diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index a597e5ed796..02fe132f285 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,9 +18,8 @@
 #include <limits.h>
 #include <ctype.h>
 
-#include "common/int.h"
 #include "utils/builtins.h"
-#include "port/pg_bitutils.h"
+#include "utils/numutils.h"
 
 /*
  * A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +37,6 @@ static const char DIGIT_TABLE[200] =
 "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
 "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
 
-/*
- * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
-	int			t;
-	static const uint32 PowersOfTen[] = {
-		1, 10, 100,
-		1000, 10000, 100000,
-		1000000, 10000000, 100000000,
-		1000000000
-	};
-
-	/*
-	 * Compute base-10 logarithm by dividing the base-2 logarithm by a
-	 * good-enough approximation of the base-2 logarithm of 10
-	 */
-	t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
-	return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
-	int			t;
-	static const uint64 PowersOfTen[] = {
-		UINT64CONST(1), UINT64CONST(10),
-		UINT64CONST(100), UINT64CONST(1000),
-		UINT64CONST(10000), UINT64CONST(100000),
-		UINT64CONST(1000000), UINT64CONST(10000000),
-		UINT64CONST(100000000), UINT64CONST(1000000000),
-		UINT64CONST(10000000000), UINT64CONST(100000000000),
-		UINT64CONST(1000000000000), UINT64CONST(10000000000000),
-		UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
-		UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
-		UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
-	};
-
-	/*
-	 * Compute base-10 logarithm by dividing the base-2 logarithm by a
-	 * good-enough approximation of the base-2 logarithm of 10
-	 */
-	t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
-	return t + (v >= PowersOfTen[t]);
-}
-
 static const int8 hexlookup[128] = {
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 00000000000..876e64f2df9
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ *	  Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+	int			t;
+	static const uint32 PowersOfTen[] = {
+		1, 10, 100,
+		1000, 10000, 100000,
+		1000000, 10000000, 100000000,
+		1000000000
+	};
+
+	/*
+	 * Compute base-10 logarithm by dividing the base-2 logarithm by a
+	 * good-enough approximation of the base-2 logarithm of 10
+	 */
+	t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+	return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+	int			t;
+	static const uint64 PowersOfTen[] = {
+		UINT64CONST(1), UINT64CONST(10),
+		UINT64CONST(100), UINT64CONST(1000),
+		UINT64CONST(10000), UINT64CONST(100000),
+		UINT64CONST(1000000), UINT64CONST(10000000),
+		UINT64CONST(100000000), UINT64CONST(1000000000),
+		UINT64CONST(10000000000), UINT64CONST(100000000000),
+		UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+		UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+		UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+		UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+	};
+
+	/*
+	 * Compute base-10 logarithm by dividing the base-2 logarithm by a
+	 * good-enough approximation of the base-2 logarithm of 10
+	 */
+	t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+	return t + (v >= PowersOfTen[t]);
+}
+
+#endif							/* NUMUTILS_H */
-- 
2.41.0

>From 4ad001ab9866467c90d5b1ddc664bfe1b6b87ee4 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Sun, 15 Oct 2023 10:06:09 +0200
Subject: [PATCH v15 3/4] Merge constants in ArrayExpr into groups

Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.
---
 .../pg_stat_statements/expected/merging.out   | 84 +++++++++++++++----
 .../pg_stat_statements/pg_stat_statements.c   | 17 +++-
 contrib/pg_stat_statements/sql/merging.sql    | 13 +++
 doc/src/sgml/config.sgml                      |  9 +-
 doc/src/sgml/pgstatstatements.sgml            |  2 +-
 src/backend/nodes/queryjumblefuncs.c          | 52 ++++++++----
 src/include/nodes/queryjumble.h               |  7 +-
 7 files changed, 142 insertions(+), 42 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 7711572e0b7..1bb75a93045 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                   query                    | calls 
---------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1)  |     1
- SELECT * FROM test_merge WHERE id IN (...) |     1
- SELECT pg_stat_statements_reset()          |     1
+                          query                           | calls 
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)                |     1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) |     1
+ SELECT pg_stat_statements_reset()                        |     1
 (3 rows)
 
 SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
                                  query                                  | calls 
 ------------------------------------------------------------------------+-------
  SELECT * FROM test_merge WHERE id IN ($1)                              |     1
- SELECT * FROM test_merge WHERE id IN (...)                             |     4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries])               |     2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     2
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                            query                             | calls 
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) |     1
+ SELECT pg_stat_statements_reset()                            |     1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                          query                           | calls 
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) |     1
+ SELECT pg_stat_statements_reset()                        |     1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries])               |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     1
  SELECT pg_stat_statements_reset()                                      |     1
  SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
 (4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and dat
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                          query                           | calls 
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 |     3
- SELECT pg_stat_statements_reset()                        |     1
-(2 rows)
+                                  query                                   | calls 
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3   |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 |     2
+ SELECT pg_stat_statements_reset()                                        |     1
+(3 rows)
 
 -- No constants simplification
 SELECT pg_stat_statements_reset();
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                       query                        | calls 
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) |     1
- SELECT pg_stat_statements_reset()                  |     1
+                               query                                | calls 
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) |     1
+ SELECT pg_stat_statements_reset()                                  |     1
 (2 rows)
 
 -- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0b03009a053..24d7401257d 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2697,6 +2697,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 				last_tok_len = 0;	/* Length (in bytes) of that tok */
 	bool		skip = false; 	/* Signals that certain constants are
 								   merged together and have to be skipped */
+	int 		magnitude; 		/* Order of magnitute for number of merged
+								   constants */
 
 
 	/*
@@ -2737,7 +2739,8 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		Assert(len_to_wrt >= 0);
 
 		/* Normal path, non merged constant */
-		if (!jstate->clocations[i].merged)
+		magnitude = jstate->clocations[i].magnitude;
+		if (magnitude == 0)
 		{
 			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
 			n_quer_loc += len_to_wrt;
@@ -2753,12 +2756,22 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		/* The firsts merged constant */
 		else if (!skip)
 		{
+			static const uint32 powers_of_ten[] = {
+				1, 10, 100,
+				1000, 10000, 100000,
+				1000000, 10000000, 100000000,
+				1000000000
+			};
+			int lower_merged = powers_of_ten[magnitude - 1];
+			int upper_merged = powers_of_ten[magnitude];
+
 			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
 			n_quer_loc += len_to_wrt;
 
 			/* Skip the following until a non merged constant appear */
 			skip = true;
-			n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "... [%d-%d entries]",
+								  lower_merged, upper_merged - 1);
 		}
 		/* Otherwise the constant is merged away */
 
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 02266a92ce8..75960159a1d 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
 SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 -- More conditions in the query
 SELECT pg_stat_statements_reset();
 
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 55b0795bce1..4fd997284c9 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8205,15 +8205,16 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
         with an array of different lenght.
 
         If this parameter is on, an array of constants will contribute only the
-        first and the last elements to the query identifier. It means two
-        occurences of the same query, where the only difference is number of
-        constants in the array, are going to get the same query identifier.
+        first element, the last element and the number of elements to the query
+        identifier. It means two occurences of the same query, where the only
+        difference is number of constants in the array, are going to get the
+        same query identifier if the arrays are of similar length.
 
         The parameter could be used to reduce amount of repeating data stored
         via <xref linkend="pgstatstatements"/> extension.  The default value is off.
 
         The <xref linkend="pgstatstatements"/> extension will represent such
-        queries in form <literal>'(...)'</literal>.
+        queries in form <literal>'(... [10-99 entries])'</literal>.
        </para>
       </listitem>
      </varlistentry>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 8df4064cbf4..1f6e6a0e76b 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -559,7 +559,7 @@
 =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
 =# SELECT query, calls FROM pg_stat_statements;
 -[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
 calls | 2
 -[ RECORD 2 ]------------------------------
 query | SELECT pg_stat_statements_reset()
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 52d45c2b093..b198fe2579e 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
 #include "nodes/queryjumble.h"
 #include "parser/scansup.h"
 
+#include "utils/numutils.h"
+
 #define JUMBLE_SIZE				1024	/* query serialization buffer size */
 
 /* GUC parameters */
@@ -51,7 +53,7 @@ bool		query_id_enabled = false;
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
 static void RecordConstLocation(JumbleState *jstate,
-								int location, bool merged);
+								int location, int magnitude);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
@@ -193,12 +195,15 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
  * Record location of constant within query string of query tree that is
  * currently being walked.
  *
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -214,7 +219,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
 		}
 		jstate->clocations[jstate->clocations_count].location = location;
 		/* initialize lengths to -1 to simplify third-party module usage */
-		jstate->clocations[jstate->clocations_count].merged = merged;
+		jstate->clocations[jstate->clocations_count].magnitude = magnitude;
 		jstate->clocations[jstate->clocations_count].length = -1;
 		jstate->clocations_count++;
 	}
@@ -224,24 +229,26 @@ RecordConstLocation(JumbleState *jstate, int location, bool merged)
  * Verify if the provided list contains could be merged down, which means it
  * contains only constant expressions.
  *
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
  *
  * Note that this function searches only for explicit Const nodes and does not
  * try to simplify expressions.
  */
-static bool
+static int
 IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
 {
 	ListCell   *temp;
 	Node	   *firstExpr = NULL;
 
 	if (elements == NULL)
-		return false;
+		return 0;
 
 	if (!query_id_const_merge)
 	{
 		/* Merging is disabled, process everything one by one */
-		return false;
+		return 0;
 	}
 
 	firstExpr = linitial(elements);
@@ -255,24 +262,24 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
 	{
 		foreach(temp, elements)
 			if (!IsA(lfirst(temp), Const))
-				return false;
+				return 0;
 
 		*firstConst = (Const *) firstExpr;
 		*lastConst = llast_node(Const, elements);
-		return true;
+		return decimalLength32(elements->length);
 	}
 
 	/*
 	 * If we end up here, it means no constants merging is possible, process
 	 * the list as usual.
 	 */
-	return false;
+	return 0;
 }
 
 #define JUMBLE_NODE(item) \
 	_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location, merged) \
-	RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+	RecordConstLocation(jstate, expr->location, magnitude)
 #define JUMBLE_FIELD(item) \
 	AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
 #define JUMBLE_FIELD_SINGLE(item) \
@@ -290,10 +297,19 @@ _jumbleArrayExpr(JumbleState *jstate, Node *node)
 {
 	ArrayExpr *expr = (ArrayExpr *) node;
 	Const *first, *last;
-	if(IsMergeableConstList(expr->elements, &first, &last))
+	int magnitude = IsMergeableConstList(expr->elements, &first, &last);
+
+	if (magnitude)
 	{
-		RecordConstLocation(jstate, first->location, true);
-		RecordConstLocation(jstate, last->location, true);
+		RecordConstLocation(jstate, first->location, magnitude);
+		RecordConstLocation(jstate, last->location, magnitude);
+
+		/*
+		 * After merging constants down we end up with only two constants, the
+		 * first and the last one. To distinguish the order of magnitute behind
+		 * merged constants, add its value into the jumble.
+		 */
+		JUMBLE_FIELD_SINGLE(magnitude);
 	}
 	else
 	{
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 225957d7b3c..cfa99efc14e 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -27,9 +27,12 @@ typedef struct LocationLen
 
 	/*
 	 * Indicates the constant represents the beginning or the end of a merged
-	 * constants interval.
+	 * constants interval. The value shows how many constants were merged away
+	 * (up to a power of 10), or in other words the order of manitude for
+	 * number of merged constants (i.e. how many digits it has). Otherwise the
+	 * value is 0, indicating that no merging was performed.
 	 */
-	bool		merged;
+	int			magnitude;
 } LocationLen;
 
 /*
-- 
2.41.0

>From e119d74ddd5cd5550246213fc66cd75cae606571 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Mon, 16 Oct 2023 16:52:27 +0200
Subject: [PATCH v15 4/4] Add query_id_const_merge_threshold

Extend query_id_const_merge to allow merging only if the number of
elements is larger than specified value, which could be configured using
new GUC query_id_const_merge_threshold.
---
 .../pg_stat_statements/expected/merging.out   | 64 +++++++++++++++++++
 contrib/pg_stat_statements/sql/merging.sql    | 17 +++++
 doc/src/sgml/config.sgml                      | 15 +++++
 doc/src/sgml/pgstatstatements.sgml            |  6 ++
 src/backend/nodes/queryjumblefuncs.c          | 12 +++-
 src/backend/utils/misc/guc_tables.c           | 11 ++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/nodes/queryjumble.h               |  1 +
 8 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/contrib/pg_stat_statements/expected/merging.out b/contrib/pg_stat_statements/expected/merging.out
index 1bb75a93045..81811b2db0d 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -218,4 +218,68 @@ FROM cte;
 --------
 (0 rows)
 
+-- With the threshold
+SET query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                   query                                   | calls 
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])                |     2
+ SELECT pg_stat_statements_reset()                                         |     1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                         query                         | calls 
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) |     1
+ SELECT pg_stat_statements_reset()                     |     1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
 RESET query_id_const_merge;
diff --git a/contrib/pg_stat_statements/sql/merging.sql b/contrib/pg_stat_statements/sql/merging.sql
index 75960159a1d..1dc0fef9984 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -68,4 +68,21 @@ WITH cte AS (
 SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
 FROM cte;
 
+-- With the threshold
+SET query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 RESET query_id_const_merge;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 4fd997284c9..c31d4806c1c 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8219,6 +8219,21 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-query-id-const-merge-threshold" xreflabel="query_id_const_merge_threshold">
+      <term><varname>query_id_const_merge_threshold</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>query_id_const_merge_threshold</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If <xref linkend="guc-query-id-const-merge"/> parameter is enabled,
+        specifies the minimal number of element an array must have to apply
+        constants merge.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
 
     </sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 1f6e6a0e76b..eb00c1ecd2a 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -566,6 +566,12 @@ query | SELECT pg_stat_statements_reset()
 calls | 1
 </screen>
 
+   Such constants merging could be configured apply only starting from certain
+   number of constants in the array. The threshold could be specified using
+   <xref linkend="guc-query-id-const-merge"/>.
+  </para>
+
+  <para>
    But there is a small chance of hash collisions causing unrelated queries to
    be merged into one entry. (This cannot happen for queries belonging to
    different users or databases, however.)
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b198fe2579e..5d3acf1115c 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -47,6 +47,9 @@ int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 /* Whether to merge constants in a list when computing query_id */
 bool		query_id_const_merge = false;
 
+/* Lower threshold for the list length to merge constants when computing query_id */
+bool		query_id_const_merge_threshold = 1;
+
 /* True when compute_query_id is ON, or AUTO and a module requests them */
 bool		query_id_enabled = false;
 
@@ -227,7 +230,8 @@ RecordConstLocation(JumbleState *jstate, int location, int magnitude)
 
 /*
  * Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
  *
  * Return value is the order of magnitude (i.e. how many digits it has) for
  * length of the list (to use for representation purposes later on) if merging
@@ -251,6 +255,12 @@ IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
 		return 0;
 	}
 
+	if (elements->length < query_id_const_merge_threshold)
+	{
+		/* The list is not large enough */
+		return 0;
+	}
+
 	firstExpr = linitial(elements);
 
 	/*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index fbb50adb9f9..ee239ae7302 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3518,6 +3518,17 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"query_id_const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+			gettext_noop("Sets lower threshold for an array length to apply"
+						 " constants merging when computing query identifier."),
+			gettext_noop("Not used if query_id_const_merge is disabled"),
+		},
+		&query_id_const_merge_threshold,
+		1, 1, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 3397780dc72..6cc30e116c4 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -629,6 +629,7 @@
 #log_planner_stats = off
 #log_executor_stats = off
 #query_id_const_merge = off
+#query_id_const_merge_threshold = 1
 
 #------------------------------------------------------------------------------
 # AUTOVACUUM
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index cfa99efc14e..8f823c3c491 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -72,6 +72,7 @@ enum ComputeQueryIdType
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
 extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT bool query_id_const_merge_threshold;
 
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query);
-- 
2.41.0

Reply via email to