> On Thu, Nov 28, 2024 at 08:36:47PM GMT, Kirill Reshke wrote:
>
> Hi! Can you please send a rebased version of this?

Sure, here it is.
>From 2de1af6489d46449b2884a9194515cd1090d5e8c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Tue, 3 Dec 2024 14:55:45 +0100
Subject: [PATCH v22 1/4] Prevent jumbling of every element in ArrayExpr

pg_stat_statements produces multiple entries for queries like

    SELECT something FROM table WHERE col IN (1, 2, 3, ...)

depending on the number of parameters, because every element of
ArrayExpr is jumbled. In certain situations it's undesirable, especially
if the list becomes too large.

Make an array of Const expressions contribute only the first/last
elements to the jumble hash. Allow to enable this behavior via the new
pg_stat_statements parameter query_id_const_merge with the default value off.

Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane,
Michael Paquier, Sergei Kornilov, Alvaro Herrera, David Geier, Sutou Kouhei
Tested-by: Chengxi Sun, Yasuo Honda
---
 contrib/pg_stat_statements/Makefile           |   2 +-
 .../pg_stat_statements/expected/merging.out   | 167 ++++++++++++++++++
 contrib/pg_stat_statements/meson.build        |   1 +
 .../pg_stat_statements/pg_stat_statements.c   |  74 +++++++-
 contrib/pg_stat_statements/sql/merging.sql    |  58 ++++++
 doc/src/sgml/pgstatstatements.sgml            |  57 +++++-
 src/backend/nodes/gen_node_support.pl         |  21 ++-
 src/backend/nodes/queryjumblefuncs.c          | 107 ++++++++++-
 src/backend/postmaster/launch_backend.c       |   3 +
 src/backend/utils/misc/postgresql.conf.sample |   1 -
 src/include/nodes/nodes.h                     |   3 +
 src/include/nodes/primnodes.h                 |   2 +-
 src/include/nodes/queryjumble.h               |   9 +-
 13 files changed, 479 insertions(+), 26 deletions(-)
 create mode 100644 contrib/pg_stat_statements/expected/merging.out
 create mode 100644 contrib/pg_stat_statements/sql/merging.sql

diff --git a/contrib/pg_stat_statements/Makefile 
b/contrib/pg_stat_statements/Makefile
index 241c02587b..eef8d69cc4 100644
--- a/contrib/pg_stat_statements/Makefile
+++ b/contrib/pg_stat_statements/Makefile
@@ -20,7 +20,7 @@ LDFLAGS_SL += $(filter -lm, $(LIBS))
 REGRESS_OPTS = --temp-config 
$(top_srcdir)/contrib/pg_stat_statements/pg_stat_statements.conf
 REGRESS = select dml cursors utility level_tracking planning \
        user_activity wal entry_timestamp privileges extended \
-       parallel cleanup oldextversions
+       parallel cleanup oldextversions merging
 # Disabled because these tests require 
"shared_preload_libraries=pg_stat_statements",
 # which typical installcheck users do not have (e.g. buildfarm clients).
 NO_INSTALLCHECK = 1
diff --git a/contrib/pg_stat_statements/expected/merging.out 
b/contrib/pg_stat_statements/expected/merging.out
new file mode 100644
index 0000000000..1e58283afe
--- /dev/null
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -0,0 +1,167 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                        query                                  
      | calls 
+-------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9)     
      |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, 
$10)      |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, 
$10, $11) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                            
      |     1
+(4 rows)
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)          |     1
+ SELECT * FROM test_merge WHERE id IN (...)         |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)                              |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     4
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data 
= 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and 
data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                          query                           | calls 
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3 |     3
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t       |     1
+(2 rows)
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 
6, 7 + 7, 8 + 8, 9 + 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                                              query            
                                                  | calls 
+---------------------------------------------------------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1 + $2, $3 + $4, $5 + $6, $7 + $8, $9 
+ $10, $11 + $12, $13 + $14, $15 + $16, $17 + $18) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                            
                                                  |     1
+(2 rows)
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result 
+--------
+(0 rows)
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/contrib/pg_stat_statements/meson.build 
b/contrib/pg_stat_statements/meson.build
index e659b5e2b7..12a0e757be 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -56,6 +56,7 @@ tests += {
       'parallel',
       'cleanup',
       'oldextversions',
+      'merging',
     ],
     'regress_args': ['--temp-config', files('pg_stat_statements.conf')],
     # Disabled because these tests require
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c 
b/contrib/pg_stat_statements/pg_stat_statements.c
index 49c657b3e0..bcbf1164c1 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -267,6 +267,9 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
 
+/* An assign hook to keep query_id_const_merge in sync */
+static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+
 /* Links to shared memory state */
 static pgssSharedState *pgss = NULL;
 static HTAB *pgss_hash = NULL;
@@ -294,7 +297,8 @@ static bool pgss_track_utility = true;      /* whether to 
track utility commands */
 static bool pgss_track_planning = false;       /* whether to track planning
                                                                                
         * duration */
 static bool pgss_save = true;  /* whether to save stats across shutdown */
-
+static bool pgss_query_id_const_merge = false; /* request constants merging
+                                                                               
                 * when computing query_id */
 
 #define pgss_enabled(level) \
        (!IsParallelWorker() && \
@@ -459,8 +463,21 @@ _PG_init(void)
                                                         NULL,
                                                         NULL);
 
+       DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
+                                                        "Whether to merge 
constants in a list when computing query_id.",
+                                                        NULL,
+                                                        
&pgss_query_id_const_merge,
+                                                        false,
+                                                        PGC_SUSET,
+                                                        0,
+                                                        NULL,
+                                                        
pgss_query_id_const_merge_assign_hook,
+                                                        NULL);
+
        MarkGUCPrefixReserved("pg_stat_statements");
 
+       SetQueryIdConstMerge(pgss_query_id_const_merge);
+
        /*
         * Install hooks.
         */
@@ -2810,6 +2827,10 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
                                n_quer_loc = 0, /* Normalized query byte 
location */
                                last_off = 0,   /* Offset from start for 
previous tok */
                                last_tok_len = 0;       /* Length (in bytes) of 
that tok */
+       bool            merged_interval = false;        /* Currently processed 
constants
+                                                                               
           belong to a merged constants
+                                                                               
           interval. */
+
 
        /*
         * Get constants' lengths (core system only gives us locations).  Note
@@ -2833,7 +2854,6 @@ generate_normalized_query(JumbleState *jstate, const char 
*query,
        {
                int                     off,            /* Offset from start 
for cur tok */
                                        tok_len;        /* Length (in bytes) of 
that tok */
-
                off = jstate->clocations[i].location;
                /* Adjust recorded location if we're dealing with partial 
string */
                off -= query_loc;
@@ -2848,13 +2868,44 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
                len_to_wrt -= last_tok_len;
 
                Assert(len_to_wrt >= 0);
-               memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-               n_quer_loc += len_to_wrt;
 
-               /* And insert a param symbol in place of the constant token */
-               n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-                                                         i + 1 + 
jstate->highest_extern_param_id);
+               /* Normal path, non merged constant */
+               if (!jstate->clocations[i].merged)
+               {
+                       memcpy(norm_query + n_quer_loc, query + quer_loc, 
len_to_wrt);
+                       n_quer_loc += len_to_wrt;
+
+                       /* And insert a param symbol in place of the constant 
token */
+                       n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+                                                                 i + 1 + 
jstate->highest_extern_param_id);
 
+                       /* In case previous constants were merged away, stop 
doing that */
+                       merged_interval = false;
+               }
+               else if (!merged_interval)
+               {
+                       /*
+                        * We are not inside a merged interval yet, which means 
it is the
+                        * the first merged constant.
+                        *
+                        * A merged constants interval must be represented via 
two
+                        * constants with the merged flag. Currently we are at 
the first,
+                        * verify there is another one.
+                        */
+                       Assert(i + 1 < jstate->clocations_count);
+                       Assert(jstate->clocations[i + 1].merged);
+
+                       memcpy(norm_query + n_quer_loc, query + quer_loc, 
len_to_wrt);
+                       n_quer_loc += len_to_wrt;
+
+                       /* Remember to skip until a non merged constant appears 
*/
+                       merged_interval = true;
+
+                       /* Mark the interval in the normalized query */
+                       n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+               }
+
+               /* Otherwise the constant is merged away, move forward */
                quer_loc = off + tok_len;
                last_off = off;
                last_tok_len = tok_len;
@@ -3012,3 +3063,12 @@ comp_location(const void *a, const void *b)
 
        return pg_cmp_s32(l, r);
 }
+
+/*
+ * Notify query jumbling about query_id_const_merge status
+ */
+static void
+pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+{
+       SetQueryIdConstMerge(newvalue);
+}
diff --git a/contrib/pg_stat_statements/sql/merging.sql 
b/contrib/pg_stat_statements/sql/merging.sql
new file mode 100644
index 0000000000..71985bb1cd
--- /dev/null
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -0,0 +1,58 @@
+--
+-- Const merging functionality
+--
+CREATE EXTENSION pg_stat_statements;
+
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging is performed, as a baseline result
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal scenario, too many simple constants for an IN query
+SET pg_stat_statements.query_id_const_merge = on;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- More conditions in the query
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9) and data = 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data 
= 2;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) and 
data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- No constants simplification
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+SELECT * FROM test_merge WHERE id IN (1 + 1, 2 + 2, 3 + 3, 4 + 4, 5 + 5, 6 + 
6, 7 + 7, 8 + 8, 9 + 9);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Numeric type
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 
11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test constants evaluation, verifies a tricky part to make sure there are no
+-- issues in the merging implementation
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET pg_stat_statements.query_id_const_merge;
diff --git a/doc/src/sgml/pgstatstatements.sgml 
b/doc/src/sgml/pgstatstatements.sgml
index 501b468e9a..2276783786 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -621,11 +621,29 @@
 
   <para>
    In some cases, queries with visibly different texts might get merged into a
-   single <structname>pg_stat_statements</structname> entry.  Normally this 
will happen
-   only for semantically equivalent queries, but there is a small chance of
-   hash collisions causing unrelated queries to be merged into one entry.
-   (This cannot happen for queries belonging to different users or databases,
-   however.)
+   single <structname>pg_stat_statements</structname> entry.  Normally this
+   will happen only for semantically equivalent queries, or if
+   <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
+   the only difference between queries is the length of an array with constants
+   they contain:
+
+<screen>
+=# SET query_id_const_merge = on;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+   But there is a small chance of hash collisions causing unrelated queries to
+   be merged into one entry. (This cannot happen for queries belonging to
+   different users or databases, however.)
   </para>
 
   <para>
@@ -956,6 +974,35 @@
      </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term>
+    <varname>pg_stat_statements.query_id_const_merge</varname> 
(<type>bool</type>)
+    <indexterm>
+     <primary><varname>pg_stat_statements.query_id_const_merge</varname> 
configuration parameter</primary>
+    </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      Specifies how an array of constants (e.g. for an "IN" clause)
+      contributes to the query identifier computation. Normally every element
+      of an array contributes to the query identifier, which means the same
+      query will get multiple different identifiers, one for each occurrence
+      with an array of different lenght.
+
+      If this parameter is on, an array of constants will contribute only the
+      first and the last elements to the query identifier. It means two
+      occurences of the same query, where the only difference is number of
+      constants in the array, are going to get the same query identifier.
+      Such queries are represented in form <literal>'(...)'</literal>.
+
+      The parameter could be used to reduce amount of repeating data stored
+      via <structname>pg_stat_statements</structname>.  The default value is 
off.
+     </para>
+    </listitem>
+   </varlistentry>
+
   </variablelist>
 
   <para>
diff --git a/src/backend/nodes/gen_node_support.pl 
b/src/backend/nodes/gen_node_support.pl
index 81df3bdf95..d2a276c303 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -475,6 +475,7 @@ foreach my $infile (@ARGV)
                                                                
equal_ignore_if_zero
                                                                
query_jumble_ignore
                                                                
query_jumble_location
+                                                               
query_jumble_merge
                                                                
read_write_ignore
                                                                
write_only_relids
                                                                
write_only_nondefault_pathtarget
@@ -1282,6 +1283,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
                my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
                my $query_jumble_ignore = $struct_no_query_jumble;
                my $query_jumble_location = 0;
+               my $query_jumble_merge = 0;
 
                # extract per-field attributes
                foreach my $a (@a)
@@ -1294,21 +1296,34 @@ _jumble${n}(JumbleState *jstate, Node *node)
                        {
                                $query_jumble_location = 1;
                        }
+                       elsif ($a eq 'query_jumble_merge')
+                       {
+                               $query_jumble_merge = 1;
+                       }
                }
 
                # node type
                if (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
                        and elem $1, @node_types)
                {
-                       print $jff "\tJUMBLE_NODE($f);\n"
-                         unless $query_jumble_ignore;
+                       # Merge constants if requested.
+                       if ($query_jumble_merge)
+                       {
+                               print $jff "\tJUMBLE_ELEMENTS($f);\n"
+                                 unless $query_jumble_ignore;
+                       }
+                       else
+                       {
+                               print $jff "\tJUMBLE_NODE($f);\n"
+                                 unless $query_jumble_ignore;
+                       }
                }
                elsif ($t eq 'ParseLoc')
                {
                        # Track the node's location only if directly requested.
                        if ($query_jumble_location)
                        {
-                               print $jff "\tJUMBLE_LOCATION($f);\n"
+                               print $jff "\tJUMBLE_LOCATION($f, false);\n"
                                  unless $query_jumble_ignore;
                        }
                }
diff --git a/src/backend/nodes/queryjumblefuncs.c 
b/src/backend/nodes/queryjumblefuncs.c
index e8bf95690b..2953073872 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,6 +42,9 @@
 /* GUC parameters */
 int                    compute_query_id = COMPUTE_QUERY_ID_AUTO;
 
+/* Whether to merge constants in a list when computing query_id */
+bool           query_id_const_merge = false;
+
 /*
  * True when compute_query_id is ON or AUTO, and a module requests them.
  *
@@ -53,8 +56,10 @@ bool         query_id_enabled = false;
 
 static void AppendJumble(JumbleState *jstate,
                                                 const unsigned char *item, 
Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate,
+                                                               int location, 
bool merged);
 static void _jumbleNode(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -160,6 +165,18 @@ EnableQueryId(void)
                query_id_enabled = true;
 }
 
+/*
+ * Controls constants merging for query identifier computation.
+ *
+ * Third-party plugins can use this function to enable/disable merging
+ * of constants in a list when query identifier is computed.
+ */
+void
+SetQueryIdConstMerge(bool value)
+{
+       query_id_const_merge = value;
+}
+
 /*
  * AppendJumble: Append a value that is substantive in a given query to
  * the current jumble.
@@ -198,11 +215,15 @@ AppendJumble(JumbleState *jstate, const unsigned char 
*item, Size size)
 }
 
 /*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked.
+ *
+ * Merged argument signals that the constant represents the first or the last
+ * element in a series of merged constants, and everything but the first/last
+ * element contributes nothing to the jumble hash.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
 {
        /* -1 indicates unknown or undefined location */
        if (location >= 0)
@@ -218,15 +239,67 @@ RecordConstLocation(JumbleState *jstate, int location)
                }
                jstate->clocations[jstate->clocations_count].location = 
location;
                /* initialize lengths to -1 to simplify third-party module 
usage */
+               jstate->clocations[jstate->clocations_count].merged = merged;
                jstate->clocations[jstate->clocations_count].length = -1;
                jstate->clocations_count++;
        }
 }
 
+/*
+ * Verify if the provided list contains could be merged down, which means it
+ * contains only constant expressions.
+ *
+ * Return value indicates if merging is possible.
+ *
+ * Note that this function searches only for explicit Const nodes and does not
+ * try to simplify expressions.
+ */
+static bool
+IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+       ListCell   *temp;
+       Node       *firstExpr = NULL;
+
+       if (elements == NIL)
+               return false;
+
+       if (!query_id_const_merge)
+       {
+               /* Merging is disabled, process everything one by one */
+               return false;
+       }
+
+       firstExpr = linitial(elements);
+
+       /*
+        * If the first expression is a constant, verify if the following 
elements
+        * are constants as well. If yes, the list is eligible for merging, and 
the
+        * order of magnitude need to be calculated.
+        */
+       if (IsA(firstExpr, Const))
+       {
+               foreach(temp, elements)
+                       if (!IsA(lfirst(temp), Const))
+                               return false;
+
+               *firstConst = (Const *) firstExpr;
+               *lastConst = llast_node(Const, elements);
+               return true;
+       }
+
+       /*
+        * If we end up here, it means no constants merging is possible, process
+        * the list as usual.
+        */
+       return false;
+}
+
 #define JUMBLE_NODE(item) \
        _jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
-       RecordConstLocation(jstate, expr->location)
+#define JUMBLE_ELEMENTS(list) \
+       _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_LOCATION(location, merged) \
+       RecordConstLocation(jstate, expr->location, merged)
 #define JUMBLE_FIELD(item) \
        AppendJumble(jstate, (const unsigned char *) &(expr->item), 
sizeof(expr->item))
 #define JUMBLE_FIELD_SINGLE(item) \
@@ -239,6 +312,26 @@ do { \
 
 #include "queryjumblefuncs.funcs.c"
 
+static void
+_jumbleElements(JumbleState *jstate, List *elements)
+{
+       Const *first, *last;
+       if (IsMergeableConstList(elements, &first, &last))
+       {
+               /*
+                * Both first and last constants have to be recorded. The first 
one
+                * will indicate the merged interval, the last one will tell us 
the
+                * length of the interval within the query text.
+                */
+               RecordConstLocation(jstate, first->location, true);
+               RecordConstLocation(jstate, last->location, true);
+       }
+       else
+       {
+               _jumbleNode(jstate, (Node *) elements);
+       }
+}
+
 static void
 _jumbleNode(JumbleState *jstate, Node *node)
 {
@@ -375,5 +468,5 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
        if (expr->jumble_args)
                JUMBLE_NODE(args);
        JUMBLE_FIELD(is_local);
-       JUMBLE_LOCATION(location);
+       JUMBLE_LOCATION(location, false);
 }
diff --git a/src/backend/postmaster/launch_backend.c 
b/src/backend/postmaster/launch_backend.c
index 1f2d829ec5..0e2761edd5 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,6 +115,7 @@ typedef struct
        bool            redirection_done;
        bool            IsBinaryUpgrade;
        bool            query_id_enabled;
+       bool            query_id_const_merge;
        int                     max_safe_fds;
        int                     MaxBackends;
        int                     num_pmchild_slots;
@@ -744,6 +745,7 @@ save_backend_variables(BackendParameters *param,
        param->redirection_done = redirection_done;
        param->IsBinaryUpgrade = IsBinaryUpgrade;
        param->query_id_enabled = query_id_enabled;
+       param->query_id_const_merge = query_id_const_merge;
        param->max_safe_fds = max_safe_fds;
 
        param->MaxBackends = MaxBackends;
@@ -1004,6 +1006,7 @@ restore_backend_variables(BackendParameters *param)
        redirection_done = param->redirection_done;
        IsBinaryUpgrade = param->IsBinaryUpgrade;
        query_id_enabled = param->query_id_enabled;
+       query_id_const_merge = param->query_id_const_merge;
        max_safe_fds = param->max_safe_fds;
 
        MaxBackends = param->MaxBackends;
diff --git a/src/backend/utils/misc/postgresql.conf.sample 
b/src/backend/utils/misc/postgresql.conf.sample
index a2ac7575ca..d46fc74daf 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -654,7 +654,6 @@
 #log_planner_stats = off
 #log_executor_stats = off
 
-
 #------------------------------------------------------------------------------
 # AUTOVACUUM
 #------------------------------------------------------------------------------
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index c1ab3d1358..4df2226cc0 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -108,6 +108,9 @@ typedef enum NodeTag
  * - query_jumble_location: Mark the field as a location to track.  This is
  *   only allowed for integer fields that include "location" in their name.
  *
+ * - query_jumble_merge: Allow to merge the field values for the query
+ *   jumbling.
+ *
  * - read_as(VALUE): In nodeRead(), replace the field's value with VALUE.
  *
  * - read_write_ignore: Ignore the field for read/write.  This is only allowed
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index b0ef1952e8..4c9a0d1e88 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1377,7 +1377,7 @@ typedef struct ArrayExpr
        /* common type of array elements */
        Oid                     element_typeid 
pg_node_attr(query_jumble_ignore);
        /* the array elements or sub-arrays */
-       List       *elements;
+       List       *elements pg_node_attr(query_jumble_merge);
        /* true if elements are sub-arrays */
        bool            multidims pg_node_attr(query_jumble_ignore);
        /* token location, or -1 if unknown */
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index f1c55c8067..8daf0725d7 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -23,6 +23,12 @@ typedef struct LocationLen
 {
        int                     location;               /* start offset in 
query text */
        int                     length;                 /* length in bytes, or 
-1 to ignore */
+
+       /*
+        * Indicates the constant represents the beginning or the end of a 
merged
+        * constants interval.
+        */
+       bool            merged;
 } LocationLen;
 
 /*
@@ -62,12 +68,13 @@ enum ComputeQueryIdType
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
 
-
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query);
 extern void EnableQueryId(void);
+extern void SetQueryIdConstMerge(bool value);
 
 extern PGDLLIMPORT bool query_id_enabled;
+extern PGDLLIMPORT bool query_id_const_merge;
 
 /*
  * Returns whether query identifier computation has been enabled, either

base-commit: 1acf10549e64c6a52ced570d712fcba1a2f5d1ec
-- 
2.45.1

>From c692cdae79c7d319f08578d0e00f2e6f4663f3d2 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Wed, 3 Apr 2024 20:03:08 +0200
Subject: [PATCH v22 2/4] Reusable decimalLength functions

Move out decimalLength functions to reuse in the following patch.

Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
 src/backend/utils/adt/numutils.c | 48 +----------------------
 src/include/utils/numutils.h     | 67 ++++++++++++++++++++++++++++++++
 2 files changed, 68 insertions(+), 47 deletions(-)
 create mode 100644 src/include/utils/numutils.h

diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 63c2beb6a2..b536778dad 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -21,6 +21,7 @@
 #include "common/int.h"
 #include "port/pg_bitutils.h"
 #include "utils/builtins.h"
+#include "utils/numutils.h"
 
 /*
  * A table of all two-digit numbers. This is used to speed up decimal digit
@@ -38,53 +39,6 @@ static const char DIGIT_TABLE[200] =
 "80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
 "90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
 
-/*
- * Adapted from 
http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- */
-static inline int
-decimalLength32(const uint32 v)
-{
-       int                     t;
-       static const uint32 PowersOfTen[] = {
-               1, 10, 100,
-               1000, 10000, 100000,
-               1000000, 10000000, 100000000,
-               1000000000
-       };
-
-       /*
-        * Compute base-10 logarithm by dividing the base-2 logarithm by a
-        * good-enough approximation of the base-2 logarithm of 10
-        */
-       t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
-       return t + (v >= PowersOfTen[t]);
-}
-
-static inline int
-decimalLength64(const uint64 v)
-{
-       int                     t;
-       static const uint64 PowersOfTen[] = {
-               UINT64CONST(1), UINT64CONST(10),
-               UINT64CONST(100), UINT64CONST(1000),
-               UINT64CONST(10000), UINT64CONST(100000),
-               UINT64CONST(1000000), UINT64CONST(10000000),
-               UINT64CONST(100000000), UINT64CONST(1000000000),
-               UINT64CONST(10000000000), UINT64CONST(100000000000),
-               UINT64CONST(1000000000000), UINT64CONST(10000000000000),
-               UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
-               UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
-               UINT64CONST(1000000000000000000), 
UINT64CONST(10000000000000000000)
-       };
-
-       /*
-        * Compute base-10 logarithm by dividing the base-2 logarithm by a
-        * good-enough approximation of the base-2 logarithm of 10
-        */
-       t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
-       return t + (v >= PowersOfTen[t]);
-}
-
 static const int8 hexlookup[128] = {
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
diff --git a/src/include/utils/numutils.h b/src/include/utils/numutils.h
new file mode 100644
index 0000000000..876e64f2df
--- /dev/null
+++ b/src/include/utils/numutils.h
@@ -0,0 +1,67 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.h
+ *       Decimal length functions for numutils.c
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/numutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NUMUTILS_H
+#define NUMUTILS_H
+
+#include "common/int.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * Adapted from 
http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+       int                     t;
+       static const uint32 PowersOfTen[] = {
+               1, 10, 100,
+               1000, 10000, 100000,
+               1000000, 10000000, 100000000,
+               1000000000
+       };
+
+       /*
+        * Compute base-10 logarithm by dividing the base-2 logarithm by a
+        * good-enough approximation of the base-2 logarithm of 10
+        */
+       t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+       return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+       int                     t;
+       static const uint64 PowersOfTen[] = {
+               UINT64CONST(1), UINT64CONST(10),
+               UINT64CONST(100), UINT64CONST(1000),
+               UINT64CONST(10000), UINT64CONST(100000),
+               UINT64CONST(1000000), UINT64CONST(10000000),
+               UINT64CONST(100000000), UINT64CONST(1000000000),
+               UINT64CONST(10000000000), UINT64CONST(100000000000),
+               UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+               UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+               UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+               UINT64CONST(1000000000000000000), 
UINT64CONST(10000000000000000000)
+       };
+
+       /*
+        * Compute base-10 logarithm by dividing the base-2 logarithm by a
+        * good-enough approximation of the base-2 logarithm of 10
+        */
+       t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+       return t + (v >= PowersOfTen[t]);
+}
+
+#endif                                                 /* NUMUTILS_H */
-- 
2.45.1

>From 800bfebec09d9876d28d3b24a3eaa256ff99787b Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Sun, 12 May 2024 11:51:10 +0200
Subject: [PATCH v22 3/4] Merge constants in ArrayExpr into groups

Using query_id_const_merge only first/last element in an ArrayExpr will
be used to compute query id. Extend this to take into account number of
elements, and merge constants into groups based on it. Resulting groups
are powers of 10, i.e. 1 to 9, 10 to 99, etc.

Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
 .../pg_stat_statements/expected/merging.out   | 84 +++++++++++++++----
 .../pg_stat_statements/pg_stat_statements.c   | 29 +++++--
 contrib/pg_stat_statements/sql/merging.sql    | 13 +++
 doc/src/sgml/pgstatstatements.sgml            | 11 +--
 src/backend/nodes/gen_node_support.pl         |  2 +-
 src/backend/nodes/queryjumblefuncs.c          | 55 ++++++++----
 src/include/nodes/queryjumble.h               | 10 ++-
 7 files changed, 158 insertions(+), 46 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/merging.out 
b/contrib/pg_stat_statements/expected/merging.out
index 1e58283afe..0cb4f67b8b 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -54,11 +54,11 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3);
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                       query                        | calls 
-----------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN ($1)          |     1
- SELECT * FROM test_merge WHERE id IN (...)         |     1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+                          query                           | calls 
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1)                |     1
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t       |     1
 (3 rows)
 
 SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
@@ -80,7 +80,60 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query 
COLLATE "C";
                                  query                                  | 
calls 
 
------------------------------------------------------------------------+-------
  SELECT * FROM test_merge WHERE id IN ($1)                              |     1
- SELECT * FROM test_merge WHERE id IN (...)                             |     4
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries])               |     2
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(5 rows)
+
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 
93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 
110);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                            query                             | calls 
+--------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [100-999 entries]) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t           |     1
+(2 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                          query                           | calls 
+----------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t       |     1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries])               |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     1
  SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
  SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
 (4 rows)
@@ -108,11 +161,12 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 
7, 8, 9, 10, 11) and dat
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                          query                           | calls 
-----------------------------------------------------------+-------
- SELECT * FROM test_merge WHERE id IN (...) and data = $3 |     3
- SELECT pg_stat_statements_reset() IS NOT NULL AS t       |     1
-(2 rows)
+                                  query                                   | 
calls 
+--------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (... [1-9 entries]) and data = $3   |    
 1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries]) and data = $3 |    
 2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                       |    
 1
+(3 rows)
 
 -- No constants simplification
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
@@ -147,10 +201,10 @@ SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 
5, 6, 7, 8, 9, 10, 11)
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                       query                        | calls 
-----------------------------------------------------+-------
- SELECT * FROM test_merge_numeric WHERE id IN (...) |     1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+                               query                                | calls 
+--------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (... [10-99 entries]) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                 |     1
 (2 rows)
 
 -- Test constants evaluation, verifies a tricky part to make sure there are no
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c 
b/contrib/pg_stat_statements/pg_stat_statements.c
index bcbf1164c1..24ab2a45ff 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2830,6 +2830,8 @@ generate_normalized_query(JumbleState *jstate, const char 
*query,
        bool            merged_interval = false;        /* Currently processed 
constants
                                                                                
           belong to a merged constants
                                                                                
           interval. */
+       int             magnitude;              /* Order of magnitute for 
number of merged
+                                                                  constants */
 
 
        /*
@@ -2844,8 +2846,13 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
         * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
         * could refine that limit based on the max value of n for the current
         * query, but it hardly seems worth any extra effort to do so.
+        *
+        * On top of that, each pair of $n symbols representing a merged 
constants
+        * interval will be decorated with the explanationary text, adding 14
+        * bytes.
         */
-       norm_query_buflen = query_len + jstate->clocations_count * 10;
+       norm_query_buflen = query_len + jstate->clocations_count * 10 +
+               jstate->clocations_merged_count * 14;
 
        /* Allocate result buffer */
        norm_query = palloc(norm_query_buflen + 1);
@@ -2870,7 +2877,8 @@ generate_normalized_query(JumbleState *jstate, const char 
*query,
                Assert(len_to_wrt >= 0);
 
                /* Normal path, non merged constant */
-               if (!jstate->clocations[i].merged)
+               magnitude = jstate->clocations[i].magnitude;
+               if (magnitude == 0)
                {
                        memcpy(norm_query + n_quer_loc, query + quer_loc, 
len_to_wrt);
                        n_quer_loc += len_to_wrt;
@@ -2887,13 +2895,23 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
                        /*
                         * We are not inside a merged interval yet, which means 
it is the
                         * the first merged constant.
-                        *
+                        */
+                       static const uint32 powers_of_ten[] = {
+                               1, 10, 100,
+                               1000, 10000, 100000,
+                               1000000, 10000000, 100000000,
+                               1000000000
+                       };
+                       int lower_merged = powers_of_ten[magnitude - 1];
+                       int upper_merged = powers_of_ten[magnitude];
+
+                       /*
                         * A merged constants interval must be represented via 
two
                         * constants with the merged flag. Currently we are at 
the first,
                         * verify there is another one.
                         */
                        Assert(i + 1 < jstate->clocations_count);
-                       Assert(jstate->clocations[i + 1].merged);
+                       Assert(jstate->clocations[i + 1].magnitude > 0);
 
                        memcpy(norm_query + n_quer_loc, query + quer_loc, 
len_to_wrt);
                        n_quer_loc += len_to_wrt;
@@ -2902,7 +2920,8 @@ generate_normalized_query(JumbleState *jstate, const char 
*query,
                        merged_interval = true;
 
                        /* Mark the interval in the normalized query */
-                       n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+                       n_quer_loc += sprintf(norm_query + n_quer_loc, "... 
[%d-%d entries]",
+                                                                 lower_merged, 
upper_merged - 1);
                }
 
                /* Otherwise the constant is merged away, move forward */
diff --git a/contrib/pg_stat_statements/sql/merging.sql 
b/contrib/pg_stat_statements/sql/merging.sql
index 71985bb1cd..657044fade 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -27,6 +27,19 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 
8, 9, 10);
 SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+-- Second order of magnitude, brace yourself
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 
53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 
73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 
93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 
110);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 -- More conditions in the query
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 
diff --git a/doc/src/sgml/pgstatstatements.sgml 
b/doc/src/sgml/pgstatstatements.sgml
index 2276783786..55dfa5b50f 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -634,7 +634,7 @@
 =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
 =# SELECT query, calls FROM pg_stat_statements;
 -[ RECORD 1 ]------------------------------
-query | SELECT * FROM test WHERE a IN (...)
+query | SELECT * FROM test WHERE a IN (... [10-99 entries])
 calls | 2
 -[ RECORD 2 ]------------------------------
 query | SELECT pg_stat_statements_reset()
@@ -992,10 +992,11 @@ calls | 1
       with an array of different lenght.
 
       If this parameter is on, an array of constants will contribute only the
-      first and the last elements to the query identifier. It means two
-      occurences of the same query, where the only difference is number of
-      constants in the array, are going to get the same query identifier.
-      Such queries are represented in form <literal>'(...)'</literal>.
+      first element, the last element and the number of elements to the query
+      identifier. It means two occurences of the same query, where the only
+      difference is number of constants in the array, are going to get the
+      same query identifier if the arrays are of similar length.
+      Such queries are represented in form <literal>'(... [10-99 
entries])'</literal>.
 
       The parameter could be used to reduce amount of repeating data stored
       via <structname>pg_stat_statements</structname>.  The default value is 
off.
diff --git a/src/backend/nodes/gen_node_support.pl 
b/src/backend/nodes/gen_node_support.pl
index d2a276c303..c98d121dba 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1323,7 +1323,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
                        # Track the node's location only if directly requested.
                        if ($query_jumble_location)
                        {
-                               print $jff "\tJUMBLE_LOCATION($f, false);\n"
+                               print $jff "\tJUMBLE_LOCATION($f, 0);\n"
                                  unless $query_jumble_ignore;
                        }
                }
diff --git a/src/backend/nodes/queryjumblefuncs.c 
b/src/backend/nodes/queryjumblefuncs.c
index 2953073872..8ab3261d66 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -37,6 +37,8 @@
 #include "nodes/queryjumble.h"
 #include "parser/scansup.h"
 
+#include "utils/numutils.h"
+
 #define JUMBLE_SIZE                            1024    /* query serialization 
buffer size */
 
 /* GUC parameters */
@@ -57,7 +59,7 @@ bool          query_id_enabled = false;
 static void AppendJumble(JumbleState *jstate,
                                                 const unsigned char *item, 
Size size);
 static void RecordConstLocation(JumbleState *jstate,
-                                                               int location, 
bool merged);
+                                                               int location, 
int magnitude);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleElements(JumbleState *jstate, List *elements);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
@@ -129,6 +131,7 @@ JumbleQuery(Query *query)
        jstate->clocations = (LocationLen *)
                palloc(jstate->clocations_buf_size * sizeof(LocationLen));
        jstate->clocations_count = 0;
+       jstate->clocations_merged_count = 0;
        jstate->highest_extern_param_id = 0;
 
        /* Compute query ID and mark the Query node with it */
@@ -218,12 +221,15 @@ AppendJumble(JumbleState *jstate, const unsigned char 
*item, Size size)
  * Record location of constant within query string of query tree that is
  * currently being walked.
  *
- * Merged argument signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * Magnitude argument larger than zero signals that the constant represents the
+ * first or the last element in a series of merged constants, and everything
+ * but such first/last element will contribute nothing to the jumble hash. The
+ * magnitute value specifies order of magnitute (i.e. how many digits it has)
+ * for the number of elements in the series, to represent the fact of merging
+ * later on.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location, bool merged)
+RecordConstLocation(JumbleState *jstate, int location, int magnitude)
 {
        /* -1 indicates unknown or undefined location */
        if (location >= 0)
@@ -238,10 +244,12 @@ RecordConstLocation(JumbleState *jstate, int location, 
bool merged)
                                                 sizeof(LocationLen));
                }
                jstate->clocations[jstate->clocations_count].location = 
location;
+               jstate->clocations[jstate->clocations_count].magnitude = 
magnitude;
                /* initialize lengths to -1 to simplify third-party module 
usage */
-               jstate->clocations[jstate->clocations_count].merged = merged;
                jstate->clocations[jstate->clocations_count].length = -1;
                jstate->clocations_count++;
+               if (magnitude > 0)
+                       jstate->clocations_merged_count++;
        }
 }
 
@@ -249,24 +257,26 @@ RecordConstLocation(JumbleState *jstate, int location, 
bool merged)
  * Verify if the provided list contains could be merged down, which means it
  * contains only constant expressions.
  *
- * Return value indicates if merging is possible.
+ * Return value is the order of magnitude (i.e. how many digits it has) for
+ * length of the list (to use for representation purposes later on) if merging
+ * is possible, otherwise zero.
  *
  * Note that this function searches only for explicit Const nodes and does not
  * try to simplify expressions.
  */
-static bool
+static int
 IsMergeableConstList(List *elements, Const **firstConst, Const **lastConst)
 {
        ListCell   *temp;
        Node       *firstExpr = NULL;
 
        if (elements == NIL)
-               return false;
+               return 0;
 
        if (!query_id_const_merge)
        {
                /* Merging is disabled, process everything one by one */
-               return false;
+               return 0;
        }
 
        firstExpr = linitial(elements);
@@ -280,26 +290,26 @@ IsMergeableConstList(List *elements, Const **firstConst, 
Const **lastConst)
        {
                foreach(temp, elements)
                        if (!IsA(lfirst(temp), Const))
-                               return false;
+                               return 0;
 
                *firstConst = (Const *) firstExpr;
                *lastConst = llast_node(Const, elements);
-               return true;
+               return decimalLength32(elements->length);
        }
 
        /*
         * If we end up here, it means no constants merging is possible, process
         * the list as usual.
         */
-       return false;
+       return 0;
 }
 
 #define JUMBLE_NODE(item) \
        _jumbleNode(jstate, (Node *) expr->item)
 #define JUMBLE_ELEMENTS(list) \
        _jumbleElements(jstate, (List *) expr->list)
-#define JUMBLE_LOCATION(location, merged) \
-       RecordConstLocation(jstate, expr->location, merged)
+#define JUMBLE_LOCATION(location, magnitude) \
+       RecordConstLocation(jstate, expr->location, magnitude)
 #define JUMBLE_FIELD(item) \
        AppendJumble(jstate, (const unsigned char *) &(expr->item), 
sizeof(expr->item))
 #define JUMBLE_FIELD_SINGLE(item) \
@@ -316,15 +326,24 @@ static void
 _jumbleElements(JumbleState *jstate, List *elements)
 {
        Const *first, *last;
-       if (IsMergeableConstList(elements, &first, &last))
+       int magnitude = IsMergeableConstList(elements, &first, &last);
+
+       if (magnitude)
        {
                /*
                 * Both first and last constants have to be recorded. The first 
one
                 * will indicate the merged interval, the last one will tell us 
the
                 * length of the interval within the query text.
                 */
-               RecordConstLocation(jstate, first->location, true);
-               RecordConstLocation(jstate, last->location, true);
+               RecordConstLocation(jstate, first->location, magnitude);
+               RecordConstLocation(jstate, last->location, magnitude);
+
+               /*
+                * After merging constants down we end up with only two 
constants, the
+                * first and the last one. To distinguish the order of 
magnitute behind
+                * merged constants, add its value into the jumble.
+                */
+               JUMBLE_FIELD_SINGLE(magnitude);
        }
        else
        {
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 8daf0725d7..0e69e420b7 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -26,9 +26,12 @@ typedef struct LocationLen
 
        /*
         * Indicates the constant represents the beginning or the end of a 
merged
-        * constants interval.
+        * constants interval. The value shows how many constants were merged 
away
+        * (up to a power of 10), or in other words the order of manitude for
+        * number of merged constants (i.e. how many digits it has). Otherwise 
the
+        * value is 0, indicating that no merging was performed.
         */
-       bool            merged;
+       int                     magnitude;
 } LocationLen;
 
 /*
@@ -52,6 +55,9 @@ typedef struct JumbleState
        /* Current number of valid entries in clocations array */
        int                     clocations_count;
 
+       /* Current number of entries with merged constants interval */
+       int                     clocations_merged_count;
+
        /* highest Param id we've seen, in order to start normalization 
correctly */
        int                     highest_extern_param_id;
 } JumbleState;
-- 
2.45.1

>From 14d755bdcdd73916a7f4879a6b856efa9654fba5 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Wed, 3 Apr 2024 20:03:45 +0200
Subject: [PATCH v22 4/4] Introduce query_id_const_merge_threshold

Replace query_id_const_merge with a threshold to allow merging only if
the number of elements is larger than specified value, which could be
configured using pg_stat_statements parameter query_id_const_merge_threshold.

Reviewed-by: Sutou Kouhei
Tested-by: Yasuo Honda
---
 .../pg_stat_statements/expected/merging.out   | 68 ++++++++++++++++++-
 .../pg_stat_statements/pg_stat_statements.c   | 36 +++++-----
 contrib/pg_stat_statements/sql/merging.sql    | 21 +++++-
 doc/src/sgml/pgstatstatements.sgml            | 23 ++++---
 src/backend/nodes/queryjumblefuncs.c          | 23 +++++--
 src/backend/postmaster/launch_backend.c       |  6 +-
 src/include/nodes/queryjumble.h               |  4 +-
 7 files changed, 137 insertions(+), 44 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/merging.out 
b/contrib/pg_stat_statements/expected/merging.out
index 0cb4f67b8b..552e248ff1 100644
--- a/contrib/pg_stat_statements/expected/merging.out
+++ b/contrib/pg_stat_statements/expected/merging.out
@@ -36,7 +36,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query 
COLLATE "C";
 (4 rows)
 
 -- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
 ---
@@ -218,4 +218,68 @@ FROM cte;
 --------
 (0 rows)
 
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                   query                                   | 
calls 
+---------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) |   
  1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])                |   
  2
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                        |   
  1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                         query                         | calls 
+-------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t    |     1
+(2 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (... [10-99 entries])             |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c 
b/contrib/pg_stat_statements/pg_stat_statements.c
index 24ab2a45ff..61b1c4ea30 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -267,8 +267,8 @@ static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
 
-/* An assign hook to keep query_id_const_merge in sync */
-static void pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra);
+/* An assign hook to keep query_id_const_merge_threshold in sync */
+static void pgss_query_id_const_merge_assign_hook(int newvalue, void *extra);
 
 /* Links to shared memory state */
 static pgssSharedState *pgss = NULL;
@@ -297,8 +297,8 @@ static bool pgss_track_utility = true;      /* whether to 
track utility commands */
 static bool pgss_track_planning = false;       /* whether to track planning
                                                                                
         * duration */
 static bool pgss_save = true;  /* whether to save stats across shutdown */
-static bool pgss_query_id_const_merge = false; /* request constants merging
-                                                                               
                 * when computing query_id */
+static int  pgss_query_id_const_merge_threshold = 0;   /* request constants 
merging
+                                                                               
                                 * when computing query_id */
 
 #define pgss_enabled(level) \
        (!IsParallelWorker() && \
@@ -463,20 +463,22 @@ _PG_init(void)
                                                         NULL,
                                                         NULL);
 
-       DefineCustomBoolVariable("pg_stat_statements.query_id_const_merge",
-                                                        "Whether to merge 
constants in a list when computing query_id.",
-                                                        NULL,
-                                                        
&pgss_query_id_const_merge,
-                                                        false,
-                                                        PGC_SUSET,
-                                                        0,
-                                                        NULL,
-                                                        
pgss_query_id_const_merge_assign_hook,
-                                                        NULL);
+       
DefineCustomIntVariable("pg_stat_statements.query_id_const_merge_threshold",
+                                                       "Whether to merge 
constants in a list when computing query_id.",
+                                                       NULL,
+                                                       
&pgss_query_id_const_merge_threshold,
+                                                       0,
+                                                       0,
+                                                       INT_MAX,
+                                                       PGC_SUSET,
+                                                       0,
+                                                       NULL,
+                                                       
pgss_query_id_const_merge_assign_hook,
+                                                       NULL);
 
        MarkGUCPrefixReserved("pg_stat_statements");
 
-       SetQueryIdConstMerge(pgss_query_id_const_merge);
+       SetQueryIdConstMerge(pgss_query_id_const_merge_threshold);
 
        /*
         * Install hooks.
@@ -3084,10 +3086,10 @@ comp_location(const void *a, const void *b)
 }
 
 /*
- * Notify query jumbling about query_id_const_merge status
+ * Notify query jumbling about query_id_const_merge_threshold status
  */
 static void
-pgss_query_id_const_merge_assign_hook(bool newvalue, void *extra)
+pgss_query_id_const_merge_assign_hook(int newvalue, void *extra)
 {
        SetQueryIdConstMerge(newvalue);
 }
diff --git a/contrib/pg_stat_statements/sql/merging.sql 
b/contrib/pg_stat_statements/sql/merging.sql
index 657044fade..fedeb35b8f 100644
--- a/contrib/pg_stat_statements/sql/merging.sql
+++ b/contrib/pg_stat_statements/sql/merging.sql
@@ -15,7 +15,7 @@ SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 
9, 10, 11);
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 -- Normal scenario, too many simple constants for an IN query
-SET pg_stat_statements.query_id_const_merge = on;
+SET pg_stat_statements.query_id_const_merge_threshold = 1;
 
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT * FROM test_merge WHERE id IN (1);
@@ -68,4 +68,21 @@ WITH cte AS (
 SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
 FROM cte;
 
-RESET pg_stat_statements.query_id_const_merge;
+-- With the threshold
+SET pg_stat_statements.query_id_const_merge_threshold = 10;
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
13, 14, 15);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+RESET pg_stat_statements.query_id_const_merge_threshold;
diff --git a/doc/src/sgml/pgstatstatements.sgml 
b/doc/src/sgml/pgstatstatements.sgml
index 55dfa5b50f..8d107e7706 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -623,12 +623,12 @@
    In some cases, queries with visibly different texts might get merged into a
    single <structname>pg_stat_statements</structname> entry.  Normally this
    will happen only for semantically equivalent queries, or if
-   <varname>pg_stat_statements.query_id_const_merge</varname> is enabled and
-   the only difference between queries is the length of an array with constants
-   they contain:
+   <varname>pg_stat_statements.query_id_const_merge_threshold</varname> is
+   enabled and the only difference between queries is the length of an array
+   with constants they contain:
 
 <screen>
-=# SET query_id_const_merge = on;
+=# SET query_id_const_merge_threshold = 1;
 =# SELECT pg_stat_statements_reset();
 =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11);
 =# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12);
@@ -977,9 +977,9 @@ calls | 1
 
    <varlistentry>
     <term>
-    <varname>pg_stat_statements.query_id_const_merge</varname> 
(<type>bool</type>)
+    <varname>pg_stat_statements.query_id_const_merge_threshold</varname> 
(<type>integer</type>)
     <indexterm>
-     <primary><varname>pg_stat_statements.query_id_const_merge</varname> 
configuration parameter</primary>
+     
<primary><varname>pg_stat_statements.query_id_const_merge_threshold</varname> 
configuration parameter</primary>
     </indexterm>
     </term>
 
@@ -991,11 +991,12 @@ calls | 1
       query will get multiple different identifiers, one for each occurrence
       with an array of different lenght.
 
-      If this parameter is on, an array of constants will contribute only the
-      first element, the last element and the number of elements to the query
-      identifier. It means two occurences of the same query, where the only
-      difference is number of constants in the array, are going to get the
-      same query identifier if the arrays are of similar length.
+      If this parameter is greater than 0, an array with more than
+      <varname>pg_stat_statements.query_id_const_merge_threshold</varname>
+      constants will contribute only the first element, the last element
+      and the number of elements to the query identifier. It means two
+      occurences of the same query, where the only difference is number of
+      constants in the array, are going to get the same query identifier.
       Such queries are represented in form <literal>'(... [10-99 
entries])'</literal>.
 
       The parameter could be used to reduce amount of repeating data stored
diff --git a/src/backend/nodes/queryjumblefuncs.c 
b/src/backend/nodes/queryjumblefuncs.c
index 8ab3261d66..88a94be933 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -44,8 +44,8 @@
 /* GUC parameters */
 int                    compute_query_id = COMPUTE_QUERY_ID_AUTO;
 
-/* Whether to merge constants in a list when computing query_id */
-bool           query_id_const_merge = false;
+/* Lower threshold for the list length to merge constants when computing 
query_id */
+int                    query_id_const_merge_threshold = 1;
 
 /*
  * True when compute_query_id is ON or AUTO, and a module requests them.
@@ -172,12 +172,14 @@ EnableQueryId(void)
  * Controls constants merging for query identifier computation.
  *
  * Third-party plugins can use this function to enable/disable merging
- * of constants in a list when query identifier is computed.
+ * of constants in a list when query identifier is computed. The argument
+ * specifies the lower threshold for an array length, above which merging will
+ * be applied.
  */
 void
-SetQueryIdConstMerge(bool value)
+SetQueryIdConstMerge(int threshold)
 {
-       query_id_const_merge = value;
+       query_id_const_merge_threshold = threshold;
 }
 
 /*
@@ -255,7 +257,8 @@ RecordConstLocation(JumbleState *jstate, int location, int 
magnitude)
 
 /*
  * Verify if the provided list contains could be merged down, which means it
- * contains only constant expressions.
+ * contains only constant expressions and the list contains more than
+ * query_id_const_merge_threshold elements.
  *
  * Return value is the order of magnitude (i.e. how many digits it has) for
  * length of the list (to use for representation purposes later on) if merging
@@ -273,12 +276,18 @@ IsMergeableConstList(List *elements, Const **firstConst, 
Const **lastConst)
        if (elements == NIL)
                return 0;
 
-       if (!query_id_const_merge)
+       if (query_id_const_merge_threshold < 1)
        {
                /* Merging is disabled, process everything one by one */
                return 0;
        }
 
+       if (elements->length < query_id_const_merge_threshold)
+       {
+               /* The list is not large enough */
+               return 0;
+       }
+
        firstExpr = linitial(elements);
 
        /*
diff --git a/src/backend/postmaster/launch_backend.c 
b/src/backend/postmaster/launch_backend.c
index 0e2761edd5..8e438084e5 100644
--- a/src/backend/postmaster/launch_backend.c
+++ b/src/backend/postmaster/launch_backend.c
@@ -115,7 +115,7 @@ typedef struct
        bool            redirection_done;
        bool            IsBinaryUpgrade;
        bool            query_id_enabled;
-       bool            query_id_const_merge;
+       int                     query_id_const_merge_threshold;
        int                     max_safe_fds;
        int                     MaxBackends;
        int                     num_pmchild_slots;
@@ -745,7 +745,7 @@ save_backend_variables(BackendParameters *param,
        param->redirection_done = redirection_done;
        param->IsBinaryUpgrade = IsBinaryUpgrade;
        param->query_id_enabled = query_id_enabled;
-       param->query_id_const_merge = query_id_const_merge;
+       param->query_id_const_merge_threshold = query_id_const_merge_threshold;
        param->max_safe_fds = max_safe_fds;
 
        param->MaxBackends = MaxBackends;
@@ -1006,7 +1006,7 @@ restore_backend_variables(BackendParameters *param)
        redirection_done = param->redirection_done;
        IsBinaryUpgrade = param->IsBinaryUpgrade;
        query_id_enabled = param->query_id_enabled;
-       query_id_const_merge = param->query_id_const_merge;
+       query_id_const_merge_threshold = param->query_id_const_merge_threshold;
        max_safe_fds = param->max_safe_fds;
 
        MaxBackends = param->MaxBackends;
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 0e69e420b7..90218c6053 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -77,10 +77,10 @@ extern PGDLLIMPORT int compute_query_id;
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query);
 extern void EnableQueryId(void);
-extern void SetQueryIdConstMerge(bool value);
+extern void SetQueryIdConstMerge(int threshold);
 
 extern PGDLLIMPORT bool query_id_enabled;
-extern PGDLLIMPORT bool query_id_const_merge;
+extern PGDLLIMPORT int         query_id_const_merge_threshold;
 
 /*
  * Returns whether query identifier computation has been enabled, either
-- 
2.45.1

Reply via email to