> On Sun, Jan 29, 2023 at 09:56:02AM -0300, Marcos Pegoraro wrote: > Em dom., 29 de jan. de 2023 às 09:24, Dmitry Dolgov <9erthali...@gmail.com> > escreveu: > > > > On Fri, Jan 27, 2023 at 08:15:29PM +0530, vignesh C wrote: > > > The patch does not apply on top of HEAD as in [1], please post a rebased > > patch: > > > > Thanks. I think this one should do the trick. > > > > There is a typo on DOC part > + and it's length is larger than <varname> const_merge_threshold > </varname>, > + then array elements will contribure nothing to the query > identifier. > + Thus the query will get the same identifier no matter how many > constants > > That "contribure" should be "contribute"
Indeed, thanks for noticing.
>From 1d980ef5f556c1684ea5c991965b2375bbdd139b Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthali...@gmail.com> Date: Sun, 24 Jul 2022 11:43:25 +0200 Subject: [PATCH v11] Prevent jumbling of every element in ArrayExpr pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on number of parameters, because every element of ArrayExpr is jumbled. In certain situations it's undesirable, especially if the list becomes too large. Make Const expressions contribute nothing to the jumble hash if they're a part of an ArrayExpr, which length is larger than specified threshold. Allow to configure the threshold via the new GUC const_merge_threshold with the default value zero, which disables this feature. Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane Tested-by: Chengxi Sun --- .../expected/pg_stat_statements.out | 412 ++++++++++++++++++ .../pg_stat_statements/pg_stat_statements.c | 33 +- .../sql/pg_stat_statements.sql | 107 +++++ doc/src/sgml/config.sgml | 26 ++ doc/src/sgml/pgstatstatements.sgml | 28 +- src/backend/nodes/queryjumblefuncs.c | 105 ++++- src/backend/utils/misc/guc_tables.c | 13 + src/backend/utils/misc/postgresql.conf.sample | 2 +- src/include/nodes/queryjumble.h | 5 +- 9 files changed, 712 insertions(+), 19 deletions(-) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 9ac5c87c3a..f18f34ae5b 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -1141,4 +1141,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; 2 (1 row) +-- +-- Consts merging +-- +CREATE TABLE test_merge (id int, data int); +-- IN queries +-- No merging +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(7 rows) + +-- Normal +SET const_merge_threshold = 5; +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT * FROM test_merge WHERE id IN (...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +-- On the merge threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN (...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN (...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- test constants after merge +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (...) and data = $3 | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +-- On table, numeric type causes every constant being wrapped into functions. +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge_numeric WHERE id IN (...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +-- Test find_const_walker +WITH cte AS ( + SELECT 'const' as const FROM test_merge +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + result +-------- +(0 rows) + +RESET const_merge_threshold; DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index ad1fe44496..b26ae1f234 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2666,6 +2666,9 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ + bool skip = false; /* Signals that certain constants are + merged together and have to be skipped */ + /* * Get constants' lengths (core system only gives us locations). Note @@ -2689,7 +2692,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, { int off, /* Offset from start for cur tok */ tok_len; /* Length (in bytes) of that tok */ - off = jstate->clocations[i].location; /* Adjust recorded location if we're dealing with partial string */ off -= query_loc; @@ -2704,12 +2706,31 @@ generate_normalized_query(JumbleState *jstate, const char *query, len_to_wrt -= last_tok_len; Assert(len_to_wrt >= 0); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; - /* And insert a param symbol in place of the constant token */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id); + /* Normal path, non merged constant */ + if (!jstate->clocations[i].merged) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a param symbol in place of the constant token */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id); + + /* In case previous constants were merged away, stop doing that */ + if (skip) + skip = false; + } + /* The firsts merged constant */ + else if (!skip) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* Skip the following until a non merged constant appear */ + skip = true; + n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + } quer_loc = off + tok_len; last_off = off; diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index 8f5c866225..8f9d284ed3 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -464,4 +464,111 @@ SELECT ( SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; +-- +-- Consts merging +-- +CREATE TABLE test_merge (id int, data int); + +-- IN queries + +-- No merging +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Normal +SET const_merge_threshold = 5; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On the merge threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- test constants after merge +SELECT pg_stat_statements_reset(); + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On table, numeric type causes every constant being wrapped into functions. +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Test find_const_walker +WITH cte AS ( + SELECT 'const' as const FROM test_merge +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + +RESET const_merge_threshold; + DROP EXTENSION pg_stat_statements; diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index f985afc009..e4306cdb89 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8278,6 +8278,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold"> + <term><varname>const_merge_threshold</varname> (<type>integer</type>) + <indexterm> + <primary><varname>const_merge_threshold</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Specifies the minimal length of an array to be eligible for constants + collapsing. Normally every element of an array contributes to a query + identifier, which means the same query containing an array of constants + could get multiple different identifiers, depending of size of the + array. If this parameter is nonzero, the array contains only constants + and it's length is larger than <varname> const_merge_threshold </varname>, + then array elements will contribute nothing to the query identifier. + Thus the query will get the same identifier no matter how many constants + it contains. + + Zero turns off collapsing, and it is the default value. + + The <xref linkend="pgstatstatements"/> extension will represent such + collapsed constants via <literal>'(...)'</literal>. + </para> + </listitem> + </varlistentry> + </variablelist> </sect2> diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml index efc36da602..f7e2e9fe85 100644 --- a/doc/src/sgml/pgstatstatements.sgml +++ b/doc/src/sgml/pgstatstatements.sgml @@ -519,10 +519,30 @@ <para> In some cases, queries with visibly different texts might get merged into a single <structname>pg_stat_statements</structname> entry. Normally this will happen - only for semantically equivalent queries, but there is a small chance of - hash collisions causing unrelated queries to be merged into one entry. - (This cannot happen for queries belonging to different users or databases, - however.) + only for semantically equivalent queries, for example when queries are + different only in values of constants they use. Another valid possibility for + merging queries into a single <structname>pg_stat_statements</structname> + entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the + queries contain an array with more than <varname>const_merge_threshold</varname> + constants in it: + +<screen> +=# SET const_merge_threshold = 5; +=# SELECT pg_stat_statements_reset(); +=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7); +=# SELECT query, calls FROM pg_stat_statements; +-[ RECORD 1 ]------------------------------ +query | SELECT * FROM test WHERE a IN (...) +calls | 2 +-[ RECORD 2 ]------------------------------ +query | SELECT pg_stat_statements_reset() +calls | 1 +</screen> + + But there is a small chance of hash collisions causing unrelated queries to + be merged into one entry. (This cannot happen for queries belonging to + different users or databases, however.) </para> <para> diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index 16084842a3..1ea1cc66f8 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -42,6 +42,9 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +/* Minimal numer of constants in an array after which they will be merged */ +int const_merge_threshold = 0; + /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; @@ -53,7 +56,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query); static void JumbleRangeTable(JumbleState *jstate, List *rtable); static void JumbleRowMarks(JumbleState *jstate, List *rowMarks); static void JumbleExpr(JumbleState *jstate, Node *node); -static void RecordConstLocation(JumbleState *jstate, int location); +static void JumbleExprList(JumbleState *jstate, List *node); +static void RecordConstLocation(JumbleState *jstate, int location, bool merged); /* * Given a possibly multi-statement source string, confine our attention to the @@ -120,7 +124,7 @@ JumbleQuery(Query *query, const char *querytext) jstate->jumble_len = 0; jstate->clocations_buf_size = 32; jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + palloc0(jstate->clocations_buf_size * sizeof(LocationLen)); jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; @@ -343,6 +347,90 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks) } } +/* + * Jubmle a list of expressions + * + * This function enforces const_merge_threshold limitation, i.e. if the + * provided list contains only constant expressions and its length is greater + * than or equal to const_merge_threshold, such list will not contribute to + * jumble. Otherwise it falls back to JumbleExpr. + */ +static void +JumbleExprList(JumbleState *jstate, List *elements) +{ + ListCell *temp; + Node *firstExpr = NULL; + bool allConst = true; + + if (elements == NULL) + return; + + if (const_merge_threshold == 0) + { + /* Merging is disabled, process everything one by one. */ + JumbleExpr(jstate, (Node *) elements); + return; + } + + if (elements->length < const_merge_threshold) + { + /* The list is not large enough to collapse it. */ + JumbleExpr(jstate, (Node *) elements); + return; + } + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + firstExpr = linitial(elements); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + APP_JUMB(elements->type); + + /* + * If the first expression is a constant, verify if the following elements + * are constants as well. If yes, the list is eligible for collapsing -- + * mark it as merged and return from the function. + */ + if (IsA(firstExpr, Const)) + { + foreach(temp, elements) + { + Node *expr = (Node *) lfirst(temp); + + if (!IsA(expr, Const)) + { + allConst = false; + break; + } + } + + if (allConst) + { + Const *firstConst = (Const *) firstExpr; + Const *lastConst = llast_node(Const, elements); + + /* + * First and last constants are needed to identify which part of + * the query to skip in generate_normalized_query. + */ + RecordConstLocation(jstate, firstConst->location, true); + RecordConstLocation(jstate, lastConst->location, true); + return; + } + } + + /* + * If we end up here, it means no constants merging is possible, process + * the list as usual. + */ + JumbleExpr(jstate, (Node *) elements); + return; +} + /* * Jumble an expression tree * @@ -392,7 +480,7 @@ JumbleExpr(JumbleState *jstate, Node *node) /* We jumble only the constant's type, not its value */ APP_JUMB(c->consttype); /* Also, record its parse location for query normalization */ - RecordConstLocation(jstate, c->location); + RecordConstLocation(jstate, c->location, false); } break; case T_Param: @@ -581,7 +669,7 @@ JumbleExpr(JumbleState *jstate, Node *node) } break; case T_ArrayExpr: - JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements); + JumbleExprList(jstate, (List *) ((ArrayExpr *) node)->elements); break; case T_RowExpr: JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args); @@ -835,11 +923,13 @@ JumbleExpr(JumbleState *jstate, Node *node) } /* - * Record location of constant within query string of query tree - * that is currently being walked. + * Record location of constant within query string of query tree that is + * currently being walked. Merged argument signals that the constant do not + * contribute to the jumble hash, and any reader of constants array may want to + * use this information to represent such constants differently. */ static void -RecordConstLocation(JumbleState *jstate, int location) +RecordConstLocation(JumbleState *jstate, int location, bool merged) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -854,6 +944,7 @@ RecordConstLocation(JumbleState *jstate, int location) sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; + jstate->clocations[jstate->clocations_count].merged = merged; /* initialize lengths to -1 to simplify third-party module usage */ jstate->clocations[jstate->clocations_count].length = -1; jstate->clocations_count++; diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 4ac808ed22..663aded290 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -3467,6 +3467,19 @@ struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"const_merge_threshold", PGC_SUSET, STATS_MONITORING, + gettext_noop("Sets the minimal numer of constants in an array" + " after which they will be merged"), + gettext_noop("Computing query id for an array of constants" + " will produce the same id for all arrays with length" + " larger than this value. Zero turns off merging."), + }, + &const_merge_threshold, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index d06074b86f..0594eb17b2 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -627,7 +627,7 @@ #log_parser_stats = off #log_planner_stats = off #log_executor_stats = off - +#const_merge_threshold = 0 #------------------------------------------------------------------------------ # AUTOVACUUM diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index 204b8f74fd..4410e2cf61 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -15,6 +15,7 @@ #define QUERYJUBLE_H #include "nodes/parsenodes.h" +#include "nodes/nodeFuncs.h" /* * Struct for tracking locations/lengths of constants during normalization @@ -23,6 +24,8 @@ typedef struct LocationLen { int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ + bool merged; /* whether or not the location was marked as + not contributing to jumble */ } LocationLen; /* @@ -61,7 +64,7 @@ enum ComputeQueryIdType /* GUC parameters */ extern PGDLLIMPORT int compute_query_id; - +extern PGDLLIMPORT int const_merge_threshold; extern const char *CleanQuerytext(const char *query, int *location, int *len); extern JumbleState *JumbleQuery(Query *query, const char *querytext); -- 2.32.0