> On Tue, Jun 15, 2021 at 05:18:50PM +0200, Dmitry Dolgov wrote: > > On Thu, Mar 18, 2021 at 04:50:02PM +0100, Dmitry Dolgov wrote: > > > On Thu, Mar 18, 2021 at 09:38:09AM -0400, David Steele wrote: > > > On 1/5/21 10:51 AM, Zhihong Yu wrote: > > > > > > > > + int lastExprLenght = 0; > > > > > > > > Did you mean to name the variable lastExprLenghth ? > > > > > > > > w.r.t. extracting to helper method, the second and third > > > > if (currentExprIdx == pgss_merge_threshold - 1) blocks are similar. > > > > It is up to you whether to create the helper method. > > > > I am fine with the current formation. > > > > > > Dmitry, thoughts on this review? > > > > Oh, right. lastExprLenghth is obviously a typo, and as we agreed that > > the helper is not strictly necessary I wanted to wait a bit hoping for > > more feedback and eventually to post an accumulated patch. Doesn't make > > sense to post another version only to fix one typo :) > > Hi, > > I've prepared a new rebased version to deal with the new way of > computing query id, but as always there is one tricky part. From what I > understand, now an external module can provide custom implementation for > query id computation algorithm. It seems natural to think this machinery > could be used instead of patch in the thread, i.e. one could create a > custom logic that will enable constants collapsing as needed, so that > same queries with different number of constants in an array will be > hashed into the same record. > > But there is a limitation in how such queries will be normalized > afterwards — to reduce level of surprise it's necessary to display the > fact that a certain query in fact had more constants that are showed in > pgss record. Ideally LocationLen needs to carry some bits of information > on what exactly could be skipped, and generate_normalized_query needs to > understand that, both are not reachable for an external module with > custom query id logic (without replicating significant part of the > existing code). Hence, a new version of the patch.
Forgot to mention a couple of people who already reviewed the patch.
>From adb36f6db4e9c923835750134a3f57543a5a911c Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthali...@gmail.com> Date: Thu, 10 Jun 2021 13:15:35 +0200 Subject: [PATCH v4] Prevent jumbling of every element in ArrayExpr pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on number of parameters, because every element of ArrayExpr is jumbled. Make Consts (or any expression that could be reduced to a Const) contribute nothing to the jumble hash if they're part of a series and at position further that specified threshold. Do the same for similar queries with VALUES as well. Reviewed-by: Zhihong Yu, Sergey Dudoladov Tested-by: Chengxi Sun --- .../expected/pg_stat_statements.out | 835 +++++++++++++++++- .../pg_stat_statements/pg_stat_statements.c | 42 +- .../sql/pg_stat_statements.sql | 163 ++++ src/backend/utils/misc/guc.c | 13 + src/backend/utils/misc/queryjumble.c | 274 +++++- src/include/utils/queryjumble.h | 11 +- 6 files changed, 1323 insertions(+), 15 deletions(-) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 40b5109b55..3fc1978066 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10 SELECT * FROM test ORDER BY a | 1 | 12 SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4 - SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8 + SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8 SELECT pg_stat_statements_reset() | 1 | 1 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 UPDATE test SET b = $1 WHERE a = $2 | 6 | 6 @@ -1067,4 +1067,837 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; 2 (1 row) +-- +-- Consts merging +-- +SET pg_stat_statements.merge_threshold = 5; +CREATE TABLE test_merge (id int, data int); +-- IN queries +-- Normal +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +-- On the merge threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- test constants after merge +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +---------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +-- test prepared statement +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +PREPARE query AS +SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10); +EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + PREPARE query AS +| 1 + SELECT * FROM test_merge WHERE id IN ($6, $7, $8, ...) | + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +DEALLOCATE query; +-- VALUES queries +-- Normal +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3)) q; + column1 +--------- + 1 + 2 + 3 +(3 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4)) q; + column1 +--------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 +(7 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 +(8 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(5 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q; + column1 | column2 +---------+--------- + 1 | 1 + 2 | 2 + 3 | 3 +(3 rows) + +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q; + column1 | column2 +---------+--------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 +(4 rows) + +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q; + column1 | column2 +---------+--------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q; + column1 | column2 +---------+--------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 +(6 rows) + +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q; + column1 | column2 +---------+--------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 +(7 rows) + +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q; + column1 | column2 +---------+--------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 +(8 rows) + +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q; + column1 | column2 +---------+--------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6)) q | 1 + SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8)) q | 1 + SELECT * FROM (VALUES ($1, $2), ($3, $4), ($5, $6), ($7, $8), (...)) q | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(5 rows) + +-- On the merge threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4)) q; + column1 +--------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 +(7 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 +(8 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(4 rows) + +-- With gaps on the treshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4)) q; + column1 +--------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +-- Const evaluation +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10)); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (($1+$2), ($3+$4), ($5+$6), ($7+$8), (...)) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10)); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +-------------------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs(...)) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +-- Param evaluation, doesn't work yet +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +PREPARE query AS +SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10)); +EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------------------------------------------------------------------------------------+------- + PREPARE query AS +| 1 + SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10)) | + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +DEALLOCATE query; +-- On table, numeric type causes every constant being wrapped into functions. +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge_numeric WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 09433c8c96..f546ee67e4 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -54,7 +54,9 @@ #include "funcapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "optimizer/planner.h" +#include "optimizer/optimizer.h" #include "parser/analyze.h" #include "parser/parsetree.h" #include "parser/scanner.h" @@ -273,6 +275,7 @@ static const struct config_enum_entry track_options[] = static int pgss_max; /* max # statements to track */ static int pgss_track; /* tracking level */ +static int pgss_merge_threshold; /* minumum number of consts for merge */ static bool pgss_track_utility; /* whether to track utility commands */ static bool pgss_track_planning; /* whether to track planning duration */ static bool pgss_save; /* whether to save stats across shutdown */ @@ -436,6 +439,19 @@ _PG_init(void) NULL, NULL); + DefineCustomIntVariable("pg_stat_statements.merge_threshold", + "After this number of duplicate constants start to merge them.", + NULL, + &pgss_merge_threshold, + 5, + 1, + INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL); + EmitWarningsOnPlaceholders("pg_stat_statements"); /* @@ -2599,6 +2615,7 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ + bool merge = false; /* * Get constants' lengths (core system only gives us locations). Note @@ -2637,12 +2654,27 @@ generate_normalized_query(JumbleState *jstate, const char *query, len_to_wrt -= last_tok_len; Assert(len_to_wrt >= 0); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; - /* And insert a param symbol in place of the constant token */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id); + if (!jstate->clocations[i].merged) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a param symbol in place of the constant token */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id); + if (merge) + merge = false; + } + else if (!merge) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* Merge until a non merged constant appear */ + merge = true; + n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + } quer_loc = off + tok_len; last_off = off; diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index bc3b6493e6..d863b2075f 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -437,4 +437,167 @@ SELECT ( SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; +-- +-- Consts merging +-- +SET pg_stat_statements.merge_threshold = 5; +CREATE TABLE test_merge (id int, data int); + +-- IN queries + +-- Normal +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On the merge threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- test constants after merge +SELECT pg_stat_statements_reset(); + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- test prepared statement +SELECT pg_stat_statements_reset(); + +PREPARE query AS +SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10); +EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +DEALLOCATE query; + +-- VALUES queries + +-- Normal +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3)) q; +SELECT * FROM (VALUES (1), (2), (3), (4)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3)) q; +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4)) q; +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)) q; +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)) q; +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7)) q; +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8)) q; +SELECT * FROM (VALUES (1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On the merge threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the treshold +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Const evaluation +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN ((1+1), (2+2), (3+3), (4+4), (5+5), (6+6), (7+7), (8+8), (9+9), (10+10)); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (abs(1), abs(2), abs(3), abs(4), abs(5), abs(6), abs(7), abs(8), abs(9), abs(10)); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Param evaluation, doesn't work yet +SELECT pg_stat_statements_reset(); +PREPARE query AS +SELECT * FROM test_merge WHERE id IN (abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7), abs($8), abs($9), abs($10)); +EXECUTE query (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +DEALLOCATE query; + +-- On table, numeric type causes every constant being wrapped into functions. +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + DROP EXTENSION pg_stat_statements; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index ee731044b6..2dd7c40749 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3539,6 +3539,19 @@ static struct config_int ConfigureNamesInt[] = check_client_connection_check_interval, NULL, NULL }, + { + {"const_merge_threshold", PGC_SUSET, STATS_MONITORING, + gettext_noop("Sets the minimal numer of constants in an array" + " after which they will be merged"), + gettext_noop("Computing query id for an array of constants" + " will produce the same id for all arrays with length" + " larger than this value."), + }, + &const_merge_threshold, + 5, 0, INT_MAX / 2, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c index 9f2cd1f127..a9d09d08a0 100644 --- a/src/backend/utils/misc/queryjumble.c +++ b/src/backend/utils/misc/queryjumble.c @@ -42,6 +42,9 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +/* */ +int const_merge_threshold = 5; + /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; @@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query); static void JumbleRangeTable(JumbleState *jstate, List *rtable); static void JumbleRowMarks(JumbleState *jstate, List *rowMarks); static void JumbleExpr(JumbleState *jstate, Node *node); -static void RecordConstLocation(JumbleState *jstate, int location); +static bool JumbleExprList(JumbleState *jstate, Node *node); +static void RecordConstLocation(JumbleState *jstate, int location, bool merged); /* * Given a possibly multi-statement source string, confine our attention to the @@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext) jstate->jumble_len = 0; jstate->clocations_buf_size = 32; jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + palloc0(jstate->clocations_buf_size * sizeof(LocationLen)); jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; @@ -297,7 +301,7 @@ JumbleRangeTable(JumbleState *jstate, List *rtable) JumbleExpr(jstate, (Node *) rte->tablefunc); break; case RTE_VALUES: - JumbleExpr(jstate, (Node *) rte->values_lists); + JumbleExprList(jstate, (Node *) rte->values_lists); break; case RTE_CTE: @@ -341,6 +345,261 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks) } } +/* + * find_const_walker + * Locate all the Const nodes in an expression tree. + * + * Caller must provide an empty list where constants will be collected. + */ +static bool +find_const_walker(Node *node, List **constants) +{ + if (node == NULL) + return false; + + if (IsA(node, Const)) + { + *constants = lappend(*constants, (Const *) node); + return false; + } + + return expression_tree_walker(node, find_const_walker, (void *) constants); +} + +static bool +JumbleExprList(JumbleState *jstate, Node *node) +{ + ListCell *temp; + Node *firstExpr = NULL; + bool merged = false; + bool allConst = true; + int currentExprIdx; + int lastExprLength = 0; + + if (node == NULL) + return merged; + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + Assert(IsA(node, List)); + firstExpr = eval_const_expressions(NULL, (Node *) lfirst(list_head((List *) node))); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + APP_JUMB(node->type); + + /* + * If the first expression is a constant or a list of constants, try to + * merge the following if they're constants as well. Otherwise do + * JumbleExpr as usual. + */ + switch (nodeTag(firstExpr)) + { + case T_List: + currentExprIdx = 0; + + foreach(temp, (List *) node) + { + List *expr = (List *) lfirst(temp); + ListCell *lc; + + foreach(lc, expr) + { + Node * subExpr = eval_const_expressions(NULL, (Node *) lfirst(lc)); + + if (!IsA(subExpr, Const)) + { + allConst = false; + break; + } + } + + if (allConst && currentExprIdx >= const_merge_threshold - 1) + { + merged = true; + + /* + * This hash is going to accumulate the following merged + * statements + */ + if (currentExprIdx == const_merge_threshold - 1) + { + JumbleExpr(jstate, (Node *) expr); + + /* + * An expr consisting of constants is already found, + * JumbleExpr must record it. Mark all the constants as + * merged, they will be the first merged but still + * present in the statement query. + */ + Assert(jstate->clocations_count > lastExprLength - 1); + for (int i = 1; i < lastExprLength + 1; i++) + { + LocationLen *loc; + loc = &jstate->clocations[jstate->clocations_count - i]; + loc->merged = true; + } + currentExprIdx++; + } + else + foreach(lc, expr) + { + /* + * eval_const_expressions does not provide real + * Const with valid const location, which we need + * for generate_normalized_query. Extract such real + * constants manually. We need only the last one, + * to find out where the current expression + * actually ends. */ + Const *lastConst; + List *constants = NIL; + find_const_walker((Node *) lfirst(lc), &constants); + lastConst = (Const *) llast(constants); + RecordConstLocation(jstate, lastConst->location, true); + } + + continue; + } + + JumbleExpr(jstate, (Node *) expr); + currentExprIdx++; + lastExprLength = expr->length; + } + break; + + case T_Const: + currentExprIdx = 0; + + foreach(temp, (List *) node) + { + Node *expr = (Node *) lfirst(temp); + Node *evalExpr = eval_const_expressions(NULL, expr); + + if (IsA(evalExpr, Const) && currentExprIdx >= const_merge_threshold - 1) + { + merged = true; + + /* + * This hash is going to accumulate the following merged + * statements + */ + if (currentExprIdx == const_merge_threshold - 1) + { + JumbleExpr(jstate, expr); + + /* + * A const expr is already found, so JumbleExpr must + * record it. Mark it as merged, it will be the first + * merged but still present in the statement query. + */ + Assert(jstate->clocations_count > lastExprLength - 1); + for (int i = 1; i < lastExprLength + 1; i++) + { + LocationLen *loc; + loc = &jstate->clocations[jstate->clocations_count - i]; + loc->merged = true; + } + currentExprIdx++; + } + else + { + /* + * eval_const_expressions does not provide real + * Const with valid const location, which we need + * for generate_normalized_query. Extract such real + * constants manually. Take into account that even with + * a single expression it could potentially contains + * many constants, we need only the last one, to find + * out where the current expression actually ends. + */ + Const *lastConst; + List *constants = NIL; + find_const_walker(expr, &constants); + lastConst = (Const *) llast(constants); + + RecordConstLocation(jstate, lastConst->location, true); + } + + continue; + } + + JumbleExpr(jstate, expr); + currentExprIdx++; + + if (currentExprIdx == const_merge_threshold -1) + { + // The next expression will be eligible for merging check. + // For it to happen correctly remember the number of + // constants in the previous expression. + List *constants = NIL; + find_const_walker(expr, &constants); + lastExprLength = constants->length; + } + } + break; + + case T_Param: + currentExprIdx = 0; + + foreach(temp, (List *) node) + { + Node *expr = (Node *) lfirst(temp); + Param *p = (Param *) expr; + + if (!equal(expr, firstExpr) && IsA(expr, Param) && + currentExprIdx >= const_merge_threshold - 1) + { + + merged = true; + + /* + * This hash is going to accumulate the following merged + * statements + */ + if (currentExprIdx == const_merge_threshold - 1) + { + JumbleExpr(jstate, expr); + + /* + * A const expr is already found, so JumbleExpr must + * record it. Mark it as merged, it will be the first + * merged but still present in the statement query. + */ + Assert(jstate->clocations_count > 0); + jstate->clocations[jstate->clocations_count - 1].merged = true; + currentExprIdx++; + } + else + RecordConstLocation(jstate, p->location, true); + + continue; + } + + JumbleExpr(jstate, expr); + + /* + * To allow merging of parameters as well in + * generate_normalized_query, remember it as a constant. + */ + RecordConstLocation(jstate, p->location, false); + currentExprIdx++; + } + break; + + default: + foreach(temp, (List *) node) + { + JumbleExpr(jstate, (Node *) lfirst(temp)); + } + break; + } + + return merged; +} + /* * Jumble an expression tree * @@ -390,7 +649,7 @@ JumbleExpr(JumbleState *jstate, Node *node) /* We jumble only the constant's type, not its value */ APP_JUMB(c->consttype); /* Also, record its parse location for query normalization */ - RecordConstLocation(jstate, c->location); + RecordConstLocation(jstate, c->location, false); } break; case T_Param: @@ -579,7 +838,7 @@ JumbleExpr(JumbleState *jstate, Node *node) } break; case T_ArrayExpr: - JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements); + JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements); break; case T_RowExpr: JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args); @@ -832,11 +1091,11 @@ JumbleExpr(JumbleState *jstate, Node *node) } /* - * Record location of constant within query string of query tree + * Record location of constant or a parameter within query string of query tree * that is currently being walked. */ static void -RecordConstLocation(JumbleState *jstate, int location) +RecordConstLocation(JumbleState *jstate, int location, bool merged) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -851,6 +1110,7 @@ RecordConstLocation(JumbleState *jstate, int location) sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; + jstate->clocations[jstate->clocations_count].merged = merged; /* initialize lengths to -1 to simplify third-party module usage */ jstate->clocations[jstate->clocations_count].length = -1; jstate->clocations_count++; diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h index 1f4d062bab..ea9f6d8ca1 100644 --- a/src/include/utils/queryjumble.h +++ b/src/include/utils/queryjumble.h @@ -15,6 +15,8 @@ #define QUERYJUBLE_H #include "nodes/parsenodes.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" #define JUMBLE_SIZE 1024 /* query serialization buffer size */ @@ -25,6 +27,8 @@ typedef struct LocationLen { int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ + bool merged; /* whether or not the location was marked as + duplicate */ } LocationLen; /* @@ -39,7 +43,10 @@ typedef struct JumbleState /* Number of bytes used in jumble[] */ Size jumble_len; - /* Array of locations of constants that should be removed */ + /* + * Array of locations of constants that should be removed, or parameters + * that are already replaced, but could be also processed to be merged + */ LocationLen *clocations; /* Allocated length of clocations array */ @@ -62,7 +69,7 @@ typedef enum /* GUC parameters */ extern int compute_query_id; - +extern int const_merge_threshold; extern const char *CleanQuerytext(const char *query, int *location, int *len); extern JumbleState *JumbleQuery(Query *query, const char *querytext); -- 2.26.3