> On Wed, Aug 12, 2020 at 06:19:02PM +0200, Dmitry Dolgov wrote: > > I would like to start another thread to follow up on [1], mostly to bump up > the > topic. Just to remind, it's about how pg_stat_statements jumbling ArrayExpr in > queries like: > > SELECT something FROM table WHERE col IN (1, 2, 3, ...) > > The current implementation produces different jumble hash for every different > number of arguments for essentially the same query. Unfortunately a lot of > ORMs > like to generate these types of queries, which in turn leads to > pg_stat_statements pollution. Ideally we want to prevent this and have only > one > record for such a query. > > As the result of [1] I've identified two highlighted approaches to improve > this > situation: > > * Reduce the generated ArrayExpr to an array Const immediately, in cases where > all the inputs are Consts. > > * Make repeating Const to contribute nothing to the resulting hash. > > I've tried to prototype both approaches to find out pros/cons and be more > specific. Attached patches could not be considered a completed piece of work, > but they seem to work, mostly pass the tests and demonstrate the point. I > would > like to get some high level input about them and ideally make it clear what is > the preferred solution to continue with.
I've implemented the second approach mentioned above, this version was tested on our test clusters for some time without visible issues. Will create a CF item and would appreciate any feedback.
>From ece03928184d88add6629f5eba3ccc9e4fa5e7b8 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthali...@gmail.com> Date: Tue, 17 Nov 2020 16:18:08 +0100 Subject: [PATCH v1] Prevent jumbling of every element in ArrayExpr pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on number of parameters, because every element of ArrayExpr is jumbled. Make Consts contribute nothing to the jumble hash if they're part of a series and at position further that specified threshold. Do the same for similar queries with VALUES as well. --- .../expected/pg_stat_statements.out | 614 +++++++++++++++++- .../pg_stat_statements/pg_stat_statements.c | 208 +++++- .../sql/pg_stat_statements.sql | 115 ++++ 3 files changed, 925 insertions(+), 12 deletions(-) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index 2a303a7f07..9d0fe074ae 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -205,7 +205,7 @@ SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; INSERT INTO test VALUES(generate_series($1, $2), $3) | 1 | 10 SELECT * FROM test ORDER BY a | 1 | 12 SELECT * FROM test WHERE a > $1 ORDER BY a | 2 | 4 - SELECT * FROM test WHERE a IN ($1, $2, $3, $4, $5) | 1 | 8 + SELECT * FROM test WHERE a IN ($1, $2, $3, $4, ...) | 1 | 8 SELECT pg_stat_statements_reset() | 1 | 1 SELECT query, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 | 0 UPDATE test SET b = $1 WHERE a = $2 | 6 | 6 @@ -861,4 +861,616 @@ SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 | 0 | 0 (6 rows) +-- +-- Consts merging +-- +SET pg_stat_statements.merge_threshold = 5; +CREATE TABLE test_merge (id int); +-- IN queries +-- Normal +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +-- On the merge threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id +---- +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id +---- +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- VALUES queries +-- Normal +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3)) q; + column1 +--------- + 1 + 2 + 3 +(3 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4)) q; + column1 +--------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 +(7 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 +(8 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(5 rows) + +-- On the merge threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4)) q; + column1 +--------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 +(7 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 +(8 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(4 rows) + +-- With gaps on the treshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4)) q; + column1 +--------- + 1 + 2 + 3 + 4 +(4 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4)) q | 1 + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 +(5 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 +(6 rows) + +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; + column1 +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 +(9 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM (VALUES ($1), ($2), ($3), ($4), (...)) q | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index dd963c4644..389a2e8e18 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -231,6 +231,8 @@ typedef struct pgssLocationLen { int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ + bool merged; /* whether or not the location was marked as + duplicate */ } pgssLocationLen; /* @@ -299,6 +301,7 @@ static const struct config_enum_entry track_options[] = static int pgss_max; /* max # statements to track */ static int pgss_track; /* tracking level */ +static int pgss_merge_threshold; /* minumum number of consts for merge */ static bool pgss_track_utility; /* whether to track utility commands */ static bool pgss_track_planning; /* whether to track planning duration */ static bool pgss_save; /* whether to save stats across shutdown */ @@ -374,7 +377,9 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query); static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable); static void JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks); static void JumbleExpr(pgssJumbleState *jstate, Node *node); -static void RecordConstLocation(pgssJumbleState *jstate, int location); +static bool JumbleExprList(pgssJumbleState *jstate, Node *node); +static void RecordConstLocation(pgssJumbleState *jstate, int location, + bool merged); static char *generate_normalized_query(pgssJumbleState *jstate, const char *query, int query_loc, int *query_len_p); static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query, @@ -460,6 +465,19 @@ _PG_init(void) NULL, NULL); + DefineCustomIntVariable("pg_stat_statements.merge_threshold", + "After this number of duplicating constants start to merge them.", + NULL, + &pgss_merge_threshold, + 5, + 1, + INT_MAX, + PGC_SUSET, + 0, + NULL, + NULL, + NULL); + EmitWarningsOnPlaceholders("pg_stat_statements"); /* @@ -863,7 +881,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query) jstate.jumble_len = 0; jstate.clocations_buf_size = 32; jstate.clocations = (pgssLocationLen *) - palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen)); + palloc0(jstate.clocations_buf_size * sizeof(pgssLocationLen)); jstate.clocations_count = 0; jstate.highest_extern_param_id = 0; @@ -2655,7 +2673,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable) JumbleExpr(jstate, (Node *) rte->tablefunc); break; case RTE_VALUES: - JumbleExpr(jstate, (Node *) rte->values_lists); + JumbleExprList(jstate, (Node *) rte->values_lists); break; case RTE_CTE: @@ -2699,6 +2717,155 @@ JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks) } } +static bool +JumbleExprList(pgssJumbleState *jstate, Node *node) +{ + ListCell *temp; + Node *firstExpr = NULL; + bool merged = false; + bool allConst = true; + int currentExprIdx; + + if (node == NULL) + return merged; + + Assert(IsA(node, List)); + firstExpr = (Node *) lfirst(list_head((List *) node)); + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + APP_JUMB(node->type); + + /* + * If the first expression is a constant or a list of constants, try to + * merge the following if they're constants as well. Otherwise do + * JumbleExpr as usual. + */ + switch (nodeTag(firstExpr)) + { + case T_List: + currentExprIdx = 0; + + foreach(temp, (List *) firstExpr) + { + Node * subExpr = (Node *) lfirst(temp); + + if (!IsA(subExpr, Const)) + { + allConst = false; + break; + } + } + + foreach(temp, (List *) node) + { + Node *expr = (Node *) lfirst(temp); + ListCell *lc; + + foreach(lc, (List *) expr) + { + Node * subExpr = (Node *) lfirst(lc); + + if (!IsA(subExpr, Const)) + { + allConst = false; + break; + } + } + + if (!equal(expr, firstExpr) && allConst && + currentExprIdx >= pgss_merge_threshold - 1) + { + Const *c = (Const *) linitial((List *) expr); + + merged = true; + + /* + * This hash is going to accumulate the following merged + * statements + */ + if (currentExprIdx == pgss_merge_threshold - 1) + { + JumbleExpr(jstate, expr); + + /* + * A const expr is already found, so JumbleExpr must + * record it. Mark it as merged, it will be the first + * merged but still present in the statement query. + */ + Assert(jstate->clocations_count > 0); + jstate->clocations[jstate->clocations_count - 1].merged = true; + currentExprIdx++; + } + else + RecordConstLocation(jstate, c->location, true); + + continue; + } + + JumbleExpr(jstate, expr); + currentExprIdx++; + } + break; + + case T_Const: + currentExprIdx = 0; + + foreach(temp, (List *) node) + { + Node *expr = (Node *) lfirst(temp); + + if (!equal(expr, firstExpr) && IsA(expr, Const) && + currentExprIdx >= pgss_merge_threshold - 1) + { + Const *c = (Const *) expr; + + merged = true; + + /* + * This hash is going to accumulate the following merged + * statements + */ + if (currentExprIdx == pgss_merge_threshold - 1) + { + JumbleExpr(jstate, expr); + + /* + * A const expr is already found, so JumbleExpr must + * record it. Mark it as merged, it will be the first + * merged but still present in the statement query. + */ + Assert(jstate->clocations_count > 0); + jstate->clocations[jstate->clocations_count - 1].merged = true; + currentExprIdx++; + } + else + RecordConstLocation(jstate, c->location, true); + + continue; + } + + JumbleExpr(jstate, expr); + currentExprIdx++; + } + break; + + default: + foreach(temp, (List *) node) + { + JumbleExpr(jstate, (Node *) lfirst(temp)); + } + break; + } + + return merged; +} + /* * Jumble an expression tree * @@ -2748,7 +2915,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) /* We jumble only the constant's type, not its value */ APP_JUMB(c->consttype); /* Also, record its parse location for query normalization */ - RecordConstLocation(jstate, c->location); + RecordConstLocation(jstate, c->location, false); } break; case T_Param: @@ -2936,7 +3103,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) } break; case T_ArrayExpr: - JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements); + JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements); break; case T_RowExpr: JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args); @@ -3193,7 +3360,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node) * that is currently being walked. */ static void -RecordConstLocation(pgssJumbleState *jstate, int location) +RecordConstLocation(pgssJumbleState *jstate, int location, bool merged) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -3210,6 +3377,8 @@ RecordConstLocation(pgssJumbleState *jstate, int location) jstate->clocations[jstate->clocations_count].location = location; /* initialize lengths to -1 to simplify fill_in_constant_lengths */ jstate->clocations[jstate->clocations_count].length = -1; + jstate->clocations[jstate->clocations_count].merged = merged; + jstate->clocations_count++; } } @@ -3246,6 +3415,7 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ + bool merge = false; /* * Get constants' lengths (core system only gives us locations). Note @@ -3284,12 +3454,28 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query, len_to_wrt -= last_tok_len; Assert(len_to_wrt >= 0); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; - /* And insert a param symbol in place of the constant token */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id); + /* If merging, do not copy anything in between constants */ + if (!merge) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + } + + if (!jstate->clocations[i].merged) + { + /* And insert a param symbol in place of the constant token */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id); + if (merge) + merge = false; + } + else if (!merge) + { + /* Merge until a non merged constant appear */ + merge = true; + n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + } quer_loc = off + tok_len; last_off = off; diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index e9f5bb84e3..c7d589d867 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -358,4 +358,119 @@ SELECT 42; SELECT 42; SELECT query, plans, calls, rows FROM pg_stat_statements ORDER BY query COLLATE "C"; +-- +-- Consts merging +-- +SET pg_stat_statements.merge_threshold = 5; +CREATE TABLE test_merge (id int); + +-- IN queries + +-- Normal +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On the merge threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- VALUES queries + +-- Normal +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3)) q; +SELECT * FROM (VALUES (1), (2), (3), (4)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On the merge threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the treshold +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4), (5)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6)) q; +SELECT * FROM (VALUES (1), (2), (3), (4), (5), (6), (7), (8), (9)) q; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + DROP EXTENSION pg_stat_statements; -- 2.21.0