> > > At the same time AFAICT there isn't much more code paths > > > to worry about in case of a LocationExpr as a node > > > > I can imagine there are others like value expressions, > > row expressions, json array expressions, etc. that we may > > want to also normalize.
> Exactly. When using a node, one can explicitly wrap whatever is needed > into it, while otherwise one would need to find a new way to piggy back > on A_Expr in a new context. Looking at the VALUES expression case, we will need to carry the info with SelectStmt and ultimately to RangeTblEntry which is where the values_list is, so either approach we take RangeTblEntry will need the LocationExpr pointer or the additional ParseLoc info I am suggesting. A_Expr is not used in the values list case. > I'll take a look at the proposed change, but a bit later. Here is a v4 to compare with v3. 0001- is the infrastructure to track the boundaries 0002- the changes to jumbling 0003 - the additional tests introduced in v3 -- Sami
From 6f7f7c2abb9e3cd2b5654869ee3626e6fe6549c5 Mon Sep 17 00:00:00 2001 From: Ubuntu <ubuntu@ip-172-31-38-230.ec2.internal> Date: Wed, 21 May 2025 18:55:52 +0000 Subject: [PATCH v4 2/3] Support external parameters for query squashing 62d712ec introduced the concept of element squashing for quwry normalization purposes. However, it did not account for external parameters passed to a list of elements. This adds support to these types of values and simplifies the squashing logic further. Discussion: https://www.postgresql.org/message-id/flat/202505021256.4yaa24s3sytm%40alvherre.pgsql#1195a340edca50cc3b7389a2ba8b0467 --- .../pg_stat_statements/expected/squashing.out | 14 +- .../pg_stat_statements/pg_stat_statements.c | 84 +++--------- src/backend/nodes/gen_node_support.pl | 2 +- src/backend/nodes/queryjumblefuncs.c | 121 +++++++++++------- 4 files changed, 100 insertions(+), 121 deletions(-) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index 7b138af098c..d92cfbd35fb 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -246,7 +246,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_bigint WHERE data IN +| 1 - ($1 /*, ... */::bigint) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -353,7 +353,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_cast WHERE data IN +| 1 - ($1 /*, ... */::int4::casttesttype) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -376,7 +376,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_jsonb WHERE data IN +| 1 - (($1 /*, ... */)::jsonb) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -393,10 +393,10 @@ SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6 (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) -- Test constants evaluation in a CTE, which was causing issues in the past diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9778407cba3..efcad87d684 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2825,10 +2825,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ - bool in_squashed = false; /* in a run of squashed consts? */ - int skipped_constants = 0; /* Position adjustment of later - * constants after squashed ones */ - /* * Get constants' lengths (core system only gives us locations). Note @@ -2842,9 +2838,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We * could refine that limit based on the max value of n for the current * query, but it hardly seems worth any extra effort to do so. - * - * Note this also gives enough room for the commented-out ", ..." list - * syntax used by constant squashing. */ norm_query_buflen = query_len + jstate->clocations_count * 10; @@ -2857,7 +2850,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, tok_len; /* Length (in bytes) of that tok */ off = jstate->clocations[i].location; - /* Adjust recorded location if we're dealing with partial string */ off -= query_loc; @@ -2866,67 +2858,24 @@ generate_normalized_query(JumbleState *jstate, const char *query, if (tok_len < 0) continue; /* ignore any duplicates */ - /* - * What to do next depends on whether we're squashing constant lists, - * and whether we're already in a run of such constants. - */ - if (!jstate->clocations[i].squashed) - { - /* - * This location corresponds to a constant not to be squashed. - * Print what comes before the constant ... - */ - len_to_wrt = off - last_off; - len_to_wrt -= last_tok_len; - - Assert(len_to_wrt >= 0); + /* Copy next chunk (what precedes the next constant) */ + len_to_wrt = off - last_off; + len_to_wrt -= last_tok_len; - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; + Assert(len_to_wrt >= 0); + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; - /* ... and then a param symbol replacing the constant itself */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* In case previous constants were merged away, stop doing that */ - in_squashed = false; - } - else if (!in_squashed) - { - /* - * This location is the start position of a run of constants to be - * squashed, so we need to print the representation of starting a - * group of stashed constants. - * - * Print what comes before the constant ... - */ - len_to_wrt = off - last_off; - len_to_wrt -= last_tok_len; - Assert(len_to_wrt >= 0); - Assert(i + 1 < jstate->clocations_count); - Assert(jstate->clocations[i + 1].squashed); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; - - /* ... and then start a run of squashed constants */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* The next location will match the block below, to end the run */ - in_squashed = true; - - skipped_constants++; - } - else - { - /* - * The second location of a run of squashable elements; this - * indicates its end. - */ - in_squashed = false; - } + /* + * And insert a param symbol in place of the constant token. + * + * However, If we have a squashable list, insert a comment in place of + * the second and remaining values of the list. + */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s", + i + 1 + jstate->highest_extern_param_id, + (jstate->clocations[i].squashed) ? " /*, ... */" : ""); - /* Otherwise the constant is squashed away -- move forward */ quer_loc = off + tok_len; last_off = off; last_tok_len = tok_len; @@ -3017,6 +2966,9 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query, Assert(loc >= 0); + if (locs[i].squashed) + continue; /* squashable list, ignore */ + if (loc <= last_loc) continue; /* Duplicate constant, ignore */ diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 77659b0f760..17ba3696226 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -1324,7 +1324,7 @@ _jumble${n}(JumbleState *jstate, Node *node) # Node type. Squash constants if requested. if ($query_jumble_squash) { - print $jff "\tJUMBLE_ELEMENTS($f);\n" + print $jff "\tJUMBLE_ELEMENTS($f, node);\n" unless $query_jumble_ignore; } else diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index d1e82a63f09..32bc42bffca 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -60,10 +60,10 @@ static uint64 DoJumble(JumbleState *jstate, Node *node); static void AppendJumble(JumbleState *jstate, const unsigned char *value, Size size); static void FlushPendingNulls(JumbleState *jstate); -static void RecordConstLocation(JumbleState *jstate, - int location, bool squashed); +static void RecordExpressionLocation(JumbleState *jstate, + int location, int len); static void _jumbleNode(JumbleState *jstate, Node *node); -static void _jumbleElements(JumbleState *jstate, List *elements); +static void _jumbleElements(JumbleState *jstate, List *elements, Node *node); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); @@ -381,7 +381,7 @@ FlushPendingNulls(JumbleState *jstate) * element contributes nothing to the jumble hash. */ static void -RecordConstLocation(JumbleState *jstate, int location, bool squashed) +RecordExpressionLocation(JumbleState *jstate, int location, int len) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -396,9 +396,15 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; - /* initialize lengths to -1 to simplify third-party module usage */ - jstate->clocations[jstate->clocations_count].squashed = squashed; - jstate->clocations[jstate->clocations_count].length = -1; + + /* + * initialize lengths to -1 to simplify third-party module usage + * + * If we have a length that is greater than -1, this indicates a + * squashable list. + */ + jstate->clocations[jstate->clocations_count].length = (len > -1) ? len : -1; + jstate->clocations[jstate->clocations_count].squashed = (len > -1) ? true : false; jstate->clocations_count++; } } @@ -413,7 +419,7 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) * - Otherwise test if the expression is a simple Const. */ static bool -IsSquashableConst(Node *element) +IsSquashableExpression(Node *element) { if (IsA(element, RelabelType)) element = (Node *) ((RelabelType *) element)->arg; @@ -437,22 +443,45 @@ IsSquashableConst(Node *element) { Node *arg = lfirst(temp); - if (!IsA(arg, Const)) /* XXX we could recurse here instead */ - return false; + switch (nodeTag(arg)) + { + case T_Const: + return true; + case T_Param: + { + Param *param = (Param *) element; + + return param->paramkind == PARAM_EXTERN; + } + default: + break; + } } - return true; + return false; } - if (!IsA(element, Const)) - return false; + switch (nodeTag(element)) + { + case T_Const: + return true; + case T_Param: + { + Param *param = (Param *) element; - return true; + return param->paramkind == PARAM_EXTERN; + } + default: + break; + } + + return false; } /* * Subroutine for _jumbleElements: Verify whether the provided list - * can be squashed, meaning it contains only constant expressions. + * can be squashed, meaning it contains only constant and external + * parameter expressions. * * Return value indicates if squashing is possible. * @@ -461,7 +490,7 @@ IsSquashableConst(Node *element) * expressions. */ static bool -IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) +IsSquashableExpressionList(List *elements) { ListCell *temp; @@ -474,22 +503,19 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) foreach(temp, elements) { - if (!IsSquashableConst(lfirst(temp))) + if (!IsSquashableExpression(lfirst(temp))) return false; } - *firstExpr = linitial(elements); - *lastExpr = llast(elements); - return true; } #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) -#define JUMBLE_ELEMENTS(list) \ - _jumbleElements(jstate, (List *) expr->list) +#define JUMBLE_ELEMENTS(list, node) \ + _jumbleElements(jstate, (List *) expr->list, node) #define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location, false) + RecordExpressionLocation(jstate, expr->location, -1) #define JUMBLE_FIELD(item) \ do { \ if (sizeof(expr->item) == 8) \ @@ -517,36 +543,37 @@ do { \ #include "queryjumblefuncs.funcs.c" /* - * We jumble lists of constant elements as one individual item regardless - * of how many elements are in the list. This means different queries - * jumble to the same query_id, if the only difference is the number of - * elements in the list. + * We try to jumble lists of expressions as one individual item regardless + * of how many elements are in the list. This is know as squashing, which + * results in different queries jumbling to the same query_id, if the only + * difference is the number of elements in the list. + * + * We allow for Constants and Params of type external to be squashed. To + * be able to normalize such queries by stripping away the squashed away + * values, we must track the start and end of the expression list. */ static void -_jumbleElements(JumbleState *jstate, List *elements) +_jumbleElements(JumbleState *jstate, List *elements, Node *node) { - Node *first, - *last; + bool normalize_list = false; - if (IsSquashableConstList(elements, &first, &last)) + if (IsSquashableExpressionList(elements)) { - /* - * If this list of elements is squashable, keep track of the location - * of its first and last elements. When reading back the locations - * array, we'll see two consecutive locations with ->squashed set to - * true, indicating the location of initial and final elements of this - * list. - * - * For the limited set of cases we support now (implicit coerce via - * FuncExpr, Const) it's fine to use exprLocation of the 'last' - * expression, but if more complex composite expressions are to be - * supported (e.g., OpExpr or FuncExpr as an explicit call), more - * sophisticated tracking will be needed. - */ - RecordConstLocation(jstate, exprLocation(first), true); - RecordConstLocation(jstate, exprLocation(last), true); + if (IsA(node, ArrayExpr)) + { + ArrayExpr *aexpr = (ArrayExpr *) node; + + if (aexpr->list_start > 0 && aexpr->list_end > 0) + { + RecordExpressionLocation(jstate, + aexpr->list_start + 1, + (aexpr->list_end - aexpr->list_start) - 1); + normalize_list = true; + } + } } - else + + if (!normalize_list) { _jumbleNode(jstate, (Node *) elements); } -- 2.39.5 (Apple Git-154)
From 8c14c0ebb20e79925fdd8b6bbd4fcce91ba92dcf Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Tue, 20 May 2025 16:12:05 +0200 Subject: [PATCH v4 3/3] Extend ARRAY squashing tests Testing coverage for ARRAY expressions is not enough. Add more test cases, similar to already existing ones. --- .../pg_stat_statements/expected/squashing.out | 178 ++++++++++++++++++ contrib/pg_stat_statements/sql/squashing.sql | 60 ++++++ 2 files changed, 238 insertions(+) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index d92cfbd35fb..d628a451a1e 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -429,3 +429,181 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) +-- Nested arrays are squashed only at constants level +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[ + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ]; + array +----------------------------------------------------------------------------------------------- + {{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10}} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[ +| 1 + ARRAY[$1 /*, ... */], +| + ARRAY[$2 /*, ... */], +| + ARRAY[$3 /*, ... */], +| + ARRAY[$4 /*, ... */] +| + ] | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Relabel type +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid]; + array +--------------------- + {1,2,3,4,5,6,7,8,9} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[$1 /*, ... */] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[ + ('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb +]; + array +------------------------------------------------------------------------------------ + {"\"1\"","\"2\"","\"3\"","\"4\"","\"5\"","\"6\"","\"7\"","\"8\"","\"9\"","\"10\""} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[$1 /*, ... */] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[ + 1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype +]; + array +--------------------------- + {1,2,3,4,5,6,7,8,9,10,11} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[$1 /*, ... */] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- CoerceViaIO, SubLink instead of a Const is not squashed +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[ + (SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb +]; + array +------------------------------------------------------------------------------------ + {"\"1\"","\"2\"","\"3\"","\"4\"","\"5\"","\"6\"","\"7\"","\"8\"","\"9\"","\"10\""} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +---------------------------------------------------------------------+------- + SELECT ARRAY[ +| 1 + (SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+| + (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+| + (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+| + (SELECT $10)::jsonb +| + ] | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[ + abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100))) +]; + array +------------------------------------------------- + {100,200,300,400,500,600,700,800,900,1000,1100} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT ARRAY[ +| 1 + abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+| + abs($8), abs($9), abs($10), ((abs($11))) +| + ] | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT ARRAY[ + 1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint +]; + array +--------------------------- + {1,2,3,4,5,6,7,8,9,10,11} +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT ARRAY[$1 /*, ... */] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index 03efd4b40c8..5ac624ae1f7 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -167,3 +167,63 @@ FROM cte; SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Nested arrays are squashed only at constants level +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[ + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Relabel type +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 9::oid]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Some casting expression are simplified to Const +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[ + ('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb, + ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb, + ( '"9"')::jsonb, ( '"10"')::jsonb +]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[ + 1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype, + 4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype, + 7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype, + 10::int4::casttesttype, 11::int4::casttesttype +]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- CoerceViaIO, SubLink instead of a Const is not squashed +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[ + (SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb, + (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb, + (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb, + (SELECT '"10"')::jsonb +]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[ + abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700), + abs(800), abs(900), abs(1000), ((abs(1100))) +]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Bigint, long tokens with parenthesis +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT ARRAY[ + 1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint, + 7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint +]; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; -- 2.39.5 (Apple Git-154)
From 0af27d235ca6fd1db12d81c657d8b349f3b29316 Mon Sep 17 00:00:00 2001 From: Ubuntu <ubuntu@ip-172-31-38-230.ec2.internal> Date: Wed, 21 May 2025 17:25:02 +0000 Subject: [PATCH v4 1/3] Add tracking for expression boundaries This adds the ability to track the locations of the start and end of a list of elements such as those in an 'IN' list of an Array expression to support squashing of values for query normalization purposes. This corrects various normalization issues that are a result of 62d712ec. Discussion: https://www.postgresql.org/message-id/flat/202505021256.4yaa24s3sytm%40alvherre.pgsql#1195a340edca50cc3b7389a2ba8b0467 --- src/backend/parser/gram.y | 94 +++++++++++++++++++++++---------- src/backend/parser/parse_expr.c | 4 ++ src/include/nodes/parsenodes.h | 4 ++ src/include/nodes/primnodes.h | 4 ++ 4 files changed, 79 insertions(+), 27 deletions(-) diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0b5652071d1..0cd5f794db3 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -136,6 +136,17 @@ typedef struct KeyActions KeyAction *deleteAction; } KeyActions; +/* + * Track the start and end of a list in an expression, such as an 'IN' list + * or Array Expression + */ +typedef struct ListWithBoundary +{ + Node *expr; + ParseLoc start; + ParseLoc end; +} ListWithBoundary; + /* ConstraintAttributeSpec yields an integer bitmask of these flags: */ #define CAS_NOT_DEFERRABLE 0x01 #define CAS_DEFERRABLE 0x02 @@ -184,7 +195,7 @@ static void doNegateFloat(Float *v); static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location); static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location); static Node *makeNotExpr(Node *expr, int location); -static Node *makeAArrayExpr(List *elements, int location); +static Node *makeAArrayExpr(List *elements, int location, int end_location); static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod, int location); static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, @@ -269,6 +280,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); struct KeyAction *keyaction; ReturningClause *retclause; ReturningOptionKind retoptionkind; + struct ListWithBoundary *listwithboundary; } %type <node> stmt toplevel_stmt schema_stmt routine_body_stmt @@ -523,8 +535,9 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <defelt> def_elem reloption_elem old_aggr_elem operator_def_elem %type <node> def_arg columnElem where_clause where_or_current_clause a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound - columnref in_expr having_clause func_table xmltable array_expr + columnref having_clause func_table xmltable array_expr OptWhereClause operator_def_arg +%type <listwithboundary> in_expr %type <list> opt_column_and_period_list %type <list> rowsfrom_item rowsfrom_list opt_col_def_list %type <boolean> opt_ordinality opt_without_overlaps @@ -15289,46 +15302,58 @@ a_expr: c_expr { $$ = $1; } } | a_expr IN_P in_expr { + ListWithBoundary *n = $3; + /* in_expr returns a SubLink or a list of a_exprs */ - if (IsA($3, SubLink)) + if (IsA(n->expr, SubLink)) { /* generate foo = ANY (subquery) */ - SubLink *n = (SubLink *) $3; - - n->subLinkType = ANY_SUBLINK; - n->subLinkId = 0; - n->testexpr = $1; - n->operName = NIL; /* show it's IN not = ANY */ - n->location = @2; - $$ = (Node *) n; + SubLink *n2 = (SubLink *) n->expr; + + n2->subLinkType = ANY_SUBLINK; + n2->subLinkId = 0; + n2->testexpr = $1; + n2->operName = NIL; /* show it's IN not = ANY */ + n2->location = @2; + $$ = (Node *) n2; } else { /* generate scalar IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2); + A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "=", $1, n->expr, @2); + + n2->rexpr_list_start = $3->start; + n2->rexpr_list_end = $3->end; + $$ = (Node *) n2; } } | a_expr NOT_LA IN_P in_expr %prec NOT_LA { + ListWithBoundary *n = $4; + /* in_expr returns a SubLink or a list of a_exprs */ - if (IsA($4, SubLink)) + if (IsA(n->expr, SubLink)) { /* generate NOT (foo = ANY (subquery)) */ /* Make an = ANY node */ - SubLink *n = (SubLink *) $4; + SubLink *n2 = (SubLink *) n->expr; - n->subLinkType = ANY_SUBLINK; - n->subLinkId = 0; - n->testexpr = $1; - n->operName = NIL; /* show it's IN not = ANY */ - n->location = @2; + n2->subLinkType = ANY_SUBLINK; + n2->subLinkId = 0; + n2->testexpr = $1; + n2->operName = NIL; /* show it's IN not = ANY */ + n2->location = @2; /* Stick a NOT on top; must have same parse location */ - $$ = makeNotExpr((Node *) n, @2); + $$ = makeNotExpr((Node *) n2, @2); } else { /* generate scalar NOT IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2); + A_Expr *n2 = makeSimpleA_Expr(AEXPR_IN, "<>", $1, n->expr, @2); + + n2->rexpr_list_start = $4->start; + n2->rexpr_list_end = $4->end; + $$ = (Node *) n2; } } | a_expr subquery_Op sub_type select_with_parens %prec Op @@ -16764,15 +16789,15 @@ type_list: Typename { $$ = list_make1($1); } array_expr: '[' expr_list ']' { - $$ = makeAArrayExpr($2, @1); + $$ = makeAArrayExpr($2, @1, @3); } | '[' array_expr_list ']' { - $$ = makeAArrayExpr($2, @1); + $$ = makeAArrayExpr($2, @1, @3); } | '[' ']' { - $$ = makeAArrayExpr(NIL, @1); + $$ = makeAArrayExpr(NIL, @1, @2); } ; @@ -16897,12 +16922,25 @@ trim_list: a_expr FROM expr_list { $$ = lappend($3, $1); } in_expr: select_with_parens { SubLink *n = makeNode(SubLink); + ListWithBoundary *n2 = palloc(sizeof(ListWithBoundary)); n->subselect = $1; /* other fields will be filled later */ - $$ = (Node *) n; + + n2->expr = (Node *) n; + n2->start = -1; + n2->end = -1; + $$ = n2; + } + | '(' expr_list ')' + { + ListWithBoundary *n = palloc(sizeof(ListWithBoundary)); + + n->expr = (Node *) $2; + n->start = @1; + n->end = @3; + $$ = n; } - | '(' expr_list ')' { $$ = (Node *) $2; } ; /* @@ -19300,12 +19338,14 @@ makeNotExpr(Node *expr, int location) } static Node * -makeAArrayExpr(List *elements, int location) +makeAArrayExpr(List *elements, int location, int location_end) { A_ArrayExpr *n = makeNode(A_ArrayExpr); n->elements = elements; n->location = location; + n->list_start = location; + n->list_end = location_end; return (Node *) n; } diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 1f8e2d54673..7347c989e11 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -1224,6 +1224,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a) newa->elements = aexprs; newa->multidims = false; newa->location = -1; + newa->list_start = a->rexpr_list_start; + newa->list_end = a->rexpr_list_end; result = (Node *) make_scalar_array_op(pstate, a->name, @@ -2166,6 +2168,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a, newa->element_typeid = element_type; newa->elements = newcoercedelems; newa->location = a->location; + newa->list_start = a->list_start; + newa->list_end = a->list_end; return (Node *) newa; } diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4610fc61293..2f078887d06 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -347,6 +347,8 @@ typedef struct A_Expr Node *lexpr; /* left argument, or NULL if none */ Node *rexpr; /* right argument, or NULL if none */ ParseLoc location; /* token location, or -1 if unknown */ + ParseLoc rexpr_list_start; /* location of the start of a rexpr list */ + ParseLoc rexpr_list_end; /* location of the end of a rexpr list */ } A_Expr; /* @@ -502,6 +504,8 @@ typedef struct A_ArrayExpr NodeTag type; List *elements; /* array element expressions */ ParseLoc location; /* token location, or -1 if unknown */ + ParseLoc list_start; /* location of the start of the elements list */ + ParseLoc list_end; /* location of the end of the elements list */ } A_ArrayExpr; /* diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 7d3b4198f26..773cdd880aa 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1399,6 +1399,10 @@ typedef struct ArrayExpr bool multidims pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ ParseLoc location; + /* location of the start of the elements list */ + ParseLoc list_start; + /* location of the end of the elements list */ + ParseLoc list_end; } ArrayExpr; /* -- 2.39.5 (Apple Git-154)