> On Fri, May 09, 2025 at 10:12:24AM GMT, Dmitry Dolgov wrote:
> Agree, I'll try to extend number of test cases here as a separate patch.

Here is the extended version, where start/end is replaced by
location/length, array_expr is handled as well, and more ARRAY cases are
added.
>From 81fe0b08473eafc88cdc56b275e6f0e08ab8858c Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Thu, 8 May 2025 16:41:01 +0200
Subject: [PATCH v2 1/3] Introduce LocationExpr

Add LocationExpr wrapper node to capture location and length of an
expression in a query. Use it in to wrap expr_list in in_expr and
array_expr conveying location information to ArrayExpr.
---
 src/backend/nodes/nodeFuncs.c    | 23 +++++++++++++++
 src/backend/parser/gram.y        | 31 +++++++++++++++++----
 src/backend/parser/parse_expr.c  | 48 ++++++++++++++++++++++++++++++--
 src/include/nodes/parsenodes.h   | 17 ++++++++++-
 src/include/nodes/primnodes.h    |  7 +++++
 src/tools/pgindent/typedefs.list |  1 +
 6 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c
index 7bc823507f1..f0b05630fd1 100644
--- a/src/backend/nodes/nodeFuncs.c
+++ b/src/backend/nodes/nodeFuncs.c
@@ -284,6 +284,12 @@ exprType(const Node *expr)
                case T_PlaceHolderVar:
                        type = exprType((Node *) ((const PlaceHolderVar *) 
expr)->phexpr);
                        break;
+               case T_LocationExpr:
+                       {
+                               const LocationExpr *n = (const LocationExpr *) 
expr;
+                               type = exprType((Node *) n->expr);
+                       }
+                       break;
                default:
                        elog(ERROR, "unrecognized node type: %d", (int) 
nodeTag(expr));
                        type = InvalidOid;      /* keep compiler quiet */
@@ -536,6 +542,11 @@ exprTypmod(const Node *expr)
                        return exprTypmod((Node *) ((const ReturningExpr *) 
expr)->retexpr);
                case T_PlaceHolderVar:
                        return exprTypmod((Node *) ((const PlaceHolderVar *) 
expr)->phexpr);
+               case T_LocationExpr:
+                       {
+                               const LocationExpr *n = (const LocationExpr *) 
expr;
+                               return exprTypmod((Node *) n->expr);
+                       }
                default:
                        break;
        }
@@ -1058,6 +1069,9 @@ exprCollation(const Node *expr)
                case T_PlaceHolderVar:
                        coll = exprCollation((Node *) ((const PlaceHolderVar *) 
expr)->phexpr);
                        break;
+               case T_LocationExpr:
+                       coll = exprCollation((Node *) ((const LocationExpr *) 
expr)->expr);
+                       break;
                default:
                        elog(ERROR, "unrecognized node type: %d", (int) 
nodeTag(expr));
                        coll = InvalidOid;      /* keep compiler quiet */
@@ -1306,6 +1320,10 @@ exprSetCollation(Node *expr, Oid collation)
                        /* NextValueExpr's result is an integer type ... */
                        Assert(!OidIsValid(collation)); /* ... so never set a 
collation */
                        break;
+               case T_LocationExpr:
+                       exprSetCollation((Node *) ((LocationExpr *) expr)->expr,
+                                                        collation);
+                       break;
                default:
                        elog(ERROR, "unrecognized node type: %d", (int) 
nodeTag(expr));
                        break;
@@ -1803,6 +1821,9 @@ exprLocation(const Node *expr)
                case T_PartitionRangeDatum:
                        loc = ((const PartitionRangeDatum *) expr)->location;
                        break;
+               case T_LocationExpr:
+                       loc = ((const LocationExpr *) expr)->location;
+                       break;
                default:
                        /* for any other node type it's just unknown... */
                        loc = -1;
@@ -4705,6 +4726,8 @@ raw_expression_tree_walker_impl(Node *node,
                                        return true;
                        }
                        break;
+               case T_LocationExpr:
+                       return WALK(((LocationExpr *) node)->expr);
                default:
                        elog(ERROR, "unrecognized node type: %d",
                                 (int) nodeTag(node));
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 3c4268b271a..8c8271f620d 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -184,7 +184,8 @@ static void doNegateFloat(Float *v);
 static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeNotExpr(Node *expr, int location);
-static Node *makeAArrayExpr(List *elements, int location);
+static Node *makeAArrayExpr(Node *elements, int location);
+static Node *makeLocationExpr(Node *expr, int location, int length);
 static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod,
                                                                  int location);
 static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
@@ -16757,15 +16758,18 @@ type_list:    Typename                                
                                { $$ = list_make1($1); }
 
 array_expr: '[' expr_list ']'
                                {
-                                       $$ = makeAArrayExpr($2, @1);
+                                       Node *locExpr = makeLocationExpr((Node 
*) $2, @1, @3);
+                                       $$ = makeAArrayExpr(locExpr, @1);
                                }
                        | '[' array_expr_list ']'
                                {
-                                       $$ = makeAArrayExpr($2, @1);
+                                       Node *locExpr = makeLocationExpr((Node 
*) $2, @1, @3);
+                                       $$ = makeAArrayExpr(locExpr, @1);
                                }
                        | '[' ']'
                                {
-                                       $$ = makeAArrayExpr(NIL, @1);
+                                       Node *locExpr =  makeLocationExpr((Node 
*) NIL, @1, @2);
+                                       $$ = makeAArrayExpr(locExpr, @1);
                                }
                ;
 
@@ -16895,7 +16899,10 @@ in_expr:       select_with_parens
                                        /* other fields will be filled later */
                                        $$ = (Node *) n;
                                }
-                       | '(' expr_list ')'                                     
        { $$ = (Node *) $2; }
+                       | '(' expr_list ')'
+                               {
+                                       $$ = (Node *) makeLocationExpr((Node *) 
$2, @1, @3);
+                               }
                ;
 
 /*
@@ -19293,7 +19300,7 @@ makeNotExpr(Node *expr, int location)
 }
 
 static Node *
-makeAArrayExpr(List *elements, int location)
+makeAArrayExpr(Node *elements, int location)
 {
        A_ArrayExpr *n = makeNode(A_ArrayExpr);
 
@@ -19302,6 +19309,18 @@ makeAArrayExpr(List *elements, int location)
        return (Node *) n;
 }
 
+static Node *
+makeLocationExpr(Node *expr, int start_location, int end_location)
+{
+       LocationExpr    *n = makeNode(LocationExpr);
+
+       n->expr = expr;
+       n->location = start_location + 1;
+       n->length = end_location - start_location - 1;
+
+       return (Node *) n;
+}
+
 static Node *
 makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod, int location)
 {
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 1f8e2d54673..b48beff157e 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -370,6 +370,25 @@ transformExprRecurse(ParseState *pstate, Node *expr)
                        result = transformJsonFuncExpr(pstate, (JsonFuncExpr *) 
expr);
                        break;
 
+               case T_LocationExpr:
+                       {
+                               LocationExpr *loc = (LocationExpr *) expr;
+                               if (!IsA(loc->expr, List))
+                                       result = transformExprRecurse(pstate, 
loc->expr);
+                               else
+                                       result = loc->expr;
+
+                               if (IsA(result, ArrayExpr))
+                               {
+                                       ArrayExpr *arr = (ArrayExpr *) result;
+                                       arr->loc_range = 
list_make2_int(loc->location,
+                                                                               
                    loc->length);
+
+                                       result = (Node *) arr;
+                               }
+                       }
+                       break;
+
                default:
                        /* should not reach here */
                        elog(ERROR, "unrecognized node type: %d", (int) 
nodeTag(expr));
@@ -1125,6 +1144,7 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
 {
        Node       *result = NULL;
        Node       *lexpr;
+       LocationExpr *location = NULL;
        List       *rexprs;
        List       *rvars;
        List       *rnonvars;
@@ -1139,6 +1159,9 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
        else
                useOr = true;
 
+       if (IsA(a->rexpr, LocationExpr))
+               location = (LocationExpr *) a->rexpr;
+
        /*
         * We try to generate a ScalarArrayOpExpr from IN/NOT IN, but this is 
only
         * possible if there is a suitable array type available.  If not, we 
fall
@@ -1152,7 +1175,7 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
         */
        lexpr = transformExprRecurse(pstate, a->lexpr);
        rexprs = rvars = rnonvars = NIL;
-       foreach(l, (List *) a->rexpr)
+       foreach(l, (List *) transformExprRecurse(pstate, a->rexpr))
        {
                Node       *rexpr = transformExprRecurse(pstate, lfirst(l));
 
@@ -1224,6 +1247,12 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
                        newa->elements = aexprs;
                        newa->multidims = false;
                        newa->location = -1;
+                       if (location)
+                               newa->loc_range = 
list_make2_int(location->location,
+                                                                               
             location->length);
+                       else
+                               newa->loc_range = NIL;
+
 
                        result = (Node *) make_scalar_array_op(pstate,
                                                                                
                   a->name,
@@ -2014,12 +2043,22 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
                                   Oid array_type, Oid element_type, int32 
typmod)
 {
        ArrayExpr  *newa = makeNode(ArrayExpr);
+       List       *elements = NIL;
        List       *newelems = NIL;
        List       *newcoercedelems = NIL;
        ListCell   *element;
+       LocationExpr *locExpr = NULL;
        Oid                     coerce_type;
        bool            coerce_hard;
 
+       if (IsA(a->elements, LocationExpr))
+       {
+               locExpr = (LocationExpr *) a->elements;
+               elements = (List *) locExpr->expr;
+       }
+       else
+               elements = (List *) a->elements;
+
        /*
         * Transform the element expressions
         *
@@ -2027,7 +2066,7 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
         * element expression.
         */
        newa->multidims = false;
-       foreach(element, a->elements)
+       foreach(element, elements)
        {
                Node       *e = (Node *) lfirst(element);
                Node       *newe;
@@ -2166,6 +2205,11 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
        newa->element_typeid = element_type;
        newa->elements = newcoercedelems;
        newa->location = a->location;
+       if (locExpr)
+               newa->loc_range = list_make2_int(locExpr->location,
+                                                                               
 locExpr->length);
+       else
+               newa->loc_range = NIL;
 
        return (Node *) newa;
 }
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 4610fc61293..f3e4ba47af1 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -500,10 +500,25 @@ typedef struct A_Indirection
 typedef struct A_ArrayExpr
 {
        NodeTag         type;
-       List       *elements;           /* array element expressions */
+       Node       *elements;           /* array element expressions */
        ParseLoc        location;               /* token location, or -1 if 
unknown */
 } A_ArrayExpr;
 
+/*
+ * A wrapper expression to record start and end location
+ */
+typedef struct LocationExpr
+{
+       NodeTag         type;
+
+       /* the node to be wrapped */
+       Node       *expr;
+       /* token location, or -1 if unknown */
+       ParseLoc        location;
+       /* token length, or -1 if unknown */
+       ParseLoc        length;
+} LocationExpr;
+
 /*
  * ResTarget -
  *       result target (used in target list of pre-transformed parse trees)
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 7d3b4198f26..60dec576908 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1399,6 +1399,13 @@ typedef struct ArrayExpr
        bool            multidims pg_node_attr(query_jumble_ignore);
        /* token location, or -1 if unknown */
        ParseLoc        location;
+
+       /*
+        * Pair (location, length) for list of elements. Note, that location 
field
+        * cannot always be used here instead, since it could be unknown, e.g. 
if
+        * the node was created in transformAExprIn.
+        */
+       List            *loc_range pg_node_attr(query_jumble_ignore);
 } ArrayExpr;
 
 /*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index e5879e00dff..e6fcba24396 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1501,6 +1501,7 @@ LOCALLOCK
 LOCALLOCKOWNER
 LOCALLOCKTAG
 LOCALPREDICATELOCK
+LocationExpr
 LOCK
 LOCKMASK
 LOCKMETHODID

base-commit: b0635bfda0535a7fc36cd11d10eecec4e2a96330
-- 
2.45.1

>From 3daccfa2ff77e78d0e04e092d9801fe6f7a22bc7 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Thu, 8 May 2025 16:41:20 +0200
Subject: [PATCH v2 2/3] Use LocationExpr in squashing

For the purpose of constants squashing we have only start location of an
expression, which is not enouch if the constant is wrapped e.g. in a
cast function. Apply information conveyed via LocationExpr to improve
squashing of constants.

Based on an idea from Sami Imseih.
---
 .../pg_stat_statements/expected/squashing.out | 35 +++++++--
 .../pg_stat_statements/pg_stat_statements.c   | 35 +++------
 contrib/pg_stat_statements/sql/squashing.sql  |  9 ++-
 src/backend/nodes/queryjumblefuncs.c          | 76 ++++++++++++-------
 4 files changed, 95 insertions(+), 60 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/squashing.out 
b/contrib/pg_stat_statements/expected/squashing.out
index 7b138af098c..a924a8c6e4c 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -147,6 +147,24 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query 
COLLATE "C";
  SELECT pg_stat_statements_reset() IS NOT NULL AS t                            
                     |     1
 (3 rows)
 
+-- Parsing
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT WHERE 1 IN (1, 2, int4(1));
+--
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT WHERE $1 IN ($2 /*, ... */)                 |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
 -- FuncExpr
 -- Verify multiple type representation end up with the same query_id
 CREATE TABLE test_float (data float);
@@ -246,7 +264,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query 
COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_bigint WHERE data IN    +|     1
-         ($1 /*, ... */::bigint)                    | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -353,7 +371,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query 
COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_cast WHERE data IN      +|     1
-         ($1 /*, ... */::int4::casttesttype)        | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -376,7 +394,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query 
COLLATE "C";
                        query                        | calls 
 ----------------------------------------------------+-------
  SELECT * FROM test_squash_jsonb WHERE data IN     +|     1
-         (($1 /*, ... */)::jsonb)                   | 
+         ($1 /*, ... */)                            | 
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
@@ -393,10 +411,10 @@ SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 
3::oid, 4::oid, 5::oid, 6
 (0 rows)
 
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-                           query                            | calls 
-------------------------------------------------------------+-------
- SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) |     1
- SELECT pg_stat_statements_reset() IS NOT NULL AS t         |     1
+                         query                         | calls 
+-------------------------------------------------------+-------
+ SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t    |     1
 (2 rows)
 
 -- Test constants evaluation in a CTE, which was causing issues in the past
@@ -409,7 +427,8 @@ FROM cte;
 --------
 (0 rows)
 
--- Simple array would be squashed as well
+-- Arrays
+-- Simple array is squashed as well
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
  t 
 ---
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c 
b/contrib/pg_stat_statements/pg_stat_statements.c
index 9778407cba3..314e065b364 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2825,10 +2825,6 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
                                n_quer_loc = 0, /* Normalized query byte 
location */
                                last_off = 0,   /* Offset from start for 
previous tok */
                                last_tok_len = 0;       /* Length (in bytes) of 
that tok */
-       bool            in_squashed = false;    /* in a run of squashed consts? 
*/
-       int                     skipped_constants = 0;  /* Position adjustment 
of later
-                                                                               
 * constants after squashed ones */
-
 
        /*
         * Get constants' lengths (core system only gives us locations).  Note
@@ -2886,12 +2882,9 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
 
                        /* ... and then a param symbol replacing the constant 
itself */
                        n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-                                                                 i + 1 + 
jstate->highest_extern_param_id - skipped_constants);
-
-                       /* In case previous constants were merged away, stop 
doing that */
-                       in_squashed = false;
+                                                                 i + 1 + 
jstate->highest_extern_param_id);
                }
-               else if (!in_squashed)
+               else
                {
                        /*
                         * This location is the start position of a run of 
constants to be
@@ -2903,27 +2896,12 @@ generate_normalized_query(JumbleState *jstate, const 
char *query,
                        len_to_wrt = off - last_off;
                        len_to_wrt -= last_tok_len;
                        Assert(len_to_wrt >= 0);
-                       Assert(i + 1 < jstate->clocations_count);
-                       Assert(jstate->clocations[i + 1].squashed);
                        memcpy(norm_query + n_quer_loc, query + quer_loc, 
len_to_wrt);
                        n_quer_loc += len_to_wrt;
 
                        /* ... and then start a run of squashed constants */
                        n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, 
... */",
-                                                                 i + 1 + 
jstate->highest_extern_param_id - skipped_constants);
-
-                       /* The next location will match the block below, to end 
the run */
-                       in_squashed = true;
-
-                       skipped_constants++;
-               }
-               else
-               {
-                       /*
-                        * The second location of a run of squashable elements; 
this
-                        * indicates its end.
-                        */
-                       in_squashed = false;
+                                                                 i + 1 + 
jstate->highest_extern_param_id);
                }
 
                /* Otherwise the constant is squashed away -- move forward */
@@ -3012,6 +2990,13 @@ fill_in_constant_lengths(JumbleState *jstate, const char 
*query,
                int                     loc = locs[i].location;
                int                     tok;
 
+               /* Squashed constants are recorded with a length set already */
+               if (locs[i].squashed)
+               {
+                       Assert(locs[i].length != -1);
+                       continue;
+               }
+
                /* Adjust recorded location if we're dealing with partial 
string */
                loc -= query_loc;
 
diff --git a/contrib/pg_stat_statements/sql/squashing.sql 
b/contrib/pg_stat_statements/sql/squashing.sql
index 908be81ff2b..f1a381e96cb 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -49,6 +49,12 @@ SELECT * FROM test_squash WHERE id IN
        (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ 
'-9');
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+-- Parsing
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT WHERE 1 IN (1, 2, int4(1));
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
 -- FuncExpr
 
 -- Verify multiple type representation end up with the same query_id
@@ -163,7 +169,8 @@ WITH cte AS (
 SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
 FROM cte;
 
--- Simple array would be squashed as well
+-- Arrays
+-- Simple array is squashed as well
 SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
diff --git a/src/backend/nodes/queryjumblefuncs.c 
b/src/backend/nodes/queryjumblefuncs.c
index d1e82a63f09..6202e4065e8 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -62,8 +62,10 @@ static void AppendJumble(JumbleState *jstate,
 static void FlushPendingNulls(JumbleState *jstate);
 static void RecordConstLocation(JumbleState *jstate,
                                                                int location, 
bool squashed);
+static void RecordConstLocationRange(JumbleState *jstate,
+                                                                        int 
location, int length, bool squashed);
 static void _jumbleNode(JumbleState *jstate, Node *node);
-static void _jumbleElements(JumbleState *jstate, List *elements);
+static void _jumbleElements(JumbleState *jstate, List *elements, Node *expr);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
@@ -403,6 +405,32 @@ RecordConstLocation(JumbleState *jstate, int location, 
bool squashed)
        }
 }
 
+/*
+ * Similar to RecordConstLocation, RecordConstLocationRange stores a constant
+ * with location start and end boundaries.
+ */
+static void
+RecordConstLocationRange(JumbleState *jstate, int location, int length, bool 
squashed)
+{
+       /* -1 indicates unknown or undefined location */
+       if (location >= 0 && length >= 0)
+       {
+               /* enlarge array if needed */
+               if (jstate->clocations_count >= jstate->clocations_buf_size)
+               {
+                       jstate->clocations_buf_size *= 2;
+                       jstate->clocations = (LocationLen *)
+                               repalloc(jstate->clocations,
+                                                jstate->clocations_buf_size *
+                                                sizeof(LocationLen));
+               }
+               jstate->clocations[jstate->clocations_count].location = 
location;
+               jstate->clocations[jstate->clocations_count].squashed = 
squashed;
+               jstate->clocations[jstate->clocations_count].length = length;
+               jstate->clocations_count++;
+       }
+}
+
 /*
  * Subroutine for _jumbleElements: Verify a few simple cases where we can
  * deduce that the expression is a constant:
@@ -461,7 +489,7 @@ IsSquashableConst(Node *element)
  * expressions.
  */
 static bool
-IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+IsSquashableConstList(List *elements)
 {
        ListCell   *temp;
 
@@ -473,13 +501,8 @@ IsSquashableConstList(List *elements, Node **firstExpr, 
Node **lastExpr)
                return false;
 
        foreach(temp, elements)
-       {
                if (!IsSquashableConst(lfirst(temp)))
                        return false;
-       }
-
-       *firstExpr = linitial(elements);
-       *lastExpr = llast(elements);
 
        return true;
 }
@@ -487,7 +510,7 @@ IsSquashableConstList(List *elements, Node **firstExpr, 
Node **lastExpr)
 #define JUMBLE_NODE(item) \
        _jumbleNode(jstate, (Node *) expr->item)
 #define JUMBLE_ELEMENTS(list) \
-       _jumbleElements(jstate, (List *) expr->list)
+       _jumbleElements(jstate, (List *) expr->list, (Node *) expr)
 #define JUMBLE_LOCATION(location) \
        RecordConstLocation(jstate, expr->location, false)
 #define JUMBLE_FIELD(item) \
@@ -523,28 +546,29 @@ do { \
  * elements in the list.
  */
 static void
-_jumbleElements(JumbleState *jstate, List *elements)
+_jumbleElements(JumbleState *jstate, List *elements, Node *expr)
 {
-       Node       *first,
-                          *last;
-
-       if (IsSquashableConstList(elements, &first, &last))
+       if (IsSquashableConstList(elements))
        {
+               ArrayExpr *array;
+
                /*
-                * If this list of elements is squashable, keep track of the 
location
-                * of its first and last elements.  When reading back the 
locations
-                * array, we'll see two consecutive locations with ->squashed 
set to
-                * true, indicating the location of initial and final elements 
of this
-                * list.
-                *
-                * For the limited set of cases we support now (implicit coerce 
via
-                * FuncExpr, Const) it's fine to use exprLocation of the 'last'
-                * expression, but if more complex composite expressions are to 
be
-                * supported (e.g., OpExpr or FuncExpr as an explicit call), 
more
-                * sophisticated tracking will be needed.
+                * Currenlty only ArrayExpr provides location information, 
needed for
+                * squashing.
                 */
-               RecordConstLocation(jstate, exprLocation(first), true);
-               RecordConstLocation(jstate, exprLocation(last), true);
+               Assert(IsA(expr, ArrayExpr));
+               array = (ArrayExpr *) expr;
+
+               /*
+                * If the parent ArrayExpr has location information, i.e. start 
and the
+                * end of the expression, use it as boundaries for squashing.
+                */
+               if (array->loc_range != NIL)
+                       RecordConstLocationRange(jstate,
+                                                                        
linitial_int(array->loc_range),
+                                                                        
lsecond_int(array->loc_range), true);
+               else
+                       _jumbleNode(jstate, (Node *) elements);
        }
        else
        {
-- 
2.45.1

>From ac43cc478cfc70ecc968d30e58da380e466c1cfb Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Mon, 12 May 2025 10:04:39 +0200
Subject: [PATCH v2 3/3] Extend ARRAY squashing tests

Testing coverage for ARRAY expressions is not enough. Add more test
cases, similar to already existing ones.
---
 .../pg_stat_statements/expected/squashing.out | 178 ++++++++++++++++++
 contrib/pg_stat_statements/sql/squashing.sql  |  59 ++++++
 2 files changed, 237 insertions(+)

diff --git a/contrib/pg_stat_statements/expected/squashing.out 
b/contrib/pg_stat_statements/expected/squashing.out
index a924a8c6e4c..1aeed911aad 100644
--- a/contrib/pg_stat_statements/expected/squashing.out
+++ b/contrib/pg_stat_statements/expected/squashing.out
@@ -448,3 +448,181 @@ SELECT query, calls FROM pg_stat_statements ORDER BY 
query COLLATE "C";
  SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
 (2 rows)
 
+-- Nested arrays are squashed only at constants level
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    ];
+                                             array                             
                
+-----------------------------------------------------------------------------------------------
+ 
{{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10},{1,2,3,4,5,6,7,8,9,10}}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[                                     +|     1
+     ARRAY[$1 /*, ... */],                         +| 
+     ARRAY[$2 /*, ... */],                         +| 
+     ARRAY[$3 /*, ... */],                         +| 
+     ARRAY[$4 /*, ... */]                          +| 
+     ]                                              | 
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- Relabel type
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 
9::oid];
+        array        
+---------------------
+ {1,2,3,4,5,6,7,8,9}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+    ('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+       ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+       ( '"9"')::jsonb, ( '"10"')::jsonb
+];
+                                       array                                   
     
+------------------------------------------------------------------------------------
+ 
{"\"1\"","\"2\"","\"3\"","\"4\"","\"5\"","\"6\"","\"7\"","\"8\"","\"9\"","\"10\""}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- CoerceViaIO
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+       1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+       4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+       7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+       10::int4::casttesttype, 11::int4::casttesttype
+];
+           array           
+---------------------------
+ {1,2,3,4,5,6,7,8,9,10,11}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
+-- CoerceViaIO, SubLink instead of a Const is not squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+       (SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+       (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+       (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+       (SELECT '"10"')::jsonb
+];
+                                       array                                   
     
+------------------------------------------------------------------------------------
+ 
{"\"1\"","\"2\"","\"3\"","\"4\"","\"5\"","\"6\"","\"7\"","\"8\"","\"9\"","\"10\""}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                query                                | calls 
+---------------------------------------------------------------------+-------
+ SELECT ARRAY[                                                      +|     1
+         (SELECT $1)::jsonb, (SELECT $2)::jsonb, (SELECT $3)::jsonb,+| 
+         (SELECT $4)::jsonb, (SELECT $5)::jsonb, (SELECT $6)::jsonb,+| 
+         (SELECT $7)::jsonb, (SELECT $8)::jsonb, (SELECT $9)::jsonb,+| 
+         (SELECT $10)::jsonb                                        +| 
+ ]                                                                   | 
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                  |     1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+       abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+       abs(800), abs(900), abs(1000), ((abs(1100)))
+];
+                      array                      
+-------------------------------------------------
+ {100,200,300,400,500,600,700,800,900,1000,1100}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | 
calls 
+------------------------------------------------------------------------+-------
+ SELECT ARRAY[                                                         +|     1
+         abs($1), abs($2), abs($3), abs($4), abs($5), abs($6), abs($7),+| 
+         abs($8), abs($9), abs($10), ((abs($11)))                      +| 
+ ]                                                                      | 
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t                     |     1
+(2 rows)
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+SELECT ARRAY[
+       1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+       7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint
+];
+           array           
+---------------------------
+ {1,2,3,4,5,6,7,8,9,10,11}
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                       query                        | calls 
+----------------------------------------------------+-------
+ SELECT ARRAY[$1 /*, ... */]                        |     1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t |     1
+(2 rows)
+
diff --git a/contrib/pg_stat_statements/sql/squashing.sql 
b/contrib/pg_stat_statements/sql/squashing.sql
index f1a381e96cb..6884df1a90d 100644
--- a/contrib/pg_stat_statements/sql/squashing.sql
+++ b/contrib/pg_stat_statements/sql/squashing.sql
@@ -175,3 +175,62 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
 SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
 SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
 
+-- Nested arrays are squashed only at constants level
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+    ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    ];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Relabel type
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6::oid, 7::oid, 8::oid, 
9::oid];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Some casting expression are simplified to Const
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+    ('"1"')::jsonb, ('"2"')::jsonb, ('"3"')::jsonb, ('"4"')::jsonb,
+       ( '"5"')::jsonb, ( '"6"')::jsonb, ( '"7"')::jsonb, ( '"8"')::jsonb,
+       ( '"9"')::jsonb, ( '"10"')::jsonb
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+       1::int4::casttesttype, 2::int4::casttesttype, 3::int4::casttesttype,
+       4::int4::casttesttype, 5::int4::casttesttype, 6::int4::casttesttype,
+       7::int4::casttesttype, 8::int4::casttesttype, 9::int4::casttesttype,
+       10::int4::casttesttype, 11::int4::casttesttype
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- CoerceViaIO, SubLink instead of a Const is not squashed
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+       (SELECT '"1"')::jsonb, (SELECT '"2"')::jsonb, (SELECT '"3"')::jsonb,
+       (SELECT '"4"')::jsonb, (SELECT '"5"')::jsonb, (SELECT '"6"')::jsonb,
+       (SELECT '"7"')::jsonb, (SELECT '"8"')::jsonb, (SELECT '"9"')::jsonb,
+       (SELECT '"10"')::jsonb
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+       abs(100), abs(200), abs(300), abs(400), abs(500), abs(600), abs(700),
+       abs(800), abs(900), abs(1000), ((abs(1100)))
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Bigint, long tokens with parenthesis
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT ARRAY[
+       1::bigint, 2::bigint, 3::bigint, 4::bigint, 5::bigint, 6::bigint,
+       7::bigint, 8::bigint, 9::bigint, 10::bigint, 11::bigint
+];
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
-- 
2.45.1

Reply via email to