Re: Michael Paquier > This is OK on its own, still feels a bit incomplete, as the relid also > includes an assumption about the namespace. I would suggested to add > a hardcoded "pg_temp" here, to keep track of this assumption, at > least.
I had thought about it, but figured that integers and strings are already separate namespaces, so hashing them shouldn't have any conflicts. But it's more clear to do that, so added in the new version: AppendJumble(jstate, (const unsigned char *)"pg_temp", sizeof("pg_temp")); AppendJumble(jstate, (const unsigned char *)rel_name, strlen(rel_name)); > typedef struct RangeTblEntry > { > - pg_node_attr(custom_read_write) > + pg_node_attr(custom_read_write, custom_query_jumble) > > This structure still includes some query_jumble_ignore, which are not > required once custom_query_jumble is added. I would tend to keep them for documentation purposes. (The other custom_query_jumble functions have a much more explicit structure so there it is clear which fields are supposed to be jumbled.) > We had better document at the top of RangeTblEntry why we are using a > custom function. I added a short comment just above custom_query_jumble. Christoph
>From 70a80f9abd781f8114563ae8fc69e64123f12eeb Mon Sep 17 00:00:00 2001 From: Christoph Berg <m...@debian.org> Date: Mon, 17 Mar 2025 17:17:17 +0100 Subject: [PATCH v2] Jumble temp tables by name Query jumbling considers everything by OID, which is fine for regular objects. But for temp tables, which have to be recreated in each session (or even transaction), this means that the same temp table query run again in the next session will never be aggregated in pg_stat_statements. Instead, the statistics are polluted with a large number of 1-call entries. Fix by using the temp table name instead. This has the risk of aggregating structurally different temp tables together if they share the same name, but practically, the queries will likely already differ in other details anyway. And even if not, aggregating the entries in pg_stat_statements instead of polluting the stats seems the better choice. (The user has still the option to simply change the name of the temp table to have the queries separated. In the old scheme, the user does not have any chance to change behavior.) --- .../pg_stat_statements/expected/select.out | 31 ++++++++++++++++ contrib/pg_stat_statements/sql/select.sql | 12 +++++++ src/backend/nodes/queryjumblefuncs.c | 36 +++++++++++++++++++ src/include/nodes/parsenodes.h | 3 +- 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out index 37a30af034a..8a7e237298c 100644 --- a/contrib/pg_stat_statements/expected/select.out +++ b/contrib/pg_stat_statements/expected/select.out @@ -241,6 +241,37 @@ SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; (12 rows) DROP TABLE pgss_a, pgss_b CASCADE; +-- temp tables +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +BEGIN; + CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP; + SELECT * FROM temp_t; + id +---- +(0 rows) + +COMMIT; +BEGIN; + CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP; + SELECT * FROM temp_t; + id +---- +(0 rows) + +COMMIT; +SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- one query with two calls + calls | query +-------+------------------------------------------------------------------------ + 2 | SELECT * FROM temp_t + 0 | SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C" + 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t +(3 rows) + -- -- access to pg_stat_statements_info view -- diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql index e0be58d5e24..81b9d50ecec 100644 --- a/contrib/pg_stat_statements/sql/select.sql +++ b/contrib/pg_stat_statements/sql/select.sql @@ -90,6 +90,18 @@ SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; DROP TABLE pgss_a, pgss_b CASCADE; +-- temp tables +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +BEGIN; + CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP; + SELECT * FROM temp_t; +COMMIT; +BEGIN; + CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP; + SELECT * FROM temp_t; +COMMIT; +SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- one query with two calls + -- -- access to pg_stat_statements_info view -- diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index b103a281936..1e18f1ddf5f 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -32,10 +32,12 @@ */ #include "postgres.h" +#include "catalog/namespace.h" #include "common/hashfn.h" #include "miscadmin.h" #include "nodes/queryjumble.h" #include "parser/scansup.h" +#include "utils/lsyscache.h" #define JUMBLE_SIZE 1024 /* query serialization buffer size */ @@ -58,6 +60,7 @@ static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); +static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node); /* * Given a possibly multi-statement source string, confine our attention to the @@ -377,3 +380,36 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node) JUMBLE_FIELD(is_local); JUMBLE_LOCATION(location); } + +static void +_jumbleRangeTblEntry(JumbleState *jstate, Node *node) +{ + RangeTblEntry *expr = (RangeTblEntry *) node; + char *rel_name; + + JUMBLE_FIELD(rtekind); + + /* + * If this is a temp table, jumble the name instead of the table OID. + */ + if (expr->rtekind == RTE_RELATION && isAnyTempNamespace(get_rel_namespace(expr->relid))) + { + rel_name = get_rel_name(expr->relid); + AppendJumble(jstate, (const unsigned char *)"pg_temp", sizeof("pg_temp")); + AppendJumble(jstate, (const unsigned char *)rel_name, strlen(rel_name)); + } + else + JUMBLE_FIELD(relid); + + JUMBLE_FIELD(inh); + JUMBLE_NODE(tablesample); + JUMBLE_NODE(subquery); + JUMBLE_FIELD(jointype); + JUMBLE_NODE(functions); + JUMBLE_FIELD(funcordinality); + JUMBLE_NODE(tablefunc); + JUMBLE_NODE(values_lists); + JUMBLE_STRING(ctename); + JUMBLE_FIELD(ctelevelsup); + JUMBLE_STRING(enrname); +} diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 23c9e3c5abf..f0f3bd2b95f 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1039,7 +1039,8 @@ typedef enum RTEKind typedef struct RangeTblEntry { - pg_node_attr(custom_read_write) + /* custom_query_jumble for handling temp tables specially */ + pg_node_attr(custom_read_write, custom_query_jumble) NodeTag type; -- 2.47.2