Re: Michael Paquier
> This is OK on its own, still feels a bit incomplete, as the relid also
> includes an assumption about the namespace.  I would suggested to add
> a hardcoded "pg_temp" here, to keep track of this assumption, at
> least.

I had thought about it, but figured that integers and strings are
already separate namespaces, so hashing them shouldn't have any
conflicts. But it's more clear to do that, so added in the new
version:

       AppendJumble(jstate, (const unsigned char *)"pg_temp", 
sizeof("pg_temp"));
       AppendJumble(jstate, (const unsigned char *)rel_name, strlen(rel_name));

>  typedef struct RangeTblEntry
>  {
> -    pg_node_attr(custom_read_write)
> +    pg_node_attr(custom_read_write, custom_query_jumble)
> 
> This structure still includes some query_jumble_ignore, which are not
> required once custom_query_jumble is added.

I would tend to keep them for documentation purposes. (The other
custom_query_jumble functions have a much more explicit structure so
there it is clear which fields are supposed to be jumbled.)

> We had better document at the top of RangeTblEntry why we are using a
> custom function.

I added a short comment just above custom_query_jumble.

Christoph
>From 70a80f9abd781f8114563ae8fc69e64123f12eeb Mon Sep 17 00:00:00 2001
From: Christoph Berg <m...@debian.org>
Date: Mon, 17 Mar 2025 17:17:17 +0100
Subject: [PATCH v2] Jumble temp tables by name

Query jumbling considers everything by OID, which is fine for regular
objects. But for temp tables, which have to be recreated in each session
(or even transaction), this means that the same temp table query run
again in the next session will never be aggregated in
pg_stat_statements. Instead, the statistics are polluted with a large
number of 1-call entries.

Fix by using the temp table name instead. This has the risk of
aggregating structurally different temp tables together if they share
the same name, but practically, the queries will likely already differ
in other details anyway. And even if not, aggregating the entries in
pg_stat_statements instead of polluting the stats seems the better
choice. (The user has still the option to simply change the name of the
temp table to have the queries separated. In the old scheme, the user
does not have any chance to change behavior.)
---
 .../pg_stat_statements/expected/select.out    | 31 ++++++++++++++++
 contrib/pg_stat_statements/sql/select.sql     | 12 +++++++
 src/backend/nodes/queryjumblefuncs.c          | 36 +++++++++++++++++++
 src/include/nodes/parsenodes.h                |  3 +-
 4 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index 37a30af034a..8a7e237298c 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -241,6 +241,37 @@ SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C";
 (12 rows)
 
 DROP TABLE pgss_a, pgss_b CASCADE;
+-- temp tables
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+ id 
+----
+(0 rows)
+
+COMMIT;
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+ id 
+----
+(0 rows)
+
+COMMIT;
+SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- one query with two calls
+ calls |                                 query                                  
+-------+------------------------------------------------------------------------
+     2 | SELECT * FROM temp_t
+     0 | SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"
+     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
 --
 -- access to pg_stat_statements_info view
 --
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index e0be58d5e24..81b9d50ecec 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -90,6 +90,18 @@ SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 DROP TABLE pgss_a, pgss_b CASCADE;
 
+-- temp tables
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+COMMIT;
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+COMMIT;
+SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- one query with two calls
+
 --
 -- access to pg_stat_statements_info view
 --
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index b103a281936..1e18f1ddf5f 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -32,10 +32,12 @@
  */
 #include "postgres.h"
 
+#include "catalog/namespace.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
 #include "nodes/queryjumble.h"
 #include "parser/scansup.h"
+#include "utils/lsyscache.h"
 
 #define JUMBLE_SIZE				1024	/* query serialization buffer size */
 
@@ -58,6 +60,7 @@ static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
+static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
 
 /*
  * Given a possibly multi-statement source string, confine our attention to the
@@ -377,3 +380,36 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
 	JUMBLE_FIELD(is_local);
 	JUMBLE_LOCATION(location);
 }
+
+static void
+_jumbleRangeTblEntry(JumbleState *jstate, Node *node)
+{
+	RangeTblEntry *expr = (RangeTblEntry *) node;
+	char	   *rel_name;
+
+	JUMBLE_FIELD(rtekind);
+
+	/*
+	 * If this is a temp table, jumble the name instead of the table OID.
+	 */
+	if (expr->rtekind == RTE_RELATION && isAnyTempNamespace(get_rel_namespace(expr->relid)))
+	{
+		rel_name = get_rel_name(expr->relid);
+		AppendJumble(jstate, (const unsigned char *)"pg_temp", sizeof("pg_temp"));
+		AppendJumble(jstate, (const unsigned char *)rel_name, strlen(rel_name));
+	}
+	else
+		JUMBLE_FIELD(relid);
+
+	JUMBLE_FIELD(inh);
+	JUMBLE_NODE(tablesample);
+	JUMBLE_NODE(subquery);
+	JUMBLE_FIELD(jointype);
+	JUMBLE_NODE(functions);
+	JUMBLE_FIELD(funcordinality);
+	JUMBLE_NODE(tablefunc);
+	JUMBLE_NODE(values_lists);
+	JUMBLE_STRING(ctename);
+	JUMBLE_FIELD(ctelevelsup);
+	JUMBLE_STRING(enrname);
+}
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 23c9e3c5abf..f0f3bd2b95f 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1039,7 +1039,8 @@ typedef enum RTEKind
 
 typedef struct RangeTblEntry
 {
-	pg_node_attr(custom_read_write)
+	/* custom_query_jumble for handling temp tables specially */
+	pg_node_attr(custom_read_write, custom_query_jumble)
 
 	NodeTag		type;
 
-- 
2.47.2

Reply via email to