Re: Michael Paquier
> Fine by me as well to keep a dependency based on the fact that the
> structure is rather complicated, but I'd rather document that as well
> in parsenodes.h with a slightly fatter comment.  What do you think?

You are of course right, that one-line comment was just snakeoil :D.
Now there are proper ones, thanks.

Christoph
>From 725b6c65160a44678b2c43b4af0a3dc9c281830d Mon Sep 17 00:00:00 2001
From: Christoph Berg <m...@debian.org>
Date: Mon, 17 Mar 2025 17:17:17 +0100
Subject: [PATCH v3] Jumble temp tables by name

Query jumbling considers everything by OID, which is fine for regular
objects. But for temp tables, which have to be recreated in each session
(or even transaction), this means that the same temp table query run
again in the next session will never be aggregated in
pg_stat_statements. Instead, the statistics are polluted with a large
number of 1-call entries.

Fix by using the temp table name instead. This has the risk of
aggregating structurally different temp tables together if they share
the same name, but practically, the queries will likely already differ
in other details anyway. And even if not, aggregating the entries in
pg_stat_statements instead of polluting the stats seems the better
choice. (The user has still the option to simply change the name of the
temp table to have the queries separated. In the old scheme, the user
does not have any chance to change behavior.)
---
 .../pg_stat_statements/expected/select.out    | 31 +++++++++++++++
 contrib/pg_stat_statements/sql/select.sql     | 12 ++++++
 src/backend/nodes/queryjumblefuncs.c          | 39 +++++++++++++++++++
 src/include/nodes/parsenodes.h                |  8 +++-
 4 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index 37a30af034a..8a7e237298c 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -241,6 +241,37 @@ SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C";
 (12 rows)
 
 DROP TABLE pgss_a, pgss_b CASCADE;
+-- temp tables
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t 
+---
+ t
+(1 row)
+
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+ id 
+----
+(0 rows)
+
+COMMIT;
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+ id 
+----
+(0 rows)
+
+COMMIT;
+SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- one query with two calls
+ calls |                                 query                                  
+-------+------------------------------------------------------------------------
+     2 | SELECT * FROM temp_t
+     0 | SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"
+     1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
+(3 rows)
+
 --
 -- access to pg_stat_statements_info view
 --
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index e0be58d5e24..81b9d50ecec 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -90,6 +90,18 @@ SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C";
 
 DROP TABLE pgss_a, pgss_b CASCADE;
 
+-- temp tables
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+COMMIT;
+BEGIN;
+  CREATE TEMP TABLE temp_t (id int) ON COMMIT DROP;
+  SELECT * FROM temp_t;
+COMMIT;
+SELECT calls, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- one query with two calls
+
 --
 -- access to pg_stat_statements_info view
 --
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 189bfda610a..d5b3134cc2f 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -33,6 +33,7 @@
 #include "postgres.h"
 
 #include "access/transam.h"
+#include "catalog/namespace.h"
 #include "catalog/pg_proc.h"
 #include "common/hashfn.h"
 #include "miscadmin.h"
@@ -40,6 +41,7 @@
 #include "nodes/queryjumble.h"
 #include "utils/lsyscache.h"
 #include "parser/scansup.h"
+#include "utils/lsyscache.h"
 
 #define JUMBLE_SIZE				1024	/* query serialization buffer size */
 
@@ -67,6 +69,7 @@ static void _jumbleElements(JumbleState *jstate, List *elements);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
+static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
 
 /*
  * Given a possibly multi-statement source string, confine our attention to the
@@ -513,3 +516,39 @@ _jumbleVariableSetStmt(JumbleState *jstate, Node *node)
 	JUMBLE_FIELD(is_local);
 	JUMBLE_LOCATION(location);
 }
+
+/*
+ * RangeTblEntry is jumbled exactly like the automatic jumbling would do, but
+ * with using the name of temp tables instead of OID. Since temp tables have to
+ * be recreated for each session (or even transaction), their OID is useless
+ * for fingerprinting.
+ */
+static void
+_jumbleRangeTblEntry(JumbleState *jstate, Node *node)
+{
+	RangeTblEntry *expr = (RangeTblEntry *) node;
+	char	   *rel_name;
+
+	JUMBLE_FIELD(rtekind);
+
+	if (expr->rtekind == RTE_RELATION && isAnyTempNamespace(get_rel_namespace(expr->relid)))
+	{
+		rel_name = get_rel_name(expr->relid);
+		AppendJumble(jstate, (const unsigned char *)"pg_temp", sizeof("pg_temp"));
+		AppendJumble(jstate, (const unsigned char *)rel_name, strlen(rel_name));
+	}
+	else
+		JUMBLE_FIELD(relid);
+
+	JUMBLE_FIELD(inh);
+	JUMBLE_NODE(tablesample);
+	JUMBLE_NODE(subquery);
+	JUMBLE_FIELD(jointype);
+	JUMBLE_NODE(functions);
+	JUMBLE_FIELD(funcordinality);
+	JUMBLE_NODE(tablefunc);
+	JUMBLE_NODE(values_lists);
+	JUMBLE_STRING(ctename);
+	JUMBLE_FIELD(ctelevelsup);
+	JUMBLE_STRING(enrname);
+}
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 23c9e3c5abf..4fbe1520548 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -1019,6 +1019,12 @@ typedef struct PartitionCmd
  *	  rewriter to implement security-barrier views and/or row-level security.
  *	  Note that the planner turns each boolean expression into an implicitly
  *	  AND'ed sublist, as is its usual habit with qualification expressions.
+ *
+ *	  RangeTblEntry uses a custom query jumble function to hash temp tables by
+ *	  name instead of by OID. The query_jumble_ignore markers on struct members
+ *	  are still kept for documentation; if the custom_query_jumble attribute is
+ *	  dropped, the automatically generated _jumbleRangeTblEntry function should
+ *	  be identical except for the relid.
  *--------------------
  */
 typedef enum RTEKind
@@ -1039,7 +1045,7 @@ typedef enum RTEKind
 
 typedef struct RangeTblEntry
 {
-	pg_node_attr(custom_read_write)
+	pg_node_attr(custom_read_write, custom_query_jumble)
 
 	NodeTag		type;
 
-- 
2.47.2

Reply via email to