From a46508514d5b7fbf7e526f41e89373e0244f85f4 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Fri, 3 May 2024 20:17:39 +1200
Subject: [PATCH v1 1/2] Add memory/disk usage for Material in EXPLAIN ANALYZE

Up until now, there was no ability to easily determine if a Material
node caused the underlying tuplestore to spill to disk or even see how
much memory the tuplestore used if it didn't.

Here we add some new functions to tuplestore.c to query this information
and add some additional output in EXPLAIN ANALYZE to display this
information.
---
 src/backend/commands/explain.c                | 37 ++++++++++++
 src/backend/storage/file/buffile.c            |  2 +-
 src/backend/utils/sort/tuplestore.c           | 53 ++++++++++++++++
 src/include/utils/tuplestore.h                |  4 ++
 src/test/regress/expected/partition_prune.out | 60 +++++++++----------
 src/test/regress/sql/partition_prune.sql      |  5 ++
 6 files changed, 130 insertions(+), 31 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index c0c73aa3c9..4f85b0af11 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -119,6 +119,7 @@ static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_incremental_sort_info(IncrementalSortState *incrsortstate,
 									   ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
+static void show_material_info(MaterialState *mstate, ExplainState *es);
 static void show_memoize_info(MemoizeState *mstate, List *ancestors,
 							  ExplainState *es);
 static void show_hashagg_info(AggState *aggstate, ExplainState *es);
@@ -2242,6 +2243,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		case T_Hash:
 			show_hash_info(castNode(HashState, planstate), es);
 			break;
+		case T_Material:
+			show_material_info(castNode(MaterialState, planstate), es);
+			break;
 		case T_Memoize:
 			show_memoize_info(castNode(MemoizeState, planstate), ancestors,
 							  es);
@@ -3313,6 +3317,39 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 	}
 }
 
+/*
+ * Show information on material node, storage method and maximum memory/disk
+ * space used.
+ */
+static void
+show_material_info(MaterialState *mstate, ExplainState *es)
+{
+	Tuplestorestate *tupstore;
+	const char *storageType;
+	int64		spaceUsedKB;
+
+	if (!es->analyze)
+		return;
+
+	tupstore = mstate->tuplestorestate;
+	storageType = tuplestore_storage_type_name(tupstore);
+	spaceUsedKB = (tuplestore_space_used(tupstore) + 1023) / 1024;
+
+	if (es->format != EXPLAIN_FORMAT_TEXT)
+	{
+		ExplainPropertyText("Storage", storageType, es);
+		ExplainPropertyInteger("Maximum Storage", "kB", spaceUsedKB, es);
+	}
+	else
+	{
+		ExplainIndentText(es);
+		appendStringInfo(es->str,
+						 "Storage: %s  Maximum Storage: " INT64_FORMAT "kB\n",
+						 storageType,
+						 spaceUsedKB);
+	}
+}
+
 /*
  * Show information on memoize hits/misses/evictions and memory usage.
  */
diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c
index a263875fd5..9255f7d464 100644
--- a/src/backend/storage/file/buffile.c
+++ b/src/backend/storage/file/buffile.c
@@ -867,7 +867,7 @@ BufFileSize(BufFile *file)
 {
 	int64		lastFileSize;
 
-	Assert(file->fileset != NULL);
+	Assert(file->files != NULL);
 
 	/* Get the size of the last physical file. */
 	lastFileSize = FileSize(file->files[file->numFiles - 1]);
diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c
index 947a868e56..24bb49ca87 100644
--- a/src/backend/utils/sort/tuplestore.c
+++ b/src/backend/utils/sort/tuplestore.c
@@ -109,6 +109,7 @@ struct Tuplestorestate
 	bool		truncated;		/* tuplestore_trim has removed tuples? */
 	int64		availMem;		/* remaining memory available, in bytes */
 	int64		allowedMem;		/* total memory allowed, in bytes */
+	int64		maxSpace;		/* maximum space used in memory */
 	int64		tuples;			/* number of tuples added */
 	BufFile    *myfile;			/* underlying file, or NULL if none */
 	MemoryContext context;		/* memory context for holding tuples */
@@ -238,6 +239,7 @@ static Tuplestorestate *tuplestore_begin_common(int eflags,
 												int maxKBytes);
 static void tuplestore_puttuple_common(Tuplestorestate *state, void *tuple);
 static void dumptuples(Tuplestorestate *state);
+static void tuplestore_updatemax(Tuplestorestate *state);
 static unsigned int getlen(Tuplestorestate *state, bool eofOK);
 static void *copytup_heap(Tuplestorestate *state, void *tup);
 static void writetup_heap(Tuplestorestate *state, void *tup);
@@ -262,6 +264,7 @@ tuplestore_begin_common(int eflags, bool interXact, int maxKBytes)
 	state->truncated = false;
 	state->allowedMem = maxKBytes * 1024L;
 	state->availMem = state->allowedMem;
+	state->maxSpace = 0;
 	state->myfile = NULL;
 	state->context = CurrentMemoryContext;
 	state->resowner = CurrentResourceOwner;
@@ -420,6 +423,9 @@ tuplestore_clear(Tuplestorestate *state)
 	int			i;
 	TSReadPointer *readptr;
 
+	/* update the maxSpace before doing any USEMEM/FREEMEM adjustments */
+	tuplestore_updatemax(state);
+
 	if (state->myfile)
 		BufFileClose(state->myfile);
 	state->myfile = NULL;
@@ -1402,6 +1408,9 @@ tuplestore_trim(Tuplestorestate *state)
 	Assert(nremove >= state->memtupdeleted);
 	Assert(nremove <= state->memtupcount);
 
+	/* before freeing any memory, update maxSpace */
+	tuplestore_updatemax(state);
+
 	/* Release no-longer-needed tuples */
 	for (i = state->memtupdeleted; i < nremove; i++)
 	{
@@ -1444,6 +1453,49 @@ tuplestore_trim(Tuplestorestate *state)
 	}
 }
 
+/*
+ * tuplestore_updatemax
+ *		Update maxSpace field
+ */
+static void
+tuplestore_updatemax(Tuplestorestate *state)
+{
+	if (state->status == TSS_INMEM)
+		state->maxSpace = Max(state->maxSpace,
+							  state->allowedMem - state->availMem);
+}
+
+/*
+ * tuplestore_storage_type_name
+ *		Return a string description of the storage method used to store the
+ *		tuples.
+ */
+const char *
+tuplestore_storage_type_name(Tuplestorestate *state)
+{
+	if (state->status == TSS_INMEM)
+		return "Memory";
+	else
+		return "Disk";
+}
+
+/*
+ * tuplestore_space_used
+ *		Return the maximum space used in memory unless the tuplestore has spilled
+ *		to disk, in which case, return the disk space used.
+ */
+int64
+tuplestore_space_used(Tuplestorestate *state)
+{
+	/* First, update the maxSpace field */
+	tuplestore_updatemax(state);
+
+	if (state->status == TSS_INMEM)
+		return state->maxSpace;
+	else
+		return BufFileSize(state->myfile);
+}
+
 /*
  * tuplestore_in_memory
  *
@@ -1513,6 +1565,7 @@ writetup_heap(Tuplestorestate *state, void *tup)
 	if (state->backward)		/* need trailing length word? */
 		BufFileWrite(state->myfile, &tuplen, sizeof(tuplen));
 
+	/* no need to call tuplestore_updatemax() when not in TSS_INMEM */
 	FREEMEM(state, GetMemoryChunkSpace(tuple));
 	heap_free_minimal_tuple(tuple);
 }
diff --git a/src/include/utils/tuplestore.h b/src/include/utils/tuplestore.h
index 419613c17b..3d8a90caaf 100644
--- a/src/include/utils/tuplestore.h
+++ b/src/include/utils/tuplestore.h
@@ -65,6 +65,10 @@ extern void tuplestore_copy_read_pointer(Tuplestorestate *state,
 
 extern void tuplestore_trim(Tuplestorestate *state);
 
+extern const char *tuplestore_storage_type_name(Tuplestorestate *state);
+
+extern int64 tuplestore_space_used(Tuplestorestate *state);
+
 extern bool tuplestore_in_memory(Tuplestorestate *state);
 
 extern bool tuplestore_gettupleslot(Tuplestorestate *state, bool forward,
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index 46b78ba3c4..e6bc3f5c05 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -2886,12 +2886,13 @@ where c.relname like 'ab\_%' order by c.relname;
  ab_a3_b3_a_idx |        1 |         0 | t          |                  |                  
 (21 rows)
 
+set enable_material = 0;
 -- UPDATE on a partition subtree has been seen to have problems.
 insert into ab values (1,2);
 explain (analyze, costs off, summary off, timing off)
 update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
-                                        QUERY PLAN                                         
--------------------------------------------------------------------------------------------
+                                     QUERY PLAN                                      
+-------------------------------------------------------------------------------------
  Update on ab_a1 (actual rows=0 loops=1)
    Update on ab_a1_b1 ab_a1_1
    Update on ab_a1_b2 ab_a1_2
@@ -2912,23 +2913,22 @@ update ab_a1 set b = 3 from ab where ab.a = 1 and ab.a = ab_a1.a;
                      Heap Blocks: exact=1
                      ->  Bitmap Index Scan on ab_a1_b3_a_idx (actual rows=1 loops=1)
                            Index Cond: (a = 1)
-         ->  Materialize (actual rows=1 loops=1)
-               ->  Append (actual rows=1 loops=1)
-                     ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
-                           Recheck Cond: (a = 1)
-                           ->  Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1)
-                                 Index Cond: (a = 1)
-                     ->  Bitmap Heap Scan on ab_a1_b2 ab_2 (actual rows=1 loops=1)
-                           Recheck Cond: (a = 1)
-                           Heap Blocks: exact=1
-                           ->  Bitmap Index Scan on ab_a1_b2_a_idx (actual rows=1 loops=1)
-                                 Index Cond: (a = 1)
-                     ->  Bitmap Heap Scan on ab_a1_b3 ab_3 (actual rows=0 loops=1)
-                           Recheck Cond: (a = 1)
-                           Heap Blocks: exact=1
-                           ->  Bitmap Index Scan on ab_a1_b3_a_idx (actual rows=1 loops=1)
-                                 Index Cond: (a = 1)
-(36 rows)
+         ->  Append (actual rows=1 loops=1)
+               ->  Bitmap Heap Scan on ab_a1_b1 ab_1 (actual rows=0 loops=1)
+                     Recheck Cond: (a = 1)
+                     ->  Bitmap Index Scan on ab_a1_b1_a_idx (actual rows=0 loops=1)
+                           Index Cond: (a = 1)
+               ->  Bitmap Heap Scan on ab_a1_b2 ab_2 (actual rows=1 loops=1)
+                     Recheck Cond: (a = 1)
+                     Heap Blocks: exact=1
+                     ->  Bitmap Index Scan on ab_a1_b2_a_idx (actual rows=1 loops=1)
+                           Index Cond: (a = 1)
+               ->  Bitmap Heap Scan on ab_a1_b3 ab_3 (actual rows=0 loops=1)
+                     Recheck Cond: (a = 1)
+                     Heap Blocks: exact=1
+                     ->  Bitmap Index Scan on ab_a1_b3_a_idx (actual rows=1 loops=1)
+                           Index Cond: (a = 1)
+(35 rows)
 
 table ab;
  a | b 
@@ -2941,8 +2941,8 @@ truncate ab;
 insert into ab values (1, 1), (1, 2), (1, 3), (2, 1);
 explain (analyze, costs off, summary off, timing off)
 update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
-                                  QUERY PLAN                                  
-------------------------------------------------------------------------------
+                               QUERY PLAN                               
+------------------------------------------------------------------------
  Update on ab_a1 (actual rows=0 loops=1)
    Update on ab_a1_b1 ab_a1_1
    Update on ab_a1_b2 ab_a1_2
@@ -2950,20 +2950,20 @@ update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
    InitPlan 1
      ->  Result (actual rows=1 loops=1)
    ->  Nested Loop (actual rows=3 loops=1)
+         ->  Append (actual rows=1 loops=1)
+               ->  Seq Scan on ab_a2_b1 ab_a2_1 (actual rows=1 loops=1)
+                     Filter: (b = (InitPlan 1).col1)
+               ->  Seq Scan on ab_a2_b2 ab_a2_2 (never executed)
+                     Filter: (b = (InitPlan 1).col1)
+               ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
+                     Filter: (b = (InitPlan 1).col1)
          ->  Append (actual rows=3 loops=1)
                ->  Seq Scan on ab_a1_b1 ab_a1_1 (actual rows=1 loops=1)
                ->  Seq Scan on ab_a1_b2 ab_a1_2 (actual rows=1 loops=1)
                ->  Seq Scan on ab_a1_b3 ab_a1_3 (actual rows=1 loops=1)
-         ->  Materialize (actual rows=1 loops=3)
-               ->  Append (actual rows=1 loops=1)
-                     ->  Seq Scan on ab_a2_b1 ab_a2_1 (actual rows=1 loops=1)
-                           Filter: (b = (InitPlan 1).col1)
-                     ->  Seq Scan on ab_a2_b2 ab_a2_2 (never executed)
-                           Filter: (b = (InitPlan 1).col1)
-                     ->  Seq Scan on ab_a2_b3 ab_a2_3 (never executed)
-                           Filter: (b = (InitPlan 1).col1)
-(19 rows)
+(18 rows)
 
+reset enable_material;
 select tableoid::regclass, * from ab;
  tableoid | a | b 
 ----------+---+---
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index dc71693861..2707a475b5 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -688,6 +688,8 @@ left join pg_stat_all_tables s on c.oid = s.relid
 left join pg_index i on c.oid = i.indexrelid
 where c.relname like 'ab\_%' order by c.relname;
 
+set enable_material = 0;
+
 -- UPDATE on a partition subtree has been seen to have problems.
 insert into ab values (1,2);
 explain (analyze, costs off, summary off, timing off)
@@ -699,6 +701,9 @@ truncate ab;
 insert into ab values (1, 1), (1, 2), (1, 3), (2, 1);
 explain (analyze, costs off, summary off, timing off)
 update ab_a1 set b = 3 from ab_a2 where ab_a2.b = (select 1);
+
+reset enable_material;
+
 select tableoid::regclass, * from ab;
 
 drop table ab, lprt_a;
-- 
2.40.1

