Re: explain HashAggregate to report bucket and memory stats

Justin Pryzby Fri, 20 Mar 2020 01:45:15 -0700

On Fri, Mar 13, 2020 at 10:57:43AM -0700, Andres Freund wrote:
> On 2020-03-13 10:53:17 -0700, Jeff Davis wrote:
> > On Fri, 2020-03-13 at 10:27 -0700, Andres Freund wrote:
> > > On 2020-03-13 10:15:46 -0700, Jeff Davis wrote:
> > > > Also, is there a reason you report two different memory values
> > > > (hashtable and tuples)? I don't object, but it seems like a little too
> > > > much detail.
> > > 
> > > Seems useful to me - the hashtable is pre-allocated based on estimates,
> > > whereas the tuples are allocated "on demand". So seeing the difference
> > > will allow to investigate the more crucial issue...


> > Then do we also want to report separately on the by-ref transition
> > values? That could be useful if you are using ARRAY_AGG and the states
> > grow larger than you might expect.
> 
> I can see that being valuable - I've had to debug cases with too much
> memory being used due to aggregate transitions before. Right now it'd be
> mixed in with tuples, I believe - and we'd need a separate context for
> tracking the transition values? Due to that I'm inclined to not report
> separately for now.

I think that's already in a separate context indexed by grouping set:
src/include/nodes/execnodes.h:  ExprContext **aggcontexts;      /* econtexts 
for long-lived data (per GS) */

But the hashtable and tuples are combined.  I put them in separate contexts and
rebased on top of 1f39bce021540fde00990af55b4432c55ef4b3c7.

But didn't do anything yet with the aggcontexts.

Now I can get output like:

|template1=# explain analyze SELECT i,COUNT(1) FROM t GROUP BY 1;
| HashAggregate  (cost=4769.99..6769.98 rows=199999 width=12) (actual 
time=266.465..27020.333 rows=199999 loops=1)
|   Group Key: i
|   Buckets: 524288 (originally 262144)
|   Peak Memory Usage: hashtable: 12297kB, tuples: 24576kB
|   Disk Usage: 192 kB
|   HashAgg Batches: 3874
|   ->  Seq Scan on t  (cost=0.00..3769.99 rows=199999 width=4) (actual 
time=13.043..64.017 rows=199999 loops=1)

It looks somewhat funny next to hash join, which puts everything on one line:

|template1=# explain  analyze SELECT i,COUNT(1) FROM t a JOIN t b USING(i) 
GROUP BY 1;
| HashAggregate  (cost=13789.95..15789.94 rows=199999 width=12) (actual 
time=657.733..27129.873 rows=199999 loops=1)
|   Group Key: a.i
|   Buckets: 524288 (originally 262144)
|   Peak Memory Usage: hashtable: 12297kB, tuples: 24576kB
|   Disk Usage: 192 kB
|   HashAgg Batches: 3874
|   ->  Hash Join  (cost=6269.98..12789.95 rows=199999 width=4) (actual 
time=135.932..426.071 rows=199999 loops=1)
|         Hash Cond: (a.i = b.i)
|         ->  Seq Scan on t a  (cost=0.00..3769.99 rows=199999 width=4) (actual 
time=3.265..47.598 rows=199999 loops=1)
|         ->  Hash  (cost=3769.99..3769.99 rows=199999 width=4) (actual 
time=131.881..131.882 rows=199999 loops=1)
|               Buckets: 262144  Batches: 1  Memory Usage: 9080kB
|               ->  Seq Scan on t b  (cost=0.00..3769.99 rows=199999 width=4) 
(actual time=3.273..40.163 rows=199999 loops=1)

-- 
Justin

>From e593c119c97ea31edac4c9f08a39eee451964a16 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Thu, 19 Mar 2020 23:03:25 -0500
Subject: [PATCH v8 1/8] nodeAgg: separate context for each hashtable

---
 src/backend/executor/execExpr.c |  2 +-
 src/backend/executor/nodeAgg.c  | 82 +++++++++++++++++++--------------
 src/include/executor/nodeAgg.h  |  2 +
 src/include/nodes/execnodes.h   |  2 -
 4 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c
index 1370ffec50..039c5a8b5f 100644
--- a/src/backend/executor/execExpr.c
+++ b/src/backend/executor/execExpr.c
@@ -3241,7 +3241,7 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
 	int adjust_jumpnull = -1;
 
 	if (ishash)
-		aggcontext = aggstate->hashcontext;
+		aggcontext = aggstate->perhash[setno].hashcontext;
 	else
 		aggcontext = aggstate->aggcontexts[setno];
 
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 44c159ab2a..1d319f49d0 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -191,8 +191,7 @@
  *	  So we create an array, aggcontexts, with an ExprContext for each grouping
  *	  set in the largest rollup that we're going to process, and use the
  *	  per-tuple memory context of those ExprContexts to store the aggregate
- *	  transition values.  hashcontext is the single context created to support
- *	  all hash tables.
+ *	  transition values.
  *
  *	  Spilling To Disk
  *
@@ -457,7 +456,7 @@ select_current_set(AggState *aggstate, int setno, bool is_hash)
 	 * ExecAggPlainTransByRef().
 	 */
 	if (is_hash)
-		aggstate->curaggcontext = aggstate->hashcontext;
+		aggstate->curaggcontext = aggstate->perhash[setno].hashcontext;
 	else
 		aggstate->curaggcontext = aggstate->aggcontexts[setno];
 
@@ -1424,8 +1423,7 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
  * grouping set for which we're doing hashing.
  *
  * The contents of the hash tables always live in the hashcontext's per-tuple
- * memory context (there is only one of these for all tables together, since
- * they are all reset at the same time).
+ * memory context.
  */
 static void
 build_hash_tables(AggState *aggstate)
@@ -1465,8 +1463,8 @@ static void
 build_hash_table(AggState *aggstate, int setno, long nbuckets)
 {
 	AggStatePerHash perhash = &aggstate->perhash[setno];
-	MemoryContext	metacxt = aggstate->hash_metacxt;
-	MemoryContext	hashcxt = aggstate->hashcontext->ecxt_per_tuple_memory;
+	MemoryContext	metacxt = perhash->hash_metacxt;
+	MemoryContext	hashcxt = perhash->hashcontext->ecxt_per_tuple_memory;
 	MemoryContext	tmpcxt	= aggstate->tmpcontext->ecxt_per_tuple_memory;
 	Size            additionalsize;
 
@@ -1776,10 +1774,15 @@ static void
 hash_agg_check_limits(AggState *aggstate)
 {
 	uint64 ngroups = aggstate->hash_ngroups_current;
-	Size meta_mem = MemoryContextMemAllocated(
-		aggstate->hash_metacxt, true);
-	Size hash_mem = MemoryContextMemAllocated(
-		aggstate->hashcontext->ecxt_per_tuple_memory, true);
+	Size meta_mem = 0;
+	Size hash_mem = 0;
+
+	for (int i = 0; i < aggstate->num_hashes; ++i)
+		meta_mem = MemoryContextMemAllocated(
+		aggstate->perhash[i].hash_metacxt, true);
+	for (int i = 0; i < aggstate->num_hashes; ++i)
+		hash_mem += MemoryContextMemAllocated(
+			aggstate->perhash[i].hashcontext->ecxt_per_tuple_memory, true);
 
 	/*
 	 * Don't spill unless there's at least one group in the hash table so we
@@ -1837,8 +1840,8 @@ hash_agg_enter_spill_mode(AggState *aggstate)
 static void
 hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 {
-	Size	meta_mem;
-	Size	hash_mem;
+	Size	meta_mem = 0;
+	Size	hash_mem = 0;
 	Size	buffer_mem;
 	Size	total_mem;
 
@@ -1846,12 +1849,16 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 		aggstate->aggstrategy != AGG_HASHED)
 		return;
 
-	/* memory for the hash table itself */
-	meta_mem = MemoryContextMemAllocated(aggstate->hash_metacxt, true);
 
-	/* memory for the group keys and transition states */
-	hash_mem = MemoryContextMemAllocated(
-		aggstate->hashcontext->ecxt_per_tuple_memory, true);
+	for (int i = 0; i < aggstate->num_hashes; ++i)
+	{
+		/* memory for the hash table itself */
+		meta_mem += MemoryContextMemAllocated(
+			aggstate->perhash[i].hash_metacxt, true);
+		/* memory for the group keys and transition states */
+		hash_mem += MemoryContextMemAllocated(
+			aggstate->perhash[i].hashcontext->ecxt_per_tuple_memory, true);
+	}
 
 	/* memory for read/write tape buffers, if spilled */
 	buffer_mem = npartitions * HASHAGG_WRITE_BUFFER_SIZE;
@@ -2557,9 +2564,11 @@ agg_refill_hash_table(AggState *aggstate)
 		aggstate->all_pergroups[setoff] = NULL;
 
 	/* free memory and reset hash tables */
-	ReScanExprContext(aggstate->hashcontext);
 	for (int setno = 0; setno < aggstate->num_hashes; setno++)
+	{
+		ReScanExprContext(aggstate->perhash[setno].hashcontext);
 		ResetTupleHashTable(aggstate->perhash[setno].hashtable);
+	}
 
 	aggstate->hash_ngroups_current = 0;
 
@@ -3217,6 +3226,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	aggstate->aggcontexts = (ExprContext **)
 		palloc0(sizeof(ExprContext *) * numGroupingSets);
 
+	aggstate->num_hashes = numHashes;
+	if (numHashes)
+		aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
+
 	/*
 	 * Create expression contexts.  We need three or more, one for
 	 * per-input-tuple processing, one for per-output-tuple processing, one
@@ -3240,10 +3253,10 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
 	}
 
-	if (use_hashing)
+	for (i = 0 ; i < numHashes; ++i)
 	{
 		ExecAssignExprContext(estate, &aggstate->ss.ps);
-		aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
+		aggstate->perhash[i].hashcontext = aggstate->ss.ps.ps_ExprContext;
 	}
 
 	ExecAssignExprContext(estate, &aggstate->ss.ps);
@@ -3332,11 +3345,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 	 * compare functions.  Accumulate all_grouped_cols in passing.
 	 */
 	aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
-
-	aggstate->num_hashes = numHashes;
 	if (numHashes)
 	{
-		aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
 		aggstate->phases[0].numsets = 0;
 		aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
 		aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
@@ -3528,10 +3538,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
 		uint64	totalGroups = 0;
 		int 	i;
 
-		aggstate->hash_metacxt = AllocSetContextCreate(
-			aggstate->ss.ps.state->es_query_cxt,
-			"HashAgg meta context",
-			ALLOCSET_DEFAULT_SIZES);
+		for (i = 0; i < aggstate->num_hashes; i++)
+			aggstate->perhash[i].hash_metacxt = AllocSetContextCreate(
+				aggstate->ss.ps.state->es_query_cxt,
+				"HashAgg meta context",
+				ALLOCSET_DEFAULT_SIZES);
+
 		aggstate->hash_spill_slot = ExecInitExtraTupleSlot(
 			estate, scanDesc, &TTSOpsMinimalTuple);
 
@@ -4461,10 +4473,11 @@ ExecEndAgg(AggState *node)
 
 	hashagg_reset_spill_state(node);
 
-	if (node->hash_metacxt != NULL)
+	for (setno = 0; setno < node->num_hashes; setno++)
 	{
-		MemoryContextDelete(node->hash_metacxt);
-		node->hash_metacxt = NULL;
+		MemoryContext *metacxt = &node->perhash[setno].hash_metacxt;
+		MemoryContextDelete(*metacxt);
+		*metacxt = NULL;
 	}
 
 	for (transno = 0; transno < node->numtrans; transno++)
@@ -4481,8 +4494,8 @@ ExecEndAgg(AggState *node)
 	/* And ensure any agg shutdown callbacks have been called */
 	for (setno = 0; setno < numGroupingSets; setno++)
 		ReScanExprContext(node->aggcontexts[setno]);
-	if (node->hashcontext)
-		ReScanExprContext(node->hashcontext);
+	for (setno = 0; setno < node->num_hashes; setno++)
+		ReScanExprContext(node->perhash[setno].hashcontext);
 
 	/*
 	 * We don't actually free any ExprContexts here (see comment in
@@ -4591,7 +4604,8 @@ ExecReScanAgg(AggState *node)
 		node->hash_spill_mode = false;
 		node->hash_ngroups_current = 0;
 
-		ReScanExprContext(node->hashcontext);
+		for (setno = 0; setno < node->num_hashes; setno++)
+			ReScanExprContext(node->perhash[setno].hashcontext);
 		/* Rebuild an empty hash table */
 		build_hash_tables(node);
 		node->table_filled = false;
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index a5b8a004d1..247bb65bd6 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -307,6 +307,8 @@ typedef struct AggStatePerHashData
 	AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
 	AttrNumber *hashGrpColIdxHash;	/* indices in hash table tuples */
 	Agg		   *aggnode;		/* original Agg node, for numGroups etc. */
+	MemoryContext	hash_metacxt;	/* memory for hash table itself */
+	ExprContext	*hashcontext;	/* context for hash table data */
 }			AggStatePerHashData;
 
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 3d27d50f09..ddf0b43916 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -2051,7 +2051,6 @@ typedef struct AggState
 	int			current_phase;	/* current phase number */
 	AggStatePerAgg peragg;		/* per-Aggref information */
 	AggStatePerTrans pertrans;	/* per-Trans state information */
-	ExprContext *hashcontext;	/* econtexts for long-lived data (hashtable) */
 	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
 	ExprContext *tmpcontext;	/* econtext for input expressions */
 #define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
@@ -2079,7 +2078,6 @@ typedef struct AggState
 	/* these fields are used in AGG_HASHED and AGG_MIXED modes: */
 	bool		table_filled;	/* hash table filled yet? */
 	int			num_hashes;
-	MemoryContext	hash_metacxt;	/* memory for hash table itself */
 	struct HashTapeInfo *hash_tapeinfo; /* metadata for spill tapes */
 	struct HashAggSpill *hash_spills; /* HashAggSpill for each grouping set,
 										 exists only during first pass */
-- 
2.17.0

>From 8fb9202fdfa04cf0c13ba3637b2c0a5365380eac Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Tue, 31 Dec 2019 18:49:41 -0600
Subject: [PATCH v8 2/8] explain to show tuplehash bucket and memory stats..

Note that hashed SubPlan and recursiveUnion aren't affected in explain output,
probably since hashtables aren't allocated at that point.

Discussion: https://www.postgresql.org/message-id/flat/20200103161925.gm12...@telsasoft.com
---
 .../postgres_fdw/expected/postgres_fdw.out    |  56 ++++--
 src/backend/commands/explain.c                | 168 ++++++++++++++----
 src/backend/executor/execGrouping.c           |  33 ++++
 src/backend/executor/nodeAgg.c                |  15 +-
 src/backend/executor/nodeRecursiveunion.c     |   3 +
 src/backend/executor/nodeSetOp.c              |   1 +
 src/backend/executor/nodeSubplan.c            |   3 +
 src/include/executor/executor.h               |   1 +
 src/include/nodes/execnodes.h                 |  10 +-
 9 files changed, 227 insertions(+), 63 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 62c2697920..2ddae83178 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2086,9 +2086,11 @@ SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2
          ->  HashAggregate
                Output: t1.c1, avg((t1.c1 + t2.c1))
                Group Key: t1.c1
+               Buckets: 256
                ->  HashAggregate
                      Output: t1.c1, t2.c1
                      Group Key: t1.c1, t2.c1
+                     Buckets: 4096
                      ->  Append
                            ->  Foreign Scan
                                  Output: t1.c1, t2.c1
@@ -2098,7 +2100,7 @@ SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2
                                  Output: t1_1.c1, t2_1.c1
                                  Relations: (public.ft1 t1_1) INNER JOIN (public.ft2 t2_1)
                                  Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1"))))
-(20 rows)
+(22 rows)
 
 SELECT t1c1, avg(t1c1 + t2c1) FROM (SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) UNION SELECT t1.c1, t2.c1 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1)) AS t (t1c1, t2c1) GROUP BY t1c1 ORDER BY t1c1 OFFSET 100 LIMIT 10;
  t1c1 |         avg          
@@ -2129,11 +2131,12 @@ SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM
          ->  HashAggregate
                Output: t2.c1, t3.c1
                Group Key: t2.c1, t3.c1
+               Buckets: 2
                ->  Foreign Scan
                      Output: t2.c1, t3.c1
                      Relations: (public.ft1 t2) INNER JOIN (public.ft2 t3)
                      Remote SQL: SELECT r1."C 1", r2."C 1" FROM ("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")) AND ((r1.c2 = $1::integer))))
-(13 rows)
+(14 rows)
 
 SELECT t1."C 1" FROM "S 1"."T 1" t1, LATERAL (SELECT DISTINCT t2.c1, t3.c1 FROM ft1 t2, ft2 t3 WHERE t2.c1 = t3.c1 AND t2.c2 = t1.c2) q ORDER BY t1."C 1" OFFSET 10 LIMIT 10;
  C 1 
@@ -2610,10 +2613,11 @@ select c2 * (random() <= 1)::int as c2 from ft2 group by c2 * (random() <= 1)::i
    ->  HashAggregate
          Output: ((c2 * ((random() <= '1'::double precision))::integer))
          Group Key: (ft2.c2 * ((random() <= '1'::double precision))::integer)
+         Buckets: 2
          ->  Foreign Scan on public.ft2
                Output: (c2 * ((random() <= '1'::double precision))::integer)
                Remote SQL: SELECT c2 FROM "S 1"."T 1"
-(9 rows)
+(10 rows)
 
 -- GROUP BY clause in various forms, cardinal, alias and constant expression
 explain (verbose, costs off)
@@ -2713,11 +2717,12 @@ select sum(c1) from ft1 group by c2 having avg(c1 * (random() <= 1)::int) > 100
    ->  HashAggregate
          Output: sum(c1), c2
          Group Key: ft1.c2
+         Buckets: 16
          Filter: (avg((ft1.c1 * ((random() <= '1'::double precision))::integer)) > '100'::numeric)
          ->  Foreign Scan on public.ft1
                Output: c1, c2
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1"
-(10 rows)
+(11 rows)
 
 -- Remote aggregate in combination with a local Param (for the output
 -- of an initplan) can be trouble, per bug #15781
@@ -2963,10 +2968,11 @@ select sum(c1) filter (where (c1 / c1) * random() <= 1) from ft1 group by c2 ord
    ->  HashAggregate
          Output: sum(c1) FILTER (WHERE ((((c1 / c1))::double precision * random()) <= '1'::double precision)), c2
          Group Key: ft1.c2
+         Buckets: 16
          ->  Foreign Scan on public.ft1
                Output: c1, c2
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1"
-(9 rows)
+(10 rows)
 
 explain (verbose, costs off)
 select sum(c2) filter (where c2 in (select c2 from ft1 where c2 < 5)) from ft1;
@@ -3229,6 +3235,7 @@ select count(*), x.b from ft1, (select c2 a, sum(c1) b from ft1 group by c2) x w
    ->  HashAggregate
          Output: count(*), x.b
          Group Key: x.b
+         Buckets: 16
          ->  Hash Join
                Output: x.b
                Inner Unique: true
@@ -3244,7 +3251,7 @@ select count(*), x.b from ft1, (select c2 a, sum(c1) b from ft1 group by c2) x w
                                  Output: ft1_1.c2, (sum(ft1_1.c1))
                                  Relations: Aggregate on (public.ft1 ft1_1)
                                  Remote SQL: SELECT c2, sum("C 1") FROM "S 1"."T 1" GROUP BY 1
-(21 rows)
+(22 rows)
 
 select count(*), x.b from ft1, (select c2 a, sum(c1) b from ft1 group by c2) x where ft1.c2 = x.a group by x.b order by 1, 2;
  count |   b   
@@ -3449,11 +3456,12 @@ select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls la
    ->  MixedAggregate
          Output: c2, sum(c1)
          Hash Key: ft1.c2
+         Buckets: 16
          Group Key: ()
          ->  Foreign Scan on public.ft1
                Output: c2, c1
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
-(10 rows)
+(11 rows)
 
 select c2, sum(c1) from ft1 where c2 < 3 group by rollup(c2) order by 1 nulls last;
  c2 |  sum   
@@ -3474,11 +3482,12 @@ select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last
    ->  MixedAggregate
          Output: c2, sum(c1)
          Hash Key: ft1.c2
+         Buckets: 16
          Group Key: ()
          ->  Foreign Scan on public.ft1
                Output: c2, c1
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
-(10 rows)
+(11 rows)
 
 select c2, sum(c1) from ft1 where c2 < 3 group by cube(c2) order by 1 nulls last;
  c2 |  sum   
@@ -3499,11 +3508,13 @@ select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) orde
    ->  HashAggregate
          Output: c2, c6, sum(c1)
          Hash Key: ft1.c2
+         Buckets: 16
          Hash Key: ft1.c6
+         Buckets: 16
          ->  Foreign Scan on public.ft1
                Output: c2, c6, c1
                Remote SQL: SELECT "C 1", c2, c6 FROM "S 1"."T 1" WHERE ((c2 < 3))
-(10 rows)
+(12 rows)
 
 select c2, c6, sum(c1) from ft1 where c2 < 3 group by grouping sets(c2, c6) order by 1 nulls last, 2 nulls last;
  c2 | c6 |  sum  
@@ -3526,10 +3537,11 @@ select c2, sum(c1), grouping(c2) from ft1 where c2 < 3 group by c2 order by 1 nu
    ->  HashAggregate
          Output: c2, sum(c1), GROUPING(c2)
          Group Key: ft1.c2
+         Buckets: 16
          ->  Foreign Scan on public.ft1
                Output: c2, c1
                Remote SQL: SELECT "C 1", c2 FROM "S 1"."T 1" WHERE ((c2 < 3))
-(9 rows)
+(10 rows)
 
 select c2, sum(c1), grouping(c2) from ft1 where c2 < 3 group by c2 order by 1 nulls last;
  c2 |  sum  | grouping 
@@ -7147,13 +7159,14 @@ select * from bar where f1 in (select f1 from foo) for update;
                ->  HashAggregate
                      Output: foo.ctid, foo.f1, foo.*, foo.tableoid
                      Group Key: foo.f1
+                     Buckets: 256
                      ->  Append
                            ->  Seq Scan on public.foo foo_1
                                  Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid
                            ->  Foreign Scan on public.foo2 foo_2
                                  Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid
                                  Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1
-(23 rows)
+(24 rows)
 
 select * from bar where f1 in (select f1 from foo) for update;
  f1 | f2 
@@ -7185,13 +7198,14 @@ select * from bar where f1 in (select f1 from foo) for share;
                ->  HashAggregate
                      Output: foo.ctid, foo.f1, foo.*, foo.tableoid
                      Group Key: foo.f1
+                     Buckets: 256
                      ->  Append
                            ->  Seq Scan on public.foo foo_1
                                  Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid
                            ->  Foreign Scan on public.foo2 foo_2
                                  Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid
                                  Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1
-(23 rows)
+(24 rows)
 
 select * from bar where f1 in (select f1 from foo) for share;
  f1 | f2 
@@ -7222,6 +7236,7 @@ update bar set f2 = f2 + 100 where f1 in (select f1 from foo);
                ->  HashAggregate
                      Output: foo.ctid, foo.f1, foo.*, foo.tableoid
                      Group Key: foo.f1
+                     Buckets: 256
                      ->  Append
                            ->  Seq Scan on public.foo foo_1
                                  Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid
@@ -7240,13 +7255,14 @@ update bar set f2 = f2 + 100 where f1 in (select f1 from foo);
                ->  HashAggregate
                      Output: foo.ctid, foo.f1, foo.*, foo.tableoid
                      Group Key: foo.f1
+                     Buckets: 256
                      ->  Append
                            ->  Seq Scan on public.foo foo_1
                                  Output: foo_1.ctid, foo_1.f1, foo_1.*, foo_1.tableoid
                            ->  Foreign Scan on public.foo2 foo_2
                                  Output: foo_2.ctid, foo_2.f1, foo_2.*, foo_2.tableoid
                                  Remote SQL: SELECT f1, f2, f3, ctid FROM public.loct1
-(39 rows)
+(41 rows)
 
 update bar set f2 = f2 + 100 where f1 in (select f1 from foo);
 select tableoid::regclass, * from bar order by 1,2;
@@ -8751,12 +8767,13 @@ SELECT a, sum(b), min(b), count(*) FROM pagg_tab GROUP BY a HAVING avg(b) < 22 O
    Sort Key: pagg_tab.a
    ->  HashAggregate
          Group Key: pagg_tab.a
+         Buckets: 64
          Filter: (avg(pagg_tab.b) < '22'::numeric)
          ->  Append
                ->  Foreign Scan on fpagg_tab_p1 pagg_tab_1
                ->  Foreign Scan on fpagg_tab_p2 pagg_tab_2
                ->  Foreign Scan on fpagg_tab_p3 pagg_tab_3
-(9 rows)
+(10 rows)
 
 -- Plan with partitionwise aggregates is enabled
 SET enable_partitionwise_aggregate TO true;
@@ -8799,6 +8816,7 @@ SELECT a, count(t1) FROM pagg_tab t1 GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
          ->  HashAggregate
                Output: t1.a, count(((t1.*)::pagg_tab))
                Group Key: t1.a
+               Buckets: 16
                Filter: (avg(t1.b) < '22'::numeric)
                ->  Foreign Scan on public.fpagg_tab_p1 t1
                      Output: t1.a, t1.*, t1.b
@@ -8806,6 +8824,7 @@ SELECT a, count(t1) FROM pagg_tab t1 GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
          ->  HashAggregate
                Output: t1_1.a, count(((t1_1.*)::pagg_tab))
                Group Key: t1_1.a
+               Buckets: 16
                Filter: (avg(t1_1.b) < '22'::numeric)
                ->  Foreign Scan on public.fpagg_tab_p2 t1_1
                      Output: t1_1.a, t1_1.*, t1_1.b
@@ -8813,11 +8832,12 @@ SELECT a, count(t1) FROM pagg_tab t1 GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
          ->  HashAggregate
                Output: t1_2.a, count(((t1_2.*)::pagg_tab))
                Group Key: t1_2.a
+               Buckets: 16
                Filter: (avg(t1_2.b) < '22'::numeric)
                ->  Foreign Scan on public.fpagg_tab_p3 t1_2
                      Output: t1_2.a, t1_2.*, t1_2.b
                      Remote SQL: SELECT a, b, c FROM public.pagg_tab_p3
-(25 rows)
+(28 rows)
 
 SELECT a, count(t1) FROM pagg_tab t1 GROUP BY a HAVING avg(b) < 22 ORDER BY 1;
  a  | count 
@@ -8839,18 +8859,22 @@ SELECT b, avg(a), max(a), count(*) FROM pagg_tab GROUP BY b HAVING sum(a) < 700
    Sort Key: pagg_tab.b
    ->  Finalize HashAggregate
          Group Key: pagg_tab.b
+         Buckets: 64
          Filter: (sum(pagg_tab.a) < 700)
          ->  Append
                ->  Partial HashAggregate
                      Group Key: pagg_tab.b
+                     Buckets: 64
                      ->  Foreign Scan on fpagg_tab_p1 pagg_tab
                ->  Partial HashAggregate
                      Group Key: pagg_tab_1.b
+                     Buckets: 64
                      ->  Foreign Scan on fpagg_tab_p2 pagg_tab_1
                ->  Partial HashAggregate
                      Group Key: pagg_tab_2.b
+                     Buckets: 64
                      ->  Foreign Scan on fpagg_tab_p3 pagg_tab_2
-(15 rows)
+(19 rows)
 
 -- ===================================================================
 -- access rights and superuser
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 58141d8393..1d9623619b 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -18,6 +18,7 @@
 #include "commands/createas.h"
 #include "commands/defrem.h"
 #include "commands/prepare.h"
+#include "executor/nodeAgg.h"
 #include "executor/nodeHash.h"
 #include "foreign/fdwapi.h"
 #include "jit/jit.h"
@@ -86,12 +87,14 @@ static void show_merge_append_keys(MergeAppendState *mstate, List *ancestors,
 								   ExplainState *es);
 static void show_agg_keys(AggState *astate, List *ancestors,
 						  ExplainState *es);
-static void show_grouping_sets(PlanState *planstate, Agg *agg,
+static void show_grouping_sets(AggState *aggstate, Agg *agg,
 							   List *ancestors, ExplainState *es);
-static void show_grouping_set_keys(PlanState *planstate,
+static void show_grouping_set_info(AggState *aggstate,
 								   Agg *aggnode, Sort *sortnode,
 								   List *context, bool useprefix,
-								   List *ancestors, ExplainState *es);
+								   List *ancestors,
+								   HashTableInstrumentation *inst,
+								   ExplainState *es);
 static void show_group_keys(GroupState *gstate, List *ancestors,
 							ExplainState *es);
 static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
@@ -104,7 +107,8 @@ static void show_tablesample(TableSampleClause *tsc, PlanState *planstate,
 							 List *ancestors, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
 static void show_hash_info(HashState *hashstate, ExplainState *es);
-static void show_hashagg_info(AggState *hashstate, ExplainState *es);
+static void show_tuplehash_info(HashTableInstrumentation *inst, AggState *as,
+		ExplainState *es);
 static void show_tidbitmap_info(BitmapHeapScanState *planstate,
 								ExplainState *es);
 static void show_instrumentation_count(const char *qlabel, int which,
@@ -1490,6 +1494,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 					appendStringInfo(es->str, " %s", setopcmd);
 				else
 					ExplainPropertyText("Command", setopcmd, es);
+				// show strategy in text mode ?
 			}
 			break;
 		default:
@@ -1883,11 +1888,24 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		case T_Agg:
 			show_agg_keys(castNode(AggState, planstate), ancestors, es);
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
-			show_hashagg_info((AggState *) planstate, es);
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
 			break;
+		case T_SetOp:
+			{
+				SetOpState *sos = castNode(SetOpState, planstate);
+				if (sos->hashtable)
+					show_tuplehash_info(&sos->hashtable->instrument, NULL, es);
+			}
+			break;
+		case T_RecursiveUnion:
+			{
+				RecursiveUnionState *rus = (RecursiveUnionState *)planstate;
+				if (rus->hashtable)
+					show_tuplehash_info(&rus->hashtable->instrument, NULL, es);
+			}
+			break;
 		case T_Group:
 			show_group_keys(castNode(GroupState, planstate), ancestors, es);
 			show_upper_qual(plan->qual, "Filter", planstate, ancestors, es);
@@ -2264,24 +2282,31 @@ show_agg_keys(AggState *astate, List *ancestors,
 		ancestors = lcons(plan, ancestors);
 
 		if (plan->groupingSets)
-			show_grouping_sets(outerPlanState(astate), plan, ancestors, es);
+			show_grouping_sets(astate, plan, ancestors, es);
 		else
+		{
 			show_sort_group_keys(outerPlanState(astate), "Group Key",
 								 plan->numCols, plan->grpColIdx,
 								 NULL, NULL, NULL,
 								 ancestors, es);
+			Assert(astate->num_hashes <= 1);
+			if (astate->num_hashes)
+				show_tuplehash_info(&astate->perhash[0].hashtable->instrument, astate, es);
+		}
 
 		ancestors = list_delete_first(ancestors);
 	}
 }
 
 static void
-show_grouping_sets(PlanState *planstate, Agg *agg,
+show_grouping_sets(AggState *aggstate, Agg *agg,
 				   List *ancestors, ExplainState *es)
 {
+	PlanState	*planstate = outerPlanState(aggstate);
 	List	   *context;
 	bool		useprefix;
 	ListCell   *lc;
+	int			setno = 0;
 
 	/* Set up deparsing context */
 	context = set_deparse_context_plan(es->deparse_cxt,
@@ -2291,27 +2316,41 @@ show_grouping_sets(PlanState *planstate, Agg *agg,
 
 	ExplainOpenGroup("Grouping Sets", "Grouping Sets", false, es);
 
-	show_grouping_set_keys(planstate, agg, NULL,
-						   context, useprefix, ancestors, es);
+	show_grouping_set_info(aggstate, agg, NULL, context, useprefix, ancestors,
+			aggstate->num_hashes ?
+			&aggstate->perhash[setno++].hashtable->instrument : NULL,
+			es);
 
 	foreach(lc, agg->chain)
 	{
 		Agg		   *aggnode = lfirst(lc);
 		Sort	   *sortnode = (Sort *) aggnode->plan.lefttree;
+		HashTableInstrumentation *inst = NULL;
+
+		if (aggnode->aggstrategy == AGG_HASHED ||
+				aggnode->aggstrategy == AGG_MIXED)
+		{
+			Assert(setno < aggstate->num_hashes);
+			inst = &aggstate->perhash[setno++].hashtable->instrument;
+		}
 
-		show_grouping_set_keys(planstate, aggnode, sortnode,
-							   context, useprefix, ancestors, es);
+		show_grouping_set_info(aggstate, aggnode, sortnode,
+							   context, useprefix, ancestors,
+							   inst, es);
 	}
 
 	ExplainCloseGroup("Grouping Sets", "Grouping Sets", false, es);
 }
 
+/* Show keys and any hash instrumentation for a grouping set */
 static void
-show_grouping_set_keys(PlanState *planstate,
+show_grouping_set_info(AggState *aggstate,
 					   Agg *aggnode, Sort *sortnode,
 					   List *context, bool useprefix,
-					   List *ancestors, ExplainState *es)
+					   List *ancestors, HashTableInstrumentation *inst,
+					   ExplainState *es)
 {
+	PlanState	*planstate = outerPlanState(aggstate);
 	Plan	   *plan = planstate->plan;
 	char	   *exprstr;
 	ListCell   *lc;
@@ -2375,6 +2414,10 @@ show_grouping_set_keys(PlanState *planstate,
 
 	ExplainCloseGroup(keysetname, keysetname, false, es);
 
+	if (aggnode->aggstrategy == AGG_HASHED ||
+			aggnode->aggstrategy == AGG_MIXED)
+		show_tuplehash_info(inst, NULL, es);
+
 	if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
 		es->indent--;
 
@@ -2772,37 +2815,73 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 }
 
 /*
- * Show information on hash aggregate memory usage and batches.
+ * Show hash bucket stats and (optionally) memory.
  */
 static void
-show_hashagg_info(AggState *aggstate, ExplainState *es)
+show_tuplehash_info(HashTableInstrumentation *inst, AggState *aggstate, ExplainState *es)
 {
-	Agg		*agg	   = (Agg *)aggstate->ss.ps.plan;
-	long	 memPeakKb = (aggstate->hash_mem_peak + 1023) / 1024;
+	size_t	spacePeakKb_tuples = (inst->space_peak_tuples + 1023) / 1024,
+		spacePeakKb_hash = (inst->space_peak_hash + 1023) / 1024;
 
-	Assert(IsA(aggstate, AggState));
-
-	if (agg->aggstrategy != AGG_HASHED &&
-		agg->aggstrategy != AGG_MIXED)
-		return;
-
-	if (es->costs && aggstate->hash_planned_partitions > 0)
-	{
+	if (es->costs && aggstate!=NULL && aggstate->hash_planned_partitions > 0)
 		ExplainPropertyInteger("Planned Partitions", NULL,
 							   aggstate->hash_planned_partitions, es);
-	}
 
 	if (!es->analyze)
 		return;
 
-	/* EXPLAIN ANALYZE */
-	ExplainPropertyInteger("Peak Memory Usage", "kB", memPeakKb, es);
-	if (aggstate->hash_batches_used > 0)
+	if (es->format != EXPLAIN_FORMAT_TEXT)
 	{
-		ExplainPropertyInteger("Disk Usage", "kB",
-							   aggstate->hash_disk_used, es);
-		ExplainPropertyInteger("HashAgg Batches", NULL,
-							   aggstate->hash_batches_used, es);
+		ExplainPropertyInteger("Hash Buckets", NULL,
+							   inst->nbuckets, es);
+		ExplainPropertyInteger("Original Hash Buckets", NULL,
+							   inst->nbuckets_original, es);
+		ExplainPropertyInteger("Peak Memory Usage (hashtable)", "kB",
+							   spacePeakKb_hash, es);
+		ExplainPropertyInteger("Peak Memory Usage (tuples)", "kB",
+							   spacePeakKb_tuples, es);
+		if (aggstate != NULL)
+		{
+			ExplainPropertyInteger("Disk Usage", "kB",
+								   aggstate->hash_disk_used, es);
+			ExplainPropertyInteger("HashAgg Batches", NULL,
+								   aggstate->hash_batches_used, es);
+		}
+	}
+	else if (!inst->nbuckets)
+		; /* Do nothing */
+	else
+	{
+		if (inst->nbuckets_original != inst->nbuckets)
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+						"Buckets: %lld (originally %lld)",
+						(long long)inst->nbuckets,
+						(long long)inst->nbuckets_original);
+		}
+		else
+		{
+			ExplainIndentText(es);
+			appendStringInfo(es->str,
+						"Buckets: %lld",
+						(long long)inst->nbuckets);
+		}
+
+		appendStringInfoChar(es->str, '\n');
+		ExplainIndentText(es);
+		appendStringInfo(es->str,
+				"Peak Memory Usage: hashtable: %lldkB, tuples: %lldkB",
+				(long long)spacePeakKb_hash, (long long)spacePeakKb_tuples);
+		appendStringInfoChar(es->str, '\n');
+
+		if (aggstate!=NULL && aggstate->hash_batches_used > 0)
+		{
+			ExplainPropertyInteger("Disk Usage", "kB",
+								   aggstate->hash_disk_used, es);
+			ExplainPropertyInteger("HashAgg Batches", NULL,
+								   aggstate->hash_batches_used, es);
+		}
 	}
 }
 
@@ -3473,6 +3552,29 @@ ExplainSubPlans(List *plans, List *ancestors,
 
 		ExplainNode(sps->planstate, ancestors,
 					relationship, sp->plan_name, es);
+		if (sps->hashtable)
+		{
+			ExplainOpenGroup("Hashtable", "Hashtable", true, es);
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				appendStringInfoString(es->str, "Hashtable: ");
+			}
+			show_tuplehash_info(&sps->hashtable->instrument, NULL, es);
+			ExplainCloseGroup("Hashtable", "Hashtable", true, es);
+		}
+
+		if (sps->hashnulls)
+		{
+			ExplainOpenGroup("Null Hashtable", "Null Hashtable", true, es);
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				appendStringInfoString(es->str, "Null Hashtable: ");
+			}
+			show_tuplehash_info(&sps->hashnulls->instrument, NULL, es);
+			ExplainCloseGroup("Null Hashtable", "Null Hashtable", true, es);
+		}
 
 		ancestors = list_delete_first(ancestors);
 	}
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 009d27b9a8..10276d3f58 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -188,6 +188,7 @@ BuildTupleHashTableExt(PlanState *parent,
 	hashtable->inputslot = NULL;
 	hashtable->in_hash_funcs = NULL;
 	hashtable->cur_eq_func = NULL;
+	memset(&hashtable->instrument, 0, sizeof(hashtable->instrument));
 
 	/*
 	 * If parallelism is in use, even if the master backend is performing the
@@ -203,6 +204,7 @@ BuildTupleHashTableExt(PlanState *parent,
 		hashtable->hash_iv = 0;
 
 	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
+	UpdateTupleHashTableStats(hashtable, true);
 
 	/*
 	 * We copy the input tuple descriptor just for safety --- we assume all
@@ -281,9 +283,40 @@ BuildTupleHashTable(PlanState *parent,
 void
 ResetTupleHashTable(TupleHashTable hashtable)
 {
+	UpdateTupleHashTableStats(hashtable, false);
 	tuplehash_reset(hashtable->hashtab);
 }
 
+/* Update instrumentation stats */
+void
+UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial)
+{
+	hashtable->instrument.nbuckets = hashtable->hashtab->size;
+	if (initial)
+	{
+		hashtable->instrument.nbuckets_original = hashtable->hashtab->size;
+		// hashtable->instrument.space_peak_hash = hashtable->hashtab->size *
+			// sizeof(TupleHashEntryData);
+		hashtable->instrument.space_peak_hash =
+			MemoryContextMemAllocated(hashtable->hashtab->ctx, true);
+		hashtable->instrument.space_peak_tuples = 0;
+	}
+	else
+	{
+		/* hashtable->entrysize includes additionalsize */
+		size_t hash_size = MemoryContextMemAllocated(hashtable->hashtab->ctx, true);
+		size_t tuple_size = MemoryContextMemAllocated(hashtable->tablecxt, true);
+
+		hashtable->instrument.space_peak_hash = Max(
+			hashtable->instrument.space_peak_hash,
+			hash_size);
+
+		hashtable->instrument.space_peak_tuples = Max(
+			hashtable->instrument.space_peak_tuples, tuple_size);
+				// hashtable->hashtab->members * hashtable->entrysize);
+	}
+}
+
 /*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 1d319f49d0..daa82cdee2 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1840,36 +1840,25 @@ hash_agg_enter_spill_mode(AggState *aggstate)
 static void
 hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 {
-	Size	meta_mem = 0;
 	Size	hash_mem = 0;
 	Size	buffer_mem;
-	Size	total_mem;
 
 	if (aggstate->aggstrategy != AGG_MIXED &&
 		aggstate->aggstrategy != AGG_HASHED)
 		return;
 
-
 	for (int i = 0; i < aggstate->num_hashes; ++i)
 	{
-		/* memory for the hash table itself */
-		meta_mem += MemoryContextMemAllocated(
-			aggstate->perhash[i].hash_metacxt, true);
-		/* memory for the group keys and transition states */
 		hash_mem += MemoryContextMemAllocated(
 			aggstate->perhash[i].hashcontext->ecxt_per_tuple_memory, true);
+		UpdateTupleHashTableStats(aggstate->perhash[i].hashtable, false);
 	}
 
-	/* memory for read/write tape buffers, if spilled */
+	/* memory for read/write tape buffers, if spilled XXX */
 	buffer_mem = npartitions * HASHAGG_WRITE_BUFFER_SIZE;
 	if (from_tape)
 		buffer_mem += HASHAGG_READ_BUFFER_SIZE;
 
-	/* update peak mem */
-	total_mem = meta_mem + hash_mem + buffer_mem;
-	if (total_mem > aggstate->hash_mem_peak)
-		aggstate->hash_mem_peak = total_mem;
-
 	/* update disk usage */
 	if (aggstate->hash_tapeinfo != NULL)
 	{
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 620414a1ed..93272c28b1 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -156,6 +156,9 @@ ExecRecursiveUnion(PlanState *pstate)
 		return slot;
 	}
 
+	if (node->hashtable)
+		UpdateTupleHashTableStats(node->hashtable, false);
+
 	return NULL;
 }
 
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index bfd148a41a..9c0e0ab96e 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -415,6 +415,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
 	setopstate->table_filled = true;
 	/* Initialize to walk the hash table */
+	UpdateTupleHashTableStats(setopstate->hashtable, false);
 	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
 }
 
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 298b7757f5..22c32612ba 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -621,6 +621,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	ExecClearTuple(node->projRight->pi_state.resultslot);
 
 	MemoryContextSwitchTo(oldcontext);
+	UpdateTupleHashTableStats(node->hashtable, false);
+	if (node->hashnulls)
+		UpdateTupleHashTableStats(node->hashnulls, false);
 }
 
 /*
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 94890512dc..f4f2ede207 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -150,6 +150,7 @@ extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 ExprState *eqcomp,
 										 FmgrInfo *hashfunctions);
 extern void ResetTupleHashTable(TupleHashTable hashtable);
+extern void UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial);
 
 /*
  * prototypes from functions in execJunk.c
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index ddf0b43916..63bbf22955 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -691,6 +691,14 @@ typedef struct TupleHashEntryData
 #define SH_DECLARE
 #include "lib/simplehash.h"
 
+typedef struct HashTableInstrumentation
+{
+	size_t	nbuckets;				/* number of buckets at end of execution */
+	size_t	nbuckets_original;		/* planned number of buckets */
+	size_t	space_peak_hash;		/* peak memory usage in bytes */
+	size_t	space_peak_tuples;		/* peak memory usage in bytes */
+} HashTableInstrumentation;
+
 typedef struct TupleHashTableData
 {
 	tuplehash_hash *hashtab;	/* underlying hash table */
@@ -709,6 +717,7 @@ typedef struct TupleHashTableData
 	ExprState  *cur_eq_func;	/* comparator for input vs. table */
 	uint32		hash_iv;		/* hash-function IV */
 	ExprContext *exprcontext;	/* expression context */
+	HashTableInstrumentation instrument;
 }			TupleHashTableData;
 
 typedef tuplehash_iterator TupleHashIterator;
@@ -2091,7 +2100,6 @@ typedef struct AggState
 	int			hash_planned_partitions; /* number of partitions planned
 											for first pass */
 	double		hashentrysize;	/* estimate revised during execution */
-	Size		hash_mem_peak;	/* peak hash table memory usage */
 	uint64		hash_ngroups_current;	/* number of groups currently in
 										   memory in all hash tables */
 	uint64		hash_disk_used; /* kB of disk space used */
-- 
2.17.0

>From af059e7435f0f8543cfb1dd6a297811e6bb201c4 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 23 Feb 2020 23:13:07 -0600
Subject: [PATCH v8 3/8] refactor show_grouping_set_keys

---
 src/backend/commands/explain.c | 55 ++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 22 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 1d9623619b..9482f68c4b 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -95,6 +95,8 @@ static void show_grouping_set_info(AggState *aggstate,
 								   List *ancestors,
 								   HashTableInstrumentation *inst,
 								   ExplainState *es);
+static void show_grouping_set_keys(AggState *aggstate, Agg *aggnode, List
+		*context, bool useprefix, ExplainState *es);
 static void show_group_keys(GroupState *gstate, List *ancestors,
 							ExplainState *es);
 static void show_sort_group_keys(PlanState *planstate, const char *qlabel,
@@ -2349,6 +2351,37 @@ show_grouping_set_info(AggState *aggstate,
 					   List *context, bool useprefix,
 					   List *ancestors, HashTableInstrumentation *inst,
 					   ExplainState *es)
+{
+	PlanState	*planstate = outerPlanState(aggstate);
+
+	ExplainOpenGroup("Grouping Set", NULL, true, es);
+
+	if (sortnode)
+	{
+		show_sort_group_keys(planstate, "Sort Key",
+							 sortnode->numCols, sortnode->sortColIdx,
+							 sortnode->sortOperators, sortnode->collations,
+							 sortnode->nullsFirst,
+							 ancestors, es);
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+			es->indent++;
+	}
+
+	show_grouping_set_keys(aggstate, aggnode, context, useprefix, es);
+
+	if (aggnode->aggstrategy == AGG_HASHED ||
+			aggnode->aggstrategy == AGG_MIXED)
+		show_tuplehash_info(inst, NULL, es);
+
+	if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
+		es->indent--;
+
+	ExplainCloseGroup("Grouping Set", NULL, true, es);
+}
+
+/* Show keys of a grouping set */
+static void
+show_grouping_set_keys(AggState *aggstate, Agg *aggnode, List *context, bool useprefix, ExplainState *es)
 {
 	PlanState	*planstate = outerPlanState(aggstate);
 	Plan	   *plan = planstate->plan;
@@ -2370,19 +2403,6 @@ show_grouping_set_info(AggState *aggstate,
 		keysetname = "Group Keys";
 	}
 
-	ExplainOpenGroup("Grouping Set", NULL, true, es);
-
-	if (sortnode)
-	{
-		show_sort_group_keys(planstate, "Sort Key",
-							 sortnode->numCols, sortnode->sortColIdx,
-							 sortnode->sortOperators, sortnode->collations,
-							 sortnode->nullsFirst,
-							 ancestors, es);
-		if (es->format == EXPLAIN_FORMAT_TEXT)
-			es->indent++;
-	}
-
 	ExplainOpenGroup(keysetname, keysetname, false, es);
 
 	foreach(lc, gsets)
@@ -2413,15 +2433,6 @@ show_grouping_set_info(AggState *aggstate,
 	}
 
 	ExplainCloseGroup(keysetname, keysetname, false, es);
-
-	if (aggnode->aggstrategy == AGG_HASHED ||
-			aggnode->aggstrategy == AGG_MIXED)
-		show_tuplehash_info(inst, NULL, es);
-
-	if (sortnode && es->format == EXPLAIN_FORMAT_TEXT)
-		es->indent--;
-
-	ExplainCloseGroup("Grouping Set", NULL, true, es);
 }
 
 /*
-- 
2.17.0

>From c77d2d07f58b52721c889d99c859594c866fbaa4 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 15 Feb 2020 14:13:06 -0600
Subject: [PATCH v8 4/8] Gross hack to put hash stats of subplans in the
 right(?) place

---
 src/backend/commands/explain.c | 70 ++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 9482f68c4b..786d3ded16 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -66,7 +66,7 @@ static double elapsed_time(instr_time *starttime);
 static bool ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used);
 static void ExplainNode(PlanState *planstate, List *ancestors,
 						const char *relationship, const char *plan_name,
-						ExplainState *es);
+						SubPlanState *subplanstate, ExplainState *es);
 static void show_plan_tlist(PlanState *planstate, List *ancestors,
 							ExplainState *es);
 static void show_expression(Node *node, const char *qlabel,
@@ -722,7 +722,7 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
 		ps = outerPlanState(ps);
 		es->hide_workers = true;
 	}
-	ExplainNode(ps, NIL, NULL, NULL, es);
+	ExplainNode(ps, NIL, NULL, NULL, NULL, es);
 
 	/*
 	 * If requested, include information about GUC parameters with values that
@@ -1084,7 +1084,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
 static void
 ExplainNode(PlanState *planstate, List *ancestors,
 			const char *relationship, const char *plan_name,
-			ExplainState *es)
+			SubPlanState *subplanstate, ExplainState *es)
 {
 	Plan	   *plan = planstate->plan;
 	const char *pname;			/* node type name for text output */
@@ -1341,6 +1341,21 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			ExplainIndentText(es);
 			appendStringInfo(es->str, "%s\n", plan_name);
 			es->indent++;
+
+			Assert(subplanstate != NULL);
+			/* Show hash stats for hashed subplan */
+			if (subplanstate->hashtable)
+			{
+				ExplainIndentText(es);
+				appendStringInfoString(es->str, "Hashtable: ");
+				show_tuplehash_info(&subplanstate->hashtable->instrument, NULL, es);
+			}
+			if (subplanstate->hashnulls)
+			{
+				ExplainIndentText(es);
+				appendStringInfoString(es->str, "Null Hashtable: ");
+				show_tuplehash_info(&subplanstate->hashnulls->instrument, NULL, es);
+			}
 		}
 		if (es->indent)
 		{
@@ -1369,6 +1384,20 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		if (custom_name)
 			ExplainPropertyText("Custom Plan Provider", custom_name, es);
 		ExplainPropertyBool("Parallel Aware", plan->parallel_aware, es);
+
+		if (subplanstate && subplanstate->hashtable)
+		{
+			ExplainOpenGroup("Hashtable", "Hashtable", true, es);
+			show_tuplehash_info(&subplanstate->hashtable->instrument, NULL, es);
+			ExplainCloseGroup("Hashtable", "Hashtable", true, es);
+		}
+
+		if (subplanstate && subplanstate->hashnulls)
+		{
+			ExplainOpenGroup("Null Hashtable", "Null Hashtable", true, es);
+			show_tuplehash_info(&subplanstate->hashnulls->instrument, NULL, es);
+			ExplainCloseGroup("Null Hashtable", "Null Hashtable", true, es);
+		}
 	}
 
 	switch (nodeTag(plan))
@@ -2041,12 +2070,12 @@ ExplainNode(PlanState *planstate, List *ancestors,
 	/* lefttree */
 	if (outerPlanState(planstate))
 		ExplainNode(outerPlanState(planstate), ancestors,
-					"Outer", NULL, es);
+					"Outer", NULL, NULL, es);
 
 	/* righttree */
 	if (innerPlanState(planstate))
 		ExplainNode(innerPlanState(planstate), ancestors,
-					"Inner", NULL, es);
+					"Inner", NULL, NULL, es);
 
 	/* special child plans */
 	switch (nodeTag(plan))
@@ -2078,7 +2107,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			break;
 		case T_SubqueryScan:
 			ExplainNode(((SubqueryScanState *) planstate)->subplan, ancestors,
-						"Subquery", NULL, es);
+						"Subquery", NULL, NULL, es);
 			break;
 		case T_CustomScan:
 			ExplainCustomChildren((CustomScanState *) planstate,
@@ -3504,7 +3533,7 @@ ExplainMemberNodes(PlanState **planstates, int nplans,
 
 	for (j = 0; j < nplans; j++)
 		ExplainNode(planstates[j], ancestors,
-					"Member", NULL, es);
+					"Member", NULL, NULL, es);
 }
 
 /*
@@ -3562,30 +3591,7 @@ ExplainSubPlans(List *plans, List *ancestors,
 		ancestors = lcons(sp, ancestors);
 
 		ExplainNode(sps->planstate, ancestors,
-					relationship, sp->plan_name, es);
-		if (sps->hashtable)
-		{
-			ExplainOpenGroup("Hashtable", "Hashtable", true, es);
-			if (es->format == EXPLAIN_FORMAT_TEXT)
-			{
-				ExplainIndentText(es);
-				appendStringInfoString(es->str, "Hashtable: ");
-			}
-			show_tuplehash_info(&sps->hashtable->instrument, NULL, es);
-			ExplainCloseGroup("Hashtable", "Hashtable", true, es);
-		}
-
-		if (sps->hashnulls)
-		{
-			ExplainOpenGroup("Null Hashtable", "Null Hashtable", true, es);
-			if (es->format == EXPLAIN_FORMAT_TEXT)
-			{
-				ExplainIndentText(es);
-				appendStringInfoString(es->str, "Null Hashtable: ");
-			}
-			show_tuplehash_info(&sps->hashnulls->instrument, NULL, es);
-			ExplainCloseGroup("Null Hashtable", "Null Hashtable", true, es);
-		}
+					relationship, sp->plan_name, sps, es);
 
 		ancestors = list_delete_first(ancestors);
 	}
@@ -3602,7 +3608,7 @@ ExplainCustomChildren(CustomScanState *css, List *ancestors, ExplainState *es)
 	(list_length(css->custom_ps) != 1 ? "children" : "child");
 
 	foreach(cell, css->custom_ps)
-		ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, es);
+		ExplainNode((PlanState *) lfirst(cell), ancestors, label, NULL, NULL, es);
 }
 
 /*
-- 
2.17.0

>From 54bdb638365e810c0a7ab53253086e380824c177 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Wed, 12 Feb 2020 23:40:45 -0600
Subject: [PATCH v8 5/8] implement hash stats for bitmapHeapScan..

TIDBitmap is a private structure, so add an accessor function to return its
instrumentation, and duplicate instrumentation struct in BitmapHeapState.

The instrumentation itself could be implemented in simplehash.h.  But I think
the higher layer BitmapHeapScan would have to include an instrumentation struct
anyway, since explain.c cannot look into tbm->pagetable to get .instrument (and
the pagetable structure itself doesn't match tuplehash).

Also, if instrumentation were implemented in simplehash.h, I think every
insertion or deletion would need to check ->members and ->size (which isn't
necessary for Agg, but is necessary in the general case, and specifically for
tidbitmap, since it actually DELETEs hashtable entries).  Or else simplehash
would need a new function like UpdateTupleHashStats, which the higher level nodes
would need to call after filling the hashtable or before deleting tuples, which
seems to defeat the purpose of implementing stats at a lower layer.

Note, this doesn't affect any regression tests, since hashtable isn't allocated
during "explain".  Note that "explain analyze" would show memory stats, which
we'd have to filter.
---
 src/backend/commands/explain.c            |  5 ++--
 src/backend/executor/nodeBitmapHeapscan.c |  3 +++
 src/backend/nodes/tidbitmap.c             | 29 +++++++++++++++++++++++
 src/include/nodes/execnodes.h             |  1 +
 src/include/nodes/tidbitmap.h             |  4 ++++
 5 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 786d3ded16..ba926bc216 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1735,8 +1735,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			if (plan->qual)
 				show_instrumentation_count("Rows Removed by Filter", 1,
 										   planstate, es);
-			if (es->analyze)
-				show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+			show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
 			break;
 		case T_SampleScan:
 			show_tablesample(((SampleScan *) plan)->tablesample,
@@ -2951,6 +2950,8 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 			appendStringInfoChar(es->str, '\n');
 		}
 	}
+
+	show_tuplehash_info(&planstate->instrument, es, NULL);
 }
 
 /*
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 726d3a2d9a..1785c78091 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -182,6 +182,8 @@ BitmapHeapNext(BitmapHeapScanState *node)
 #endif							/* USE_PREFETCH */
 		}
 		node->initialized = true;
+		if (node->tbm)
+			node->instrument = *tbm_instrumentation(node->tbm);
 	}
 
 	for (;;)
@@ -741,6 +743,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
 	scanstate->shared_tbmiterator = NULL;
 	scanstate->shared_prefetch_iterator = NULL;
 	scanstate->pstate = NULL;
+	memset(&scanstate->instrument, 0, sizeof(scanstate->instrument));
 
 	/*
 	 * We can potentially skip fetching heap pages if we do not need any
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index ad4e071ca3..ab81cce3b2 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -44,6 +44,7 @@
 #include "common/hashfn.h"
 #include "nodes/bitmapset.h"
 #include "nodes/tidbitmap.h"
+#include "nodes/execnodes.h"
 #include "storage/lwlock.h"
 #include "utils/dsa.h"
 
@@ -166,6 +167,7 @@ struct TIDBitmap
 	dsa_pointer ptpages;		/* dsa_pointer to the page array */
 	dsa_pointer ptchunks;		/* dsa_pointer to the chunk array */
 	dsa_area   *dsa;			/* reference to per-query dsa area */
+	HashTableInstrumentation instrument;	/* Returned by accessor function */
 };
 
 /*
@@ -294,6 +296,7 @@ tbm_create_pagetable(TIDBitmap *tbm)
 	Assert(tbm->pagetable == NULL);
 
 	tbm->pagetable = pagetable_create(tbm->mcxt, 128, tbm);
+	tbm->instrument.nbuckets_original = tbm->pagetable->size;
 
 	/* If entry1 is valid, push it into the hashtable */
 	if (tbm->status == TBM_ONE_PAGE)
@@ -1147,6 +1150,32 @@ tbm_end_iterate(TBMIterator *iterator)
 	pfree(iterator);
 }
 
+/*
+ * tbm_instrumentation - update stored stats and return pointer to
+ * instrumentation structure
+ *
+ * This updates stats when called.
+ * Returned data is within the iterator's tbm, and destroyed with it.
+ */
+HashTableInstrumentation *
+tbm_instrumentation(TIDBitmap *tbm)
+{
+	if (tbm->pagetable)
+	{
+		tbm->instrument.nbuckets = tbm->pagetable->size;
+		tbm->instrument.space_peak_hash = sizeof(PagetableEntry) * tbm->pagetable->size;
+
+		/*
+		 * If there are lossy pages, then at one point, we filled maxentries;
+		 * otherwise, number of pages is "->members".
+		 */
+		tbm->instrument.space_peak_tuples = sizeof(BlockNumber) *
+			(tbm->nchunks>0 ? tbm->maxentries : tbm->pagetable->members);
+	}
+
+	return &tbm->instrument;
+}
+
 /*
  * tbm_end_shared_iterate - finish a shared iteration over a TIDBitmap
  *
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 63bbf22955..972c7a00cf 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1609,6 +1609,7 @@ typedef struct BitmapHeapScanState
 	TBMSharedIterator *shared_tbmiterator;
 	TBMSharedIterator *shared_prefetch_iterator;
 	ParallelBitmapHeapState *pstate;
+	HashTableInstrumentation instrument;
 } BitmapHeapScanState;
 
 /* ----------------
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index d562fcae34..de0cdfb91f 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -26,6 +26,9 @@
 #include "utils/dsa.h"
 
 
+/* Forward decl */
+typedef struct HashTableInstrumentation HashTableInstrumentation;
+
 /*
  * Actual bitmap representation is private to tidbitmap.c.  Callers can
  * do IsA(x, TIDBitmap) on it, but nothing else.
@@ -71,5 +74,6 @@ extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
 extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
 													dsa_pointer dp);
 extern long tbm_calculate_entries(double maxbytes);
+extern HashTableInstrumentation *tbm_instrumentation(TIDBitmap *tbm);
 
 #endif							/* TIDBITMAP_H */
-- 
2.17.0

>From 1b99dba4d2116b4a2230dfbf3718e3c9a66c78cd Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 9 Feb 2020 15:08:14 -0600
Subject: [PATCH v8 6/8] Refactor for consistency/symmetry

This moves hash instrumentation out of execGrouping.c / TupleHashTable and into
higher level nodes, for consistency with bitmapHeapScan.

This might be unimportant and maybe clearer left in execGrouping.c.
---
 src/backend/commands/explain.c            | 20 +++++++-------
 src/backend/executor/execGrouping.c       | 33 -----------------------
 src/backend/executor/nodeAgg.c            |  9 +++++--
 src/backend/executor/nodeRecursiveunion.c |  4 ++-
 src/backend/executor/nodeSetOp.c          |  6 ++++-
 src/backend/executor/nodeSubplan.c        | 12 +++++++--
 src/include/executor/executor.h           |  1 -
 src/include/executor/nodeAgg.h            |  1 +
 src/include/nodes/execnodes.h             | 24 ++++++++++++++++-
 9 files changed, 59 insertions(+), 51 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index ba926bc216..309e7259fc 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1348,13 +1348,13 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			{
 				ExplainIndentText(es);
 				appendStringInfoString(es->str, "Hashtable: ");
-				show_tuplehash_info(&subplanstate->hashtable->instrument, NULL, es);
+				show_tuplehash_info(&subplanstate->instrument, NULL, es);
 			}
 			if (subplanstate->hashnulls)
 			{
 				ExplainIndentText(es);
 				appendStringInfoString(es->str, "Null Hashtable: ");
-				show_tuplehash_info(&subplanstate->hashnulls->instrument, NULL, es);
+				show_tuplehash_info(&subplanstate->instrument_nulls, NULL, es);
 			}
 		}
 		if (es->indent)
@@ -1388,14 +1388,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		if (subplanstate && subplanstate->hashtable)
 		{
 			ExplainOpenGroup("Hashtable", "Hashtable", true, es);
-			show_tuplehash_info(&subplanstate->hashtable->instrument, NULL, es);
+			show_tuplehash_info(&subplanstate->instrument, NULL, es);
 			ExplainCloseGroup("Hashtable", "Hashtable", true, es);
 		}
 
 		if (subplanstate && subplanstate->hashnulls)
 		{
 			ExplainOpenGroup("Null Hashtable", "Null Hashtable", true, es);
-			show_tuplehash_info(&subplanstate->hashnulls->instrument, NULL, es);
+			show_tuplehash_info(&subplanstate->instrument_nulls, NULL, es);
 			ExplainCloseGroup("Null Hashtable", "Null Hashtable", true, es);
 		}
 	}
@@ -1926,14 +1926,14 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			{
 				SetOpState *sos = castNode(SetOpState, planstate);
 				if (sos->hashtable)
-					show_tuplehash_info(&sos->hashtable->instrument, NULL, es);
+					show_tuplehash_info(&sos->instrument, NULL, es);
 			}
 			break;
 		case T_RecursiveUnion:
 			{
 				RecursiveUnionState *rus = (RecursiveUnionState *)planstate;
 				if (rus->hashtable)
-					show_tuplehash_info(&rus->hashtable->instrument, NULL, es);
+					show_tuplehash_info(&rus->instrument, NULL, es);
 			}
 			break;
 		case T_Group:
@@ -2321,7 +2321,7 @@ show_agg_keys(AggState *astate, List *ancestors,
 								 ancestors, es);
 			Assert(astate->num_hashes <= 1);
 			if (astate->num_hashes)
-				show_tuplehash_info(&astate->perhash[0].hashtable->instrument, astate, es);
+				show_tuplehash_info(&astate->perhash[0].instrument, astate, es);
 		}
 
 		ancestors = list_delete_first(ancestors);
@@ -2348,7 +2348,7 @@ show_grouping_sets(AggState *aggstate, Agg *agg,
 
 	show_grouping_set_info(aggstate, agg, NULL, context, useprefix, ancestors,
 			aggstate->num_hashes ?
-			&aggstate->perhash[setno++].hashtable->instrument : NULL,
+			&aggstate->perhash[setno++].instrument : NULL,
 			es);
 
 	foreach(lc, agg->chain)
@@ -2361,7 +2361,7 @@ show_grouping_sets(AggState *aggstate, Agg *agg,
 				aggnode->aggstrategy == AGG_MIXED)
 		{
 			Assert(setno < aggstate->num_hashes);
-			inst = &aggstate->perhash[setno++].hashtable->instrument;
+			inst = &aggstate->perhash[setno++].instrument;
 		}
 
 		show_grouping_set_info(aggstate, aggnode, sortnode,
@@ -2951,7 +2951,7 @@ show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 		}
 	}
 
-	show_tuplehash_info(&planstate->instrument, es, NULL);
+	show_tuplehash_info(&planstate->instrument, NULL, es);
 }
 
 /*
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 10276d3f58..009d27b9a8 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -188,7 +188,6 @@ BuildTupleHashTableExt(PlanState *parent,
 	hashtable->inputslot = NULL;
 	hashtable->in_hash_funcs = NULL;
 	hashtable->cur_eq_func = NULL;
-	memset(&hashtable->instrument, 0, sizeof(hashtable->instrument));
 
 	/*
 	 * If parallelism is in use, even if the master backend is performing the
@@ -204,7 +203,6 @@ BuildTupleHashTableExt(PlanState *parent,
 		hashtable->hash_iv = 0;
 
 	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
-	UpdateTupleHashTableStats(hashtable, true);
 
 	/*
 	 * We copy the input tuple descriptor just for safety --- we assume all
@@ -283,40 +281,9 @@ BuildTupleHashTable(PlanState *parent,
 void
 ResetTupleHashTable(TupleHashTable hashtable)
 {
-	UpdateTupleHashTableStats(hashtable, false);
 	tuplehash_reset(hashtable->hashtab);
 }
 
-/* Update instrumentation stats */
-void
-UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial)
-{
-	hashtable->instrument.nbuckets = hashtable->hashtab->size;
-	if (initial)
-	{
-		hashtable->instrument.nbuckets_original = hashtable->hashtab->size;
-		// hashtable->instrument.space_peak_hash = hashtable->hashtab->size *
-			// sizeof(TupleHashEntryData);
-		hashtable->instrument.space_peak_hash =
-			MemoryContextMemAllocated(hashtable->hashtab->ctx, true);
-		hashtable->instrument.space_peak_tuples = 0;
-	}
-	else
-	{
-		/* hashtable->entrysize includes additionalsize */
-		size_t hash_size = MemoryContextMemAllocated(hashtable->hashtab->ctx, true);
-		size_t tuple_size = MemoryContextMemAllocated(hashtable->tablecxt, true);
-
-		hashtable->instrument.space_peak_hash = Max(
-			hashtable->instrument.space_peak_hash,
-			hash_size);
-
-		hashtable->instrument.space_peak_tuples = Max(
-			hashtable->instrument.space_peak_tuples, tuple_size);
-				// hashtable->hashtab->members * hashtable->entrysize);
-	}
-}
-
 /*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index daa82cdee2..4cd09e2291 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1493,6 +1493,10 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
 		hashcxt,
 		tmpcxt,
 		DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
+
+	InitTupleHashTableStats(perhash->instrument,
+			perhash->hashtable->hashtab,
+			hashcxt, additionalsize);
 }
 
 /*
@@ -1851,7 +1855,8 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 	{
 		hash_mem += MemoryContextMemAllocated(
 			aggstate->perhash[i].hashcontext->ecxt_per_tuple_memory, true);
-		UpdateTupleHashTableStats(aggstate->perhash[i].hashtable, false);
+		UpdateTupleHashTableStats(aggstate->perhash[i].instrument,
+				aggstate->perhash[i].hashtable->hashtab);
 	}
 
 	/* memory for read/write tape buffers, if spilled XXX */
@@ -1879,7 +1884,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 	if (aggstate->hash_ngroups_current > 0)
 	{
 		aggstate->hashentrysize =
-			hash_mem / (double)aggstate->hash_ngroups_current;
+			0 / (double)aggstate->hash_ngroups_current; // XXX
 	}
 }
 
diff --git a/src/backend/executor/nodeRecursiveunion.c b/src/backend/executor/nodeRecursiveunion.c
index 93272c28b1..5e70e008e5 100644
--- a/src/backend/executor/nodeRecursiveunion.c
+++ b/src/backend/executor/nodeRecursiveunion.c
@@ -50,6 +50,8 @@ build_hash_table(RecursiveUnionState *rustate)
 												rustate->tableContext,
 												rustate->tempContext,
 												false);
+
+	InitTupleHashTableStats(rustate->instrument, rustate->hashtable->hashtab, rustate->tableContext, 0);
 }
 
 
@@ -157,7 +159,7 @@ ExecRecursiveUnion(PlanState *pstate)
 	}
 
 	if (node->hashtable)
-		UpdateTupleHashTableStats(node->hashtable, false);
+		UpdateTupleHashTableStats(node->instrument, node->hashtable->hashtab);
 
 	return NULL;
 }
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 9c0e0ab96e..5eca128183 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -139,6 +139,9 @@ build_hash_table(SetOpState *setopstate)
 												   setopstate->tableContext,
 												   econtext->ecxt_per_tuple_memory,
 												   false);
+
+	InitTupleHashTableStats(setopstate->instrument,
+			setopstate->hashtable->hashtab, setopstate->tableContext, 0);
 }
 
 /*
@@ -415,7 +418,8 @@ setop_fill_hash_table(SetOpState *setopstate)
 
 	setopstate->table_filled = true;
 	/* Initialize to walk the hash table */
-	UpdateTupleHashTableStats(setopstate->hashtable, false);
+	UpdateTupleHashTableStats(setopstate->instrument,
+			setopstate->hashtable->hashtab);
 	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
 }
 
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 22c32612ba..0de6be40e4 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -505,6 +505,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	if (node->hashtable)
 		ResetTupleHashTable(node->hashtable);
 	else
+	{
 		node->hashtable = BuildTupleHashTableExt(node->parent,
 												 node->descRight,
 												 ncols,
@@ -518,6 +519,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 												 node->hashtablecxt,
 												 node->hashtempcxt,
 												 false);
+		InitTupleHashTableStats(node->instrument, node->hashtable->hashtab,
+				node->hashtablecxt, 0);
+	}
 
 	if (!subplan->unknownEqFalse)
 	{
@@ -533,6 +537,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 		if (node->hashnulls)
 			ResetTupleHashTable(node->hashnulls);
 		else
+		{
 			node->hashnulls = BuildTupleHashTableExt(node->parent,
 													 node->descRight,
 													 ncols,
@@ -546,6 +551,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 													 node->hashtablecxt,
 													 node->hashtempcxt,
 													 false);
+			InitTupleHashTableStats(node->instrument_nulls,
+					node->hashnulls->hashtab, node->hashtablecxt, 0);
+		}
 	}
 	else
 		node->hashnulls = NULL;
@@ -621,9 +629,9 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
 	ExecClearTuple(node->projRight->pi_state.resultslot);
 
 	MemoryContextSwitchTo(oldcontext);
-	UpdateTupleHashTableStats(node->hashtable, false);
+	UpdateTupleHashTableStats(node->instrument, node->hashtable->hashtab);
 	if (node->hashnulls)
-		UpdateTupleHashTableStats(node->hashnulls, false);
+		UpdateTupleHashTableStats(node->instrument_nulls, node->hashnulls->hashtab);
 }
 
 /*
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index f4f2ede207..94890512dc 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -150,7 +150,6 @@ extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 ExprState *eqcomp,
 										 FmgrInfo *hashfunctions);
 extern void ResetTupleHashTable(TupleHashTable hashtable);
-extern void UpdateTupleHashTableStats(TupleHashTable hashtable, bool initial);
 
 /*
  * prototypes from functions in execJunk.c
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 247bb65bd6..71ba2011cd 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -309,6 +309,7 @@ typedef struct AggStatePerHashData
 	Agg		   *aggnode;		/* original Agg node, for numGroups etc. */
 	MemoryContext	hash_metacxt;	/* memory for hash table itself */
 	ExprContext	*hashcontext;	/* context for hash table data */
+	HashTableInstrumentation    instrument;
 }			AggStatePerHashData;
 
 
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 972c7a00cf..e2cfce8bad 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -691,12 +691,31 @@ typedef struct TupleHashEntryData
 #define SH_DECLARE
 #include "lib/simplehash.h"
 
+#define InitTupleHashTableStats(instr, htable, tupctx, addsize) \
+	do{\
+	instr.entrysize = sizeof(MinimalTuple) + addsize; \
+	instr.tuplectx = tupctx; \
+	instr.nbuckets = htable->size; \
+	instr.nbuckets_original = htable->size; \
+	instr.space_peak_hash = MemoryContextMemAllocated(htable->ctx, false); \
+	instr.space_peak_tuples = 0; \
+	}while(0)
+
+#define UpdateTupleHashTableStats(instr, htable) \
+	do{\
+	instr.nbuckets = htable->size; \
+	instr.space_peak_hash = Max(instr.space_peak_hash, MemoryContextMemAllocated(htable->ctx, false)); \
+	instr.space_peak_tuples = Max(instr.space_peak_tuples, MemoryContextMemAllocated(instr.tuplectx, false)); \
+	}while(0)
+
 typedef struct HashTableInstrumentation
 {
+	size_t	entrysize;				/* Includes additionalsize */
 	size_t	nbuckets;				/* number of buckets at end of execution */
 	size_t	nbuckets_original;		/* planned number of buckets */
 	size_t	space_peak_hash;		/* peak memory usage in bytes */
 	size_t	space_peak_tuples;		/* peak memory usage in bytes */
+	MemoryContext	tuplectx;		/* Context where tuples are stored */
 } HashTableInstrumentation;
 
 typedef struct TupleHashTableData
@@ -717,7 +736,6 @@ typedef struct TupleHashTableData
 	ExprState  *cur_eq_func;	/* comparator for input vs. table */
 	uint32		hash_iv;		/* hash-function IV */
 	ExprContext *exprcontext;	/* expression context */
-	HashTableInstrumentation instrument;
 }			TupleHashTableData;
 
 typedef tuplehash_iterator TupleHashIterator;
@@ -883,6 +901,8 @@ typedef struct SubPlanState
 	FmgrInfo   *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
 	FmgrInfo   *cur_eq_funcs;	/* equality functions for LHS vs. table */
 	ExprState  *cur_eq_comp;	/* equality comparator for LHS vs. table */
+	HashTableInstrumentation instrument;
+	HashTableInstrumentation instrument_nulls; /* instrumentation for nulls hashtable */
 } SubPlanState;
 
 /* ----------------
@@ -1291,6 +1311,7 @@ typedef struct RecursiveUnionState
 	MemoryContext tempContext;	/* short-term context for comparisons */
 	TupleHashTable hashtable;	/* hash table for tuples already seen */
 	MemoryContext tableContext; /* memory context containing hash table */
+	HashTableInstrumentation instrument;
 } RecursiveUnionState;
 
 /* ----------------
@@ -2341,6 +2362,7 @@ typedef struct SetOpState
 	MemoryContext tableContext; /* memory context containing hash table */
 	bool		table_filled;	/* hash table filled yet? */
 	TupleHashIterator hashiter; /* for iterating through hash table */
+	HashTableInstrumentation instrument;
 } SetOpState;
 
 /* ----------------
-- 
2.17.0

>From 64d80fa1a10c1d8961295328c1461e33960b6eab Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 15 Feb 2020 17:19:21 -0600
Subject: [PATCH v8 7/8] TupleHashTable.entrysize was unused except for
 instrumentation..

---
 src/backend/executor/execGrouping.c | 1 -
 src/include/nodes/execnodes.h       | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 009d27b9a8..98fd4bf8bd 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -183,7 +183,6 @@ BuildTupleHashTableExt(PlanState *parent,
 	hashtable->tab_collations = collations;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
-	hashtable->entrysize = entrysize;
 	hashtable->tableslot = NULL;	/* will be made on first lookup */
 	hashtable->inputslot = NULL;
 	hashtable->in_hash_funcs = NULL;
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e2cfce8bad..82b2f510f4 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -728,7 +728,6 @@ typedef struct TupleHashTableData
 	Oid		   *tab_collations; /* collations for hash and comparison */
 	MemoryContext tablecxt;		/* memory context containing table */
 	MemoryContext tempcxt;		/* context for function evaluations */
-	Size		entrysize;		/* actual size to make each hash entry */
 	TupleTableSlot *tableslot;	/* slot for referencing table entries */
 	/* The following fields are set transiently for each table search: */
 	TupleTableSlot *inputslot;	/* current input tuple's slot */
-- 
2.17.0

>From aeb814f18051ed47f18c1c2502a5ba06187eb853 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 15 Feb 2020 15:53:34 -0600
Subject: [PATCH v8 8/8] Update comment obsolete since 69c3936a

---
 src/backend/executor/nodeAgg.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 4cd09e2291..68fe4d3591 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -2025,8 +2025,7 @@ lookup_hash_entry(AggState *aggstate, uint32 hash)
 }
 
 /*
- * Look up hash entries for the current tuple in all hashed grouping sets,
- * returning an array of pergroup pointers suitable for advance_aggregates.
+ * Look up hash entries for the current tuple in all hashed grouping sets.
  *
  * Be aware that lookup_hash_entry can reset the tmpcontext.
  *
-- 
2.17.0

Re: explain HashAggregate to report bucket and memory stats

Reply via email to