On Thu, 2024-11-21 at 12:37 -0800, Jeff Davis wrote:
> 
> New patch series attached.

I committed the earlier cleanup patches and rebased the rest. Attached.

0001 is not quite as clean as I'd like it to be; suggestions welcome.
It does save a pointer per entry, though, which is substantial.

Regards,
        Jeff Davis

From 399f0e175c23f275b54efd4ec8eaa58b1a7d96c8 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Thu, 14 Nov 2024 11:47:51 -0800
Subject: [PATCH v3 1/4] TupleHashTable: store additional data along with
 tuple.

Previously, the caller needed to allocate the memory and the
TupleHashTable would store a pointer to it. That wastes space for the
palloc overhead as well as the size of the pointer itself.

Now, the TupleHashTable relies on the caller to correctly specify the
additionalsize, and allocates that amount of space. The caller can
then request a pointer into that space.
---
 src/backend/executor/execGrouping.c | 59 ++++++++++++++++++++++++++++-
 src/backend/executor/nodeAgg.c      | 20 ++++------
 src/backend/executor/nodeSetOp.c    | 17 +++++----
 src/backend/executor/nodeSubplan.c  |  2 +-
 src/include/executor/executor.h     |  3 ++
 src/include/nodes/execnodes.h       | 10 +----
 6 files changed, 80 insertions(+), 31 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 33b124fbb0a..29bd3162ad5 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -20,6 +20,13 @@
 #include "miscadmin.h"
 #include "utils/lsyscache.h"
 
+typedef struct TupleHashEntryData
+{
+	MinimalTuple firstTuple;	/* copy of first tuple in this group */
+	uint32		status;			/* hash status */
+	uint32		hash;			/* hash value (cached) */
+} TupleHashEntryData;
+
 static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
 												 const MinimalTuple tuple);
@@ -196,6 +203,7 @@ BuildTupleHashTable(PlanState *parent,
 	hashtable->tab_collations = collations;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
+	hashtable->additionalsize = additionalsize;
 	hashtable->tableslot = NULL;	/* will be made on first lookup */
 	hashtable->inputslot = NULL;
 	hashtable->in_hash_expr = NULL;
@@ -273,6 +281,15 @@ ResetTupleHashTable(TupleHashTable hashtable)
 	tuplehash_reset(hashtable->hashtab);
 }
 
+/*
+ * Return size of the hash bucket. Useful for estimating memory usage.
+ */
+size_t
+TupleHashEntrySize(void)
+{
+	return sizeof(TupleHashEntryData);
+}
+
 /*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
@@ -339,6 +356,26 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
 	return hash;
 }
 
+MinimalTuple
+TupleHashEntryGetTuple(TupleHashEntry entry)
+{
+	return entry->firstTuple;
+}
+
+/*
+ * Get a pointer into the additional space allocated for this entry. The
+ * amount of space available is the additionalsize specified to
+ * BuildTupleHashTable(). If additionalsize was specified as zero, no
+ * additional space is available and this function should not be called.
+ */
+void *
+TupleHashEntryGetAdditional(TupleHashEntry entry)
+{
+	Assert(GetMemoryChunkSpace(entry->firstTuple) > MAXALIGN(entry->firstTuple->t_len));
+
+	return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len);
+}
+
 /*
  * A variant of LookupTupleHashEntry for callers that have already computed
  * the hash value.
@@ -477,13 +514,31 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
 		}
 		else
 		{
+			MinimalTuple firstTuple;
+			size_t totalsize; /* including alignment and additionalsize */
+
 			/* created new entry */
 			*isnew = true;
 			/* zero caller data */
-			entry->additional = NULL;
 			MemoryContextSwitchTo(hashtable->tablecxt);
+
 			/* Copy the first tuple into the table context */
-			entry->firstTuple = ExecCopySlotMinimalTuple(slot);
+			firstTuple = ExecCopySlotMinimalTuple(slot);
+
+			/*
+			 * Allocate additional space right after the MinimalTuple of size
+			 * additionalsize. The caller can get a pointer to this data with
+			 * TupleHashEntryGetAdditional(), and store arbitrary data there.
+			 *
+			 * This avoids the need to store an extra pointer or allocate an
+			 * additional chunk, which would waste memory.
+			 */
+			totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize;
+			firstTuple = repalloc(firstTuple, totalsize);
+			memset((char *) firstTuple + firstTuple->t_len, 0,
+				   totalsize - firstTuple->t_len);
+
+			entry->firstTuple = firstTuple;
 		}
 	}
 	else
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 3005b5c0e3b..19640752967 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
 		transitionChunkSize = 0;
 
 	return
-		sizeof(TupleHashEntryData) +
+		TupleHashEntrySize() +
 		tupleChunkSize +
 		pergroupChunkSize +
 		transitionChunkSize;
@@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 	if (aggstate->hash_ngroups_current > 0)
 	{
 		aggstate->hashentrysize =
-			sizeof(TupleHashEntryData) +
+			TupleHashEntrySize() +
 			(hashkey_mem / (double) aggstate->hash_ngroups_current);
 	}
 }
@@ -2055,11 +2055,7 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
 	if (aggstate->numtrans == 0)
 		return;
 
-	pergroup = (AggStatePerGroup)
-		MemoryContextAlloc(hashtable->tablecxt,
-						   sizeof(AggStatePerGroupData) * aggstate->numtrans);
-
-	entry->additional = pergroup;
+	pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
 
 	/*
 	 * Initialize aggregates for new tuple group, lookup_hash_entries()
@@ -2123,7 +2119,7 @@ lookup_hash_entries(AggState *aggstate)
 		{
 			if (isnew)
 				initialize_hash_entry(aggstate, hashtable, entry);
-			pergroup[setno] = entry->additional;
+			pergroup[setno] = TupleHashEntryGetAdditional(entry);
 		}
 		else
 		{
@@ -2681,7 +2677,7 @@ agg_refill_hash_table(AggState *aggstate)
 		{
 			if (isnew)
 				initialize_hash_entry(aggstate, perhash->hashtable, entry);
-			aggstate->hash_pergroup[batch->setno] = entry->additional;
+			aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry);
 			advance_aggregates(aggstate);
 		}
 		else
@@ -2773,7 +2769,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
 	ExprContext *econtext;
 	AggStatePerAgg peragg;
 	AggStatePerGroup pergroup;
-	TupleHashEntryData *entry;
+	TupleHashEntry entry;
 	TupleTableSlot *firstSlot;
 	TupleTableSlot *result;
 	AggStatePerHash perhash;
@@ -2845,7 +2841,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
 		 * Transform representative tuple back into one with the right
 		 * columns.
 		 */
-		ExecStoreMinimalTuple(entry->firstTuple, hashslot, false);
+		ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false);
 		slot_getallattrs(hashslot);
 
 		ExecClearTuple(firstSlot);
@@ -2861,7 +2857,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
 		}
 		ExecStoreVirtualTuple(firstSlot);
 
-		pergroup = (AggStatePerGroup) entry->additional;
+		pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
 
 		/*
 		 * Use the representative input tuple for any references to
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 5b7ff9c3748..2a50d56fc87 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -425,6 +425,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 	{
 		TupleTableSlot *outerslot;
 		TupleHashEntryData *entry;
+		SetOpStatePerGroup pergroup;
 		bool		isnew;
 
 		outerslot = ExecProcNode(outerPlan);
@@ -437,16 +438,16 @@ setop_fill_hash_table(SetOpState *setopstate)
 									 outerslot,
 									 &isnew, NULL);
 
+		pergroup = TupleHashEntryGetAdditional(entry);
 		/* If new tuple group, initialize counts to zero */
 		if (isnew)
 		{
-			entry->additional = (SetOpStatePerGroup)
-				MemoryContextAllocZero(setopstate->hashtable->tablecxt,
-									   sizeof(SetOpStatePerGroupData));
+			pergroup->numLeft = 0;
+			pergroup->numRight = 0;
 		}
 
 		/* Advance the counts */
-		((SetOpStatePerGroup) entry->additional)->numLeft++;
+		pergroup->numLeft++;
 
 		/* Must reset expression context after each hashtable lookup */
 		ResetExprContext(econtext);
@@ -478,7 +479,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
 			/* Advance the counts if entry is already present */
 			if (entry)
-				((SetOpStatePerGroup) entry->additional)->numRight++;
+				((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++;
 
 			/* Must reset expression context after each hashtable lookup */
 			ResetExprContext(econtext);
@@ -496,7 +497,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 static TupleTableSlot *
 setop_retrieve_hash_table(SetOpState *setopstate)
 {
-	TupleHashEntryData *entry;
+	TupleHashEntry entry;
 	TupleTableSlot *resultTupleSlot;
 
 	/*
@@ -526,12 +527,12 @@ setop_retrieve_hash_table(SetOpState *setopstate)
 		 * See if we should emit any copies of this tuple, and if so return
 		 * the first copy.
 		 */
-		set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
+		set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry));
 
 		if (setopstate->numOutput > 0)
 		{
 			setopstate->numOutput--;
-			return ExecStoreMinimalTuple(entry->firstTuple,
+			return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry),
 										 resultTupleSlot,
 										 false);
 		}
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 49767ed6a52..f7f6fc2da0b 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
 	{
 		CHECK_FOR_INTERRUPTS();
 
-		ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
+		ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false);
 		if (!execTuplesUnequal(slot, hashtable->tableslot,
 							   numCols, keyColIdx,
 							   eqfunctions,
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index f8a8d03e533..a49c32072e3 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -148,9 +148,12 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 										   bool *isnew, uint32 *hash);
 extern uint32 TupleHashTableHash(TupleHashTable hashtable,
 								 TupleTableSlot *slot);
+extern size_t TupleHashEntrySize(void);
 extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
 											   TupleTableSlot *slot,
 											   bool *isnew, uint32 hash);
+extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry);
+extern void *TupleHashEntryGetAdditional(TupleHashEntry entry);
 extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 TupleTableSlot *slot,
 										 ExprState *eqcomp,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index b3f7aa299f5..7d5871d6fac 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -806,17 +806,10 @@ typedef struct ExecAuxRowMark
  * point to tab_hash_expr and tab_eq_func respectively.
  * ----------------------------------------------------------------
  */
+typedef struct TupleHashEntryData TupleHashEntryData;
 typedef struct TupleHashEntryData *TupleHashEntry;
 typedef struct TupleHashTableData *TupleHashTable;
 
-typedef struct TupleHashEntryData
-{
-	MinimalTuple firstTuple;	/* copy of first tuple in this group */
-	void	   *additional;		/* user data */
-	uint32		status;			/* hash status */
-	uint32		hash;			/* hash value (cached) */
-} TupleHashEntryData;
-
 /* define parameters necessary to generate the tuple hash table interface */
 #define SH_PREFIX tuplehash
 #define SH_ELEMENT_TYPE TupleHashEntryData
@@ -835,6 +828,7 @@ typedef struct TupleHashTableData
 	Oid		   *tab_collations; /* collations for hash and comparison */
 	MemoryContext tablecxt;		/* memory context containing table */
 	MemoryContext tempcxt;		/* context for function evaluations */
+	Size		additionalsize;	/* size of additional data */
 	TupleTableSlot *tableslot;	/* slot for referencing table entries */
 	/* The following fields are set transiently for each table search: */
 	TupleTableSlot *inputslot;	/* current input tuple's slot */
-- 
2.34.1

From 1d3ee5d04adf729cff93544cf9a9b768d47fa805 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Fri, 15 Nov 2024 12:21:04 -0800
Subject: [PATCH v3 2/4] simplehash: don't require a "status" field.

Callers may define SH_ENTRY_IS_EMPTY(), SH_ENTRY_SET_EMPTY(), and
SH_ENTRY_SET_IN_USE() as an alternative, which can allow for a more
compact entry size. That reduces the memory overhead, especially when
the hash table is sparse.
---
 src/include/lib/simplehash.h | 68 ++++++++++++++++++++++++------------
 1 file changed, 45 insertions(+), 23 deletions(-)

diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index 327274c2340..0b9b0cdfde0 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -50,9 +50,18 @@
  *	  - SH_HASH_KEY(table, key) - generate hash for the key
  *	  - SH_STORE_HASH - if defined the hash is stored in the elements
  *	  - SH_GET_HASH(tb, a) - return the field to store the hash in
+ *	  - SH_ENTRY_IS_EMPTY(tb, a) - return true if the entry is empty
+ *	  - SH_ENTRY_SET_EMPTY(tb, a) - set entry to empty
+ *	  - SH_ENTRY_SET_IN_USE(tb, a) - set entry to "in use"
  *
  *	  The element type is required to contain a "status" member that can store
- *	  the range of values defined in the SH_STATUS enum.
+ *	  the range of values defined in the SH_STATUS enum. Alternatively,
+ *	  callers may define all of SH_ENTRY_IS_EMPTY, SH_ENTRY_SET_EMPTY,
+ *	  SH_ENTRY_SET_IN_USE to allow for a more compact entry size. For example,
+ *	  if the SH_ELEMENT_TYPE contains a pointer, the caller may decide that
+ *	  the entry is empty if the pointer is NULL and "in use" if the pointer is
+ *	  non-NULL. NB: if the entire entry is zero, then SH_ENTRY_IS_EMPTY()
+ *	  *must* evaluate to true.
  *
  *	  While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
  *	  the hash table implementation needs to compare hashes to move elements
@@ -277,6 +286,18 @@ SH_SCOPE void SH_STAT(SH_TYPE * tb);
 #define SH_GROW_MIN_FILLFACTOR 0.1
 #endif
 
+#ifndef SH_ENTRY_IS_EMPTY
+#define SH_ENTRY_IS_EMPTY(tb, a) ((a)->status == SH_STATUS_EMPTY)
+#endif
+
+#ifndef SH_ENTRY_SET_EMPTY
+#define SH_ENTRY_SET_EMPTY(tb, a) do { (a)->status = SH_STATUS_EMPTY; } while (0)
+#endif
+
+#ifndef SH_ENTRY_SET_IN_USE
+#define SH_ENTRY_SET_IN_USE(tb, a) do { (a)->status = SH_STATUS_IN_USE; } while (0)
+#endif
+
 #ifdef SH_STORE_HASH
 #define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
 #else
@@ -540,7 +561,7 @@ SH_GROW(SH_TYPE * tb, uint64 newsize)
 		uint32		hash;
 		uint32		optimal;
 
-		if (oldentry->status != SH_STATUS_IN_USE)
+		if (SH_ENTRY_IS_EMPTY(tb, oldentry))
 		{
 			startelem = i;
 			break;
@@ -562,7 +583,7 @@ SH_GROW(SH_TYPE * tb, uint64 newsize)
 	{
 		SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
 
-		if (oldentry->status == SH_STATUS_IN_USE)
+		if (!SH_ENTRY_IS_EMPTY(tb, oldentry))
 		{
 			uint32		hash;
 			uint32		startelem2;
@@ -578,7 +599,7 @@ SH_GROW(SH_TYPE * tb, uint64 newsize)
 			{
 				newentry = &newdata[curelem];
 
-				if (newentry->status == SH_STATUS_EMPTY)
+				if (SH_ENTRY_IS_EMPTY(tb, newentry))
 				{
 					break;
 				}
@@ -649,14 +670,14 @@ restart:
 		SH_ELEMENT_TYPE *entry = &data[curelem];
 
 		/* any empty bucket can directly be used */
-		if (entry->status == SH_STATUS_EMPTY)
+		if (SH_ENTRY_IS_EMPTY(tb, entry))
 		{
 			tb->members++;
 			entry->SH_KEY = key;
 #ifdef SH_STORE_HASH
 			SH_GET_HASH(tb, entry) = hash;
 #endif
-			entry->status = SH_STATUS_IN_USE;
+			SH_ENTRY_SET_IN_USE(tb, entry);
 			*found = false;
 			return entry;
 		}
@@ -671,7 +692,7 @@ restart:
 
 		if (SH_COMPARE_KEYS(tb, hash, key, entry))
 		{
-			Assert(entry->status == SH_STATUS_IN_USE);
+			Assert(!SH_ENTRY_IS_EMPTY(tb, entry));
 			*found = true;
 			return entry;
 		}
@@ -695,7 +716,7 @@ restart:
 				emptyelem = SH_NEXT(tb, emptyelem, startelem);
 				emptyentry = &data[emptyelem];
 
-				if (emptyentry->status == SH_STATUS_EMPTY)
+				if (SH_ENTRY_IS_EMPTY(tb, emptyentry))
 				{
 					lastentry = emptyentry;
 					break;
@@ -744,7 +765,7 @@ restart:
 #ifdef SH_STORE_HASH
 			SH_GET_HASH(tb, entry) = hash;
 #endif
-			entry->status = SH_STATUS_IN_USE;
+			SH_ENTRY_SET_IN_USE(tb, entry);
 			*found = false;
 			return entry;
 		}
@@ -806,13 +827,11 @@ SH_LOOKUP_HASH_INTERNAL(SH_TYPE * tb, SH_KEY_TYPE key, uint32 hash)
 	{
 		SH_ELEMENT_TYPE *entry = &tb->data[curelem];
 
-		if (entry->status == SH_STATUS_EMPTY)
+		if (SH_ENTRY_IS_EMPTY(tb, entry))
 		{
 			return NULL;
 		}
 
-		Assert(entry->status == SH_STATUS_IN_USE);
-
 		if (SH_COMPARE_KEYS(tb, hash, key, entry))
 			return entry;
 
@@ -864,10 +883,10 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
 	{
 		SH_ELEMENT_TYPE *entry = &tb->data[curelem];
 
-		if (entry->status == SH_STATUS_EMPTY)
+		if (SH_ENTRY_IS_EMPTY(tb, entry))
 			return false;
 
-		if (entry->status == SH_STATUS_IN_USE &&
+		if (!SH_ENTRY_IS_EMPTY(tb, entry) &&
 			SH_COMPARE_KEYS(tb, hash, key, entry))
 		{
 			SH_ELEMENT_TYPE *lastentry = entry;
@@ -890,9 +909,9 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
 				curelem = SH_NEXT(tb, curelem, startelem);
 				curentry = &tb->data[curelem];
 
-				if (curentry->status != SH_STATUS_IN_USE)
+				if (SH_ENTRY_IS_EMPTY(tb, curentry))
 				{
-					lastentry->status = SH_STATUS_EMPTY;
+					SH_ENTRY_SET_EMPTY(tb, lastentry);
 					break;
 				}
 
@@ -902,7 +921,7 @@ SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
 				/* current is at optimal position, done */
 				if (curoptimal == curelem)
 				{
-					lastentry->status = SH_STATUS_EMPTY;
+					SH_ENTRY_SET_EMPTY(tb, lastentry);
 					break;
 				}
 
@@ -953,9 +972,9 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 		curelem = SH_NEXT(tb, curelem, startelem);
 		curentry = &tb->data[curelem];
 
-		if (curentry->status != SH_STATUS_IN_USE)
+		if (SH_ENTRY_IS_EMPTY(tb, curentry))
 		{
-			lastentry->status = SH_STATUS_EMPTY;
+			SH_ENTRY_SET_EMPTY(tb, lastentry);
 			break;
 		}
 
@@ -965,7 +984,7 @@ SH_DELETE_ITEM(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
 		/* current is at optimal position, done */
 		if (curoptimal == curelem)
 		{
-			lastentry->status = SH_STATUS_EMPTY;
+			SH_ENTRY_SET_EMPTY(tb, lastentry);
 			break;
 		}
 
@@ -993,7 +1012,7 @@ SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
 	{
 		SH_ELEMENT_TYPE *entry = &tb->data[i];
 
-		if (entry->status != SH_STATUS_IN_USE)
+		if (SH_ENTRY_IS_EMPTY(tb, entry))
 		{
 			startelem = i;
 			break;
@@ -1055,7 +1074,7 @@ SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
 
 		if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
 			iter->done = true;
-		if (elem->status == SH_STATUS_IN_USE)
+		if (!SH_ENTRY_IS_EMPTY(tb, elem))
 		{
 			return elem;
 		}
@@ -1091,7 +1110,7 @@ SH_STAT(SH_TYPE * tb)
 
 		elem = &tb->data[i];
 
-		if (elem->status != SH_STATUS_IN_USE)
+		if (SH_ENTRY_IS_EMPTY(tb, elem))
 			continue;
 
 		hash = SH_ENTRY_HASH(tb, elem);
@@ -1149,6 +1168,9 @@ SH_STAT(SH_TYPE * tb)
 #undef SH_KEY
 #undef SH_ELEMENT_TYPE
 #undef SH_HASH_KEY
+#undef SH_ENTRY_IS_EMPTY
+#undef SH_ENTRY_SET_EMPTY
+#undef SH_ENTRY_SET_IN_USE
 #undef SH_SCOPE
 #undef SH_DECLARE
 #undef SH_DEFINE
-- 
2.34.1

From 393dfcb784bed5c162fe5c9d5b1bb287288312cf Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Fri, 15 Nov 2024 13:03:32 -0800
Subject: [PATCH v3 3/4] TupleHashTable: reduce overhead by removing "status"
 field.

Make use of new simplehash API introduced in commit 1234567890.

Because NULL now means that the entry is empty, use a unique dummy
pointer to refer to the inputslot instead. It also makes the code more
readable.
---
 src/backend/executor/execGrouping.c | 33 +++++++++++++++++++----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 29bd3162ad5..43165375946 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -23,7 +23,6 @@
 typedef struct TupleHashEntryData
 {
 	MinimalTuple firstTuple;	/* copy of first tuple in this group */
-	uint32		status;			/* hash status */
 	uint32		hash;			/* hash value (cached) */
 } TupleHashEntryData;
 
@@ -34,6 +33,14 @@ static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashta
 														   TupleTableSlot *slot,
 														   bool *isnew, uint32 hash);
 
+/*
+ * Create a dummy pointer used to mean the input slot. The pointer will only
+ * be used for comparison and never dereferenced, and must not be equal to a
+ * valid pointer. Distinct from NULL, because NULL means that the hash entry
+ * itself is empty.
+ */
+const static MinimalTuple FIRSTTUPLE_INPUTSLOT = (MinimalTuple) 0x1;
+
 /*
  * Define parameters for tuple hash table code generation. The interface is
  * *also* declared in execnodes.h (to generate the types, which are externally
@@ -48,6 +55,9 @@ static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashta
 #define SH_SCOPE extern
 #define SH_STORE_HASH
 #define SH_GET_HASH(tb, a) a->hash
+#define SH_ENTRY_IS_EMPTY(tb, a) ((a)->firstTuple == NULL)
+#define SH_ENTRY_SET_EMPTY(tb, a) do { (a)->firstTuple = NULL; } while (0)
+#define SH_ENTRY_SET_IN_USE(tb, a) Assert((a)->firstTuple != NULL)
 #define SH_DEFINE
 #include "lib/simplehash.h"
 
@@ -321,7 +331,7 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 	hashtable->in_hash_expr = hashtable->tab_hash_expr;
 	hashtable->cur_eq_func = hashtable->tab_eq_func;
 
-	local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
+	local_hash = TupleHashTableHash_internal(hashtable->hashtab, FIRSTTUPLE_INPUTSLOT);
 	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash);
 
 	if (hash != NULL)
@@ -349,7 +359,8 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
 	/* Need to run the hash functions in short-lived context */
 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
 
-	hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
+	hash = TupleHashTableHash_internal(hashtable->hashtab,
+									   FIRSTTUPLE_INPUTSLOT);
 
 	MemoryContextSwitchTo(oldContext);
 
@@ -430,7 +441,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 	hashtable->cur_eq_func = eqcomp;
 
 	/* Search the hash table */
-	key = NULL;					/* flag to reference inputslot */
+	key = FIRSTTUPLE_INPUTSLOT;
 	entry = tuplehash_lookup(hashtable->hashtab, key);
 	MemoryContextSwitchTo(oldContext);
 
@@ -438,9 +449,9 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 }
 
 /*
- * If tuple is NULL, use the input slot instead. This convention avoids the
- * need to materialize virtual input tuples unless they actually need to get
- * copied into the table.
+ * If tuple is FIRSTTUPLE_INPUTSLOT, use the input slot instead. This
+ * convention avoids the need to materialize virtual input tuples unless they
+ * actually need to get copied into the table.
  *
  * Also, the caller must select an appropriate memory context for running
  * the hash functions.
@@ -454,7 +465,7 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb,
 	TupleTableSlot *slot;
 	bool		isnull;
 
-	if (tuple == NULL)
+	if (tuple == FIRSTTUPLE_INPUTSLOT)
 	{
 		/* Process the current input tuple for the table */
 		hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
@@ -501,7 +512,7 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
 	bool		found;
 	MinimalTuple key;
 
-	key = NULL;					/* flag to reference inputslot */
+	key = FIRSTTUPLE_INPUTSLOT;
 
 	if (isnew)
 	{
@@ -566,10 +577,10 @@ TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const
 	 * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
 	 * could be supported too, but is not currently required.
 	 */
-	Assert(tuple1 != NULL);
+	Assert(tuple1 && tuple1 != FIRSTTUPLE_INPUTSLOT);
 	slot1 = hashtable->tableslot;
 	ExecStoreMinimalTuple(tuple1, slot1, false);
-	Assert(tuple2 == NULL);
+	Assert(tuple2 == FIRSTTUPLE_INPUTSLOT);
 	slot2 = hashtable->inputslot;
 
 	/* For crosstype comparisons, the inputslot must be first */
-- 
2.34.1

From 160b8bee7c56305fc881dbe8187c13eebce7bf92 Mon Sep 17 00:00:00 2001
From: Jeff Davis <j...@j-davis.com>
Date: Fri, 15 Nov 2024 18:52:06 -0800
Subject: [PATCH v3 4/4] Pack TupleHashEntryData and AggStatePerGroupData.

---
 src/backend/executor/execGrouping.c | 6 +++++-
 src/include/executor/nodeAgg.h      | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 43165375946..70f79abe86b 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -24,7 +24,11 @@ typedef struct TupleHashEntryData
 {
 	MinimalTuple firstTuple;	/* copy of first tuple in this group */
 	uint32		hash;			/* hash value (cached) */
-} TupleHashEntryData;
+}
+#ifdef pg_attribute_packed
+	pg_attribute_packed()
+#endif
+TupleHashEntryData;
 
 static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h
index 34b82d0f5d1..d519b2a57a4 100644
--- a/src/include/executor/nodeAgg.h
+++ b/src/include/executor/nodeAgg.h
@@ -264,7 +264,11 @@ typedef struct AggStatePerGroupData
 	 * NULL and not auto-replace it with a later input value. Only the first
 	 * non-NULL input will be auto-substituted.
 	 */
-} AggStatePerGroupData;
+}
+#ifdef pg_attribute_packed
+	pg_attribute_packed()
+#endif
+AggStatePerGroupData;
 
 /*
  * AggStatePerPhaseData - per-grouping-set-phase state
-- 
2.34.1

Reply via email to