From 0c99e0fa2969a14cd156cfc0adc45ba85c48093f Mon Sep 17 00:00:00 2001
From: Hubert Zhang <hzhang@pivotal.io>
Date: Tue, 28 May 2019 08:47:15 +0000
Subject: [PATCH] Allow to continue to split batch when tuples become diverse

When build hash table, we need to increase batch number
when spaceAllowed is reached. If the split process failed
(all the tuples fall into one batch and the other is empty)
then the split flag growEnable will be turned off forever in past.
Since later tuples may become diverse, we add a new logic to
re-enable growing batch when there are benefit

Co-authored-by: Ning Yu <nyu@pivotal.io>
---
 src/backend/executor/nodeHash.c | 35 +++++++++++++++++++++++++++++------
 src/include/executor/hashjoin.h |  2 ++
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 64eec91..fa2a2f8 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -43,7 +43,7 @@
 #include "utils/syscache.h"
 
 
-static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
+static void ExecHashIncreaseNumBatches(HashJoinTable hashtable, uint32 hashvalue, uint32 hashTupleSize);
 static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable);
 static void ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable);
 static void ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable);
@@ -494,6 +494,8 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
 	hashtable->nbatch_original = nbatch;
 	hashtable->nbatch_outstart = nbatch;
 	hashtable->growEnabled = true;
+	hashtable->growRemainOldBatch = true;
+	hashtable->splittableSize = 0;
 	hashtable->totalTuples = 0;
 	hashtable->partialTuples = 0;
 	hashtable->skewTuples = 0;
@@ -882,7 +884,7 @@ ExecHashTableDestroy(HashJoinTable hashtable)
  *		current memory consumption
  */
 static void
-ExecHashIncreaseNumBatches(HashJoinTable hashtable)
+ExecHashIncreaseNumBatches(HashJoinTable hashtable, uint32 hashvalue, uint32 hashTupleSize)
 {
 	int			oldnbatch = hashtable->nbatch;
 	int			curbatch = hashtable->curbatch;
@@ -892,9 +894,29 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 	long		nfreed;
 	HashMemoryChunk oldchunks;
 
-	/* do nothing if we've decided to shut off growth */
+	/* 
+	 * If we've shut off the growth, check whether the new tuple could benefit
+	 * from the split. When splittable_size reaches the spaceAllowed, re-enable
+	 * growEnabled flag and do the real split further.
+	 */
 	if (!hashtable->growEnabled)
-		return;
+	{
+		int			bucketno;
+		int			batchno;
+		hashtable->nbatch *= 2;
+		ExecHashGetBucketAndBatch(hashtable, hashvalue, &bucketno, &batchno);
+		hashtable->nbatch = oldnbatch;
+		if ((hashtable->growRemainOldBatch) && (batchno != curbatch)
+				|| (!hashtable->growRemainOldBatch) && (batchno == curbatch))
+			hashtable->splittableSize += hashTupleSize;
+		if (hashtable->splittableSize >= hashtable->spaceAllowed )
+		{
+			hashtable->growEnabled = true;
+			hashtable->splittableSize = 0;
+		}
+		else
+			return ;
+	}
 
 	/* safety check to avoid overflow */
 	if (oldnbatch > Min(INT_MAX / 2, MaxAllocSize / (sizeof(void *) * 2)))
@@ -1040,6 +1062,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 	if (nfreed == 0 || nfreed == ninmemory)
 	{
 		hashtable->growEnabled = false;
+		hashtable->growRemainOldBatch = (nfreed == 0) ? true : false;
 #ifdef HJDEBUG
 		printf("Hashjoin %p: disabling further increase of nbatch\n",
 			   hashtable);
@@ -1656,7 +1679,7 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		if (hashtable->spaceUsed +
 			hashtable->nbuckets_optimal * sizeof(HashJoinTuple)
 			> hashtable->spaceAllowed)
-			ExecHashIncreaseNumBatches(hashtable);
+			ExecHashIncreaseNumBatches(hashtable, hashvalue, (uint32)hashTupleSize);
 	}
 	else
 	{
@@ -2435,7 +2458,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
 
 	/* Check we are not over the total spaceAllowed, either */
 	if (hashtable->spaceUsed > hashtable->spaceAllowed)
-		ExecHashIncreaseNumBatches(hashtable);
+		ExecHashIncreaseNumBatches(hashtable, hashvalue, (uint32)hashTupleSize);
 
 	if (shouldFree)
 		heap_free_minimal_tuple(tuple);
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 2c94b92..e6ce30c 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -314,6 +314,8 @@ typedef struct HashJoinTableData
 	int			nbatch_outstart;	/* nbatch when we started outer scan */
 
 	bool		growEnabled;	/* flag to shut off nbatch increases */
+	bool		growRemainOldBatch;	/* all the tuple remain in old batch when growing */
+	int			splittableSize;	/* size of splittable tuples in hashtable */
 
 	double		totalTuples;	/* # tuples obtained from inner plan */
 	double		partialTuples;	/* # tuples obtained from inner plan by me */
-- 
1.8.3.1

