>It's a shame you only see 3%, but that's still worth it.
Hi,

I ran this test here:

DROP TABLE hash_speed;
CREATE unlogged TABLE hash_speed (x integer);
INSERT INTO hash_speed SELECT random()*10000000 FROM
generate_series(1,10000000) x;
VACUUM
Timing is on.
CREATE INDEX ON hash_speed USING hash (x);

head:
Time: 20526,490 ms (00:20,526)

attached patch (v3):
Time: 18810,777 ms (00:18,811)

I can see 9%, with the patch (v3) attached.

This optimization would not apply in any way also to _hash_pgaddmultitup?

regards,
Ranier Vilela
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index c361509d68..a68eb40b2b 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -39,6 +39,7 @@ typedef struct
 	HSpool	   *spool;			/* NULL if not using spooling */
 	double		indtuples;		/* # tuples accepted into index */
 	Relation	heapRel;		/* heap relation descriptor */
+	HashInsertState istate;		/* insert state */
 } HashBuildState;
 
 static void hashbuildCallback(Relation index,
@@ -118,6 +119,7 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	uint32		num_buckets;
 	long		sort_threshold;
 	HashBuildState buildstate;
+	HashInsertStateData insertstate;
 
 	/*
 	 * We expect to be called exactly once for any index relation. If that's
@@ -158,13 +160,20 @@ hashbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 		sort_threshold = Min(sort_threshold, NLocBuffer);
 
 	if (num_buckets >= (uint32) sort_threshold)
-		buildstate.spool = _h_spoolinit(heap, index, num_buckets);
+	{
+		insertstate.sorted = true;
+		buildstate.spool = _h_spoolinit(heap, index, num_buckets, (HashInsertState) &insertstate);
+	}
 	else
+	{
+		insertstate.sorted = false;
 		buildstate.spool = NULL;
+	}
 
 	/* prepare to build the index */
-	buildstate.indtuples = 0;
 	buildstate.heapRel = heap;
+	buildstate.indtuples = 0;
+	buildstate.istate = (HashInsertState) &insertstate;
 
 	/* do the heap scan */
 	reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
@@ -231,7 +240,7 @@ hashbuildCallback(Relation index,
 		itup = index_form_tuple(RelationGetDescr(index),
 								index_values, index_isnull);
 		itup->t_tid = *tid;
-		_hash_doinsert(index, itup, buildstate->heapRel);
+		_hash_doinsert(index, itup, buildstate->heapRel, buildstate->istate);
 		pfree(itup);
 	}
 
@@ -254,6 +263,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
 	Datum		index_values[1];
 	bool		index_isnull[1];
 	IndexTuple	itup;
+	HashInsertStateData istate;
 
 	/* convert data to a hash key; on failure, do not insert anything */
 	if (!_hash_convert_tuple(rel,
@@ -265,7 +275,9 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
 	itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
 	itup->t_tid = *ht_ctid;
 
-	_hash_doinsert(rel, itup, heapRel);
+	istate.sorted = false;
+
+	_hash_doinsert(rel, itup, heapRel, (HashInsertState) &istate);
 
 	pfree(itup);
 
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index 4f2fecb908..4d17c841c0 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -34,7 +34,7 @@ static void _hash_vacuum_one_page(Relation rel, Relation hrel,
  *		and hashinsert.  By here, itup is completely filled in.
  */
 void
-_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
+_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, HashInsertState istate)
 {
 	Buffer		buf = InvalidBuffer;
 	Buffer		bucket_buf;
@@ -198,7 +198,7 @@ restart_insert:
 	START_CRIT_SECTION();
 
 	/* found page with enough space, so add the item here */
-	itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
+	itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, istate->sorted);
 	MarkBufferDirty(buf);
 
 	/* metapage operations */
@@ -266,18 +266,28 @@ restart_insert:
  * page are sorted by hashkey value.
  */
 OffsetNumber
-_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
+_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup, bool sorted)
 {
 	OffsetNumber itup_off;
 	Page		page;
-	uint32		hashkey;
 
 	_hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 	page = BufferGetPage(buf);
 
-	/* Find where to insert the tuple (preserving page's hashkey ordering) */
-	hashkey = _hash_get_indextuple_hashkey(itup);
-	itup_off = _hash_binsearch(page, hashkey);
+	/*
+	 * Find where to insert the tuple (preserving page's hashkey ordering).
+	 * If the input is already sorted by hashkey, then we can avoid the
+	 * binary search and just add it to the end of the page.
+	 */
+	if (sorted)
+		itup_off = PageGetMaxOffsetNumber(page) + 1;
+	else
+	{
+		uint32		hashkey;
+
+		hashkey = _hash_get_indextuple_hashkey(itup);
+		itup_off = _hash_binsearch(page, hashkey);
+	}
 
 	if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
 		== InvalidOffsetNumber)
@@ -300,7 +310,6 @@ void
 _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
 					OffsetNumber *itup_offsets, uint16 nitups)
 {
-	OffsetNumber itup_off;
 	Page		page;
 	uint32		hashkey;
 	int			i;
@@ -317,11 +326,9 @@ _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
 
 		/* Find where to insert the tuple (preserving page's hashkey ordering) */
 		hashkey = _hash_get_indextuple_hashkey(itups[i]);
-		itup_off = _hash_binsearch(page, hashkey);
-
-		itup_offsets[i] = itup_off;
+		itup_offsets[i] = _hash_binsearch(page, hashkey);
 
-		if (PageAddItem(page, (Item) itups[i], itemsize, itup_off, false, false)
+		if (PageAddItem(page, (Item) itups[i], itemsize, itup_offsets[i], false, false)
 			== InvalidOffsetNumber)
 			elog(ERROR, "failed to add index item to \"%s\"",
 				 RelationGetRelationName(rel));
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c
index 19563148d0..6213855a4c 100644
--- a/src/backend/access/hash/hashsort.c
+++ b/src/backend/access/hash/hashsort.c
@@ -50,6 +50,8 @@ struct HSpool
 	uint32		high_mask;
 	uint32		low_mask;
 	uint32		max_buckets;
+
+	HashInsertState istate;
 };
 
 
@@ -57,7 +59,7 @@ struct HSpool
  * create and initialize a spool structure
  */
 HSpool *
-_h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
+_h_spoolinit(Relation heap, Relation index, uint32 num_buckets, HashInsertState istate)
 {
 	HSpool	   *hspool = (HSpool *) palloc0(sizeof(HSpool));
 
@@ -89,6 +91,8 @@ _h_spoolinit(Relation heap, Relation index, uint32 num_buckets)
 												   NULL,
 												   TUPLESORT_NONE);
 
+	hspool->istate = istate;
+
 	return hspool;
 }
 
@@ -145,7 +149,7 @@ _h_indexbuild(HSpool *hspool, Relation heapRel)
 		Assert(hashkey >= lasthashkey);
 #endif
 
-		_hash_doinsert(hspool->index, itup, heapRel);
+		_hash_doinsert(hspool->index, itup, heapRel, hspool->istate);
 
 		pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
 									 ++tups_done);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index da372841c4..f0b016de96 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -357,6 +357,12 @@ typedef struct HashOptions
 #define HASHOPTIONS_PROC		3
 #define HASHNProcs				3
 
+typedef struct HashInsertStateData
+{
+	bool sorted;
+} HashInsertStateData;
+
+typedef HashInsertStateData *HashInsertState;
 
 /* public routines */
 
@@ -390,9 +396,9 @@ extern void hashadjustmembers(Oid opfamilyoid,
 /* private routines */
 
 /* hashinsert.c */
-extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel);
+extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, HashInsertState istate);
 extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
-								   Size itemsize, IndexTuple itup);
+								   Size itemsize, IndexTuple itup, bool sorted);
 extern void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
 								OffsetNumber *itup_offsets, uint16 nitups);
 
@@ -446,7 +452,8 @@ extern bool _hash_first(IndexScanDesc scan, ScanDirection dir);
 /* hashsort.c */
 typedef struct HSpool HSpool;	/* opaque struct in hashsort.c */
 
-extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets);
+extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets,
+							HashInsertState istate);
 extern void _h_spooldestroy(HSpool *hspool);
 extern void _h_spool(HSpool *hspool, ItemPointer self,
 					 Datum *values, bool *isnull);

Reply via email to