Re: [HACKERS] avoiding tuple copying in btree index builds

Robert Haas Mon, 16 Jun 2014 12:51:12 -0700

On Tue, Jun 3, 2014 at 4:38 PM, Robert Haas <robertmh...@gmail.com> wrote:
> On Sun, Jun 1, 2014 at 3:26 AM, Amit Kapila <amit.kapil...@gmail.com> wrote:
>> On Tue, May 6, 2014 at 12:04 AM, Robert Haas <robertmh...@gmail.com> wrote:
>>> On Mon, May 5, 2014 at 2:13 PM, Andres Freund <and...@2ndquadrant.com>
>>> wrote:
>>> > On 2014-05-05 13:52:39 -0400, Robert Haas wrote:
>>> >> Today, I discovered that when building a btree index, the btree code
>>> >> uses index_form_tuple() to create an index tuple from the heap tuple,
>>> >> calls tuplesort_putindextuple() to copy that tuple into the sort's
>>> >> memory context, and then frees the original one it built.  This seemed
>>> >> inefficient, so I wrote a patch to eliminate the tuple copying.  It
>>> >> works by adding a function tuplesort_putindextuplevalues(), which
>>> >> builds the tuple in the sort's memory context and thus avoids the need
>>> >> for a separate copy.  I'm not sure if that's the best approach, but
>>> >> the optimization seems wortwhile.
>>> >
>>> > Hm. It looks like we could quite easily just get rid of
>>> > tuplesort_putindextuple(). The hash usage doesn't look hard to convert.
>>>
>>> I glanced at that, but it wasn't obvious to me how to convert the hash
>>> usage.  If you have an idea, I'm all ears.
>>
>> I also think it's possible to have similar optimization for hash index
>> incase it has to spool the tuple for sorting.
>>
>> In function hashbuildCallback(), when buildstate->spool is true, we
>> can avoid to form index tuple. To check for nulls before calling
>>
>> _h_spool(), we can traverse the isnull array.
>
> Hmm, that might work.  Arguably it's less efficient, but on the other
> hand if it avoids forming the tuple sometimes it might be MORE
> efficient.  And anyway the difference might not be enough to matter.


On further review, this is definitely the way to go: it's a
straight-up win.  The isnull array is never more than one element in
length, so testing the single element is quite trivial.   The
attached, revised patch provides a modest but useful speedup for both
hash and btree index builds.

Anyone see any reason NOT to do this?  So far it looks like a
slam-dunk from here.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 7abb7a4..c30c6f9 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -142,26 +142,23 @@ hashbuildCallback(Relation index,
 	HashBuildState *buildstate = (HashBuildState *) state;
 	IndexTuple	itup;
 
-	/* form an index tuple and point it at the heap tuple */
-	itup = _hash_form_tuple(index, values, isnull);
-	itup->t_tid = htup->t_self;
-
 	/* Hash indexes don't index nulls, see notes in hashinsert */
-	if (IndexTupleHasNulls(itup))
-	{
-		pfree(itup);
+	if (isnull[0])
 		return;
-	}
 
 	/* Either spool the tuple for sorting, or just put it into the index */
 	if (buildstate->spool)
-		_h_spool(itup, buildstate->spool);
+		_h_spool(buildstate->spool, htup->t_self, values, isnull);
 	else
+	{
+		/* form an index tuple and point it at the heap tuple */
+		itup = _hash_form_tuple(index, values, isnull);
+		itup->t_tid = htup->t_self;
 		_hash_doinsert(index, itup);
+		pfree(itup);
+	}
 
 	buildstate->indtuples += 1;
-
-	pfree(itup);
 }
 
 /*
@@ -184,10 +181,6 @@ hashinsert(PG_FUNCTION_ARGS)
 #endif
 	IndexTuple	itup;
 
-	/* generate an index tuple */
-	itup = _hash_form_tuple(rel, values, isnull);
-	itup->t_tid = *ht_ctid;
-
 	/*
 	 * If the single index key is null, we don't insert it into the index.
 	 * Hash tables support scans on '='. Relational algebra says that A = B
@@ -197,11 +190,12 @@ hashinsert(PG_FUNCTION_ARGS)
 	 * NOTNULL scans, but that's an artifact of the strategy map architecture
 	 * chosen in 1986, not of the way nulls are handled here.
 	 */
-	if (IndexTupleHasNulls(itup))
-	{
-		pfree(itup);
+	if (isnull[0])
 		PG_RETURN_BOOL(false);
-	}
+
+	/* generate an index tuple */
+	itup = _hash_form_tuple(rel, values, isnull);
+	itup->t_tid = *ht_ctid;
 
 	_hash_doinsert(rel, itup);
 
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c
index c0d6fec..3a70bc1 100644
--- a/src/backend/access/hash/hashsort.c
+++ b/src/backend/access/hash/hashsort.c
@@ -90,9 +90,10 @@ _h_spooldestroy(HSpool *hspool)
  * spool an index entry into the sort file.
  */
 void
-_h_spool(IndexTuple itup, HSpool *hspool)
+_h_spool(HSpool *hspool, ItemPointerData self, Datum *values, bool *isnull)
 {
-	tuplesort_putindextuple(hspool->sortstate, itup);
+	tuplesort_putindextuplevalues(hspool->sortstate, hspool->index,
+								  self, values, isnull);
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 36dc6c2..1ea4a2c 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -171,28 +171,21 @@ btbuildCallback(Relation index,
 				void *state)
 {
 	BTBuildState *buildstate = (BTBuildState *) state;
-	IndexTuple	itup;
-
-	/* form an index tuple and point it at the heap tuple */
-	itup = index_form_tuple(RelationGetDescr(index), values, isnull);
-	itup->t_tid = htup->t_self;
 
 	/*
 	 * insert the index tuple into the appropriate spool file for subsequent
 	 * processing
 	 */
 	if (tupleIsAlive || buildstate->spool2 == NULL)
-		_bt_spool(itup, buildstate->spool);
+		_bt_spool(buildstate->spool, htup->t_self, values, isnull);
 	else
 	{
 		/* dead tuples are put into spool2 */
 		buildstate->haveDead = true;
-		_bt_spool(itup, buildstate->spool2);
+		_bt_spool(buildstate->spool2, htup->t_self, values, isnull);
 	}
 
 	buildstate->indtuples += 1;
-
-	pfree(itup);
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 1281a12..81a77b4 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -185,9 +185,10 @@ _bt_spooldestroy(BTSpool *btspool)
  * spool an index entry into the sort file.
  */
 void
-_bt_spool(IndexTuple itup, BTSpool *btspool)
+_bt_spool(BTSpool *btspool, ItemPointerData self, Datum *values, bool *isnull)
 {
-	tuplesort_putindextuple(btspool->sortstate, itup);
+	tuplesort_putindextuplevalues(btspool->sortstate, btspool->index,
+								  self, values, isnull);
 }
 
 /*
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index aa0f6d8..4f152d3 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -1134,22 +1134,25 @@ tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
 }
 
 /*
- * Accept one index tuple while collecting input data for sort.
- *
- * Note that the input tuple is always copied; the caller need not save it.
+ * Collect one index tuple while collecting input data for sort, building
+ * it from caller-supplied values.
  */
 void
-tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple)
+tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
+							  ItemPointerData self, Datum *values,
+                              bool *isnull)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
 
-	/*
-	 * Copy the given tuple into memory we control, and decrease availMem.
-	 * Then call the common code.
-	 */
-	COPYTUP(state, &stup, (void *) tuple);
-
+	stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull);
+	((IndexTuple) stup.tuple)->t_tid = self;
+	USEMEM(state, GetMemoryChunkSpace(stup.tuple));
+	/* set up first-column key value */
+	stup.datum1 = index_getattr((IndexTuple) stup.tuple,
+								1,
+								RelationGetDescr(state->indexRel),
+								&stup.isnull1);
 	puttuple_common(state, &stup);
 
 	MemoryContextSwitchTo(oldcontext);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 2062801..0f45d3d 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -336,7 +336,8 @@ typedef struct HSpool HSpool;	/* opaque struct in hashsort.c */
 
 extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets);
 extern void _h_spooldestroy(HSpool *hspool);
-extern void _h_spool(IndexTuple itup, HSpool *hspool);
+extern void _h_spool(HSpool *hspool, ItemPointerData self,
+                Datum *values, bool *isnull);
 extern void _h_indexbuild(HSpool *hspool);
 
 /* hashutil.c */
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index ed6f697..4f37e25 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -717,7 +717,8 @@ typedef struct BTSpool BTSpool; /* opaque type known only within nbtsort.c */
 extern BTSpool *_bt_spoolinit(Relation heap, Relation index,
 			  bool isunique, bool isdead);
 extern void _bt_spooldestroy(BTSpool *btspool);
-extern void _bt_spool(IndexTuple itup, BTSpool *btspool);
+extern void _bt_spool(BTSpool *btspool, ItemPointerData self,
+		  Datum *values, bool *isnull);
 extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
 
 /*
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 7d828e0..f02a29e 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -84,7 +84,9 @@ extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
 extern void tuplesort_puttupleslot(Tuplesortstate *state,
 					   TupleTableSlot *slot);
 extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
-extern void tuplesort_putindextuple(Tuplesortstate *state, IndexTuple tuple);
+extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
+							  Relation rel, ItemPointerData self,
+							  Datum *values, bool *isnull);
 extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
 				   bool isNull);

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Re: [HACKERS] avoiding tuple copying in btree index builds

Reply via email to