I did some performance testing of building an SP-GiST index, with the new range type SP-GiST opclass. There's some low-hanging fruit there, I was able to reduce the index build time on a simple test case by about 20% with a few small changes.

I created a test table with:

create table range_test AS SELECT int4range(i-10, i + 10) as r from generate_series(1, 100000) i;

And measured the time it takes to build an index on that, on my laptop, by repeating this a few times and taking the lowest value:

\timing
create index i_r on range_test using spgist (r);

On unpatched checkout from master, the shortest time was 19.2 seconds.

Profile taken with 'perf' tool looks like this:

     21,43%  postmaster  postgres           [.] spgdoinsert
     17,25%  postmaster  postgres           [.] range_deserialize
     10,28%  postmaster  postgres           [.] FunctionCall2Coll
      9,68%  postmaster  postgres           [.] spgExtractNodeLabels
      7,99%  postmaster  postgres           [.] spg_range_quad_choose
      7,21%  postmaster  postgres           [.] index_getprocinfo
      5,24%  postmaster  postgres           [.] range_cmp_bounds
      4,74%  postmaster  postgres           [.] AllocSetAlloc
      2,49%  postmaster  postgres           [.] btint4cmp
      2,49%  postmaster  postgres           [.] AllocSetFree
      1,98%  postmaster  postgres           [.] SpGistGetTypeSize
      1,63%  postmaster  postgres           [.] range_get_typcache
      1,62%  postmaster  postgres           [.] MemoryContextAlloc
      1,16%  postmaster  postgres           [.] pg_detoast_datum
      0,87%  postmaster  postgres           [.] PageIndexTupleDelete
      0,65%  postmaster  postgres           [.] pfree
      0,49%  postmaster  postgres           [.] XLogInsert

Drilling into the profile, I came up with three little optimizations:

1. Within spgdoinsert, a significant portion of the CPU time is spent on line 2033 in spgdoinsert.c:

memset(&out, 0, sizeof(out));

That zeroes out a small struct allocated in the stack. Replacing that with MemSet() makes it faster, reducing the time spent on zeroing that struct from 10% to 1.5% of the time spent in spgdoinsert(). That's not very much in the big scheme of things, but it's a trivial change so seems worth it.

2. When spgdoinsert descends the tree, it calls index_getprocinfo() every time it calls the user-defined "choose" function. By calling it only once at the beginning of the function, the time spent in that function drops from 7.21% to 0.02%.

3. Most of the AllocSetAlloc/AllocSetFree calls in the profile are coming from spgExtractNodeLabels(). It first palloc's an array to hold node labels, then it iterates through all the nodes in the inner tuple, and if it turns out that there are no node labels, it pfrees the array and returns NULL. With this opclass, there never are any node labels, so spgExtractNodeLabels() always performs a pointless palloc+pfree. By changing the function to first check if there are node labels, and only performing the palloc when necessary, we can eliminate the time spent in AllocSetAlloc and AllocSetFree, about 7% of the CPU time in total.

With those three changes, the profile now looks like this:

     22,57%  postmaster  postgres           [.] range_deserialize
     21,54%  postmaster  postgres           [.] spgdoinsert
     13,37%  postmaster  postgres           [.] FunctionCall2Coll
     11,13%  postmaster  postgres           [.] spg_range_quad_choose
      7,11%  postmaster  postgres           [.] range_cmp_bounds
      6,96%  postmaster  postgres           [.] spgExtractNodeLabels
      3,68%  postmaster  postgres           [.] btint4cmp
      3,05%  postmaster  postgres           [.] pg_detoast_datum
      2,53%  postmaster  postgres           [.] SpGistGetTypeSize
      2,47%  postmaster  postgres           [.] range_get_typcache
      1,22%  postmaster  postgres           [.] PageIndexTupleDelete
      0,66%  postmaster  postgres           [.] XLogInsert

Attached is a patch with those changes. Barring objections, will commit.

--
  Heikki Linnakangas
  EnterpriseDB   http://www.enterprisedb.com
*** a/src/backend/access/spgist/spgdoinsert.c
--- b/src/backend/access/spgist/spgdoinsert.c
***************
*** 826,832 **** doPickSplit(Relation index, SpGistState *state,
  	heapPtrs[in.nTuples] = newLeafTuple->heapPtr;
  	in.nTuples++;
  
! 	memset(&out, 0, sizeof(out));
  
  	if (!isNulls)
  	{
--- 826,832 ----
  	heapPtrs[in.nTuples] = newLeafTuple->heapPtr;
  	in.nTuples++;
  
! 	MemSet(&out, 0, sizeof(out));
  
  	if (!isNulls)
  	{
***************
*** 1861,1866 **** spgdoinsert(Relation index, SpGistState *state,
--- 1861,1874 ----
  	int			leafSize;
  	SPPageDesc	current,
  				parent;
+ 	FmgrInfo   *procinfo = NULL;
+ 
+ 	/*
+ 	 * Look up FmgrInfo of the user-defined choose function once, to save
+ 	 * cycles in the loop below.
+ 	 */
+ 	if (!isnull)
+ 		procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
  
  	/*
  	 * Since we don't use index_form_tuple in this AM, we have to make sure
***************
*** 2007,2013 **** spgdoinsert(Relation index, SpGistState *state,
  			SpGistInnerTuple innerTuple;
  			spgChooseIn in;
  			spgChooseOut out;
- 			FmgrInfo   *procinfo;
  
  			/*
  			 * spgAddNode and spgSplitTuple cases will loop back to here to
--- 2015,2020 ----
***************
*** 2030,2041 **** spgdoinsert(Relation index, SpGistState *state,
  			in.nNodes = innerTuple->nNodes;
  			in.nodeLabels = spgExtractNodeLabels(state, innerTuple);
  
! 			memset(&out, 0, sizeof(out));
  
  			if (!isnull)
  			{
  				/* use user-defined choose method */
- 				procinfo = index_getprocinfo(index, 1, SPGIST_CHOOSE_PROC);
  				FunctionCall2Coll(procinfo,
  								  index->rd_indcollation[0],
  								  PointerGetDatum(&in),
--- 2037,2047 ----
  			in.nNodes = innerTuple->nNodes;
  			in.nodeLabels = spgExtractNodeLabels(state, innerTuple);
  
! 			MemSet(&out, 0, sizeof(out));
  
  			if (!isnull)
  			{
  				/* use user-defined choose method */
  				FunctionCall2Coll(procinfo,
  								  index->rd_indcollation[0],
  								  PointerGetDatum(&in),
*** a/src/backend/access/spgist/spgutils.c
--- b/src/backend/access/spgist/spgutils.c
***************
*** 743,769 **** Datum *
  spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
  {
  	Datum	   *nodeLabels;
- 	int			nullcount = 0;
  	int			i;
  	SpGistNodeTuple node;
  
! 	nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
! 	SGITITERATE(innerTuple, i, node)
! 	{
! 		if (IndexTupleHasNulls(node))
! 			nullcount++;
! 		else
! 			nodeLabels[i] = SGNTDATUM(node, state);
! 	}
! 	if (nullcount == innerTuple->nNodes)
  	{
  		/* They're all null, so just return NULL */
- 		pfree(nodeLabels);
  		return NULL;
  	}
! 	if (nullcount != 0)
! 		elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
! 	return nodeLabels;
  }
  
  /*
--- 743,774 ----
  spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
  {
  	Datum	   *nodeLabels;
  	int			i;
  	SpGistNodeTuple node;
  
! 	/* Either all the labels must be NULL, or none. */
! 	node = SGITNODEPTR(innerTuple);
! 	if (IndexTupleHasNulls(node))
  	{
+ 		SGITITERATE(innerTuple, i, node)
+ 		{
+ 			if (!IndexTupleHasNulls(node))
+ 				elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
+ 		}
  		/* They're all null, so just return NULL */
  		return NULL;
  	}
! 	else
! 	{
! 		nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
! 		SGITITERATE(innerTuple, i, node)
! 		{
! 			if (IndexTupleHasNulls(node))
! 				elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
! 			nodeLabels[i] = SGNTDATUM(node, state);
! 		}
! 		return nodeLabels;
! 	}
  }
  
  /*
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to