While working on the progress reporting, I've been looking into the performance results, particularly why the parallelism doesn't help much for some indexes - e.g. the index on the headers JSONB column.
CREATE INDEX headers_jsonb_idx ON messages USING gin (msg_headers); In this case the parallelism helps only a little bit - serial build takes ~47 seconds, parallel builds with 1 worker (so 2 with leader) takes ~40 seconds. Not great. There are two reasons for this. First, the "keys" (JSONB values) are mostly unique, with only 1 or 2 TIDs per key, which means the workers can't really do much merging. But shifting the merges to workers is the main benefit of parallel builds - if the merge happens in the leader anyway, this explains the lack of speedup. The other reason is that with JSON keys the comparisons are rather expensive, and we're comparing a lot of keys. It occurred to me we can work around this by comparing hashes first, and comparing the full keys only when the hashes match. And indeed, this helps a lot (there's a very rough PoC patch attached) - I'm seeing ~20% speedup from this, so the parallel build runs in ~30 seconds now. Still not quite serial speedup, but better than before. But I think this optimization is mostly orthogonal to parallel builds, i.e. we could do the same thing for serial builds (while accumulating data in memory, we could do these comparisons). But it needs to be careful about still writing the data out in the "natural" order, not ordered by hash. The hash randomizes the pattern, making it much less efficient for bulk inserts (it trashes the buffers, etc.). The PoC patch for parallel builds addresses this by ignoring the hash during the final tuplesort, the serial builds would need to do something similar. My conclusion is this can be left as a future improvement, independent of the parallel builds. regards -- Tomas Vondra
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c index 302cb2092a9..74cc62839cb 100644 --- a/src/backend/access/gin/ginbulk.c +++ b/src/backend/access/gin/ginbulk.c @@ -76,8 +76,8 @@ cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg) BuildAccumulator *accum = (BuildAccumulator *) arg; return ginCompareAttEntries(accum->ginstate, - ea->attnum, ea->key, ea->category, - eb->attnum, eb->key, eb->category); + ea->attnum, ea->key, ea->hash, ea->category, + eb->attnum, eb->key, eb->hash, eb->category); } /* Allocator function for rbtree.c */ @@ -147,7 +147,7 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value) static void ginInsertBAEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, - Datum key, GinNullCategory category) + Datum key, uint32 hash, GinNullCategory category) { GinEntryAccumulator eatmp; GinEntryAccumulator *ea; @@ -159,6 +159,7 @@ ginInsertBAEntry(BuildAccumulator *accum, */ eatmp.attnum = attnum; eatmp.key = key; + eatmp.hash = hash; eatmp.category = category; /* temporarily set up single-entry itempointer list */ eatmp.list = heapptr; @@ -209,7 +210,7 @@ ginInsertBAEntry(BuildAccumulator *accum, void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, - Datum *entries, GinNullCategory *categories, + Datum *entries, uint32 *hashes, GinNullCategory *categories, int32 nentries) { uint32 step = nentries; @@ -236,7 +237,7 @@ ginInsertBAEntries(BuildAccumulator *accum, for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ ) ginInsertBAEntry(accum, heapptr, attnum, - entries[i], categories[i]); + entries[i], hashes[i], categories[i]); step >>= 1; /* /2 */ } diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index ba6bbd562df..8b2125bb00c 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -255,8 +255,8 @@ entryIsMoveRight(GinBtree btree, Page page) key = gintuple_get_key(btree->ginstate, itup, &category); if (ginCompareAttEntries(btree->ginstate, - btree->entryAttnum, btree->entryKey, btree->entryCategory, - attnum, key, category) > 0) + btree->entryAttnum, btree->entryKey, 0, btree->entryCategory, + attnum, key, 0, category) > 0) return true; return false; @@ -313,8 +313,9 @@ entryLocateEntry(GinBtree btree, GinBtreeStack *stack) result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, + 0, btree->entryCategory, - attnum, key, category); + attnum, key, 0, category); } if (result == 0) @@ -384,8 +385,9 @@ entryLocateLeafEntry(GinBtree btree, GinBtreeStack *stack) result = ginCompareAttEntries(btree->ginstate, btree->entryAttnum, btree->entryKey, + 0, btree->entryCategory, - attnum, key, category); + attnum, key, 0, category); if (result == 0) { stack->off = mid; diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index a6d88572cc2..6010e8091bb 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -746,7 +746,7 @@ processPendingPage(BuildAccumulator *accum, KeyArray *ka, * and reset ka. */ ginInsertBAEntries(accum, &heapptr, attrnum, - ka->keys, ka->categories, ka->nvalues); + ka->keys, NULL, ka->categories, ka->nvalues); ka->nvalues = 0; heapptr = itup->t_tid; attrnum = curattnum; @@ -759,7 +759,7 @@ processPendingPage(BuildAccumulator *accum, KeyArray *ka, /* Dump out all remaining keys */ ginInsertBAEntries(accum, &heapptr, attrnum, - ka->keys, ka->categories, ka->nvalues); + ka->keys, NULL, ka->categories, ka->nvalues); } /* diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index f3b19d280c3..1d38b6aa462 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -283,8 +283,8 @@ collectMatchBitmap(GinBtreeData *btree, GinBtreeStack *stack, &newCategory); if (ginCompareEntries(btree->ginstate, attnum, - newDatum, newCategory, - idatum, icategory) == 0) + newDatum, 0, newCategory, + idatum, 0, icategory) == 0) break; /* Found! */ } @@ -1724,8 +1724,10 @@ collectMatchesForHeapRow(IndexScanDesc scan, pendingPosition *pos) res = ginCompareEntries(&so->ginstate, entry->attnum, entry->queryKey, + 0, entry->queryCategory, datum[StopMiddle - 1], + 0, category[StopMiddle - 1]); } diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 7286432698e..f54e9e9b26c 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -193,7 +193,7 @@ static void _gin_parallel_scan_and_build(GinBuildState *buildstate, static Datum _gin_parse_tuple(GinTuple *a, ItemPointerData **items); static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category, - Datum key, int16 typlen, bool typbyval, + Datum key, int16 typlen, bool typbyval, uint32 hash, ItemPointerData *items, uint32 nitems, Size *len); @@ -412,8 +412,8 @@ ginEntryInsert(GinState *ginstate, * This function is used only during initial index creation. */ static void -ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, - Datum value, bool isNull, +ginHeapTupleBulkInsert(GinBuildState *buildstate, Relation index, + OffsetNumber attnum, Datum value, bool isNull, ItemPointer heapptr) { Datum *entries; @@ -421,14 +421,41 @@ ginHeapTupleBulkInsert(GinBuildState *buildstate, OffsetNumber attnum, int32 nentries; MemoryContext oldCtx; + uint32 *hashes; + TypeCacheEntry *typentry; + TupleDesc tdesc = RelationGetDescr(index); + Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1)); + oldCtx = MemoryContextSwitchTo(buildstate->funcCtx); entries = ginExtractEntries(buildstate->accum.ginstate, attnum, value, isNull, &nentries, &categories); + + hashes = palloc0(sizeof(uint32) * nentries); + MemoryContextSwitchTo(oldCtx); + typentry = lookup_type_cache(attr->atttypid, TYPECACHE_HASH_PROC); + if (OidIsValid(typentry->hash_proc)) + { + /* FIXME is it correct to use attr->attcollation, not rd_indcollation? */ + Oid collation = attr->attcollation; + if (!OidIsValid(collation)) + collation = DEFAULT_COLLATION_OID; + + for (int i = 0; i < nentries; i++) + { + if (categories[i] != GIN_CAT_NORM_KEY) + continue; + + hashes[i] = DatumGetUInt32(OidFunctionCall1Coll(typentry->hash_proc, + collation, + entries[i])); + } + } + ginInsertBAEntries(&buildstate->accum, heapptr, attnum, - entries, categories, nentries); + entries, hashes, categories, nentries); buildstate->indtuples += nentries; @@ -446,7 +473,7 @@ ginBuildCallback(Relation index, ItemPointer tid, Datum *values, oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx); for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++) - ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), + ginHeapTupleBulkInsert(buildstate, index, (OffsetNumber) (i + 1), values[i], isnull[i], tid); /* If we've maxed out our available memory, dump everything to the index */ @@ -495,6 +522,8 @@ ginFlushBuildState(GinBuildState *buildstate, Relation index) { /* information about the key */ Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1)); + TypeCacheEntry *typentry; + uint32 hash = 0; /* GIN tuple and tuple length */ GinTuple *tup; @@ -503,8 +532,22 @@ ginFlushBuildState(GinBuildState *buildstate, Relation index) /* there could be many entries, so be willing to abort here */ CHECK_FOR_INTERRUPTS(); + /* calculate hash of the key */ + typentry = lookup_type_cache(attr->atttypid, TYPECACHE_HASH_PROC); + if (OidIsValid(typentry->hash_proc) && (category == GIN_CAT_NORM_KEY)) + { + /* FIXME is it correct to use attr->attcollation, not rd_indcollation? */ + Oid collation = attr->attcollation; + if (!OidIsValid(collation)) + collation = DEFAULT_COLLATION_OID; + + hash = DatumGetUInt32(OidFunctionCall1Coll(typentry->hash_proc, + collation, + key)); + } + tup = _gin_build_tuple(attnum, category, - key, attr->attlen, attr->attbyval, + key, attr->attlen, attr->attbyval, hash, list, nlist, &tuplen); tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen); @@ -562,7 +605,7 @@ ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values, buildstate->tid = *tid; for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++) - ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), + ginHeapTupleBulkInsert(buildstate, index, (OffsetNumber) (i + 1), values[i], isnull[i], tid); /* @@ -1149,6 +1192,7 @@ typedef struct GinBuffer GinNullCategory category; Datum key; /* 0 if no key (and keylen == 0) */ Size keylen; /* number of bytes (not typlen) */ + uint32 hash; /* type info */ int16 typlen; @@ -1315,6 +1359,9 @@ GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup) if (tup->category != buffer->category) return false; + if (tup->hash != buffer->hash) + return false; + /* * For NULL/empty keys, this means equality, for normal keys we need to * compare the actual key value. @@ -1377,6 +1424,7 @@ GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup) buffer->typlen = tup->typlen; buffer->typbyval = tup->typbyval; + buffer->hash = tup->hash; if (tup->category == GIN_CAT_NORM_KEY) buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen); @@ -1695,6 +1743,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, ntup = _gin_build_tuple(buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, + 0, buffer->items, buffer->nitems, &ntuplen); tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen); @@ -1723,6 +1772,7 @@ _gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort, ntup = _gin_build_tuple(buffer->attnum, buffer->category, buffer->key, buffer->typlen, buffer->typbyval, + 0, buffer->items, buffer->nitems, &ntuplen); tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen); @@ -1971,7 +2021,7 @@ _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc) */ static GinTuple * _gin_build_tuple(OffsetNumber attrnum, unsigned char category, - Datum key, int16 typlen, bool typbyval, + Datum key, int16 typlen, bool typbyval, uint32 hash, ItemPointerData *items, uint32 nitems, Size *len) { @@ -2030,6 +2080,7 @@ _gin_build_tuple(OffsetNumber attrnum, unsigned char category, tuple->category = category; tuple->keylen = keylen; tuple->nitems = nitems; + tuple->hash = hash; /* key type info */ tuple->typlen = typlen; @@ -2138,6 +2189,12 @@ _gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup) if (a->category > b->category) return 1; + if (a->hash < b->hash) + return -1; + + if (a->hash > b->hash) + return 1; + if (a->category == GIN_CAT_NORM_KEY) { keya = _gin_parse_tuple(a, NULL); diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 63ded6301e2..4bb15d6ccc3 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -82,8 +82,10 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum, prevEntry->attnum == attnum && ginCompareEntries(ginstate, attnum, prevEntry->queryKey, + 0, prevEntry->queryCategory, queryKey, + 0, queryCategory) == 0) { /* Successful match */ diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index a61532538c0..f47417ee638 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -388,8 +388,8 @@ GinInitMetabuffer(Buffer b) */ int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, - Datum a, GinNullCategory categorya, - Datum b, GinNullCategory categoryb) + Datum a, uint32 hasha, GinNullCategory categorya, + Datum b, uint32 hashb, GinNullCategory categoryb) { /* if not of same null category, sort by that first */ if (categorya != categoryb) @@ -399,6 +399,9 @@ ginCompareEntries(GinState *ginstate, OffsetNumber attnum, if (categorya != GIN_CAT_NORM_KEY) return 0; + if (hasha != hashb) + return (hasha < hashb) ? -1 : 1; + /* both not null, so safe to call the compareFn */ return DatumGetInt32(FunctionCall2Coll(&ginstate->compareFn[attnum - 1], ginstate->supportCollation[attnum - 1], @@ -410,14 +413,14 @@ ginCompareEntries(GinState *ginstate, OffsetNumber attnum, */ int ginCompareAttEntries(GinState *ginstate, - OffsetNumber attnuma, Datum a, GinNullCategory categorya, - OffsetNumber attnumb, Datum b, GinNullCategory categoryb) + OffsetNumber attnuma, Datum a, uint32 hasha, GinNullCategory categorya, + OffsetNumber attnumb, Datum b, uint32 hashb, GinNullCategory categoryb) { /* attribute number is the first sort key */ if (attnuma != attnumb) return (attnuma < attnumb) ? -1 : 1; - return ginCompareEntries(ginstate, attnuma, a, categorya, b, categoryb); + return ginCompareEntries(ginstate, attnuma, a, hasha, categorya, b, hashb, categoryb); } diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 95d8805b66f..d213f22d6f3 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -97,11 +97,11 @@ extern void GinInitBuffer(Buffer b, uint32 f); extern void GinInitPage(Page page, uint32 f, Size pageSize); extern void GinInitMetabuffer(Buffer b); extern int ginCompareEntries(GinState *ginstate, OffsetNumber attnum, - Datum a, GinNullCategory categorya, - Datum b, GinNullCategory categoryb); + Datum a, uint32 hasha, GinNullCategory categorya, + Datum b, uint32 hashb, GinNullCategory categoryb); extern int ginCompareAttEntries(GinState *ginstate, - OffsetNumber attnuma, Datum a, GinNullCategory categorya, - OffsetNumber attnumb, Datum b, GinNullCategory categoryb); + OffsetNumber attnuma, Datum a, uint32 hasha, GinNullCategory categorya, + OffsetNumber attnumb, Datum b, uint32 hashb, GinNullCategory categoryb); extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum, Datum value, bool isNull, int32 *nentries, GinNullCategory **categories); @@ -423,6 +423,7 @@ typedef struct GinEntryAccumulator { RBTNode rbtnode; Datum key; + uint32 hash; GinNullCategory category; OffsetNumber attnum; bool shouldSort; @@ -444,7 +445,8 @@ typedef struct extern void ginInitBA(BuildAccumulator *accum); extern void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, - Datum *entries, GinNullCategory *categories, + Datum *entries, uint32 *hashes, + GinNullCategory *categories, int32 nentries); extern void ginBeginBAScan(BuildAccumulator *accum); extern ItemPointerData *ginGetBAEntry(BuildAccumulator *accum, diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h index ce555031335..1075aad917d 100644 --- a/src/include/access/gin_tuple.h +++ b/src/include/access/gin_tuple.h @@ -26,6 +26,7 @@ typedef struct GinTuple bool typbyval; /* typbyval for key */ signed char category; /* category: normal or NULL? */ int nitems; /* number of TIDs in the data */ + uint32 hash; /* hash of the value */ char data[FLEXIBLE_ARRAY_MEMBER]; } GinTuple;