On Fri, Dec 11, 2015 at 9:13 AM, Robert Haas <robertmh...@gmail.com> wrote: > Also, I'd be in favor of you updating the patch to reflect the > comments from Tom and Simon on November 17th.
Attached revision: * Has more worked out comments on encoding, per Simon's request. * Uses Tom's preferred formulation for encoding TIDs (which involves unsigned integer casts). * Sets indexcursor = NULL when the tuplesort is empty, just to be tidy, per Tom's request. -- Peter Geoghegan
From dd15ae3d0a2bc29fbcefebe2ae08834d2e247070 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan <peter.geoghega...@gmail.com> Date: Sat, 14 Nov 2015 16:57:20 -0800 Subject: [PATCH] Speed up CREATE INDEX CONCURRENTLY's TID sort Add a simple TID encoding scheme. This is used for encoding and decoding TIDs to and from int8/int64 values, for the benefit of an expensive, extra tuplesort operation performed by CREATE INDEX CONCURRENTLY. The sort now uses the default int8 opclass to sort values in the ad-hoc encoding, and so benefits from the availability of int8 SortSupport. Despite the fact that item pointers take up only 6 bytes on disk, the TID type is always pass-by-reference due to considerations about how Datums are represented and accessed. On the other hand, int8 is usually, in practice, a pass-by-value type. Testing shows this approach makes CREATE INDEX CONCURRENTLY significantly faster on platforms where int8 is pass-by-value, especially when the big saving in memory within tuplesort.c prevents the sort from being performed externally. The approach will help to some degree on all platforms, though. --- src/backend/catalog/index.c | 71 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 67 insertions(+), 4 deletions(-) diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index e59b163..c10be3d 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -109,6 +109,8 @@ static void index_update_stats(Relation rel, static void IndexCheckExclusion(Relation heapRelation, Relation indexRelation, IndexInfo *indexInfo); +static inline int64 itemptr_encode(ItemPointer itemptr); +static inline void itemptr_decode(ItemPointer itemptr, int64 encoded); static bool validate_index_callback(ItemPointer itemptr, void *opaque); static void validate_index_heapscan(Relation heapRelation, Relation indexRelation, @@ -2832,7 +2834,13 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples; ivinfo.strategy = NULL; - state.tuplesort = tuplesort_begin_datum(TIDOID, TIDLessOperator, + /* + * Encode TIDs as int8 values for the sort, rather than directly sorting + * item pointers. This can be significantly faster, primarily because TID + * is a pass-by-reference type on all platforms, whereas int8 is + * pass-by-value on most platforms. + */ + state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator, InvalidOid, false, maintenance_work_mem, false); @@ -2872,14 +2880,55 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot) } /* + * itemptr_encode - Encode ItemPointer as int64/int8 + * + * This representation must produce values encoded as int64 that sort in the + * same order as their corresponding original TID values would (using the + * default int8 opclass to produce a result equivalent to the default TID + * opclass). + * + * As noted in validate_index(), this can be significantly faster. + */ +static inline int64 +itemptr_encode(ItemPointer itemptr) +{ + BlockNumber block = ItemPointerGetBlockNumber(itemptr); + OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr); + int64 encoded; + + /* + * Use the 16 least significant bits for the offset. 32 adjacent bits are + * used for the block number. Since remaining bits are unused, there + * cannot be negative encoded values (We assume a two's complement + * representation). + */ + encoded = ((uint64) block << 16) | (uint16) offset; + + return encoded; +} + +/* + * itemptr_decode - Decode int64/int8 representation back to ItemPointer + */ +static inline void +itemptr_decode(ItemPointer itemptr, int64 encoded) +{ + BlockNumber block = (BlockNumber) (encoded >> 16); + OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF); + + ItemPointerSet(itemptr, block, offset); +} + +/* * validate_index_callback - bulkdelete callback to collect the index TIDs */ static bool validate_index_callback(ItemPointer itemptr, void *opaque) { v_i_state *state = (v_i_state *) opaque; + int64 encoded = itemptr_encode(itemptr); - tuplesort_putdatum(state->tuplesort, PointerGetDatum(itemptr), false); + tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false); state->itups += 1; return false; /* never actually delete anything */ } @@ -2911,6 +2960,7 @@ validate_index_heapscan(Relation heapRelation, /* state variables for the merge */ ItemPointer indexcursor = NULL; + ItemPointerData decoded; bool tuplesort_empty = false; /* @@ -3020,13 +3070,26 @@ validate_index_heapscan(Relation heapRelation, */ if (ItemPointerGetBlockNumber(indexcursor) == root_blkno) in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true; - pfree(indexcursor); } tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true, &ts_val, &ts_isnull); Assert(tuplesort_empty || !ts_isnull); - indexcursor = (ItemPointer) DatumGetPointer(ts_val); + if (!tuplesort_empty) + { + itemptr_decode(&decoded, DatumGetInt64(ts_val)); + indexcursor = &decoded; + + /* If int8 is pass-by-ref, free (encoded) TID Datum memory */ +#ifndef USE_FLOAT8_BYVAL + pfree(DatumGetPointer(ts_val)); +#endif + } + else + { + /* Be tidy */ + indexcursor = NULL; + } } /* -- 1.9.1
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers