On Tue, Jan 28, 2020 at 1:34 PM Floris Van Nee <florisvan...@optiver.com> wrote: > With Andres' instructions I ran a couple of tests. With your patches I can > reproduce a speedup of ~3% on single core tests reliably on a dual-socket > 36-core machine for the pgbench select-only test case.
Thanks for testing! Attached is v2 of patch series, which makes the changes to 0001-* requested by Andres. I restructured the loop in a way that allows the compiler to assume that there will always be at least one loop iteration -- so I'm not quite as aggressive as I was with v1. We don't actually delay the call to BTreeTupleGetNAtts() as such in v2. Can you test this version, Floris? The second two patches are probably not helping here, so it would be useful if you could just test 0001-*, and then test all three together. I can toss the latter two patches if there is no additional speedup. If we're lucky, then Andres will have been right to suspect that there might be a smaller stall caused by the new branch in the loop that existed in v1. Maybe this will show up at higher client counts. I should point out that the deduplication patch changes the definition of BTreeTupleGetNAtts(), making it slightly more complicated. With the deduplication patch, we have to check that the tuple isn't a posting list tuple, which uses the INDEX_ALT_TID_MASK/INDEX_AM_RESERVED_BIT bit to indicate a non-standard tuple header format, just like the current pivot tuple format (we need to check a BT_RESERVED_OFFSET_MASK bit to further differentiate posting list tuples from pivot tuples). This increase in the complexity of BTreeTupleGetNAtts() will probably further tip things in favor of this patch. There are no changes in either 0002-* or 0003-* patches for v2. I'm including the same patches here a second time for completeness. -- Peter Geoghegan
From 8f55bcedaa9c109543627e9c785dab0ffb0e5c75 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan <pg@bowt.ie> Date: Wed, 22 Jan 2020 20:59:57 -0800 Subject: [PATCH v2 3/3] Remove "negative infinity" check from _bt_compare(). --- src/backend/access/nbtree/nbtsearch.c | 32 ++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index dd56fdaaea..b2a2605c47 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -348,6 +348,8 @@ _bt_binsrch(Relation rel, high; int32 result, cmpval; + bool isleaf; + OffsetNumber noff; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -359,6 +361,7 @@ _bt_binsrch(Relation rel, low = P_FIRSTDATAKEY(opaque); high = PageGetMaxOffsetNumber(page); + isleaf = P_ISLEAF(opaque); /* * If there are no keys on the page, return the first available slot. Note @@ -386,13 +389,20 @@ _bt_binsrch(Relation rel, cmpval = key->nextkey ? 0 : 1; /* select comparison value */ + if (isleaf) + noff = InvalidOffsetNumber; + else + noff = P_FIRSTDATAKEY(opaque); while (high > low) { OffsetNumber mid = low + ((high - low) / 2); /* We have low <= mid < high, so mid points at a real slot */ - result = _bt_compare_inl(rel, key, page, mid); + if (mid == noff) + result = 1; + else + result = _bt_compare_inl(rel, key, page, mid); if (result >= cmpval) low = mid + 1; @@ -407,7 +417,7 @@ _bt_binsrch(Relation rel, * On a leaf page, we always return the first key >= scan key (resp. > * scan key), which could be the last slot + 1. */ - if (P_ISLEAF(opaque)) + if (isleaf) return low; /* @@ -536,6 +546,16 @@ _bt_compare(Relation rel, Page page, OffsetNumber offnum) { + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* + * Force result ">" if target item is first data item on an internal page + * --- see NOTE above. + */ + if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque)) + return 1; + + return _bt_compare_inl(rel, key, page, offnum); } @@ -568,7 +588,6 @@ _bt_compare_inl(Relation rel, OffsetNumber offnum) { TupleDesc itupdesc = RelationGetDescr(rel); - BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); IndexTuple itup; ItemPointer heapTid; int ski; @@ -580,13 +599,6 @@ _bt_compare_inl(Relation rel, Assert(key->keysz <= IndexRelationGetNumberOfKeyAttributes(rel)); Assert(key->heapkeyspace || key->scantid == NULL); - /* - * Force result ">" if target item is first data item on an internal page - * --- see NOTE above. - */ - if (!P_ISLEAF(opaque) && offnum == P_FIRSTDATAKEY(opaque)) - return 1; - itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); ntupatts = BTreeTupleGetNAtts(itup, rel); -- 2.17.1
From aa6d3365da2bcd6fdeaaffb7a4ac0f783538b522 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan <pg@bowt.ie> Date: Wed, 22 Jan 2020 20:35:04 -0800 Subject: [PATCH v2 2/3] Inline _bt_compare(). --- src/backend/access/nbtree/nbtsearch.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 7e4cd9c5ad..dd56fdaaea 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -26,6 +26,8 @@ static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp); static OffsetNumber _bt_binsrch(Relation rel, BTScanInsert key, Buffer buf); +static inline int32 _bt_compare_inl(Relation rel, BTScanInsert key, Page page, + OffsetNumber offnum); static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum); static void _bt_saveitem(BTScanOpaque so, int itemIndex, @@ -298,7 +300,7 @@ _bt_moveright(Relation rel, continue; } - if (P_IGNORE(opaque) || _bt_compare(rel, key, page, P_HIKEY) >= cmpval) + if (P_IGNORE(opaque) || _bt_compare_inl(rel, key, page, P_HIKEY) >= cmpval) { /* step right one page */ buf = _bt_relandgetbuf(rel, buf, opaque->btpo_next, access); @@ -390,7 +392,7 @@ _bt_binsrch(Relation rel, /* We have low <= mid < high, so mid points at a real slot */ - result = _bt_compare(rel, key, page, mid); + result = _bt_compare_inl(rel, key, page, mid); if (result >= cmpval) low = mid + 1; @@ -499,7 +501,7 @@ _bt_binsrch_insert(Relation rel, BTInsertState insertstate) /* We have low <= mid < high, so mid points at a real slot */ - result = _bt_compare(rel, key, page, mid); + result = _bt_compare_inl(rel, key, page, mid); if (result >= cmpval) low = mid + 1; @@ -528,6 +530,15 @@ _bt_binsrch_insert(Relation rel, BTInsertState insertstate) return low; } +int32 +_bt_compare(Relation rel, + BTScanInsert key, + Page page, + OffsetNumber offnum) +{ + return _bt_compare_inl(rel, key, page, offnum); +} + /*---------- * _bt_compare() -- Compare insertion-type scankey to tuple on a page. * @@ -550,11 +561,11 @@ _bt_binsrch_insert(Relation rel, BTInsertState insertstate) * key. See backend/access/nbtree/README for details. *---------- */ -int32 -_bt_compare(Relation rel, - BTScanInsert key, - Page page, - OffsetNumber offnum) +static inline int32 +_bt_compare_inl(Relation rel, + BTScanInsert key, + Page page, + OffsetNumber offnum) { TupleDesc itupdesc = RelationGetDescr(rel); BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); -- 2.17.1
From 73058e520cce39a598a1011ec05f84b4f7ee8d97 Mon Sep 17 00:00:00 2001 From: Peter Geoghegan <pg@bowt.ie> Date: Wed, 22 Jan 2020 18:00:38 -0800 Subject: [PATCH v2 1/3] Avoid pipeline stall in _bt_compare(). Author: Peter Geoghegan Reviewed-By: Andres Freund, Floris Van Nee Discussion: CAH2-Wzngz5MrkiTaZNny0GzfTxNQE+QWPPaO-C390BgruhgjEw@mail.gmail.com">https://postgr.es/m/CAH2-Wzngz5MrkiTaZNny0GzfTxNQE+QWPPaO-C390BgruhgjEw@mail.gmail.com --- src/backend/access/nbtree/nbtsearch.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index c573814f01..7e4cd9c5ad 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -560,6 +560,7 @@ _bt_compare(Relation rel, BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); IndexTuple itup; ItemPointer heapTid; + int ski; ScanKey scankey; int ncmpkey; int ntupatts; @@ -591,9 +592,11 @@ _bt_compare(Relation rel, */ ncmpkey = Min(ntupatts, key->keysz); + Assert(ntupatts >= 1); Assert(key->heapkeyspace || ncmpkey == key->keysz); + ski = 1; scankey = key->scankeys; - for (int i = 1; i <= ncmpkey; i++) + for (;;) { Datum datum; bool isNull; @@ -641,7 +644,18 @@ _bt_compare(Relation rel, if (result != 0) return result; + /* + * The loop is deliberately structured in a way that enables the + * compiler to assume that the first iteration always runs. Testing + * has shown that this avoids a pipeline stall with certain + * memory-bound workloads. We delay this test, since it depends on + * whether or not caller's tuple is a pivot tuple. Typically, most + * calls here never reach this far. + */ + ski++; scankey++; + if (ski > ncmpkey) + break; } /* -- 2.17.1