On 30.07.2020 16:40, Anastasia Lubennikova wrote:
While testing this fix, Alexander Lakhin spotted another problem.
After a few runs, it will fail with "ERROR: corrupted BRIN index:
inconsistent range map"
The problem is caused by a race in page locking in
brinGetTupleForHeapBlock [1]:
(1) bitmapsan locks revmap->rm_currBuf and finds the address of the
tuple on a regular page "page", then unlocks revmap->rm_currBuf
(2) in another transaction desummarize locks both revmap->rm_currBuf
and "page", cleans up the tuple and unlocks both buffers
(1) bitmapscan locks buffer, containing "page", attempts to access the
tuple and fails to find it
At first, I tried to fix it by holding the lock on revmap->rm_currBuf
until we locked the regular page, but it causes a deadlock with
brinsummarize(), It can be easily reproduced with the same test as above.
Is there any rule about the order of locking revmap and regular pages
in brin? I haven't found anything in README.
As an alternative, we can leave locks as is and add a recheck, before
throwing an error.
Here are the updated patches for both problems.
1) brin_summarize_fix_REL_12_v2 fixes
"failed to find parent tuple for heap-only tuple at (50661,130) in table
"tbl'"
This patch checks that we only access initialized entries of
root_offsets[] array. If necessary, collect the array again. One recheck
is enough here, since concurrent pruning is not possible.
2) brin_pagelock_fix_REL_12_v1.patch fixes
"ERROR: corrupted BRIN index: inconsistent range map"
This patch adds a recheck as suggested in previous message.
I am not sure if one recheck is enough to eliminate the race completely,
but the problem cannot be reproduced anymore.
--
Anastasia Lubennikova
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company
commit 0b1fbfb34236882591c5ac6665a407d9780f4017
Author: anastasia <a.lubennik...@postgrespro.ru>
Date: Wed Aug 5 15:56:10 2020 +0300
fix race in BRIN page locking
diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c
index 7dcb1cd73f..89724c3bd4 100644
--- a/src/backend/access/brin/brin_revmap.c
+++ b/src/backend/access/brin/brin_revmap.c
@@ -207,6 +207,7 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
ItemId lp;
BrinTuple *tup;
ItemPointerData previptr;
+ bool in_retry = false;
/* normalize the heap block number to be the first page in the range */
heapBlk = (heapBlk / revmap->rm_pagesPerRange) * revmap->rm_pagesPerRange;
@@ -283,9 +284,23 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk,
if (BRIN_IS_REGULAR_PAGE(page))
{
if (*off > PageGetMaxOffsetNumber(page))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg_internal("corrupted BRIN index: inconsistent range map")));
+ {
+ if (in_retry)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg_internal("corrupted BRIN index: inconsistent range map")));
+ else
+ {
+ /*
+ * Assume that the revmap was updated concurrently.
+ * Retry only once to avoid eternal looping in case
+ * the index is really corrupted.
+ */
+ LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+ in_retry = true;
+ continue;
+ }
+ }
lp = PageGetItemId(page, *off);
if (ItemIdIsUsed(lp))
{
commit 16f0387dbeba3570836b506241bf0a1820de3390
Author: anastasia <a.lubennik...@postgrespro.ru>
Date: Mon Aug 10 20:07:22 2020 +0300
brin_summarize_fix_REL_12_v2.patch
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 267a6ee25a..c651d8b04e 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -1159,6 +1159,7 @@ heapam_index_build_range_scan(Relation heapRelation,
BlockNumber previous_blkno = InvalidBlockNumber;
BlockNumber root_blkno = InvalidBlockNumber;
OffsetNumber root_offsets[MaxHeapTuplesPerPage];
+ OffsetNumber root_offsets_size = 0;
/*
* sanity checks
@@ -1324,6 +1325,11 @@ heapam_index_build_range_scan(Relation heapRelation,
* buffer continuously while visiting the page, so no pruning
* operation can occur either.
*
+ * It is essential, though, to check the root_offsets_size bound
+ * before accessing the array, because concurrently inserted HOT tuples
+ * don't have a valid cached root offset and we need to build the map
+ * once again for them.
+ *
* Also, although our opinions about tuple liveness could change while
* we scan the page (due to concurrent transaction commits/aborts),
* the chain root locations won't, so this info doesn't need to be
@@ -1338,7 +1344,7 @@ heapam_index_build_range_scan(Relation heapRelation,
Page page = BufferGetPage(hscan->rs_cbuf);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
- heap_get_root_tuples(page, root_offsets);
+ root_offsets_size = heap_get_root_tuples(page, root_offsets);
LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
root_blkno = hscan->rs_cblock;
@@ -1625,6 +1631,25 @@ heapam_index_build_range_scan(Relation heapRelation,
offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
+ /*
+ * As we do not hold buffer lock, concurrent insertion can happen.
+ * If so, collect the map once again to find the root offset for
+ * the new tuple.
+ * (MaxOffsetNumber+1) is a special value that we use to
+ * differentiate uninitialized entries.
+ */
+ if (root_offsets_size < offnum ||
+ root_offsets[offnum - 1] == (MaxOffsetNumber+1))
+ {
+ Page page = BufferGetPage(hscan->rs_cbuf);
+
+ LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+ root_offsets_size = heap_get_root_tuples(page, root_offsets);
+ LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+ }
+
+ Assert(root_offsets_size >= offnum);
+
if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 256df4de10..3810481df7 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -732,7 +732,9 @@ heap_page_prune_execute(Buffer buffer,
* root_offsets[k - 1] = j.
*
* The passed-in root_offsets array must have MaxHeapTuplesPerPage entries.
- * We zero out all unused entries.
+ * The return value is the size of the root_offsets array. The caller must
+ * never access values that are out of this bound.
+ * We also set special value of (MaxOffsetNumber+1) to unused entries.
*
* The function must be called with at least share lock on the buffer, to
* prevent concurrent prune operations.
@@ -740,12 +742,13 @@ heap_page_prune_execute(Buffer buffer,
* Note: The information collected here is valid only as long as the caller
* holds a pin on the buffer. Once pin is released, a tuple might be pruned
* and reused by a completely unrelated tuple.
+ *
*/
-void
+OffsetNumber
heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
{
- OffsetNumber offnum,
- maxoff;
+ OffsetNumber offnum, maxoff;
+ OffsetNumber last_valid_offnum = 0;
MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
@@ -759,7 +762,14 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
/* skip unused and dead items */
if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
+ {
+ /*
+ * (MaxOffsetNumber+1) is a special value that we use to
+ * differentiate values that we've skipped.
+ */
+ root_offsets[offnum - 1] = (MaxOffsetNumber+1);
continue;
+ }
if (ItemIdIsNormal(lp))
{
@@ -778,6 +788,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
* Remember it in the mapping.
*/
root_offsets[offnum - 1] = offnum;
+ if (offnum > last_valid_offnum)
+ last_valid_offnum = offnum;
/* If it's not the start of a HOT-chain, we're done with it */
if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -820,6 +832,8 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
/* Remember the root line pointer for this item */
root_offsets[nextoffnum - 1] = offnum;
+ if (nextoffnum > last_valid_offnum)
+ last_valid_offnum = nextoffnum;
/* Advance to next chain member, if any */
if (!HeapTupleHeaderIsHotUpdated(htup))
@@ -832,4 +846,6 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
priorXmax = HeapTupleHeaderGetUpdateXid(htup);
}
}
+
+ return last_valid_offnum;
}
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index b31de38910..b7f570887e 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -180,7 +180,7 @@ extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
OffsetNumber *nowunused, int nunused);
-extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
+extern OffsetNumber heap_get_root_tuples(Page page, OffsetNumber *root_offsets);
/* in heap/vacuumlazy.c */
struct VacuumParams;