Steps to reproduce (REL_12_STABLE):
1) Create table with primary key, create brin index, fill table with some initial data:
create table tbl (id int primary key, a int) with (fillfactor=50); create index idx on tbl using brin (a) with (autosummarize=on); insert into tbl select i, i from generate_series(0,100000) as i; 2) Run script test_brin.sql using pgbench: pgbench postgres -f ../review/brin_test.sql -n -T 120The script is a bit messy because I was trying to reproduce a problematic workload. Though I didn't manage to simplify it. The idea is that it inserts new values into the table to produce unindexed pages and also updates some values to trigger HOT-updates on these pages.
3) Open psql session and run brin_summarize_new_values select brin_summarize_new_values('idx'::regclass::oid); \watch 2 Wait a bit. And in psql you will see the ERROR.This error is caused by the problem with root_offsets array bounds. It occurs if a new HOT tuple was inserted after we've collected root_offsets, and thus we don't have root_offset for tuple's offnum. Concurrent insertions are possible, because brin_summarize_new_values() only holds ShareUpdateLock on table and no lock (only pin) on the page.
The draft fix is in the attachments. It saves root_offsets_size and checks that we only access valid fields.
Patch also adds some debug messages, just to ensure that problem was caught. TODO: - check if heapam_index_validate_scan() has the same problem - code cleanup - test other PostgreSQL versions[1] https://www.postgresql.org/message-id/flat/CA%2BTgmoYgwjmmjK24Qxb_vWAu8_Hh7gfVFcr3%2BR7ocdLvYOWJXg%40mail.gmail.com
-- Anastasia Lubennikova Postgres Professional: http://www.postgrespro.com The Russian Postgres Company
commit 71bdcd268386c4240613b080b9ebd5ae935c1405 Author: anastasia <a.lubennik...@postgrespro.ru> Date: Thu Jul 23 17:55:16 2020 +0300 WIP Fix root_offsets out of bound error in brin_summarize_new_values() diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index fc19f40a2e3..34f74a61ea3 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -1176,6 +1176,7 @@ heapam_index_build_range_scan(Relation heapRelation, BlockNumber previous_blkno = InvalidBlockNumber; BlockNumber root_blkno = InvalidBlockNumber; OffsetNumber root_offsets[MaxHeapTuplesPerPage]; + OffsetNumber root_offsets_size = 0; /* * sanity checks @@ -1355,7 +1356,7 @@ heapam_index_build_range_scan(Relation heapRelation, Page page = BufferGetPage(hscan->rs_cbuf); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); - heap_get_root_tuples(page, root_offsets); + root_offsets_size = heap_get_root_tuples(page, root_offsets); LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); root_blkno = hscan->rs_cblock; @@ -1643,13 +1644,31 @@ heapam_index_build_range_scan(Relation heapRelation, rootTuple = *heapTuple; offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self); + elog(LOG, "HeapTupleIsHeapOnly. Check Offset last_valid %d offnum %d", root_offsets_size, offnum); + + if (root_offsets_size <= offnum) + { + Page page = BufferGetPage(hscan->rs_cbuf); + + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("DEBUG failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", + ItemPointerGetBlockNumber(&heapTuple->t_self), + offnum, + RelationGetRelationName(heapRelation)))); + + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); + root_offsets_size = heap_get_root_tuples(page, root_offsets); + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); + } + if (!OffsetNumberIsValid(root_offsets[offnum - 1])) ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", - ItemPointerGetBlockNumber(&heapTuple->t_self), - offnum, - RelationGetRelationName(heapRelation)))); + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"", + ItemPointerGetBlockNumber(&heapTuple->t_self), + offnum, + RelationGetRelationName(heapRelation)))); ItemPointerSetOffsetNumber(&rootTuple.t_self, root_offsets[offnum - 1]); @@ -1820,6 +1839,7 @@ heapam_index_validate_scan(Relation heapRelation, if (HeapTupleIsHeapOnly(heapTuple)) { root_offnum = root_offsets[root_offnum - 1]; + // TODO add check of root_offsets_size like in heapam_index_build_range_scan() if (!OffsetNumberIsValid(root_offnum)) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c index 0efe3ce9995..9b2bff1b8fd 100644 --- a/src/backend/access/heap/pruneheap.c +++ b/src/backend/access/heap/pruneheap.c @@ -741,11 +741,11 @@ heap_page_prune_execute(Buffer buffer, * holds a pin on the buffer. Once pin is released, a tuple might be pruned * and reused by a completely unrelated tuple. */ -void +OffsetNumber heap_get_root_tuples(Page page, OffsetNumber *root_offsets) { - OffsetNumber offnum, - maxoff; + OffsetNumber offnum = 0; + OffsetNumber maxoff; MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber)); @@ -832,4 +832,5 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) priorXmax = HeapTupleHeaderGetUpdateXid(htup); } } + return offnum; } diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index dffb57bf11a..45f8622c6cb 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -181,7 +181,7 @@ extern void heap_page_prune_execute(Buffer buffer, OffsetNumber *redirected, int nredirected, OffsetNumber *nowdead, int ndead, OffsetNumber *nowunused, int nunused); -extern void heap_get_root_tuples(Page page, OffsetNumber *root_offsets); +extern OffsetNumber heap_get_root_tuples(Page page, OffsetNumber *root_offsets); /* in heap/syncscan.c */ extern void ss_report_location(Relation rel, BlockNumber location);
brin_test.sql
Description: application/sql