On Fri, Jul 31, 2020 at 04:55:14PM -0500, Justin Pryzby wrote: > Bcc: > Subject: Re: display offset along with block number in vacuum errors > Reply-To: > In-Reply-To: > <cakytnapljjaarw0uebby6g1o0lrzks7ra5n46bfh+nfwsoy...@mail.gmail.com>
whoops > On Wed, Jul 29, 2020 at 12:35:17AM +0530, Mahendra Singh Thalor wrote: > > > Here: > > > > > > @@ -1924,14 +1932,22 @@ lazy_vacuum_page(Relation onerel, BlockNumber > > > blkno, Buffer buffer, > > > BlockNumber tblk; > > > OffsetNumber toff; > > > ItemId itemid; > > > + LVSavedErrInfo loc_saved_err_info; > > > > > > tblk = > > > ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]); > > > if (tblk != blkno) > > > break; /* past end of > > > tuples for this block */ > > > toff = > > > ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]); > > > + > > > + /* Update error traceback information */ > > > + update_vacuum_error_info(vacrelstats, > > > &loc_saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP, > > > + blkno, > > > toff); > > > itemid = PageGetItemId(page, toff); > > > ItemIdSetUnused(itemid); > > > unused[uncnt++] = toff; > > > + > > > + /* Revert to the previous phase information for error > > > traceback */ > > > + restore_vacuum_error_info(vacrelstats, > > > &loc_saved_err_info); > > > } > > > > > > I'm not sure why you use restore_vacuum_error_info() at all. It's already > > > called at the end of lazy_vacuum_page() (and others) to allow functions to > > > clean up after their own state changes, rather than requiring callers to > > > do it. > > > I don't think you should use it in a loop, nor introduce another > > > LVSavedErrInfo. > > > > > > Since phase and blkno are already set, I think you should just set > > > vacrelstats->offnum = toff, rather than calling > > > update_vacuum_error_info(). > > > Similar to whats done in lazy_vacuum_heap(): > > > > > > tblk = > > > ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); > > > vacrelstats->blkno = tblk; > > > > Fixed. > > I rearead this thread and I think the earlier suggestion from Masahiko was > right. The loop around dead_tuples only does ItemIdSetUnused() which updates > the page, which has already been read from disk. On my suggestion, your v2 > patch sets offnum directly, but now I think it's not useful to set at all, > since the whole page is manipulated by PageRepairFragmentation() and > log_heap_clean(). An error there would misleadingly say "..at offset number > MM", but would always show the page's last offset, and not the offset where an > error occured. This makes me question whether offset numbers are ever useful during VACUUM_HEAP, since the real work is done a page at a time (not tuple) or by internal functions that don't update vacrelstats->offno. Note that my initial problem report that led to the errcontext implementation was an ERROR in heap *scan* (not vacuum). So an offset number at that point would've been sufficient. https://www.postgresql.org/message-id/20190808012436.gg11...@telsasoft.com I mentioned that lazy_check_needs_freeze() should save and restore the errinfo, so an error in heap_page_prune() (for example) doesn't get the wrong offset associated with it. So see the attached changes on top of your v2 patch. postgres=# DROP TABLE tt; CREATE TABLE tt(a int) WITH (fillfactor=90); INSERT INTO tt SELECT generate_series(1,99999); VACUUM tt; UPDATE tt SET a=1 WHERE ctid='(345,10)'; UPDATE pg_class SET relfrozenxid=(relfrozenxid::text::int + (1<<30))::int::text::xid WHERE oid='tt'::regclass; VACUUM tt; ERROR: found xmin 1961 from before relfrozenxid 1073743785 CONTEXT: while scanning block 345 of relation "public.tt", item offset 205 Hmm.. is it confusing that the block number is 0-indexed but the offset is 1-indexed ? -- Justin
>From 3202eb7f4b846bc206f24d22610b4ec23b24e7d4 Mon Sep 17 00:00:00 2001 From: Mahendra Singh Thalor <mahi6...@gmail.com> Date: Tue, 28 Jul 2020 11:53:17 -0700 Subject: [PATCH 1/2] Added offset with block number in vacuum errcontext --- src/backend/access/heap/vacuumlazy.c | 62 +++++++++++++++++++++------- 1 file changed, 46 insertions(+), 16 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 1bbc4598f7..78d1db9ae2 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -316,6 +316,7 @@ typedef struct LVRelStats /* Used for error callback */ char *indname; BlockNumber blkno; /* used only for heap operations */ + OffsetNumber offnum; VacErrPhase phase; } LVRelStats; @@ -323,6 +324,7 @@ typedef struct LVRelStats typedef struct LVSavedErrInfo { BlockNumber blkno; + OffsetNumber offnum; VacErrPhase phase; } LVSavedErrInfo; @@ -341,7 +343,8 @@ static void lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); -static bool lazy_check_needs_freeze(Buffer buf, bool *hastup); +static bool lazy_check_needs_freeze(Buffer buf, bool *hastup, + LVRelStats *vacrelstats); static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, @@ -364,6 +367,7 @@ static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples, static bool lazy_tid_reaped(ItemPointer itemptr, void *state); static int vac_cmp_itemptr(const void *left, const void *right); static bool heap_page_is_all_visible(Relation rel, Buffer buf, + LVRelStats *vacrelstats, TransactionId *visibility_cutoff_xid, bool *all_frozen); static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats, LVRelStats *vacrelstats, LVParallelState *lps, @@ -396,7 +400,8 @@ static LVSharedIndStats *get_indstats(LVShared *lvshared, int n); static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared); static void vacuum_error_callback(void *arg); static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, - int phase, BlockNumber blkno); + int phase, BlockNumber blkno, + OffsetNumber offnum); static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info); @@ -547,7 +552,8 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, * revert to the previous phase. */ update_vacuum_error_info(vacrelstats, NULL, VACUUM_ERRCB_PHASE_TRUNCATE, - vacrelstats->nonempty_pages); + vacrelstats->nonempty_pages, + InvalidOffsetNumber); lazy_truncate_heap(onerel, vacrelstats); } @@ -957,7 +963,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); update_vacuum_error_info(vacrelstats, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, - blkno); + blkno, InvalidOffsetNumber); if (blkno == next_unskippable_block) { @@ -1126,7 +1132,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * to use lazy_check_needs_freeze() for both situations, though. */ LockBuffer(buf, BUFFER_LOCK_SHARE); - if (!lazy_check_needs_freeze(buf, &hastup)) + if (!lazy_check_needs_freeze(buf, &hastup, vacrelstats)) { UnlockReleaseBuffer(buf); vacrelstats->scanned_pages++; @@ -1263,6 +1269,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, { ItemId itemid; + vacrelstats->offnum = offnum; itemid = PageGetItemId(page, offnum); /* Unused items require no processing, but we count 'em */ @@ -1836,7 +1843,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) /* Update error traceback information */ update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP, - InvalidBlockNumber); + InvalidBlockNumber, InvalidOffsetNumber); pg_rusage_init(&ru0); npages = 0; @@ -1853,6 +1860,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); vacrelstats->blkno = tblk; + vacrelstats->offnum = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) @@ -1915,7 +1923,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, /* Update error traceback information */ update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP, - blkno); + blkno, InvalidOffsetNumber); START_CRIT_SECTION(); @@ -1929,6 +1937,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, if (tblk != blkno) break; /* past end of tuples for this block */ toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]); + vacrelstats->offnum = toff; itemid = PageGetItemId(page, toff); ItemIdSetUnused(itemid); unused[uncnt++] = toff; @@ -1967,7 +1976,8 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, * dirty, exclusively locked, and, if needed, a full page image has been * emitted in the log_heap_clean() above. */ - if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid, + if (heap_page_is_all_visible(onerel, buffer, vacrelstats, + &visibility_cutoff_xid, &all_frozen)) PageSetAllVisible(page); @@ -2006,7 +2016,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, * Also returns a flag indicating whether page contains any tuples at all. */ static bool -lazy_check_needs_freeze(Buffer buf, bool *hastup) +lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats) { Page page = BufferGetPage(buf); OffsetNumber offnum, @@ -2031,6 +2041,7 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup) { ItemId itemid; + vacrelstats->offnum = offnum; itemid = PageGetItemId(page, offnum); /* this should match hastup test in count_nondeletable_pages() */ @@ -2426,7 +2437,7 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, vacrelstats->indname = pstrdup(RelationGetRelationName(indrel)); update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_INDEX, - InvalidBlockNumber); + InvalidBlockNumber, InvalidOffsetNumber); /* Do bulk deletion */ *stats = index_bulk_delete(&ivinfo, *stats, @@ -2486,7 +2497,7 @@ lazy_cleanup_index(Relation indrel, vacrelstats->indname = pstrdup(RelationGetRelationName(indrel)); update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_INDEX_CLEANUP, - InvalidBlockNumber); + InvalidBlockNumber, InvalidOffsetNumber); *stats = index_vacuum_cleanup(&ivinfo, *stats); @@ -2640,6 +2651,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) */ new_rel_pages = count_nondeletable_pages(onerel, vacrelstats); vacrelstats->blkno = new_rel_pages; + vacrelstats->offnum = InvalidOffsetNumber; if (new_rel_pages >= old_rel_pages) { @@ -2952,6 +2964,7 @@ vac_cmp_itemptr(const void *left, const void *right) */ static bool heap_page_is_all_visible(Relation rel, Buffer buf, + LVRelStats *vacrelstats, TransactionId *visibility_cutoff_xid, bool *all_frozen) { @@ -2976,6 +2989,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, ItemId itemid; HeapTupleData tuple; + vacrelstats->offnum = offnum; itemid = PageGetItemId(page, offnum); /* Unused or redirect line pointers are of no interest */ @@ -3574,14 +3588,26 @@ vacuum_error_callback(void *arg) { case VACUUM_ERRCB_PHASE_SCAN_HEAP: if (BlockNumberIsValid(errinfo->blkno)) - errcontext("while scanning block %u of relation \"%s.%s\"", - errinfo->blkno, errinfo->relnamespace, errinfo->relname); + { + if (OffsetNumberIsValid(errinfo->offnum)) + errcontext("while scanning block %u and offset %u of relation \"%s.%s\"", + errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname); + else + errcontext("while scanning block %u of relation \"%s.%s\"", + errinfo->blkno, errinfo->relnamespace, errinfo->relname); + } break; case VACUUM_ERRCB_PHASE_VACUUM_HEAP: if (BlockNumberIsValid(errinfo->blkno)) - errcontext("while vacuuming block %u of relation \"%s.%s\"", - errinfo->blkno, errinfo->relnamespace, errinfo->relname); + { + if (OffsetNumberIsValid(errinfo->offnum)) + errcontext("while vacuuming block %u and offset %u of relation \"%s.%s\"", + errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname); + else + errcontext("while vacuuming block %u of relation \"%s.%s\"", + errinfo->blkno, errinfo->relnamespace, errinfo->relname); + } break; case VACUUM_ERRCB_PHASE_VACUUM_INDEX: @@ -3589,6 +3615,7 @@ vacuum_error_callback(void *arg) errinfo->indname, errinfo->relnamespace, errinfo->relname); break; + case VACUUM_ERRCB_PHASE_INDEX_CLEANUP: errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"", errinfo->indname, errinfo->relnamespace, errinfo->relname); @@ -3613,15 +3640,17 @@ vacuum_error_callback(void *arg) */ static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase, - BlockNumber blkno) + BlockNumber blkno, OffsetNumber offnum) { if (saved_err_info) { + saved_err_info->offnum = errinfo->offnum; saved_err_info->blkno = errinfo->blkno; saved_err_info->phase = errinfo->phase; } errinfo->blkno = blkno; + errinfo->offnum = offnum; errinfo->phase = phase; } @@ -3632,5 +3661,6 @@ static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info) { errinfo->blkno = saved_err_info->blkno; + errinfo->offnum = saved_err_info->offnum; errinfo->phase = saved_err_info->phase; } -- 2.17.0
>From f9cdde59dfa05413ad4213c5438c06fc552d8f40 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Fri, 31 Jul 2020 21:24:49 -0500 Subject: [PATCH 2/2] fix --- src/backend/access/heap/vacuumlazy.c | 29 +++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 78d1db9ae2..b6015c9297 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -1860,7 +1860,6 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); vacrelstats->blkno = tblk; - vacrelstats->offnum = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) @@ -1937,7 +1936,6 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, if (tblk != blkno) break; /* past end of tuples for this block */ toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]); - vacrelstats->offnum = toff; itemid = PageGetItemId(page, toff); ItemIdSetUnused(itemid); unused[uncnt++] = toff; @@ -2022,6 +2020,7 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats) OffsetNumber offnum, maxoff; HeapTupleHeader tupleheader; + LVSavedErrInfo saved_err_info; *hastup = false; @@ -2034,6 +2033,11 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats) if (PageIsNew(page) || PageIsEmpty(page)) return false; + /* Update error traceback information */ + update_vacuum_error_info(vacrelstats, &saved_err_info, + VACUUM_ERRCB_PHASE_SCAN_HEAP, vacrelstats->blkno, + InvalidOffsetNumber); + maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; offnum <= maxoff; @@ -2056,10 +2060,13 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats) if (heap_tuple_needs_freeze(tupleheader, FreezeLimit, MultiXactCutoff, buf)) - return true; + break; } /* scan along page */ - return false; + /* Revert to the previous phase information for error traceback */ + restore_vacuum_error_info(vacrelstats, &saved_err_info); + + return offnum <= maxoff ? true : false; } /* @@ -2501,7 +2508,7 @@ lazy_cleanup_index(Relation indrel, *stats = index_vacuum_cleanup(&ivinfo, *stats); - /* Revert back to the old phase information for error traceback */ + /* Revert to the old phase information for error traceback */ restore_vacuum_error_info(vacrelstats, &saved_err_info); pfree(vacrelstats->indname); vacrelstats->indname = NULL; @@ -3590,8 +3597,8 @@ vacuum_error_callback(void *arg) if (BlockNumberIsValid(errinfo->blkno)) { if (OffsetNumberIsValid(errinfo->offnum)) - errcontext("while scanning block %u and offset %u of relation \"%s.%s\"", - errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname); + errcontext("while scanning block %u of relation \"%s.%s\", item offset %u", + errinfo->blkno, errinfo->relnamespace, errinfo->relname, errinfo->offnum); else errcontext("while scanning block %u of relation \"%s.%s\"", errinfo->blkno, errinfo->relnamespace, errinfo->relname); @@ -3601,12 +3608,8 @@ vacuum_error_callback(void *arg) case VACUUM_ERRCB_PHASE_VACUUM_HEAP: if (BlockNumberIsValid(errinfo->blkno)) { - if (OffsetNumberIsValid(errinfo->offnum)) - errcontext("while vacuuming block %u and offset %u of relation \"%s.%s\"", - errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname); - else - errcontext("while vacuuming block %u of relation \"%s.%s\"", - errinfo->blkno, errinfo->relnamespace, errinfo->relname); + errcontext("while vacuuming block %u of relation \"%s.%s\"", + errinfo->blkno, errinfo->relnamespace, errinfo->relname); } break; -- 2.17.0