On Fri, Jul 31, 2020 at 04:55:14PM -0500, Justin Pryzby wrote:
> Bcc: 
> Subject: Re: display offset along with block number in vacuum errors
> Reply-To: 
> In-Reply-To: 
> <cakytnapljjaarw0uebby6g1o0lrzks7ra5n46bfh+nfwsoy...@mail.gmail.com>

whoops

> On Wed, Jul 29, 2020 at 12:35:17AM +0530, Mahendra Singh Thalor wrote:
> > > Here:
> > >
> > > @@ -1924,14 +1932,22 @@ lazy_vacuum_page(Relation onerel, BlockNumber 
> > > blkno, Buffer buffer,
> > >                 BlockNumber tblk;
> > >                 OffsetNumber toff;
> > >                 ItemId          itemid;
> > > +               LVSavedErrInfo loc_saved_err_info;
> > >
> > >                 tblk = 
> > > ItemPointerGetBlockNumber(&dead_tuples->itemptrs[tupindex]);
> > >                 if (tblk != blkno)
> > >                         break;                          /* past end of 
> > > tuples for this block */
> > >                 toff = 
> > > ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
> > > +
> > > +               /* Update error traceback information */
> > > +               update_vacuum_error_info(vacrelstats, 
> > > &loc_saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
> > > +                                                                blkno, 
> > > toff);
> > >                 itemid = PageGetItemId(page, toff);
> > >                 ItemIdSetUnused(itemid);
> > >                 unused[uncnt++] = toff;
> > > +
> > > +               /* Revert to the previous phase information for error 
> > > traceback */
> > > +               restore_vacuum_error_info(vacrelstats, 
> > > &loc_saved_err_info);
> > >         }
> > >
> > > I'm not sure why you use restore_vacuum_error_info() at all.  It's already
> > > called at the end of lazy_vacuum_page() (and others) to allow functions to
> > > clean up after their own state changes, rather than requiring callers to 
> > > do it.
> > > I don't think you should use it in a loop, nor introduce another
> > > LVSavedErrInfo.
> > >
> > > Since phase and blkno are already set, I think you should just set
> > > vacrelstats->offnum = toff, rather than calling 
> > > update_vacuum_error_info().
> > > Similar to whats done in lazy_vacuum_heap():
> > >
> > >                 tblk = 
> > > ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
> > >                 vacrelstats->blkno = tblk;
> > 
> > Fixed.
> 
> I rearead this thread and I think the earlier suggestion from Masahiko was
> right.  The loop around dead_tuples only does ItemIdSetUnused() which updates
> the page, which has already been read from disk.  On my suggestion, your v2
> patch sets offnum directly, but now I think it's not useful to set at all,
> since the whole page is manipulated by PageRepairFragmentation() and
> log_heap_clean().  An error there would misleadingly say "..at offset number
> MM", but would always show the page's last offset, and not the offset where an
> error occured.

This makes me question whether offset numbers are ever useful during
VACUUM_HEAP, since the real work is done a page at a time (not tuple) or by
internal functions that don't update vacrelstats->offno.  Note that my initial
problem report that led to the errcontext implementation was an ERROR in heap
*scan* (not vacuum).  So an offset number at that point would've been
sufficient.
https://www.postgresql.org/message-id/20190808012436.gg11...@telsasoft.com

I mentioned that lazy_check_needs_freeze() should save and restore the errinfo,
so an error in heap_page_prune() (for example) doesn't get the wrong offset
associated with it.

So see the attached changes on top of your v2 patch.

postgres=# DROP TABLE tt; CREATE TABLE tt(a int) WITH (fillfactor=90); INSERT 
INTO tt SELECT generate_series(1,99999); VACUUM tt; UPDATE tt SET a=1 WHERE 
ctid='(345,10)'; UPDATE pg_class SET relfrozenxid=(relfrozenxid::text::int + 
(1<<30))::int::text::xid WHERE oid='tt'::regclass; VACUUM tt;
ERROR:  found xmin 1961 from before relfrozenxid 1073743785
CONTEXT:  while scanning block 345 of relation "public.tt", item offset 205

Hmm.. is it confusing that the block number is 0-indexed but the offset is
1-indexed ?

-- 
Justin
>From 3202eb7f4b846bc206f24d22610b4ec23b24e7d4 Mon Sep 17 00:00:00 2001
From: Mahendra Singh Thalor <mahi6...@gmail.com>
Date: Tue, 28 Jul 2020 11:53:17 -0700
Subject: [PATCH 1/2] Added offset with block number in vacuum errcontext

---
 src/backend/access/heap/vacuumlazy.c | 62 +++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 16 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 1bbc4598f7..78d1db9ae2 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -316,6 +316,7 @@ typedef struct LVRelStats
 	/* Used for error callback */
 	char	   *indname;
 	BlockNumber blkno;			/* used only for heap operations */
+	OffsetNumber	offnum;
 	VacErrPhase phase;
 } LVRelStats;
 
@@ -323,6 +324,7 @@ typedef struct LVRelStats
 typedef struct LVSavedErrInfo
 {
 	BlockNumber blkno;
+	OffsetNumber	offnum;
 	VacErrPhase phase;
 } LVSavedErrInfo;
 
@@ -341,7 +343,8 @@ static void lazy_scan_heap(Relation onerel, VacuumParams *params,
 						   LVRelStats *vacrelstats, Relation *Irel, int nindexes,
 						   bool aggressive);
 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
-static bool lazy_check_needs_freeze(Buffer buf, bool *hastup);
+static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
+									LVRelStats *vacrelstats);
 static void lazy_vacuum_all_indexes(Relation onerel, Relation *Irel,
 									IndexBulkDeleteResult **stats,
 									LVRelStats *vacrelstats, LVParallelState *lps,
@@ -364,6 +367,7 @@ static void lazy_record_dead_tuple(LVDeadTuples *dead_tuples,
 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 static int	vac_cmp_itemptr(const void *left, const void *right);
 static bool heap_page_is_all_visible(Relation rel, Buffer buf,
+									 LVRelStats *vacrelstats,
 									 TransactionId *visibility_cutoff_xid, bool *all_frozen);
 static void lazy_parallel_vacuum_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
 										 LVRelStats *vacrelstats, LVParallelState *lps,
@@ -396,7 +400,8 @@ static LVSharedIndStats *get_indstats(LVShared *lvshared, int n);
 static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared);
 static void vacuum_error_callback(void *arg);
 static void update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info,
-									 int phase, BlockNumber blkno);
+									 int phase, BlockNumber blkno,
+									 OffsetNumber offnum);
 static void restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info);
 
 
@@ -547,7 +552,8 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 		 * revert to the previous phase.
 		 */
 		update_vacuum_error_info(vacrelstats, NULL, VACUUM_ERRCB_PHASE_TRUNCATE,
-								 vacrelstats->nonempty_pages);
+								 vacrelstats->nonempty_pages,
+								 InvalidOffsetNumber);
 		lazy_truncate_heap(onerel, vacrelstats);
 	}
 
@@ -957,7 +963,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
 
 		update_vacuum_error_info(vacrelstats, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
-								 blkno);
+								 blkno, InvalidOffsetNumber);
 
 		if (blkno == next_unskippable_block)
 		{
@@ -1126,7 +1132,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			 * to use lazy_check_needs_freeze() for both situations, though.
 			 */
 			LockBuffer(buf, BUFFER_LOCK_SHARE);
-			if (!lazy_check_needs_freeze(buf, &hastup))
+			if (!lazy_check_needs_freeze(buf, &hastup, vacrelstats))
 			{
 				UnlockReleaseBuffer(buf);
 				vacrelstats->scanned_pages++;
@@ -1263,6 +1269,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		{
 			ItemId		itemid;
 
+			vacrelstats->offnum = offnum;
 			itemid = PageGetItemId(page, offnum);
 
 			/* Unused items require no processing, but we count 'em */
@@ -1836,7 +1843,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 
 	/* Update error traceback information */
 	update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
-							 InvalidBlockNumber);
+							 InvalidBlockNumber, InvalidOffsetNumber);
 
 	pg_rusage_init(&ru0);
 	npages = 0;
@@ -1853,6 +1860,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 
 		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
 		vacrelstats->blkno = tblk;
+		vacrelstats->offnum = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
 		buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
 								 vac_strategy);
 		if (!ConditionalLockBufferForCleanup(buf))
@@ -1915,7 +1923,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 
 	/* Update error traceback information */
 	update_vacuum_error_info(vacrelstats, &saved_err_info, VACUUM_ERRCB_PHASE_VACUUM_HEAP,
-							 blkno);
+							 blkno, InvalidOffsetNumber);
 
 	START_CRIT_SECTION();
 
@@ -1929,6 +1937,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 		if (tblk != blkno)
 			break;				/* past end of tuples for this block */
 		toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
+		vacrelstats->offnum = toff;
 		itemid = PageGetItemId(page, toff);
 		ItemIdSetUnused(itemid);
 		unused[uncnt++] = toff;
@@ -1967,7 +1976,8 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 	 * dirty, exclusively locked, and, if needed, a full page image has been
 	 * emitted in the log_heap_clean() above.
 	 */
-	if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid,
+	if (heap_page_is_all_visible(onerel, buffer, vacrelstats,
+								 &visibility_cutoff_xid,
 								 &all_frozen))
 		PageSetAllVisible(page);
 
@@ -2006,7 +2016,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
  * Also returns a flag indicating whether page contains any tuples at all.
  */
 static bool
-lazy_check_needs_freeze(Buffer buf, bool *hastup)
+lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
 {
 	Page		page = BufferGetPage(buf);
 	OffsetNumber offnum,
@@ -2031,6 +2041,7 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup)
 	{
 		ItemId		itemid;
 
+		vacrelstats->offnum = offnum;
 		itemid = PageGetItemId(page, offnum);
 
 		/* this should match hastup test in count_nondeletable_pages() */
@@ -2426,7 +2437,7 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
 	vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
 	update_vacuum_error_info(vacrelstats, &saved_err_info,
 							 VACUUM_ERRCB_PHASE_VACUUM_INDEX,
-							 InvalidBlockNumber);
+							 InvalidBlockNumber, InvalidOffsetNumber);
 
 	/* Do bulk deletion */
 	*stats = index_bulk_delete(&ivinfo, *stats,
@@ -2486,7 +2497,7 @@ lazy_cleanup_index(Relation indrel,
 	vacrelstats->indname = pstrdup(RelationGetRelationName(indrel));
 	update_vacuum_error_info(vacrelstats, &saved_err_info,
 							 VACUUM_ERRCB_PHASE_INDEX_CLEANUP,
-							 InvalidBlockNumber);
+							 InvalidBlockNumber, InvalidOffsetNumber);
 
 	*stats = index_vacuum_cleanup(&ivinfo, *stats);
 
@@ -2640,6 +2651,7 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats)
 		 */
 		new_rel_pages = count_nondeletable_pages(onerel, vacrelstats);
 		vacrelstats->blkno = new_rel_pages;
+		vacrelstats->offnum = InvalidOffsetNumber;
 
 		if (new_rel_pages >= old_rel_pages)
 		{
@@ -2952,6 +2964,7 @@ vac_cmp_itemptr(const void *left, const void *right)
  */
 static bool
 heap_page_is_all_visible(Relation rel, Buffer buf,
+						 LVRelStats *vacrelstats,
 						 TransactionId *visibility_cutoff_xid,
 						 bool *all_frozen)
 {
@@ -2976,6 +2989,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf,
 		ItemId		itemid;
 		HeapTupleData tuple;
 
+		vacrelstats->offnum = offnum;
 		itemid = PageGetItemId(page, offnum);
 
 		/* Unused or redirect line pointers are of no interest */
@@ -3574,14 +3588,26 @@ vacuum_error_callback(void *arg)
 	{
 		case VACUUM_ERRCB_PHASE_SCAN_HEAP:
 			if (BlockNumberIsValid(errinfo->blkno))
-				errcontext("while scanning block %u of relation \"%s.%s\"",
-						   errinfo->blkno, errinfo->relnamespace, errinfo->relname);
+			{
+				if (OffsetNumberIsValid(errinfo->offnum))
+					errcontext("while scanning block %u and offset %u of relation \"%s.%s\"",
+							   errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
+				else
+					errcontext("while scanning block %u of relation \"%s.%s\"",
+							   errinfo->blkno, errinfo->relnamespace, errinfo->relname);
+			}
 			break;
 
 		case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
 			if (BlockNumberIsValid(errinfo->blkno))
-				errcontext("while vacuuming block %u of relation \"%s.%s\"",
-						   errinfo->blkno, errinfo->relnamespace, errinfo->relname);
+			{
+				if (OffsetNumberIsValid(errinfo->offnum))
+					errcontext("while vacuuming block %u and offset %u of relation \"%s.%s\"",
+							   errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
+				else
+					errcontext("while vacuuming block %u of relation \"%s.%s\"",
+							   errinfo->blkno, errinfo->relnamespace, errinfo->relname);
+			}
 			break;
 
 		case VACUUM_ERRCB_PHASE_VACUUM_INDEX:
@@ -3589,6 +3615,7 @@ vacuum_error_callback(void *arg)
 					   errinfo->indname, errinfo->relnamespace, errinfo->relname);
 			break;
 
+
 		case VACUUM_ERRCB_PHASE_INDEX_CLEANUP:
 			errcontext("while cleaning up index \"%s\" of relation \"%s.%s\"",
 					   errinfo->indname, errinfo->relnamespace, errinfo->relname);
@@ -3613,15 +3640,17 @@ vacuum_error_callback(void *arg)
  */
 static void
 update_vacuum_error_info(LVRelStats *errinfo, LVSavedErrInfo *saved_err_info, int phase,
-						 BlockNumber blkno)
+						 BlockNumber blkno, OffsetNumber offnum)
 {
 	if (saved_err_info)
 	{
+		saved_err_info->offnum = errinfo->offnum;
 		saved_err_info->blkno = errinfo->blkno;
 		saved_err_info->phase = errinfo->phase;
 	}
 
 	errinfo->blkno = blkno;
+	errinfo->offnum = offnum;
 	errinfo->phase = phase;
 }
 
@@ -3632,5 +3661,6 @@ static void
 restore_vacuum_error_info(LVRelStats *errinfo, const LVSavedErrInfo *saved_err_info)
 {
 	errinfo->blkno = saved_err_info->blkno;
+	errinfo->offnum = saved_err_info->offnum;
 	errinfo->phase = saved_err_info->phase;
 }
-- 
2.17.0

>From f9cdde59dfa05413ad4213c5438c06fc552d8f40 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Fri, 31 Jul 2020 21:24:49 -0500
Subject: [PATCH 2/2] fix

---
 src/backend/access/heap/vacuumlazy.c | 29 +++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 78d1db9ae2..b6015c9297 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1860,7 +1860,6 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
 
 		tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
 		vacrelstats->blkno = tblk;
-		vacrelstats->offnum = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]);
 		buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
 								 vac_strategy);
 		if (!ConditionalLockBufferForCleanup(buf))
@@ -1937,7 +1936,6 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 		if (tblk != blkno)
 			break;				/* past end of tuples for this block */
 		toff = ItemPointerGetOffsetNumber(&dead_tuples->itemptrs[tupindex]);
-		vacrelstats->offnum = toff;
 		itemid = PageGetItemId(page, toff);
 		ItemIdSetUnused(itemid);
 		unused[uncnt++] = toff;
@@ -2022,6 +2020,7 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
 	OffsetNumber offnum,
 				maxoff;
 	HeapTupleHeader tupleheader;
+	LVSavedErrInfo saved_err_info;
 
 	*hastup = false;
 
@@ -2034,6 +2033,11 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
 	if (PageIsNew(page) || PageIsEmpty(page))
 		return false;
 
+	/* Update error traceback information */
+	update_vacuum_error_info(vacrelstats, &saved_err_info,
+			VACUUM_ERRCB_PHASE_SCAN_HEAP, vacrelstats->blkno,
+			InvalidOffsetNumber);
+
 	maxoff = PageGetMaxOffsetNumber(page);
 	for (offnum = FirstOffsetNumber;
 		 offnum <= maxoff;
@@ -2056,10 +2060,13 @@ lazy_check_needs_freeze(Buffer buf, bool *hastup, LVRelStats *vacrelstats)
 
 		if (heap_tuple_needs_freeze(tupleheader, FreezeLimit,
 									MultiXactCutoff, buf))
-			return true;
+			break;
 	}							/* scan along page */
 
-	return false;
+	/* Revert to the previous phase information for error traceback */
+	restore_vacuum_error_info(vacrelstats, &saved_err_info);
+
+	return offnum <= maxoff ? true : false;
 }
 
 /*
@@ -2501,7 +2508,7 @@ lazy_cleanup_index(Relation indrel,
 
 	*stats = index_vacuum_cleanup(&ivinfo, *stats);
 
-	/* Revert back to the old phase information for error traceback */
+	/* Revert to the old phase information for error traceback */
 	restore_vacuum_error_info(vacrelstats, &saved_err_info);
 	pfree(vacrelstats->indname);
 	vacrelstats->indname = NULL;
@@ -3590,8 +3597,8 @@ vacuum_error_callback(void *arg)
 			if (BlockNumberIsValid(errinfo->blkno))
 			{
 				if (OffsetNumberIsValid(errinfo->offnum))
-					errcontext("while scanning block %u and offset %u of relation \"%s.%s\"",
-							   errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
+					errcontext("while scanning block %u of relation \"%s.%s\", item offset %u",
+							   errinfo->blkno, errinfo->relnamespace, errinfo->relname, errinfo->offnum);
 				else
 					errcontext("while scanning block %u of relation \"%s.%s\"",
 							   errinfo->blkno, errinfo->relnamespace, errinfo->relname);
@@ -3601,12 +3608,8 @@ vacuum_error_callback(void *arg)
 		case VACUUM_ERRCB_PHASE_VACUUM_HEAP:
 			if (BlockNumberIsValid(errinfo->blkno))
 			{
-				if (OffsetNumberIsValid(errinfo->offnum))
-					errcontext("while vacuuming block %u and offset %u of relation \"%s.%s\"",
-							   errinfo->blkno, errinfo->offnum, errinfo->relnamespace, errinfo->relname);
-				else
-					errcontext("while vacuuming block %u of relation \"%s.%s\"",
-							   errinfo->blkno, errinfo->relnamespace, errinfo->relname);
+				errcontext("while vacuuming block %u of relation \"%s.%s\"",
+						   errinfo->blkno, errinfo->relnamespace, errinfo->relname);
 			}
 			break;
 
-- 
2.17.0

Reply via email to