On Mon, Feb 17, 2020 at 10:47:47AM +0900, Masahiko Sawada wrote: > Thank you for updating the patch! > > 1. > The above lines need a new line.
Done, thanks. > 2. > In lazy_vacuum_heap, we set the error context and then call > pg_rusage_init whereas lazy_vacuum_index and lazy_cleanup_index does > the opposite. And lazy_scan_heap also call pg_rusage first. I think > lazy_vacuum_heap should follow them for consistency. That is, we can > set error context after pages = 0. Right. Maybe I did it the other way because the two uses of PROGRESS_VACUUM_PHASE_VACUUM_HEAP were right next to each other. > 3. > We have 2 other phases: PROGRESS_VACUUM_PHASE_TRUNCATE and > PROGRESS_VACUUM_PHASE_FINAL_CLEANUP. I think it's better to set the > error context in lazy_truncate_heap as well. What do you think? > > I'm not sure it's worth to set the error context for FINAL_CLENAUP but > we should add the case of FINAL_CLENAUP to vacuum_error_callback as > no-op or explain it as a comment even if we don't. I don't have strong feelings either way. I looked a bit at the truncation phase. It also truncates the FSM and VM forks, which could be misleading if the error was in one of those files and not the main filenode. I'd have to find a way to test it... ...and was pleasantly surprised to see that earlier phases don't choke if I (re)create a fake 4GB table like: postgres=# CREATE TABLE trunc(i int); CREATE TABLE postgres=# \x\t Expanded display is on. Tuples only is on. postgres=# SELECT relfilenode FROM pg_class WHERE oid='trunc'::regclass; relfilenode | 59068 postgres=# \! bash -xc 'truncate -s 1G ./pgsql13.dat111/base/12689/59068{,.{1..3}}' + truncate -s 1G ./pgsql13.dat111/base/12689/59074 ./pgsql13.dat111/base/12689/59074.1 ./pgsql13.dat111/base/12689/59074.2 ./pgsql13.dat111/base/12689/59074.3 postgres=# \timing Timing is on. postgres=# SET client_min_messages=debug; SET statement_timeout='13s'; VACUUM VERBOSE trunc; INFO: vacuuming "public.trunc" INFO: "trunc": found 0 removable, 0 nonremovable row versions in 524288 out of 524288 pages DETAIL: 0 dead row versions cannot be removed yet, oldest xmin: 2098 There were 0 unused item identifiers. Skipped 0 pages due to buffer pins, 0 frozen pages. 524288 pages are entirely empty. CPU: user: 5.00 s, system: 1.50 s, elapsed: 6.52 s. ERROR: canceling statement due to statement timeout CONTEXT: while truncating relation "public.trunc" to 0 blocks The callback surrounding RelationTruncate() seems hard to hit unless you add CHECK_FOR_INTERRUPTS(); the same was true for index cleanup. The truncation uses a prefetch, which is more likely to hit any lowlevel error, so I added callback there, too. BTW, for the index cases, I didn't like repeating the namespace here, but WDYT ? |CONTEXT: while vacuuming index "public.t_i_idx3" of relation "t" Thanks for rerere-reviewing. -- Justin
>From 8295224470e0ce236025dfbff50de052978fae1d Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Thu, 12 Dec 2019 20:54:37 -0600 Subject: [PATCH v19 1/2] vacuum errcontext to show block being processed Discussion: https://www.postgresql.org/message-id/20191120210600.gc30...@telsasoft.com --- src/backend/access/heap/vacuumlazy.c | 130 +++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index a23cdef..5e734ee 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -292,6 +292,14 @@ typedef struct LVRelStats bool lock_waiter_detected; } LVRelStats; +typedef struct +{ + char *relnamespace; + char *relname; + char *indname; + BlockNumber blkno; /* used only for heap operations */ + int phase; /* Reusing same enums as for progress reporting */ +} vacuum_error_callback_arg; /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; @@ -361,6 +369,9 @@ static void end_parallel_vacuum(Relation *Irel, IndexBulkDeleteResult **stats, LVParallelState *lps, int nindexes); static LVSharedIndStats *get_indstats(LVShared *lvshared, int n); static bool skip_parallel_vacuum_index(Relation indrel, LVShared *lvshared); +static void vacuum_error_callback(void *arg); +static void init_vacuum_error_callback(ErrorContextCallback *errcallback, + vacuum_error_callback_arg *errcbarg, Relation onerel, int phase); /* @@ -724,6 +735,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, PROGRESS_VACUUM_MAX_DEAD_TUPLES }; int64 initprog_val[3]; + ErrorContextCallback errcallback; + vacuum_error_callback_arg errcbarg; pg_rusage_init(&ru0); @@ -870,6 +883,11 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, else skipping_blocks = false; + /* Setup error traceback support for ereport() */ + init_vacuum_error_callback(&errcallback, &errcbarg, onerel, + PROGRESS_VACUUM_PHASE_SCAN_HEAP); + error_context_stack = &errcallback; + for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; @@ -891,6 +909,8 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, #define FORCE_CHECK_PAGE() \ (blkno == nblocks - 1 && should_attempt_truncation(params, vacrelstats)) + errcbarg.blkno = blkno; + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); if (blkno == next_unskippable_block) @@ -987,6 +1007,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vmbuffer = InvalidBuffer; } + /* Pop the error context stack while calling vacuum */ + error_context_stack = errcallback.previous; + /* Work on all the indexes, then the heap */ lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, lps, nindexes); @@ -1011,6 +1034,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, /* Report that we are once again scanning the heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_SCAN_HEAP); + + /* Set the error context while continuing heap scan */ + error_context_stack = &errcallback; } /* @@ -1597,6 +1623,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, RecordPageWithFreeSpace(onerel, blkno, freespace); } + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + /* report that everything is scanned and vacuumed */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); @@ -1772,6 +1801,8 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) int npages; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; + ErrorContextCallback errcallback; + vacuum_error_callback_arg errcbarg; /* Report that we are now vacuuming the heap */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, @@ -1780,6 +1811,13 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) pg_rusage_init(&ru0); npages = 0; + /* + * Setup error traceback support for ereport() + */ + init_vacuum_error_callback(&errcallback, &errcbarg, onerel, + PROGRESS_VACUUM_PHASE_VACUUM_HEAP); + error_context_stack = &errcallback; + tupindex = 0; while (tupindex < vacrelstats->dead_tuples->num_tuples) { @@ -1791,6 +1829,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) vacuum_delay_point(); tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples->itemptrs[tupindex]); + errcbarg.blkno = tblk; buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, vac_strategy); if (!ConditionalLockBufferForCleanup(buf)) @@ -1811,6 +1850,9 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) npages++; } + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + if (BufferIsValid(vmbuffer)) { ReleaseBuffer(vmbuffer); @@ -2318,6 +2360,8 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, IndexVacuumInfo ivinfo; const char *msg; PGRUsage ru0; + ErrorContextCallback errcallback; + vacuum_error_callback_arg errcbarg; pg_rusage_init(&ru0); @@ -2329,10 +2373,18 @@ lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vac_strategy; + /* Setup error traceback support for ereport() */ + init_vacuum_error_callback(&errcallback, &errcbarg, indrel, + PROGRESS_VACUUM_PHASE_VACUUM_INDEX); + error_context_stack = &errcallback; + /* Do bulk deletion */ *stats = index_bulk_delete(&ivinfo, *stats, lazy_tid_reaped, (void *) dead_tuples); + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + if (IsParallelWorker()) msg = gettext_noop("scanned index \"%s\" to remove %d row versions by parallel vacuum worker"); else @@ -2359,6 +2411,8 @@ lazy_cleanup_index(Relation indrel, IndexVacuumInfo ivinfo; const char *msg; PGRUsage ru0; + vacuum_error_callback_arg errcbarg; + ErrorContextCallback errcallback; pg_rusage_init(&ru0); @@ -2371,8 +2425,16 @@ lazy_cleanup_index(Relation indrel, ivinfo.num_heap_tuples = reltuples; ivinfo.strategy = vac_strategy; + /* Setup error traceback support for ereport() */ + init_vacuum_error_callback(&errcallback, &errcbarg, indrel, + PROGRESS_VACUUM_PHASE_INDEX_CLEANUP); + error_context_stack = &errcallback; + *stats = index_vacuum_cleanup(&ivinfo, *stats); + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + if (!(*stats)) return; @@ -3375,3 +3437,71 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) table_close(onerel, ShareUpdateExclusiveLock); pfree(stats); } + +/* + * Error context callback for errors occurring during vacuum. + */ +static void +vacuum_error_callback(void *arg) +{ + vacuum_error_callback_arg *cbarg = arg; + + switch (cbarg->phase) { + case PROGRESS_VACUUM_PHASE_SCAN_HEAP: + if (BlockNumberIsValid(cbarg->blkno)) + errcontext(_("while scanning block %u of relation \"%s.%s\""), + cbarg->blkno, cbarg->relnamespace, cbarg->relname); + break; + + case PROGRESS_VACUUM_PHASE_VACUUM_HEAP: + if (BlockNumberIsValid(cbarg->blkno)) + errcontext(_("while vacuuming block %u of relation \"%s.%s\""), + cbarg->blkno, cbarg->relnamespace, cbarg->relname); + break; + + case PROGRESS_VACUUM_PHASE_VACUUM_INDEX: + errcontext(_("while vacuuming index \"%s.%s\" of relation \"%s\""), + cbarg->relnamespace, cbarg->indname, cbarg->relname); + break; + + case PROGRESS_VACUUM_PHASE_INDEX_CLEANUP: + errcontext(_("while cleaning up index \"%s.%s\" of relation \"%s\""), + cbarg->relnamespace, cbarg->indname, cbarg->relname); + break; + + case PROGRESS_VACUUM_PHASE_TRUNCATE: + case PROGRESS_VACUUM_PHASE_FINAL_CLEANUP: + default: + return; /* Shouldn't happen: do nothing */ + } +} + +/* Initialize vacuum error callback */ +static void +init_vacuum_error_callback(ErrorContextCallback *errcallback, + vacuum_error_callback_arg *errcbarg, Relation rel, int phase) +{ + switch (phase) + { + case PROGRESS_VACUUM_PHASE_SCAN_HEAP: + case PROGRESS_VACUUM_PHASE_VACUUM_HEAP: + errcbarg->relname = RelationGetRelationName(rel); + errcbarg->indname = NULL; /* Not used for heap */ + break; + + case PROGRESS_VACUUM_PHASE_VACUUM_INDEX: + case PROGRESS_VACUUM_PHASE_INDEX_CLEANUP: + /* rel is an index relation in index vacuum case */ + errcbarg->relname = get_rel_name(rel->rd_index->indrelid); + errcbarg->indname = RelationGetRelationName(rel); + break; + } + + errcbarg->relnamespace = get_namespace_name(RelationGetNamespace(rel)); + errcbarg->blkno = InvalidBlockNumber; /* Not known yet */ + errcbarg->phase = phase; + + errcallback->callback = vacuum_error_callback; + errcallback->arg = errcbarg; + errcallback->previous = error_context_stack; +} -- 2.7.4
>From 479f81758119f66ea57b3047c07fa4bd27341f2f Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Sun, 16 Feb 2020 20:25:13 -0600 Subject: [PATCH v19 2/2] add callback for truncation --- src/backend/access/heap/vacuumlazy.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 5e734ee..de3b92f 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -2503,9 +2503,15 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) BlockNumber new_rel_pages; PGRUsage ru0; int lock_retry; + ErrorContextCallback errcallback; + vacuum_error_callback_arg errcbarg; pg_rusage_init(&ru0); + /* Setup error traceback support for ereport() */ + init_vacuum_error_callback(&errcallback, &errcbarg, onerel, + PROGRESS_VACUUM_PHASE_TRUNCATE); + /* Report that we are now truncating */ pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE); @@ -2586,11 +2592,18 @@ lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats) return; } + /* Setup error traceback support for ereport() */ + errcbarg.blkno = new_rel_pages; + error_context_stack = &errcallback; + /* * Okay to truncate. */ RelationTruncate(onerel, new_rel_pages); + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + /* * We can release the exclusive lock as soon as we have truncated. * Other backends can't safely access the relation until they have @@ -2630,6 +2643,12 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) BlockNumber blkno; BlockNumber prefetchedUntil; instr_time starttime; + ErrorContextCallback errcallback; + vacuum_error_callback_arg errcbarg; + + /* Setup error traceback support for ereport() */ + init_vacuum_error_callback(&errcallback, &errcbarg, onerel, + PROGRESS_VACUUM_PHASE_TRUNCATE); /* Initialize the starttime if we check for conflicting lock requests */ INSTR_TIME_SET_CURRENT(starttime); @@ -2693,6 +2712,10 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) blkno--; + /* Setup error traceback support for ereport() */ + errcbarg.blkno = blkno; + error_context_stack = &errcallback; + /* If we haven't prefetched this lot yet, do so now. */ if (prefetchedUntil > blkno) { @@ -2711,6 +2734,9 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno, RBM_NORMAL, vac_strategy); + /* Pop the error context stack */ + error_context_stack = errcallback.previous; + /* In this phase we only need shared access to the buffer */ LockBuffer(buf, BUFFER_LOCK_SHARE); @@ -3470,6 +3496,10 @@ vacuum_error_callback(void *arg) break; case PROGRESS_VACUUM_PHASE_TRUNCATE: + errcontext(_("while truncating relation \"%s.%s\" to %u blocks"), + cbarg->relnamespace, cbarg->relname, cbarg->blkno); + break; + case PROGRESS_VACUUM_PHASE_FINAL_CLEANUP: default: return; /* Shouldn't happen: do nothing */ @@ -3485,6 +3515,7 @@ init_vacuum_error_callback(ErrorContextCallback *errcallback, { case PROGRESS_VACUUM_PHASE_SCAN_HEAP: case PROGRESS_VACUUM_PHASE_VACUUM_HEAP: + case PROGRESS_VACUUM_PHASE_TRUNCATE: errcbarg->relname = RelationGetRelationName(rel); errcbarg->indname = NULL; /* Not used for heap */ break; -- 2.7.4