Going from one to two vacuum workers doesn't scale as well as we'd like, and this thread is about ways of mitigating that.
The easiest thing that shows some benefit is separating the two paths by preventing inlining of one or the other: #define RT_PREFIX shared_ts #define RT_SHMEM -#define RT_SCOPE static +#define RT_SCOPE static pg_noinline This small change is enough to show a difference in an unrealistic test designed to reduce I/O and overemphasize TID storage and retrieval: 0001: a patch from Masahiko to time the vacuum stages separately 0002: force non-parallel vacuum to use shared memory for easier testing 0003: the above 1-line change drop table if exists test; create unlogged table test (a int) with (AUTOVACUUM_ENABLED=false); insert into test select * from generate_series(1,20_000_000,1); create index on test (a); select pg_prewarm('test'); select pg_prewarm('test_a_idx'); delete from test where a < 20_000_000; vacuum verbose test; Two runs each: 0002: local: INFO: heap_scan 1731 index_vac 586 heap_vac 119 INFO: heap_scan 1772 index_vac 592 heap_vac 119 shared: INFO: heap_scan 2059 index_vac 894 heap_vac 121 INFO: heap_scan 2070 index_vac 893 heap_vac 120 0003: local: INFO: heap_scan 1669 index_vac 543 heap_vac 119 INFO: heap_scan 1670 index_vac 543 heap_vac 119 shared: INFO: heap_scan 1978 index_vac 839 heap_vac 120 INFO: heap_scan 1989 index_vac 838 heap_vac 119 This makes both paths faster, and is pretty good for a 1-line change, so I intend to commit 0003. In the real world, with disk and WAL, this would probably only be noticeable on tables with many indexes, but I've encountered plenty of those. Note that phase III is unaffected, and I think it's because the iteration state relies on saved local pointers. We could expand that concept, but it'd be invasive and unreliable. There are other things we can try, and I'll update the thread as I find them. -- John Naylor Amazon Web Services
From f0bed5ceb72c34a9ad541976247d0ae2b88d17d8 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Tue, 4 Mar 2025 12:32:46 +0700 Subject: [PATCH v1 3/3] Prevent inlining of shared memory TID store functions --- src/backend/access/common/tidstore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c index 5bd75fb499c..c17ab0a40fa 100644 --- a/src/backend/access/common/tidstore.c +++ b/src/backend/access/common/tidstore.c @@ -100,7 +100,7 @@ typedef struct BlocktableEntry #define RT_PREFIX shared_ts #define RT_SHMEM -#define RT_SCOPE static +#define RT_SCOPE static pg_noinline #define RT_DECLARE #define RT_DEFINE #define RT_VALUE_TYPE BlocktableEntry -- 2.48.1
From de93e4d5a7e86691393df129dd14df3a52b7fee6 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Mon, 3 Mar 2025 16:40:15 +0700 Subject: [PATCH v1 1/3] time vacuum stages patch from Masahiko Sawada XXX not for commit --- src/backend/access/heap/vacuumlazy.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 1af18a78a2b..a5913668193 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -419,6 +419,7 @@ typedef struct LVSavedErrInfo VacErrPhase phase; } LVSavedErrInfo; +static int _heap_scan = 0, _index_vac = 0, _heap_vac = 0; /* non-export function prototypes */ static void lazy_scan_heap(LVRelState *vacrel); @@ -830,12 +831,16 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, lazy_check_wraparound_failsafe(vacrel); dead_items_alloc(vacrel, params->nworkers); + TimestampTz start = GetCurrentTimestamp(); + /* * Call lazy_scan_heap to perform all required heap pruning, index * vacuuming, and heap vacuuming (plus related processing) */ lazy_scan_heap(vacrel); + _heap_scan = (int) (GetCurrentTimestamp() - start) / 1000; + /* * Free resources managed by dead_items_alloc. This ends parallel mode in * passing when necessary. @@ -932,6 +937,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, starttime); pgstat_progress_end_command(); + elog(INFO, "heap_scan %d index_vac %d heap_vac %d", + _heap_scan, _index_vac, _heap_vac); + if (instrument) { TimestampTz endtime = GetCurrentTimestamp(); @@ -2519,7 +2527,9 @@ lazy_vacuum(LVRelState *vacrel) * We successfully completed a round of index vacuuming. Do related * heap vacuuming now. */ + TimestampTz start = GetCurrentTimestamp(); lazy_vacuum_heap_rel(vacrel); + _heap_vac = (int) (GetCurrentTimestamp() - start) / 1000; } else { @@ -2588,6 +2598,8 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) progress_start_val[1] = vacrel->nindexes; pgstat_progress_update_multi_param(2, progress_start_index, progress_start_val); + TimestampTz start = GetCurrentTimestamp(); + if (!ParallelVacuumIsActive(vacrel)) { for (int idx = 0; idx < vacrel->nindexes; idx++) @@ -2625,6 +2637,8 @@ lazy_vacuum_all_indexes(LVRelState *vacrel) allindexes = false; } + _index_vac = (int) (GetCurrentTimestamp() - start) / 1000; + /* * We delete all LP_DEAD items from the first heap pass in all indexes on * each call here (except calls where we choose to do the failsafe). This -- 2.48.1
From 8d23b2a7a921caf9e717a437c9de90c4d9eb2204 Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Mon, 3 Mar 2025 17:40:38 +0700 Subject: [PATCH v1 2/3] Force shared TID store with a single vacuum worker XXX not for commit --- src/backend/access/heap/vacuumlazy.c | 4 +++- src/backend/commands/vacuumparallel.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index a5913668193..144be988bbe 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -3478,7 +3478,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers) * be used for an index, so we invoke parallelism only if there are at * least two indexes on a table. */ - if (nworkers >= 0 && vacrel->nindexes > 1 && vacrel->do_index_vacuuming) + if (nworkers >= 0 && vacrel->nindexes > 0 && vacrel->do_index_vacuuming) { /* * Since parallel workers cannot access data in temporary tables, we @@ -3514,6 +3514,8 @@ dead_items_alloc(LVRelState *vacrel, int nworkers) } } + elog(ERROR, "shoudn't reach here"); + /* * Serial VACUUM case. Allocate both dead_items and dead_items_info * locally. diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 2b9d548cdeb..1c2f9e34dd1 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -272,7 +272,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, parallel_workers = parallel_vacuum_compute_workers(indrels, nindexes, nrequested_workers, will_parallel_vacuum); - if (parallel_workers <= 0) + if (false) //(parallel_workers <= 0) { /* Can't perform vacuum in parallel -- return NULL */ pfree(will_parallel_vacuum); -- 2.48.1