Hi, On Mon, Feb 03, 2025 at 02:05:51PM -0600, Nathan Bossart wrote: > Barring objections, I am planning to commit this one soon. I might move > the addition of analyze_delay_point() to its own patch, but otherwise I > think it looks good to go.
Yeah, I think that having analyze_delay_point() in its own patch makes sense. It's done that way in the attached and allows 0002 to be focus on the main feature. Regards, -- Bertrand Drouvot PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
>From 0ba4f51ae63e9b8b4d6987d52465f2a284820d4d Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Tue, 4 Feb 2025 09:05:33 +0000 Subject: [PATCH v16 1/2] Introduce analyze_delay_point() Currently vacuum_delay_point() is being used in analyze code paths. This commit introduces analyze_delay_point() to make the analyze/vacuum split clear. The "is_analyze" bool passed as a parameter to the new vacuum_delay_point_internal is not being used (but will be used in a following commit tracking the timing of cost-based vacuum/analyze delay). --- contrib/file_fdw/file_fdw.c | 2 +- src/backend/commands/analyze.c | 10 +++---- src/backend/commands/vacuum.c | 29 +++++++++++++++---- src/backend/tsearch/ts_typanalyze.c | 2 +- src/backend/utils/adt/array_typanalyze.c | 2 +- src/backend/utils/adt/rangetypes_typanalyze.c | 2 +- src/include/commands/vacuum.h | 1 + 7 files changed, 34 insertions(+), 14 deletions(-) 3.9% contrib/file_fdw/ 81.1% src/backend/commands/ 3.9% src/backend/tsearch/ 7.8% src/backend/utils/adt/ 3.1% src/include/commands/ diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index 678e754b2b9..44dfb5c5a54 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -1237,7 +1237,7 @@ file_acquire_sample_rows(Relation onerel, int elevel, for (;;) { /* Check for user-requested abort or sleep */ - vacuum_delay_point(); + analyze_delay_point(); /* Fetch next row */ MemoryContextReset(tupcontext); diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index f8da32e9aef..e177c6c3da5 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -915,7 +915,7 @@ compute_index_stats(Relation onerel, double totalrows, { HeapTuple heapTuple = rows[rowno]; - vacuum_delay_point(); + analyze_delay_point(); /* * Reset the per-tuple context each time, to reclaim any cruft @@ -1234,7 +1234,7 @@ acquire_sample_rows(Relation onerel, int elevel, /* Outer loop over blocks to sample */ while (table_scan_analyze_next_block(scan, stream)) { - vacuum_delay_point(); + analyze_delay_point(); while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot)) { @@ -1966,7 +1966,7 @@ compute_trivial_stats(VacAttrStatsP stats, Datum value; bool isnull; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, i, &isnull); @@ -2082,7 +2082,7 @@ compute_distinct_stats(VacAttrStatsP stats, int firstcount1, j; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, i, &isnull); @@ -2429,7 +2429,7 @@ compute_scalar_stats(VacAttrStatsP stats, Datum value; bool isnull; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, i, &isnull); diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index e6745e6145c..4563e617505 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -2352,13 +2352,14 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) } /* - * vacuum_delay_point --- check for interrupts and cost-based delay. + * vacuum_delay_point_internal --- check for interrupts and cost-based delay. * - * This should be called in each major loop of VACUUM processing, - * typically once per page processed. + * This should be called (through the vacuum_delay_point() or the analyze_delay_point() + * helpers) in each major loop of VACUUM/ANALYZE processing, typically once per + * page processed. */ -void -vacuum_delay_point(void) +static void +vacuum_delay_point_internal(bool is_analyze) { double msec = 0; @@ -2434,6 +2435,24 @@ vacuum_delay_point(void) } } +/* + * Helper function to implement delay points in non-analyze operations. + */ +void +vacuum_delay_point(void) +{ + vacuum_delay_point_internal(false); +} + +/* + * Helper function to implement delay points in analyze operations. + */ +void +analyze_delay_point(void) +{ + vacuum_delay_point_internal(true); +} + /* * Computes the vacuum delay for parallel workers. * diff --git a/src/backend/tsearch/ts_typanalyze.c b/src/backend/tsearch/ts_typanalyze.c index 1494da1c9d3..133ec743495 100644 --- a/src/backend/tsearch/ts_typanalyze.c +++ b/src/backend/tsearch/ts_typanalyze.c @@ -204,7 +204,7 @@ compute_tsvector_stats(VacAttrStats *stats, char *lexemesptr; int j; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, vector_no, &isnull); diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c index 44a6eb5dad0..0d1e0c7a582 100644 --- a/src/backend/utils/adt/array_typanalyze.c +++ b/src/backend/utils/adt/array_typanalyze.c @@ -314,7 +314,7 @@ compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, int distinct_count; bool count_item_found; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, array_no, &isnull); if (isnull) diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c index 9dc73af1992..81e72a29d28 100644 --- a/src/backend/utils/adt/rangetypes_typanalyze.c +++ b/src/backend/utils/adt/rangetypes_typanalyze.c @@ -167,7 +167,7 @@ compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc, upper; float8 length; - vacuum_delay_point(); + analyze_delay_point(); value = fetchfunc(stats, range_no, &isnull); if (isnull) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 12d0b61950d..0d60bed0be4 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -340,6 +340,7 @@ extern bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, extern bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs); extern void vac_update_datfrozenxid(void); extern void vacuum_delay_point(void); +extern void analyze_delay_point(void); extern bool vacuum_is_permitted_for_relation(Oid relid, Form_pg_class reltuple, bits32 options); extern Relation vacuum_open_relation(Oid relid, RangeVar *relation, -- 2.34.1
>From 61592deef3218328a0accdb9c80f659d80e6060d Mon Sep 17 00:00:00 2001 From: Bertrand Drouvot <bertranddrouvot...@gmail.com> Date: Tue, 4 Feb 2025 09:52:10 +0000 Subject: [PATCH v16 2/2] Add cost-based delay time to progress views. Author: Bertrand Drouvot Reviewed-by: Sami Imseih, Robert Haas, Masahiko Sawada, Masahiro Ikeda, Dilip Kumar, Sergei Kornilov Discussion: https://postgr.es/m/ZmaXmWDL829fzAVX%40ip-10-97-1-34.eu-west-3.compute.internal --- doc/src/sgml/config.sgml | 24 ++++++ doc/src/sgml/monitoring.sgml | 27 +++++++ src/backend/catalog/system_views.sql | 6 +- src/backend/commands/vacuum.c | 76 +++++++++++++++++++ src/backend/commands/vacuumparallel.c | 5 ++ src/backend/utils/misc/guc_tables.c | 9 +++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/commands/progress.h | 2 + src/include/commands/vacuum.h | 3 + src/test/regress/expected/rules.out | 6 +- 10 files changed, 155 insertions(+), 4 deletions(-) 38.4% doc/src/sgml/ 5.1% src/backend/catalog/ 44.0% src/backend/commands/ 3.9% src/backend/utils/misc/ 3.0% src/include/commands/ 5.3% src/test/regress/expected/ diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index a782f109982..eb96f2852e7 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8246,6 +8246,30 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-track-cost-delay-timing" xreflabel="track_cost_delay_timing"> + <term><varname>track_cost_delay_timing</varname> (<type>boolean</type>) + <indexterm> + <primary><varname>track_cost_delay_timing</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Enables timing of cost-based vacuum delay (see + <xref linkend="runtime-config-resource-vacuum-cost"/>). This parameter + is off by default, as it will repeatedly query the operating system for + the current time, which may cause significant overhead on some + platforms. You can use the <xref linkend="pgtesttiming"/> tool to + measure the overhead of timing on your system. Cost-based vacuum delay + timing information is displayed in + <link linkend="vacuum-progress-reporting"><structname>pg_stat_progress_vacuum</structname></link> + and + <link linkend="analyze-progress-reporting"><structname>pg_stat_progress_analyze</structname></link>. + Only superusers and users with the appropriate <literal>SET</literal> + privilege can change this setting. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-track-io-timing" xreflabel="track_io_timing"> <term><varname>track_io_timing</varname> (<type>boolean</type>) <indexterm> diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index edc2470bcf9..aa7e0bb677b 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -5606,6 +5606,18 @@ FROM pg_stat_get_backend_idset() AS backendid; <literal>acquiring inherited sample rows</literal>. </para></entry> </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>delay_time</structfield> <type>double precision</type> + </para> + <para> + Total time spent sleeping due to cost-based delay (see + <xref linkend="runtime-config-resource-vacuum-cost"/>), in milliseconds + (if <xref linkend="guc-track-cost-delay-timing"/> is enabled, otherwise + zero). + </para></entry> + </row> </tbody> </tgroup> </table> @@ -6531,6 +6543,21 @@ FROM pg_stat_get_backend_idset() AS backendid; <literal>cleaning up indexes</literal>. </para></entry> </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>delay_time</structfield> <type>double precision</type> + </para> + <para> + Total time spent sleeping due to cost-based delay (see + <xref linkend="runtime-config-resource-vacuum-cost"/>), in milliseconds + (if <xref linkend="guc-track-cost-delay-timing"/> is enabled, otherwise + zero). This includes the time that any associated parallel workers have + slept. However, parallel workers report their sleep time no more + frequently than once per second, so the reported value may be slightly + stale. + </para></entry> + </row> </tbody> </tgroup> </table> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index cddc3ea9b53..eff0990957e 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1213,7 +1213,8 @@ CREATE VIEW pg_stat_progress_analyze AS S.param5 AS ext_stats_computed, S.param6 AS child_tables_total, S.param7 AS child_tables_done, - CAST(S.param8 AS oid) AS current_child_table_relid + CAST(S.param8 AS oid) AS current_child_table_relid, + S.param9 / 1000000::double precision AS delay_time FROM pg_stat_get_progress_info('ANALYZE') AS S LEFT JOIN pg_database D ON S.datid = D.oid; @@ -1233,7 +1234,8 @@ CREATE VIEW pg_stat_progress_vacuum AS S.param4 AS heap_blks_vacuumed, S.param5 AS index_vacuum_count, S.param6 AS max_dead_tuple_bytes, S.param7 AS dead_tuple_bytes, S.param8 AS num_dead_item_ids, S.param9 AS indexes_total, - S.param10 AS indexes_processed + S.param10 AS indexes_processed, + S.param11 / 1000000::double precision AS delay_time FROM pg_stat_get_progress_info('VACUUM') AS S LEFT JOIN pg_database D ON S.datid = D.oid; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 4563e617505..6bf9fc7161f 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -39,6 +39,7 @@ #include "catalog/pg_inherits.h" #include "commands/cluster.h" #include "commands/defrem.h" +#include "commands/progress.h" #include "commands/vacuum.h" #include "miscadmin.h" #include "nodes/makefuncs.h" @@ -59,6 +60,12 @@ #include "utils/snapmgr.h" #include "utils/syscache.h" +/* + * Minimum interval for cost-based vacuum delay reports from a parallel worker. + * This aims to avoid sending too many messages and waking up the leader too + * frequently. + */ +#define PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS (NS_PER_S) /* * GUC parameters @@ -69,6 +76,7 @@ int vacuum_multixact_freeze_min_age; int vacuum_multixact_freeze_table_age; int vacuum_failsafe_age; int vacuum_multixact_failsafe_age; +bool track_cost_delay_timing; /* * Variables for cost-based vacuum delay. The defaults differ between @@ -79,6 +87,11 @@ int vacuum_multixact_failsafe_age; double vacuum_cost_delay = 0; int vacuum_cost_limit = 200; +/* + * Variable for reporting cost-based vacuum delay from parallel workers. + */ +int64 parallel_vacuum_worker_delay_ns = 0; + /* * VacuumFailsafeActive is a defined as a global so that we can determine * whether or not to re-enable cost-based vacuum delay when vacuuming a table. @@ -2357,6 +2370,15 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode) * This should be called (through the vacuum_delay_point() or the analyze_delay_point() * helpers) in each major loop of VACUUM/ANALYZE processing, typically once per * page processed. + * + * If the track_cost_delay_timing GUC is set to on, the function tracks + * cumulative delay times and reports them as progress. In parallel vacuum workers, + * these times are accumulated and reported periodically to avoid overloading + * the leader with messages and interrupts. For non-parallel workers, each delay + * is reported immediately. + * + * The "is_analyze" parameter determines whether delays are attributed to vacuum + * or analyze operations in the progress reporting system. */ static void vacuum_delay_point_internal(bool is_analyze) @@ -2402,13 +2424,67 @@ vacuum_delay_point_internal(bool is_analyze) /* Nap if appropriate */ if (msec > 0) { + instr_time delay_start; + if (msec > vacuum_cost_delay * 4) msec = vacuum_cost_delay * 4; + if (track_cost_delay_timing) + INSTR_TIME_SET_CURRENT(delay_start); + pgstat_report_wait_start(WAIT_EVENT_VACUUM_DELAY); pg_usleep(msec * 1000); pgstat_report_wait_end(); + if (track_cost_delay_timing) + { + instr_time delay_end; + instr_time delay; + + INSTR_TIME_SET_CURRENT(delay_end); + INSTR_TIME_SET_ZERO(delay); + INSTR_TIME_ACCUM_DIFF(delay, delay_end, delay_start); + + /* + * For parallel workers, we only report the delay time every once + * in a while to avoid overloading the leader with messages and + * interrupts. + */ + if (IsParallelWorker()) + { + static instr_time last_report_time; + instr_time time_since_last_report; + + Assert(!is_analyze); + + /* Accumulate the delay time */ + parallel_vacuum_worker_delay_ns += INSTR_TIME_GET_NANOSEC(delay); + + /* Calculate interval since last report */ + INSTR_TIME_SET_ZERO(time_since_last_report); + INSTR_TIME_ACCUM_DIFF(time_since_last_report, delay_end, last_report_time); + + /* If we haven't reported in a while, do so now */ + if (INSTR_TIME_GET_NANOSEC(time_since_last_report) >= + PARALLEL_VACUUM_DELAY_REPORT_INTERVAL_NS) + { + /* PROGRESS_ANALYZE_DELAY_TIME can't be of interest */ + pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME, + parallel_vacuum_worker_delay_ns); + + /* Reset variables */ + last_report_time = delay_end; + parallel_vacuum_worker_delay_ns = 0; + } + } + else if (is_analyze) + pgstat_progress_incr_param(PROGRESS_ANALYZE_DELAY_TIME, + INSTR_TIME_GET_NANOSEC(delay)); + else + pgstat_progress_incr_param(PROGRESS_VACUUM_DELAY_TIME, + INSTR_TIME_GET_NANOSEC(delay)); + } + /* * We don't want to ignore postmaster death during very long vacuums * with vacuum_cost_delay configured. We can't use the usual diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index dc3322c256b..2b9d548cdeb 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -1094,6 +1094,11 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc) InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber], &wal_usage[ParallelWorkerNumber]); + /* Report any remaining cost-based vacuum delay time */ + if (track_cost_delay_timing) + pgstat_progress_parallel_incr_param(PROGRESS_VACUUM_DELAY_TIME, + parallel_vacuum_worker_delay_ns); + TidStoreDetach(dead_items); /* Pop the error context stack */ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 71448bb4fdd..4179c260e4d 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1470,6 +1470,15 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"track_cost_delay_timing", PGC_SUSET, STATS_CUMULATIVE, + gettext_noop("Collects timing statistics for cost-based vacuum delay."), + NULL + }, + &track_cost_delay_timing, + false, + NULL, NULL, NULL + }, { {"track_io_timing", PGC_SUSET, STATS_CUMULATIVE, gettext_noop("Collects timing statistics for database I/O activity."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 079efa1baa7..18e2d5d7608 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -632,6 +632,7 @@ #track_activities = on #track_activity_query_size = 1024 # (change requires restart) #track_counts = on +#track_cost_delay_timing = off #track_io_timing = off #track_wal_io_timing = off #track_functions = none # none, pl, all diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h index 18e3179ef63..7c736e7b03b 100644 --- a/src/include/commands/progress.h +++ b/src/include/commands/progress.h @@ -28,6 +28,7 @@ #define PROGRESS_VACUUM_NUM_DEAD_ITEM_IDS 7 #define PROGRESS_VACUUM_INDEXES_TOTAL 8 #define PROGRESS_VACUUM_INDEXES_PROCESSED 9 +#define PROGRESS_VACUUM_DELAY_TIME 10 /* Phases of vacuum (as advertised via PROGRESS_VACUUM_PHASE) */ #define PROGRESS_VACUUM_PHASE_SCAN_HEAP 1 @@ -46,6 +47,7 @@ #define PROGRESS_ANALYZE_CHILD_TABLES_TOTAL 5 #define PROGRESS_ANALYZE_CHILD_TABLES_DONE 6 #define PROGRESS_ANALYZE_CURRENT_CHILD_TABLE_RELID 7 +#define PROGRESS_ANALYZE_DELAY_TIME 8 /* Phases of analyze (as advertised via PROGRESS_ANALYZE_PHASE) */ #define PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS 1 diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 0d60bed0be4..ae0edbbeaf1 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -296,6 +296,7 @@ extern PGDLLIMPORT int vacuum_multixact_freeze_min_age; extern PGDLLIMPORT int vacuum_multixact_freeze_table_age; extern PGDLLIMPORT int vacuum_failsafe_age; extern PGDLLIMPORT int vacuum_multixact_failsafe_age; +extern PGDLLIMPORT bool track_cost_delay_timing; /* * Maximum value for default_statistics_target and per-column statistics @@ -313,6 +314,8 @@ extern PGDLLIMPORT bool VacuumFailsafeActive; extern PGDLLIMPORT double vacuum_cost_delay; extern PGDLLIMPORT int vacuum_cost_limit; +extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns; + /* in commands/vacuum.c */ extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel); extern void vacuum(List *relations, VacuumParams *params, diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 3361f6a69c9..5baba8d39ff 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1932,7 +1932,8 @@ pg_stat_progress_analyze| SELECT s.pid, s.param5 AS ext_stats_computed, s.param6 AS child_tables_total, s.param7 AS child_tables_done, - (s.param8)::oid AS current_child_table_relid + (s.param8)::oid AS current_child_table_relid, + ((s.param9)::double precision / (1000000)::double precision) AS delay_time FROM (pg_stat_get_progress_info('ANALYZE'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_progress_basebackup| SELECT pid, @@ -2062,7 +2063,8 @@ pg_stat_progress_vacuum| SELECT s.pid, s.param7 AS dead_tuple_bytes, s.param8 AS num_dead_item_ids, s.param9 AS indexes_total, - s.param10 AS indexes_processed + s.param10 AS indexes_processed, + ((s.param11)::double precision / (1000000)::double precision) AS delay_time FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20) LEFT JOIN pg_database d ON ((s.datid = d.oid))); pg_stat_recovery_prefetch| SELECT stats_reset, -- 2.34.1