Here is what I have staged for commit. I'll aim to commit these patches sometime next week to give time for additional feedback.
-- nathan
>From 6d1e608edb17d9bbaaf7d57ace35fee68ff869c0 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Wed, 23 Apr 2025 10:11:46 -0500 Subject: [PATCH v2 1/2] Further adjust guidance for running vacuumdb after pg_upgrade. Since pg_upgrade does not transfer the cumulative statistics used for triggering autovacuum and autoanalyze, the server may take much longer than expected to process them post-upgrade. Currently, the pg_upgrade documentation recommends analyzing only relations for which optimizer statistics were not carried over during upgrade. This commit appends another recommendation to also analyze all relations to update the relevant cumulative statistics, similar to the recommendation for pg_stat_reset(). Reported-by: Christoph Berg <m...@debian.org> Reviewed-by: Christoph Berg <m...@debian.org> Discussion: https://postgr.es/m/aAfxfKC82B9NvJDj%40msg.df7cb.de --- doc/src/sgml/ref/pgupgrade.sgml | 12 +++++++----- src/bin/pg_upgrade/check.c | 9 ++++++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/ref/pgupgrade.sgml b/doc/src/sgml/ref/pgupgrade.sgml index df13365b287..648c6e2967c 100644 --- a/doc/src/sgml/ref/pgupgrade.sgml +++ b/doc/src/sgml/ref/pgupgrade.sgml @@ -833,17 +833,19 @@ psql --username=postgres --file=script.sql postgres <para> Because not all statistics are not transferred by - <command>pg_upgrade</command>, you will be instructed to run a command to + <command>pg_upgrade</command>, you will be instructed to run commands to regenerate that information at the end of the upgrade. You might need to set connection parameters to match your new cluster. </para> <para> - Using <command>vacuumdb --all --analyze-only --missing-stats-only</command> - can efficiently generate such statistics. Alternatively, + First, use <command>vacuumdb --all --analyze-in-stages --missing-stats-only</command> - can be used to generate minimal statistics quickly. For either command, - the use of <option>--jobs</option> can speed it up. + to quickly generate minimal optimizer statistics for relations without + any. Then, use <command>vacuumdb --all --analyze-only</command> to ensure + all relations have updated cumulative statistics for triggering vacuum and + analyze. For both commands, the use of <option>--jobs</option> can speed + it up. If <varname>vacuum_cost_delay</varname> is set to a non-zero value, this can be overridden to speed up statistics generation using <envar>PGOPTIONS</envar>, e.g., <literal>PGOPTIONS='-c diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 18c2d652bb6..940fc77fc2e 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -814,9 +814,12 @@ output_completion_banner(char *deletion_script_file_name) } pg_log(PG_REPORT, - "Some optimizer statistics may not have been transferred by pg_upgrade.\n" - "Once you start the new server, consider running:\n" - " %s/vacuumdb %s--all --analyze-in-stages --missing-stats-only", new_cluster.bindir, user_specification.data); + "Some statistics are not transferred by pg_upgrade.\n" + "Once you start the new server, consider running these two commands:\n" + " %s/vacuumdb %s--all --analyze-in-stages --missing-stats-only\n" + " %s/vacuumdb %s--all --analyze-only", + new_cluster.bindir, user_specification.data, + new_cluster.bindir, user_specification.data); if (deletion_script_file_name) pg_log(PG_REPORT, -- 2.39.5 (Apple Git-154)
>From 299c8bea787fb1637b58390d351a07aaa2521ac9 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Wed, 23 Apr 2025 10:37:42 -0500 Subject: [PATCH v2 2/2] vacuumdb: Don't skip empty relations in --missing-stats-only. Presently, --missing-stats-only skips relations with reltuples set to 0 because empty relations don't get optimizer statistics. However, before v14, a reltuples value of 0 was ambiguous: it could either mean the relation is empty, or it could mean that it hadn't yet been vacuumed or analyzed. (Commit 3d351d916b taught Postgres 14 and newer to use -1 for the latter case.) This ambiguity can cause --missing-stats-only to inadvertently skip relations that need optimizer statistics. To fix, simply remove the check for reltuples != 0. This will cause --missing-stats-only to analyze some empty tables, but that doesn't seem too terrible a trade-off. Reported-by: Christoph Berg <m...@debian.org> Reviewed-by: Christoph Berg <m...@debian.org> Discussion: https://postgr.es/m/aAjyvW5_fRGNr7yF%40msg.df7cb.de --- src/bin/scripts/vacuumdb.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/bin/scripts/vacuumdb.c b/src/bin/scripts/vacuumdb.c index 22067faaf7d..79b1096eb08 100644 --- a/src/bin/scripts/vacuumdb.c +++ b/src/bin/scripts/vacuumdb.c @@ -954,7 +954,6 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, appendPQExpBufferStr(&catalog_query, " EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" - " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" " AND NOT a.attisdropped\n" " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" @@ -967,7 +966,6 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, appendPQExpBufferStr(&catalog_query, " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n" " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n" - " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" " AND NOT EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext_data d\n" " WHERE d.stxoid OPERATOR(pg_catalog.=) e.oid\n" @@ -979,7 +977,6 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, " JOIN pg_catalog.pg_index i" " ON i.indexrelid OPERATOR(pg_catalog.=) a.attrelid\n" " WHERE i.indrelid OPERATOR(pg_catalog.=) c.oid\n" - " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" " AND i.indkey[a.attnum OPERATOR(pg_catalog.-) 1::pg_catalog.int2]" " OPERATOR(pg_catalog.=) 0::pg_catalog.int2\n" " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" @@ -994,7 +991,6 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, appendPQExpBufferStr(&catalog_query, " OR EXISTS (SELECT NULL FROM pg_catalog.pg_attribute a\n" " WHERE a.attrelid OPERATOR(pg_catalog.=) c.oid\n" - " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" " AND a.attnum OPERATOR(pg_catalog.>) 0::pg_catalog.int2\n" " AND NOT a.attisdropped\n" " AND a.attstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" @@ -1011,7 +1007,6 @@ retrieve_objects(PGconn *conn, vacuumingOptions *vacopts, appendPQExpBufferStr(&catalog_query, " OR EXISTS (SELECT NULL FROM pg_catalog.pg_statistic_ext e\n" " WHERE e.stxrelid OPERATOR(pg_catalog.=) c.oid\n" - " AND c.reltuples OPERATOR(pg_catalog.!=) 0::pg_catalog.float4\n" " AND e.stxstattarget IS DISTINCT FROM 0::pg_catalog.int2\n" " AND c.relhassubclass\n" " AND NOT p.inherited\n" -- 2.39.5 (Apple Git-154)