Hi Sami, Your patch should correct the problem. However, given that this function is part of the tableam API, I am wondering if the fix shouldn't be outside heap's copy_for_cluster implementation? I guess it depends on the semantics of num_tuples, but the cluster code seems to allude to interpreting num_tuples as the number of non-removable tuples. If you subtract recently dead from that number within the heap implementation, then it will no longer reflect non-removable tuples and the log message in the cluster function "found %.0f removable, %.0f nonremovable row versions" will no longer be correct.
Surprisingly, tableam.h does not document the num_tuples parameter in the table_relation_copy_for_cluster() function although all other output parameters are documented. So, it is not clear what the intended semantics are. Maybe other hackers on the mailing list have opinions on how to interpret num_tuples? In any case, assuming num_tuples is supposed to return non-removable tuples, then the fix should be to subtract recently dead tuples when updating pg_class.reltuples. Other TAM's need to treat num_tuples as non-removable tuples as well, and update recently dead if applicable. I am attaching a patch with these changes, while also including the isolation test in that patch. Regards, Erik On Thu, May 22, 2025 at 10:42 PM Sami Imseih <samims...@gmail.com> wrote: > > In both cases, he recently dead tuples must be copied to the table or > index, but > > they should not be counted towards reltuples. So, I think we need to fix > this in > > heapam_relation_copy_for_cluster by probably subtracting > > tups_recently_dead from num_tuples ( which is the value set in > > pg_class.reltuples ) > > after we process all the tuples, which looks like the best fix to me. > > something like the attached. > > -- > Sami Imseih > Amazon Web Services (AWS) >
From 1a5814f794c71d6a6c0b100e71289dece7136a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Nordstr=C3=B6m?= <erik.nordst...@gmail.com> Date: Fri, 23 May 2025 14:54:56 +0200 Subject: [PATCH v1] Fix reltuples stats after VACUUM FULL and CLUSTER The tableam API's function for clustering a table, used by both CLUSTER and VACUUM FULL, returns the number of non-removable tuples in a num_tuples variable. This number includes dead tuples that cannot be removed because they are still visible to concurrent transactions. Previously, num_tuples was used to update pg_class.reltuples, but this could lead to widely misleading numbers if many tuples had been deleted or updated concurrently with ongoing transactions. The fix is to not include the recently dead tuples. It should be noted that this issue only occurred on tables that didn't have any indexes, since the reindexing that happens as part of the cluster code also updates reltuples, but using the more accurate number based on currently visible tuples. --- src/backend/commands/cluster.c | 2 +- src/include/access/tableam.h | 1 + .../isolation/expected/vacuum-full-stats.out | 72 +++++++++++++++++++ src/test/isolation/isolation_schedule | 1 + .../isolation/specs/vacuum-full-stats.spec | 42 +++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 src/test/isolation/expected/vacuum-full-stats.out create mode 100644 src/test/isolation/specs/vacuum-full-stats.spec diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 54a08e4102e..74e445d9a67 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -1017,7 +1017,7 @@ copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verb relform = (Form_pg_class) GETSTRUCT(reltup); relform->relpages = num_pages; - relform->reltuples = num_tuples; + relform->reltuples = num_tuples - tups_recently_dead; /* Don't update the stats for pg_class. See swap_relation_files. */ if (RelationGetRelid(OldHeap) != RelationRelationId) diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 8713e12cbfb..d259a7b46fa 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -1631,6 +1631,7 @@ table_relation_copy_data(Relation rel, const RelFileLocator *newrlocator) * Output parameters: * - *xid_cutoff - rel's new relfrozenxid value, may be invalid * - *multi_cutoff - rel's new relminmxid value, may be invalid + * - *num_tuples - stats, non-removable tuples for logging * - *tups_vacuumed - stats, for logging, if appropriate for AM * - *tups_recently_dead - stats, for logging, if appropriate for AM */ diff --git a/src/test/isolation/expected/vacuum-full-stats.out b/src/test/isolation/expected/vacuum-full-stats.out new file mode 100644 index 00000000000..7b027711f85 --- /dev/null +++ b/src/test/isolation/expected/vacuum-full-stats.out @@ -0,0 +1,72 @@ +Parsed test spec with 2 sessions + +starting permutation: s2_vac_full s2_reltuples s1_query s2_update s2_reltuples s2_vac_full s1_commit s2_reltuples +step s2_vac_full: VACUUM FULL stats; +step s2_reltuples: SELECT reltuples FROM pg_class WHERE relname = 'stats'; +reltuples +--------- + 10 +(1 row) + +step s1_query: + START TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; + +?column? +-------- + 1 +(1 row) + +step s2_update: UPDATE stats SET a = 1 WHERE a > 6; +step s2_reltuples: SELECT reltuples FROM pg_class WHERE relname = 'stats'; +reltuples +--------- + 10 +(1 row) + +step s2_vac_full: VACUUM FULL stats; +step s1_commit: + COMMIT; + +step s2_reltuples: SELECT reltuples FROM pg_class WHERE relname = 'stats'; +reltuples +--------- + 10 +(1 row) + + +starting permutation: s2_create_index s2_vac_full s2_reltuples s1_query s2_update s2_reltuples s2_vac_full s1_commit s2_reltuples +step s2_create_index: CREATE INDEX ON stats (a); +step s2_vac_full: VACUUM FULL stats; +step s2_reltuples: SELECT reltuples FROM pg_class WHERE relname = 'stats'; +reltuples +--------- + 10 +(1 row) + +step s1_query: + START TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; + +?column? +-------- + 1 +(1 row) + +step s2_update: UPDATE stats SET a = 1 WHERE a > 6; +step s2_reltuples: SELECT reltuples FROM pg_class WHERE relname = 'stats'; +reltuples +--------- + 10 +(1 row) + +step s2_vac_full: VACUUM FULL stats; +step s1_commit: + COMMIT; + +step s2_reltuples: SELECT reltuples FROM pg_class WHERE relname = 'stats'; +reltuples +--------- + 10 +(1 row) + diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index e3c669a29c7..71358fa42c5 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -95,6 +95,7 @@ test: vacuum-no-cleanup-lock test: timeouts test: vacuum-concurrent-drop test: vacuum-conflict +test: vacuum-full-stats test: vacuum-skip-locked test: stats test: horizons diff --git a/src/test/isolation/specs/vacuum-full-stats.spec b/src/test/isolation/specs/vacuum-full-stats.spec new file mode 100644 index 00000000000..1280b0dd975 --- /dev/null +++ b/src/test/isolation/specs/vacuum-full-stats.spec @@ -0,0 +1,42 @@ +# Test that pg_class.reltuples is updated with the correct value after +# a VACUUM FULL when there are concurrent transactions preventing +# removal of some garbage tuples. The reltuples count should not +# include these garbage tuples. + +setup +{ + CREATE TABLE stats (a INT); + INSERT INTO stats SELECT generate_series(1, 10); +} + +teardown +{ + DROP TABLE IF EXISTS stats; +} + +# Session 1 (s1) keeps an open transaction in repeatable read to +# prevent VACUUM FULL in session 2 (s2) from removing some dead tuples +# that session 1 should still see in its snapshot. Note that s1 needs +# to run a query to initiate a snapshot. +session s1 +step s1_query +{ + START TRANSACTION ISOLATION LEVEL REPEATABLE READ; + SELECT 1; +} +step s1_commit +{ + COMMIT; +} + +session s2 +step s2_update { UPDATE stats SET a = 1 WHERE a > 6; } +step s2_vac_full { VACUUM FULL stats; } +step s2_reltuples { SELECT reltuples FROM pg_class WHERE relname = 'stats'; } +step s2_create_index { CREATE INDEX ON stats (a); } + +permutation s2_vac_full s2_reltuples s1_query s2_update s2_reltuples s2_vac_full s1_commit s2_reltuples + +# If the table has at least one index, then the index rebuild will set +# reltuples after VACUUM FULL, so run a test with that too. +permutation s2_create_index s2_vac_full s2_reltuples s1_query s2_update s2_reltuples s2_vac_full s1_commit s2_reltuples -- 2.48.1