Hi,

Jeff pointed out that one of the pg_stat_io tests has failed a few times
over the past months (here on morepork [1] and more recently here on
francolin [2]).

Failing test diff for those who prefer not to scroll:

+++ 
/home/bf/bf-build/francolin/HEAD/pgsql.build/testrun/recovery/027_stream_regress/data/results/stats.out
   2023-07-07 18:48:25.976313231 +0000
@@ -1415,7 +1415,7 @@
        :io_sum_vac_strategy_after_reuses > :io_sum_vac_strategy_before_reuses;
  ?column? | ?column?
 ----------+----------
- t        | t
+ t        | f

My theory about the test failure is that, when there is enough demand
for shared buffers, the flapping test fails because it expects buffer
access strategy *reuses* and concurrent queries already flushed those
buffers before they could be reused. Attached is a patch which I think
will fix the test while keeping some code coverage. If we count
evictions and reuses together, those should have increased.

- Melanie

[1] 
https://buildfarm.postgresql.org/cgi-bin/show_log.pl?nm=morepork&dt=2023-06-16%2018%3A30%3A32
[2] 
https://buildfarm.postgresql.org/cgi-bin/show_stage_log.pl?nm=francolin&dt=2023-07-07%2018%3A43%3A57&stg=recovery-check
From cf41551431751a2b03e0fc08b7296301fba81992 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplage...@gmail.com>
Date: Mon, 10 Jul 2023 13:57:36 -0400
Subject: [PATCH v1] Fix pg_stat_io buffer reuse test instability

The stats regression test attempts to ensure that Buffer Access Strategy
"reuses" are being counted in pg_stat_io by vacuuming a table which is
larger than the size of the strategy ring. However, when shared buffers
are in sufficiently high demand, another backend could evict one of the
blocks in the strategy ring before the first backend has a chance to
reuse the buffer. The backend using the strategy would then evict a
block from another shared buffer and add that buffer to the strategy
ring. This counts as an eviction and not a reuse in pg_stat_io. Count
both evictions and reuses in the test to ensure it does not fail
incorrectly.

Reported-by: Jeff Davis
---
 src/test/regress/expected/stats.out | 30 +++++++++++++++++++----------
 src/test/regress/sql/stats.sql      | 19 +++++++++++-------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out
index 8e63340782..2ea6bbca7a 100644
--- a/src/test/regress/expected/stats.out
+++ b/src/test/regress/expected/stats.out
@@ -1385,7 +1385,11 @@ SELECT :io_sum_local_new_tblspc_writes > :io_sum_local_after_writes;
 
 RESET temp_buffers;
 -- Test that reuse of strategy buffers and reads of blocks into these reused
--- buffers while VACUUMing are tracked in pg_stat_io.
+-- buffers while VACUUMing are tracked in pg_stat_io. If there is sufficient
+-- demand for shared buffers from concurrent queries, some blocks may be
+-- evicted from the strategy ring before they can be reused. In such cases
+-- this, the backend will evict a block from a shared buffer outside of the
+-- ring and add it to the ring. This is considered an eviction and not a reuse.
 -- Set wal_skip_threshold smaller than the expected size of
 -- test_io_vac_strategy so that, even if wal_level is minimal, VACUUM FULL will
 -- fsync the newly rewritten test_io_vac_strategy instead of writing it to WAL.
@@ -1393,15 +1397,15 @@ RESET temp_buffers;
 -- shared buffers -- preventing us from testing BAS_VACUUM BufferAccessStrategy
 -- reads.
 SET wal_skip_threshold = '1 kB';
-SELECT sum(reuses) AS reuses, sum(reads) AS reads
+SELECT sum(reuses) AS reuses, sum(reads) AS reads, sum(evictions) AS evictions
   FROM pg_stat_io WHERE context = 'vacuum' \gset io_sum_vac_strategy_before_
 CREATE TABLE test_io_vac_strategy(a int, b int) WITH (autovacuum_enabled = 'false');
 INSERT INTO test_io_vac_strategy SELECT i, i from generate_series(1, 4500)i;
 -- Ensure that the next VACUUM will need to perform IO by rewriting the table
 -- first with VACUUM (FULL).
 VACUUM (FULL) test_io_vac_strategy;
--- Use the minimum BUFFER_USAGE_LIMIT to cause reuses with the smallest table
--- possible.
+-- Use the minimum BUFFER_USAGE_LIMIT to cause reuses or evictions with the
+-- smallest table possible.
 VACUUM (PARALLEL 0, BUFFER_USAGE_LIMIT 128) test_io_vac_strategy;
 SELECT pg_stat_force_next_flush();
  pg_stat_force_next_flush 
@@ -1409,13 +1413,19 @@ SELECT pg_stat_force_next_flush();
  
 (1 row)
 
-SELECT sum(reuses) AS reuses, sum(reads) AS reads
+SELECT sum(reuses) AS reuses, sum(reads) AS reads, sum(evictions) AS evictions
   FROM pg_stat_io WHERE context = 'vacuum' \gset io_sum_vac_strategy_after_
-SELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads,
-       :io_sum_vac_strategy_after_reuses > :io_sum_vac_strategy_before_reuses;
- ?column? | ?column? 
-----------+----------
- t        | t
+SELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads;
+ ?column? 
+----------
+ t
+(1 row)
+
+SELECT (:io_sum_vac_strategy_after_reuses + :io_sum_vac_strategy_after_evictions) >
+  (:io_sum_vac_strategy_before_reuses + :io_sum_vac_strategy_before_evictions);
+ ?column? 
+----------
+ t
 (1 row)
 
 RESET wal_skip_threshold;
diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql
index fddf5a8277..f398d8c5f5 100644
--- a/src/test/regress/sql/stats.sql
+++ b/src/test/regress/sql/stats.sql
@@ -678,7 +678,11 @@ SELECT :io_sum_local_new_tblspc_writes > :io_sum_local_after_writes;
 RESET temp_buffers;
 
 -- Test that reuse of strategy buffers and reads of blocks into these reused
--- buffers while VACUUMing are tracked in pg_stat_io.
+-- buffers while VACUUMing are tracked in pg_stat_io. If there is sufficient
+-- demand for shared buffers from concurrent queries, some blocks may be
+-- evicted from the strategy ring before they can be reused. In such cases
+-- this, the backend will evict a block from a shared buffer outside of the
+-- ring and add it to the ring. This is considered an eviction and not a reuse.
 
 -- Set wal_skip_threshold smaller than the expected size of
 -- test_io_vac_strategy so that, even if wal_level is minimal, VACUUM FULL will
@@ -687,21 +691,22 @@ RESET temp_buffers;
 -- shared buffers -- preventing us from testing BAS_VACUUM BufferAccessStrategy
 -- reads.
 SET wal_skip_threshold = '1 kB';
-SELECT sum(reuses) AS reuses, sum(reads) AS reads
+SELECT sum(reuses) AS reuses, sum(reads) AS reads, sum(evictions) AS evictions
   FROM pg_stat_io WHERE context = 'vacuum' \gset io_sum_vac_strategy_before_
 CREATE TABLE test_io_vac_strategy(a int, b int) WITH (autovacuum_enabled = 'false');
 INSERT INTO test_io_vac_strategy SELECT i, i from generate_series(1, 4500)i;
 -- Ensure that the next VACUUM will need to perform IO by rewriting the table
 -- first with VACUUM (FULL).
 VACUUM (FULL) test_io_vac_strategy;
--- Use the minimum BUFFER_USAGE_LIMIT to cause reuses with the smallest table
--- possible.
+-- Use the minimum BUFFER_USAGE_LIMIT to cause reuses or evictions with the
+-- smallest table possible.
 VACUUM (PARALLEL 0, BUFFER_USAGE_LIMIT 128) test_io_vac_strategy;
 SELECT pg_stat_force_next_flush();
-SELECT sum(reuses) AS reuses, sum(reads) AS reads
+SELECT sum(reuses) AS reuses, sum(reads) AS reads, sum(evictions) AS evictions
   FROM pg_stat_io WHERE context = 'vacuum' \gset io_sum_vac_strategy_after_
-SELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads,
-       :io_sum_vac_strategy_after_reuses > :io_sum_vac_strategy_before_reuses;
+SELECT :io_sum_vac_strategy_after_reads > :io_sum_vac_strategy_before_reads;
+SELECT (:io_sum_vac_strategy_after_reuses + :io_sum_vac_strategy_after_evictions) >
+  (:io_sum_vac_strategy_before_reuses + :io_sum_vac_strategy_before_evictions);
 RESET wal_skip_threshold;
 
 -- Test that extends done by a CTAS, which uses a BAS_BULKWRITE
-- 
2.37.2

Reply via email to