On Sat, Apr 18, 2020 at 9:27 PM Dilip Kumar <dilipbal...@gmail.com> wrote:
> On Sat, Apr 18, 2020 at 11:47 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
> > I think I found another bug in MaintainOldSnapshotTimeMapping(): if
> > you make time jump by more than old_snapshot_threshold in one go, then
> > the map gets cleared and then no early pruning or snapshot-too-old
> > errors happen.  That's why in 002_too_old.pl it currently advances
> > time by 10 minutes twice, instead of 20 minutes once.  To be
> > continued.
>
> IMHO that doesn't seems to be a problem.  Because even if we jump more
> than old_snapshot_threshold in one go we don't clean complete map
> right.  The latest snapshot timestamp will become the headtimestamp.
> So in TransactionIdLimitedForOldSnapshots if (current_ts -
> old_snapshot_threshold) is still >= head_timestap then we can still do
> early pruning.

Right, thanks.  I got confused about that, and misdiagnosed something
I was seeing.

Here's a new version:

0004: Instead of writing a new kind of TAP test to demonstrate
snapshot-too-old errors, I adjusted the existing isolation tests to
use the same absolute time control technique.  Previously I had
invented a way to do isolation tester-like stuff in TAP tests, which
might be interesting but strange new perl is not necessary for this.

0005: Truncates the time map when the CLOG is truncated.  Its test is
now under src/test/module/snapshot_too_old/t/001_truncate.sql.

These apply on top of Robert's patches, but the only dependency is on
his patch 0001 "Expose oldSnapshotControl.", because now I have stuff
in src/test/module/snapshot_too_old/test_sto.c that wants to mess with
that object too.

Is this an improvement?  I realise that there is still nothing to
actually verify that early pruning has actually happened.  I haven't
thought of a good way to do that yet (stats, page inspection, ...).
From 9640c654e97e8704b041e207f2fd02070d2dd057 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Mon, 20 Apr 2020 16:23:02 +1200
Subject: [PATCH v3 4/5] Rewrite the "snapshot_too_old" tests with absolute
 timestamps.

Previously the snapshot too old feature used a special test value for
old_snapshot_threshold.  Instead, use a new approach based on clobbering
the "current" timestamp used in snapshot-too-old book keeping, so that
we can control the timeline precisely without having to resort to
sleeping or special test branches in the code that are different than
what is used in production.
---
 src/backend/utils/time/snapmgr.c              | 24 ------
 src/test/modules/snapshot_too_old/Makefile    | 21 ++---
 .../expected/sto_using_cursor.out             | 75 +++++++----------
 .../expected/sto_using_hash_index.out         | 26 ++++--
 .../expected/sto_using_select.out             | 80 ++++++++++++-------
 .../specs/sto_using_cursor.spec               | 30 ++++---
 .../specs/sto_using_hash_index.spec           | 19 ++++-
 .../specs/sto_using_select.spec               | 32 +++++---
 src/test/modules/snapshot_too_old/sto.conf    |  2 +-
 .../snapshot_too_old/test_sto--1.0.sql        | 14 ++++
 src/test/modules/snapshot_too_old/test_sto.c  | 74 +++++++++++++++++
 .../modules/snapshot_too_old/test_sto.control |  5 ++
 12 files changed, 264 insertions(+), 138 deletions(-)
 create mode 100644 src/test/modules/snapshot_too_old/test_sto--1.0.sql
 create mode 100644 src/test/modules/snapshot_too_old/test_sto.c
 create mode 100644 src/test/modules/snapshot_too_old/test_sto.control

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 72b2c61a07..19e6c52b80 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1739,26 +1739,6 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 		update_ts = oldSnapshotControl->next_map_update;
 		SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
 
-		/*
-		 * Zero threshold always overrides to latest xmin, if valid.  Without
-		 * some heuristic it will find its own snapshot too old on, for
-		 * example, a simple UPDATE -- which would make it useless for most
-		 * testing, but there is no principled way to ensure that it doesn't
-		 * fail in this way.  Use a five-second delay to try to get useful
-		 * testing behavior, but this may need adjustment.
-		 */
-		if (old_snapshot_threshold == 0)
-		{
-			if (TransactionIdPrecedes(latest_xmin, MyPgXact->xmin)
-				&& TransactionIdFollows(latest_xmin, xlimit))
-				xlimit = latest_xmin;
-
-			ts -= 5 * USECS_PER_SEC;
-			SetOldSnapshotThresholdTimestamp(ts, xlimit);
-
-			return xlimit;
-		}
-
 		ts = AlignTimestampToMinuteBoundary(ts)
 			- (old_snapshot_threshold * USECS_PER_MINUTE);
 
@@ -1860,10 +1840,6 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 	if (!map_update_required)
 		return;
 
-	/* No further tracking needed for 0 (used for testing). */
-	if (old_snapshot_threshold == 0)
-		return;
-
 	/*
 	 * We don't want to do something stupid with unusual values, but we don't
 	 * want to litter the log with warnings or break otherwise normal
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index dfb4537f63..be5ad77b7e 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -1,14 +1,20 @@
 # src/test/modules/snapshot_too_old/Makefile
 
-# Note: because we don't tell the Makefile there are any regression tests,
-# we have to clean those result files explicitly
-EXTRA_CLEAN = $(pg_regress_clean_files)
+MODULE_big = test_sto
+OBJS = \
+	$(WIN32RES) \
+	test_sto.o
+
+EXTENSION = test_sto
+DATA = test_sto--1.0.sql
+PGDESCFILE = "test_sto -- internal testing for snapshot too old errors"
 
 ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
 ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
 
-# Disabled because these tests require "old_snapshot_threshold" >= 0, which
-# typical installcheck users do not have (e.g. buildfarm clients).
+# Disabled because these tests require "old_snapshot_threshold" = 10, which
+# typical installcheck users do not have (e.g. buildfarm clients) and also
+# because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
 ifdef USE_PGXS
@@ -21,8 +27,3 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 include $(top_srcdir)/contrib/contrib-global.mk
 endif
-
-# But it can nonetheless be very helpful to run tests on preexisting
-# installation, allow to do so, but only if requested explicitly.
-installcheck-force:
-	$(pg_isolation_regress_installcheck) $(ISOLATION)
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out b/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
index 8cc29ec82f..b007e2dc17 100644
--- a/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
+++ b/src/test/modules/snapshot_too_old/expected/sto_using_cursor.out
@@ -1,73 +1,60 @@
-Parsed test spec with 2 sessions
+Parsed test spec with 3 sessions
 
-starting permutation: s1decl s1f1 s1sleep s1f2 s2u
+starting permutation: t00 s1decl s1f1 t10 s2u s1f2 t20 s1f3
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
 step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
 step s1f1: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
+step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
 step s1f2: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
+step t20: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:20:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s1decl s1f1 s1sleep s2u s1f2
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s1f1: FETCH FIRST FROM cursor1;
-c              
+               
+step s1f3: FETCH FIRST FROM cursor1;
+ERROR:  snapshot too old
+test_sto_reset_all_state
 
-1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+               
 
-0                             
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
+starting permutation: t00 s1decl s1f1 t10 s1f2 t20 s1f3
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s1decl s1f1 s2u s1sleep s1f2
+               
 step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
 step s1f1: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
 step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
-
-starting permutation: s1decl s2u s1f1 s1sleep s1f2
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f1: FETCH FIRST FROM cursor1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
-
-0                             
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
+step t20: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:20:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s2u s1decl s1f1 s1sleep s1f2
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s1f1: FETCH FIRST FROM cursor1;
+               
+step s1f3: FETCH FIRST FROM cursor1;
 c              
 
-2              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+1              
+test_sto_reset_all_state
 
-0                             
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR:  snapshot too old
+               
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out b/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
index bf94054790..091c212adc 100644
--- a/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
+++ b/src/test/modules/snapshot_too_old/expected/sto_using_hash_index.out
@@ -1,15 +1,31 @@
-Parsed test spec with 2 sessions
+Parsed test spec with 3 sessions
 
-starting permutation: noseq s1f1 s2sleep s2u s1f2
+starting permutation: t00 noseq s1f1 t10 s2u s2v1 s1f2 t22 s2v2 s1f3
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
 step noseq: SET enable_seqscan = false;
 step s1f1: SELECT c FROM sto1 where c = 1000;
 c              
 
 1000           
-step s2sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
 step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1000;
+step s2v1: VACUUM sto1;
 step s1f2: SELECT c FROM sto1 where c = 1001;
+c              
+
+step t22: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:22:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s2v2: VACUUM sto1;
+step s1f3: SELECT c FROM sto1 where c = 1001;
 ERROR:  snapshot too old
+test_sto_reset_all_state
+
+               
diff --git a/src/test/modules/snapshot_too_old/expected/sto_using_select.out b/src/test/modules/snapshot_too_old/expected/sto_using_select.out
index eb15bc23bf..d1402ae8ce 100644
--- a/src/test/modules/snapshot_too_old/expected/sto_using_select.out
+++ b/src/test/modules/snapshot_too_old/expected/sto_using_select.out
@@ -1,55 +1,77 @@
-Parsed test spec with 2 sessions
+Parsed test spec with 3 sessions
 
-starting permutation: s1f1 s1sleep s1f2 s2u
+starting permutation: t00 s1f1 t01 s2u s1f2 t10 s1f3 t11 s1f4
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
 step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t01: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
+step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
 step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
 
-starting permutation: s1f1 s1sleep s2u s1f2
-step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+               
+step s1f3: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t11: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:11:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-ERROR:  snapshot too old
+               
+step s1f4: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+c              
+
+1              
+test_sto_reset_all_state
 
-starting permutation: s1f1 s2u s1sleep s1f2
+               
+
+starting permutation: t00 s1f1 t01 s1f2 t10 s1f3 t11 s1f4
+step t00: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
 step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
 1              
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+step t01: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
+               
 step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-ERROR:  snapshot too old
+c              
 
-starting permutation: s2u s1f1 s1sleep s1f2
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+1              
+step t10: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z');
+test_sto_clobber_snapshot_timestamp
+
+               
+step s1f3: SELECT c FROM sto1 ORDER BY c LIMIT 1;
 c              
 
-2              
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting        pg_sleep       
+1              
+step t11: SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:11:00Z');
+test_sto_clobber_snapshot_timestamp
 
-0                             
-step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-ERROR:  snapshot too old
+               
+step s1f4: SELECT c FROM sto1 ORDER BY c LIMIT 1;
+c              
+
+1              
+test_sto_reset_all_state
+
+               
+unused step name: t02
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec b/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
index eac18ca5b9..3be084b8fe 100644
--- a/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
+++ b/src/test/modules/snapshot_too_old/specs/sto_using_cursor.spec
@@ -1,19 +1,14 @@
 # This test provokes a "snapshot too old" error using a cursor.
 #
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made.  With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising values isn't so hard
-# to identify.
+# Expects old_snapshot_threshold = 10.  Not suitable for installcheck since it
+# messes with internal snapmgr.c state.
 
 setup
 {
+    CREATE EXTENSION IF NOT EXISTS test_sto;
+    SELECT test_sto_reset_all_state();
     CREATE TABLE sto1 (c int NOT NULL);
     INSERT INTO sto1 SELECT generate_series(1, 1000);
-    CREATE TABLE sto2 (c int NOT NULL);
 }
 setup
 {
@@ -22,16 +17,29 @@ setup
 
 teardown
 {
-    DROP TABLE sto1, sto2;
+    DROP TABLE sto1;
+    SELECT test_sto_reset_all_state();
 }
 
 session "s1"
 setup			{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
 step "s1decl"	{ DECLARE cursor1 CURSOR FOR SELECT c FROM sto1; }
 step "s1f1"		{ FETCH FIRST FROM cursor1; }
-step "s1sleep"	{ SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
 step "s1f2"		{ FETCH FIRST FROM cursor1; }
+step "s1f3"		{ FETCH FIRST FROM cursor1; }
 teardown		{ COMMIT; }
 
 session "s2"
 step "s2u"		{ UPDATE sto1 SET c = 1001 WHERE c = 1; }
+
+session "time"
+step "t00"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z'); }
+step "t10"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z'); }
+step "t20"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:20:00Z'); }
+
+# if there's an update, we get a snapshot too old error at time 00:20 (not before,
+# because we need page pruning to see the xmin level change from 10 minutes earlier)
+permutation "t00" "s1decl" "s1f1" "t10" "s2u" "s1f2" "t20" "s1f3"
+
+# if there's no update, no snapshot too old error at time 00:20
+permutation "t00" "s1decl" "s1f1" "t10"       "s1f2" "t20" "s1f3"
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec b/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
index 33d91ff852..f90bca3b7a 100644
--- a/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
+++ b/src/test/modules/snapshot_too_old/specs/sto_using_hash_index.spec
@@ -1,8 +1,12 @@
 # This test is like sto_using_select, except that we test access via a
-# hash index.
+# hash index.  Explicit vacuuming is required in this version because
+# there is are no incidental calls to heap_page_prune_opt() that can
+# call SetOldSnapshotThresholdTimestamp().
 
 setup
 {
+	CREATE EXTENSION IF NOT EXISTS test_sto;
+	SELECT test_sto_reset_all_state();
     CREATE TABLE sto1 (c int NOT NULL);
     INSERT INTO sto1 SELECT generate_series(1, 1000);
     CREATE INDEX idx_sto1 ON sto1 USING HASH (c);
@@ -15,6 +19,7 @@ setup
 teardown
 {
     DROP TABLE sto1;
+	SELECT test_sto_reset_all_state();
 }
 
 session "s1"
@@ -22,10 +27,18 @@ setup			{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
 step "noseq"	{ SET enable_seqscan = false; }
 step "s1f1"		{ SELECT c FROM sto1 where c = 1000; }
 step "s1f2"		{ SELECT c FROM sto1 where c = 1001; }
+step "s1f3"		{ SELECT c FROM sto1 where c = 1001; }
 teardown		{ ROLLBACK; }
 
 session "s2"
-step "s2sleep"	{ SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
 step "s2u"		{ UPDATE sto1 SET c = 1001 WHERE c = 1000; }
+step "s2v1"		{ VACUUM sto1; }
+step "s2v2"		{ VACUUM sto1; }
 
-permutation "noseq" "s1f1" "s2sleep" "s2u" "s1f2"
+session "time"
+step "t00"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z'); }
+step "t10"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z'); }
+step "t22"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:22:00Z'); }
+
+# snapshot too old at t22
+permutation "t00" "noseq" "s1f1" "t10" "s2u" "s2v1" "s1f2" "t22" "s2v2" "s1f3"
diff --git a/src/test/modules/snapshot_too_old/specs/sto_using_select.spec b/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
index d7c34f3d89..21f25ca73a 100644
--- a/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
+++ b/src/test/modules/snapshot_too_old/specs/sto_using_select.spec
@@ -1,19 +1,14 @@
 # This test provokes a "snapshot too old" error using SELECT statements.
 #
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made.  With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising values isn't so hard
-# to identify.
+# Expects old_snapshot_threshold = 10.  Not suitable for installcheck since it
+# messes with internal snapmgr.c state.
 
 setup
 {
+	CREATE EXTENSION IF NOT EXISTS test_sto;
+	SELECT test_sto_reset_all_state();
     CREATE TABLE sto1 (c int NOT NULL);
     INSERT INTO sto1 SELECT generate_series(1, 1000);
-    CREATE TABLE sto2 (c int NOT NULL);
 }
 setup
 {
@@ -22,15 +17,30 @@ setup
 
 teardown
 {
-    DROP TABLE sto1, sto2;
+	DROP TABLE sto1;
+    SELECT test_sto_reset_all_state();
 }
 
 session "s1"
 setup			{ BEGIN ISOLATION LEVEL REPEATABLE READ; }
 step "s1f1"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
-step "s1sleep"	{ SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
 step "s1f2"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
+step "s1f3"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
+step "s1f4"		{ SELECT c FROM sto1 ORDER BY c LIMIT 1; }
 teardown		{ COMMIT; }
 
 session "s2"
 step "s2u"		{ UPDATE sto1 SET c = 1001 WHERE c = 1; }
+
+session "time"
+step "t00"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z'); }
+step "t01"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z'); }
+step "t02"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:02:00Z'); }
+step "t10"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:10:00Z'); }
+step "t11"		{ SELECT test_sto_clobber_snapshot_timestamp('3000-01-01 00:11:00Z'); }
+
+# if there's an update, we get a snapshot too old error at time 00:11
+permutation "t00" "s1f1" "t01" "s2u" "s1f2" "t10" "s1f3" "t11" "s1f4"
+
+# without the update, we get no snapshot too old error at time 00:11
+permutation "t00" "s1f1" "t01"       "s1f2" "t10" "s1f3" "t11" "s1f4"
diff --git a/src/test/modules/snapshot_too_old/sto.conf b/src/test/modules/snapshot_too_old/sto.conf
index 7eeaeeb0dc..5ed46b3560 100644
--- a/src/test/modules/snapshot_too_old/sto.conf
+++ b/src/test/modules/snapshot_too_old/sto.conf
@@ -1,2 +1,2 @@
 autovacuum = off
-old_snapshot_threshold = 0
+old_snapshot_threshold = 10
diff --git a/src/test/modules/snapshot_too_old/test_sto--1.0.sql b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
new file mode 100644
index 0000000000..c10afcf23a
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
@@ -0,0 +1,14 @@
+/* src/test/modules/snapshot_too_old/test_sto--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_sto" to load this file. \quit
+
+CREATE FUNCTION test_sto_clobber_snapshot_timestamp(now timestamptz)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'test_sto_clobber_snapshot_timestamp'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION test_sto_reset_all_state()
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'test_sto_reset_all_state'
+LANGUAGE C STRICT;
diff --git a/src/test/modules/snapshot_too_old/test_sto.c b/src/test/modules/snapshot_too_old/test_sto.c
new file mode 100644
index 0000000000..f6c9a1a000
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/test_sto.c
@@ -0,0 +1,74 @@
+/*-------------------------------------------------------------------------
+ *
+ * test_sto.c
+ *	  Functions to support isolation tests for snapshot too old.
+ *
+ * These functions are not intended for use in a production database and
+ * could cause corruption.
+ *
+ * Copyright (c) 2020, PostgreSQL Global Development Group
+ *
+ *	  src/test/modules/snapshot_too_old/test_sto.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "storage/lwlock.h"
+#include "utils/old_snapshot.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+PG_MODULE_MAGIC;
+PG_FUNCTION_INFO_V1(test_sto_reset_all_state);
+PG_FUNCTION_INFO_V1(test_sto_clobber_snapshot_timestamp);
+
+/*
+ * Revert to initial state.  This is not safe except in carefully
+ * controlled tests.
+ */
+Datum
+test_sto_reset_all_state(PG_FUNCTION_ARGS)
+{
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	oldSnapshotControl->count_used = 0;
+	oldSnapshotControl->current_timestamp = 0;
+	oldSnapshotControl->head_offset = 0;
+	oldSnapshotControl->head_timestamp = 0;
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
+	oldSnapshotControl->latest_xmin = InvalidTransactionId;
+	oldSnapshotControl->next_map_update = 0;
+	SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_current);
+	oldSnapshotControl->current_timestamp = 0;
+	SpinLockRelease(&oldSnapshotControl->mutex_current);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
+	oldSnapshotControl->threshold_timestamp = 0;
+	oldSnapshotControl->threshold_xid = InvalidTransactionId;
+	SpinLockRelease(&oldSnapshotControl->mutex_threshold);
+
+	PG_RETURN_NULL();
+}
+
+/*
+ * Update the minimum time used in snapshot-too-old code.  If set ahead of the
+ * current wall clock time (for example, the year 3000), this allows testing
+ * with arbitrary times.  This is not safe except in carefully controlled
+ * tests.
+ */
+Datum
+test_sto_clobber_snapshot_timestamp(PG_FUNCTION_ARGS)
+{
+	TimestampTz new_current_timestamp = PG_GETARG_TIMESTAMPTZ(0);
+
+	SpinLockAcquire(&oldSnapshotControl->mutex_current);
+	oldSnapshotControl->current_timestamp = new_current_timestamp;
+	SpinLockRelease(&oldSnapshotControl->mutex_current);
+
+	PG_RETURN_NULL();
+}
diff --git a/src/test/modules/snapshot_too_old/test_sto.control b/src/test/modules/snapshot_too_old/test_sto.control
new file mode 100644
index 0000000000..e497e5450e
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/test_sto.control
@@ -0,0 +1,5 @@
+# test_sto test module
+comment = 'functions for internal testing of snapshot too old errors'
+default_version = '1.0'
+module_pathname = '$libdir/test_sto'
+relocatable = true
-- 
2.20.1

From c56dc1f15a71a988f84e573f4a54322643236143 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Mon, 20 Apr 2020 17:05:42 +1200
Subject: [PATCH v3 5/5] Truncate snapshot-too-old time map when CLOG is
 truncated.

It's not safe to leave xids in the map that have wrapped around.

Reported-by: Andres Freund
---
 src/backend/commands/vacuum.c                 |  3 +
 src/backend/utils/time/snapmgr.c              | 21 +++++
 src/include/utils/snapmgr.h                   |  1 +
 src/test/modules/snapshot_too_old/Makefile    |  2 +
 .../snapshot_too_old/t/001_truncate.pl        | 80 +++++++++++++++++++
 .../snapshot_too_old/test_sto--1.0.sql        |  5 ++
 src/test/modules/snapshot_too_old/test_sto.c  | 16 ++++
 7 files changed, 128 insertions(+)
 create mode 100644 src/test/modules/snapshot_too_old/t/001_truncate.pl

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5a110edb07..37ead45fa5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1627,6 +1627,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 19e6c52b80..edb47c9664 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1974,6 +1974,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b28d13ce84..4f53aad956 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -135,6 +135,7 @@ extern TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmi
 														 Relation relation);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
diff --git a/src/test/modules/snapshot_too_old/Makefile b/src/test/modules/snapshot_too_old/Makefile
index be5ad77b7e..0a69f3a232 100644
--- a/src/test/modules/snapshot_too_old/Makefile
+++ b/src/test/modules/snapshot_too_old/Makefile
@@ -17,6 +17,8 @@ ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/s
 # because it'd be dangerous on a production system.
 NO_INSTALLCHECK = 1
 
+TAP_TESTS = 1
+
 ifdef USE_PGXS
 PG_CONFIG = pg_config
 PGXS := $(shell $(PG_CONFIG) --pgxs)
diff --git a/src/test/modules/snapshot_too_old/t/001_truncate.pl b/src/test/modules/snapshot_too_old/t/001_truncate.pl
new file mode 100644
index 0000000000..849c70c5ec
--- /dev/null
+++ b/src/test/modules/snapshot_too_old/t/001_truncate.pl
@@ -0,0 +1,80 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension test_sto');
+
+note "check time map is truncated when CLOG is";
+
+# build up a time map with 4 entries
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:00:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:01:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:02:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:03:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+my $count;
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 4, "expected to have 4 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we expect all XIDs to have been truncated
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+# put two more in the map
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:04:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:05:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 2, "expected to have 2 entries in the old snapshot time map");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes (this tests wrapping around the mapping array, which is of size 10 + 10)...
+$node->psql('postgres', "select test_sto_clobber_snapshot_timestamp('3000-01-01 00:21:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 18, "expected to have 18 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# this should leave just 16
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 16, "expected to have 16 entries in the old snapshot time map");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we should now be back to empty
+$node->psql('postgres', "select test_sto_time_map_size()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+$node->stop;
diff --git a/src/test/modules/snapshot_too_old/test_sto--1.0.sql b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
index c10afcf23a..b4cc970ba4 100644
--- a/src/test/modules/snapshot_too_old/test_sto--1.0.sql
+++ b/src/test/modules/snapshot_too_old/test_sto--1.0.sql
@@ -12,3 +12,8 @@ CREATE FUNCTION test_sto_reset_all_state()
 RETURNS VOID
 AS 'MODULE_PATHNAME', 'test_sto_reset_all_state'
 LANGUAGE C STRICT;
+
+CREATE FUNCTION test_sto_time_map_size()
+RETURNS int4
+AS 'MODULE_PATHNAME', 'test_sto_time_map_size'
+LANGUAGE C STRICT;
diff --git a/src/test/modules/snapshot_too_old/test_sto.c b/src/test/modules/snapshot_too_old/test_sto.c
index f6c9a1a000..5b874a9641 100644
--- a/src/test/modules/snapshot_too_old/test_sto.c
+++ b/src/test/modules/snapshot_too_old/test_sto.c
@@ -22,6 +22,7 @@
 PG_MODULE_MAGIC;
 PG_FUNCTION_INFO_V1(test_sto_reset_all_state);
 PG_FUNCTION_INFO_V1(test_sto_clobber_snapshot_timestamp);
+PG_FUNCTION_INFO_V1(test_sto_time_map_size);
 
 /*
  * Revert to initial state.  This is not safe except in carefully
@@ -72,3 +73,18 @@ test_sto_clobber_snapshot_timestamp(PG_FUNCTION_ARGS)
 
 	PG_RETURN_NULL();
 }
+
+/*
+ * How many xids are currently in the xid/time map?  Used only for testing.
+ */
+Datum
+test_sto_time_map_size(PG_FUNCTION_ARGS)
+{
+	int		result;
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
+	result = oldSnapshotControl->count_used;
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	PG_RETURN_INT32(result);
+}
-- 
2.20.1

Reply via email to