On Fri, Apr 17, 2020 at 3:37 PM Thomas Munro <thomas.mu...@gmail.com> wrote:
> On Mon, Apr 13, 2020 at 5:14 PM Andres Freund <and...@anarazel.de> wrote:
> > FWIW, I think the part that is currently harder to fix is the time->xmin
> > mapping and some related pieces. Second comes the test
> > infrastructure. Compared to those, adding additional checks for old
> > snapshots wouldn't be too hard - although I'd argue that the approach of
> > sprinkling these tests everywhere isn't that scalable...
>
> Just trying out some ideas here...
> ... so I guess maybe I'll
> need to go and figure out how to write some perl.

Here's a very rough sketch of what I mean.  Patches 0001-0003 are
stolen directly from Robert.  I think 0005's t/001_truncate.pl
demonstrates that the map is purged of old xids as appropriate.  I
suppose this style of testing based on manually advancing the hands of
time should also allow for testing early pruning, but it may be Monday
before I can try that so I'm sharing what I have so far in case it's
useful...  I think this really wants to be in src/test/modules, not
contrib, but I just bolted it on top of what Robert posted.
From b78644a0f9580934b136ca8413366de91198203f Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 09:37:31 -0400
Subject: [PATCH v1 1/5] Expose oldSnapshotControl.

---
 src/backend/utils/time/snapmgr.c | 55 +----------------------
 src/include/utils/old_snapshot.h | 75 ++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 53 deletions(-)
 create mode 100644 src/include/utils/old_snapshot.h

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 1c063c592c..abaaea569a 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -63,6 +63,7 @@
 #include "storage/spin.h"
 #include "utils/builtins.h"
 #include "utils/memutils.h"
+#include "utils/old_snapshot.h"
 #include "utils/rel.h"
 #include "utils/resowner_private.h"
 #include "utils/snapmgr.h"
@@ -74,59 +75,7 @@
  */
 int			old_snapshot_threshold; /* number of minutes, -1 disables */
 
-/*
- * Structure for dealing with old_snapshot_threshold implementation.
- */
-typedef struct OldSnapshotControlData
-{
-	/*
-	 * Variables for old snapshot handling are shared among processes and are
-	 * only allowed to move forward.
-	 */
-	slock_t		mutex_current;	/* protect current_timestamp */
-	TimestampTz current_timestamp;	/* latest snapshot timestamp */
-	slock_t		mutex_latest_xmin;	/* protect latest_xmin and next_map_update */
-	TransactionId latest_xmin;	/* latest snapshot xmin */
-	TimestampTz next_map_update;	/* latest snapshot valid up to */
-	slock_t		mutex_threshold;	/* protect threshold fields */
-	TimestampTz threshold_timestamp;	/* earlier snapshot is old */
-	TransactionId threshold_xid;	/* earlier xid may be gone */
-
-	/*
-	 * Keep one xid per minute for old snapshot error handling.
-	 *
-	 * Use a circular buffer with a head offset, a count of entries currently
-	 * used, and a timestamp corresponding to the xid at the head offset.  A
-	 * count_used value of zero means that there are no times stored; a
-	 * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
-	 * is full and the head must be advanced to add new entries.  Use
-	 * timestamps aligned to minute boundaries, since that seems less
-	 * surprising than aligning based on the first usage timestamp.  The
-	 * latest bucket is effectively stored within latest_xmin.  The circular
-	 * buffer is updated when we get a new xmin value that doesn't fall into
-	 * the same interval.
-	 *
-	 * It is OK if the xid for a given time slot is from earlier than
-	 * calculated by adding the number of minutes corresponding to the
-	 * (possibly wrapped) distance from the head offset to the time of the
-	 * head entry, since that just results in the vacuuming of old tuples
-	 * being slightly less aggressive.  It would not be OK for it to be off in
-	 * the other direction, since it might result in vacuuming tuples that are
-	 * still expected to be there.
-	 *
-	 * Use of an SLRU was considered but not chosen because it is more
-	 * heavyweight than is needed for this, and would probably not be any less
-	 * code to implement.
-	 *
-	 * Persistence is not needed.
-	 */
-	int			head_offset;	/* subscript of oldest tracked time */
-	TimestampTz head_timestamp; /* time corresponding to head xid */
-	int			count_used;		/* how many slots are in use */
-	TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
-} OldSnapshotControlData;
-
-static volatile OldSnapshotControlData *oldSnapshotControl;
+volatile OldSnapshotControlData *oldSnapshotControl;
 
 
 /*
diff --git a/src/include/utils/old_snapshot.h b/src/include/utils/old_snapshot.h
new file mode 100644
index 0000000000..284af7d508
--- /dev/null
+++ b/src/include/utils/old_snapshot.h
@@ -0,0 +1,75 @@
+/*-------------------------------------------------------------------------
+ *
+ * old_snapshot.h
+ *		Data structures for 'snapshot too old'
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/utils/old_snapshot.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef OLD_SNAPSHOT_H
+#define OLD_SNAPSHOT_H
+
+#include "datatype/timestamp.h"
+#include "storage/s_lock.h"
+
+/*
+ * Structure for dealing with old_snapshot_threshold implementation.
+ */
+typedef struct OldSnapshotControlData
+{
+	/*
+	 * Variables for old snapshot handling are shared among processes and are
+	 * only allowed to move forward.
+	 */
+	slock_t		mutex_current;	/* protect current_timestamp */
+	TimestampTz current_timestamp;	/* latest snapshot timestamp */
+	slock_t		mutex_latest_xmin;	/* protect latest_xmin and next_map_update */
+	TransactionId latest_xmin;	/* latest snapshot xmin */
+	TimestampTz next_map_update;	/* latest snapshot valid up to */
+	slock_t		mutex_threshold;	/* protect threshold fields */
+	TimestampTz threshold_timestamp;	/* earlier snapshot is old */
+	TransactionId threshold_xid;	/* earlier xid may be gone */
+
+	/*
+	 * Keep one xid per minute for old snapshot error handling.
+	 *
+	 * Use a circular buffer with a head offset, a count of entries currently
+	 * used, and a timestamp corresponding to the xid at the head offset.  A
+	 * count_used value of zero means that there are no times stored; a
+	 * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
+	 * is full and the head must be advanced to add new entries.  Use
+	 * timestamps aligned to minute boundaries, since that seems less
+	 * surprising than aligning based on the first usage timestamp.  The
+	 * latest bucket is effectively stored within latest_xmin.  The circular
+	 * buffer is updated when we get a new xmin value that doesn't fall into
+	 * the same interval.
+	 *
+	 * It is OK if the xid for a given time slot is from earlier than
+	 * calculated by adding the number of minutes corresponding to the
+	 * (possibly wrapped) distance from the head offset to the time of the
+	 * head entry, since that just results in the vacuuming of old tuples
+	 * being slightly less aggressive.  It would not be OK for it to be off in
+	 * the other direction, since it might result in vacuuming tuples that are
+	 * still expected to be there.
+	 *
+	 * Use of an SLRU was considered but not chosen because it is more
+	 * heavyweight than is needed for this, and would probably not be any less
+	 * code to implement.
+	 *
+	 * Persistence is not needed.
+	 */
+	int			head_offset;	/* subscript of oldest tracked time */
+	TimestampTz head_timestamp; /* time corresponding to head xid */
+	int			count_used;		/* how many slots are in use */
+	TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
+} OldSnapshotControlData;
+
+extern volatile OldSnapshotControlData *oldSnapshotControl;
+
+#endif
-- 
2.20.1

From e23d46f8482561fc2369525d5625058dad0ded5c Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 12:14:32 -0400
Subject: [PATCH v1 2/5] contrib/old_snapshot: time->xid mapping.

---
 contrib/Makefile                           |   1 +
 contrib/old_snapshot/Makefile              |  24 ++++
 contrib/old_snapshot/old_snapshot--1.0.sql |  14 ++
 contrib/old_snapshot/old_snapshot.control  |   5 +
 contrib/old_snapshot/time_mapping.c        | 159 +++++++++++++++++++++
 5 files changed, 203 insertions(+)
 create mode 100644 contrib/old_snapshot/Makefile
 create mode 100644 contrib/old_snapshot/old_snapshot--1.0.sql
 create mode 100644 contrib/old_snapshot/old_snapshot.control
 create mode 100644 contrib/old_snapshot/time_mapping.c

diff --git a/contrib/Makefile b/contrib/Makefile
index 1846d415b6..452ade0782 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -27,6 +27,7 @@ SUBDIRS = \
 		lo		\
 		ltree		\
 		oid2name	\
+		old_snapshot	\
 		pageinspect	\
 		passwordcheck	\
 		pg_buffercache	\
diff --git a/contrib/old_snapshot/Makefile b/contrib/old_snapshot/Makefile
new file mode 100644
index 0000000000..091231f25f
--- /dev/null
+++ b/contrib/old_snapshot/Makefile
@@ -0,0 +1,24 @@
+# contrib/old_snapshot/Makefile
+
+MODULE_big = old_snapshot
+OBJS = \
+	$(WIN32RES) \
+	time_mapping.o
+PG_CPPFLAGS = -I$(libpq_srcdir)
+
+EXTENSION = old_snapshot
+DATA = old_snapshot--1.0.sql
+PGFILEDESC = "old_snapshot - utilities in support of old_snapshot_threshold"
+
+REGRESS = old_snapshot
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/old_snapshot
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/old_snapshot/old_snapshot--1.0.sql b/contrib/old_snapshot/old_snapshot--1.0.sql
new file mode 100644
index 0000000000..9ebb8829e3
--- /dev/null
+++ b/contrib/old_snapshot/old_snapshot--1.0.sql
@@ -0,0 +1,14 @@
+/* contrib/old_snapshot/old_snapshot--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION old_snapshot" to load this file. \quit
+
+-- Show visibility map and page-level visibility information for each block.
+CREATE FUNCTION pg_old_snapshot_time_mapping(array_offset OUT int4,
+											 end_timestamp OUT timestamptz,
+											 newest_xmin OUT xid)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_old_snapshot_time_mapping'
+LANGUAGE C STRICT;
+
+-- XXX. Do we want REVOKE commands here?
diff --git a/contrib/old_snapshot/old_snapshot.control b/contrib/old_snapshot/old_snapshot.control
new file mode 100644
index 0000000000..491eec536c
--- /dev/null
+++ b/contrib/old_snapshot/old_snapshot.control
@@ -0,0 +1,5 @@
+# old_snapshot extension
+comment = 'utilities in support of old_snapshot_threshold'
+default_version = '1.0'
+module_pathname = '$libdir/old_snapshot'
+relocatable = true
diff --git a/contrib/old_snapshot/time_mapping.c b/contrib/old_snapshot/time_mapping.c
new file mode 100644
index 0000000000..37e0055a00
--- /dev/null
+++ b/contrib/old_snapshot/time_mapping.c
@@ -0,0 +1,159 @@
+/*-------------------------------------------------------------------------
+ *
+ * time_mapping.c
+ *	  time to XID mapping information
+ *
+ * Copyright (c) 2020, PostgreSQL Global Development Group
+ *
+ *	  contrib/old_snapshot/time_mapping.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "storage/lwlock.h"
+#include "utils/old_snapshot.h"
+#include "utils/snapmgr.h"
+#include "utils/timestamp.h"
+
+/*
+ * Backend-private copy of the information from oldSnapshotControl which relates
+ * to the time to XID mapping, plus an index so that we can iterate.
+ *
+ * Note that the length of the xid_by_minute array is given by
+ * OLD_SNAPSHOT_TIME_MAP_ENTRIES (which is not a compile-time constant).
+ */
+typedef struct
+{
+	int				current_index;
+	int				head_offset;
+	TimestampTz		head_timestamp;
+	int				count_used;
+	TransactionId	xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
+} OldSnapshotTimeMapping;
+
+#define NUM_TIME_MAPPING_COLUMNS 3
+
+PG_MODULE_MAGIC;
+PG_FUNCTION_INFO_V1(pg_old_snapshot_time_mapping);
+
+static OldSnapshotTimeMapping *GetOldSnapshotTimeMapping(void);
+static TupleDesc MakeOldSnapshotTimeMappingTupleDesc(void);
+static HeapTuple MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc,
+												 OldSnapshotTimeMapping *mapping);
+
+/*
+ * SQL-callable set-returning function.
+ */
+Datum
+pg_old_snapshot_time_mapping(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	OldSnapshotTimeMapping *mapping;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		MemoryContext	oldcontext;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+		mapping = GetOldSnapshotTimeMapping();
+		funcctx->user_fctx = mapping;
+		funcctx->tuple_desc = MakeOldSnapshotTimeMappingTupleDesc();
+		MemoryContextSwitchTo(oldcontext);
+	}
+
+	funcctx = SRF_PERCALL_SETUP();
+	mapping = (OldSnapshotTimeMapping *) funcctx->user_fctx;
+
+	while (mapping->current_index < mapping->count_used)
+	{
+		HeapTuple	tuple;
+
+		tuple = MakeOldSnapshotTimeMappingTuple(funcctx->tuple_desc, mapping);
+		++mapping->current_index;
+		SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Get the old snapshot time mapping data from shared memory.
+ */
+static OldSnapshotTimeMapping *
+GetOldSnapshotTimeMapping(void)
+{
+	OldSnapshotTimeMapping *mapping;
+
+	mapping = palloc(offsetof(OldSnapshotTimeMapping, xid_by_minute)
+					 + sizeof(TransactionId) * OLD_SNAPSHOT_TIME_MAP_ENTRIES);
+	mapping->current_index = 0;
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
+	mapping->head_offset = oldSnapshotControl->head_offset;
+	mapping->head_timestamp = oldSnapshotControl->head_timestamp;
+	mapping->count_used = oldSnapshotControl->count_used;
+	for (int i = 0; i < OLD_SNAPSHOT_TIME_MAP_ENTRIES; ++i)
+		mapping->xid_by_minute[i] = oldSnapshotControl->xid_by_minute[i];
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	return mapping;
+}
+
+/*
+ * Build a tuple descriptor for the pg_old_snapshot_time_mapping() SRF.
+ */
+static TupleDesc
+MakeOldSnapshotTimeMappingTupleDesc(void)
+{
+	TupleDesc	tupdesc;
+
+	tupdesc = CreateTemplateTupleDesc(NUM_TIME_MAPPING_COLUMNS);
+
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "array_offset",
+					   INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "end_timestamp",
+					   TIMESTAMPTZOID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "newest_xmin",
+					   XIDOID, -1, 0);
+
+	return BlessTupleDesc(tupdesc);
+}
+
+/*
+ * Convert one entry from the old snapshot time mapping to a HeapTuple.
+ */
+static HeapTuple
+MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc, OldSnapshotTimeMapping *mapping)
+{
+	Datum	values[NUM_TIME_MAPPING_COLUMNS];
+	bool	nulls[NUM_TIME_MAPPING_COLUMNS];
+	int		array_position;
+	TimestampTz	timestamp;
+
+	/*
+	 * Figure out the array position corresponding to the current index.
+	 *
+	 * Index 0 means the oldest entry in the mapping, which is stored at
+	 * mapping->head_offset. Index 1 means the next-oldest entry, which is a the
+	 * following index, and so on. We wrap around when we reach the end of the array.
+	 */
+	array_position = (mapping->head_offset + mapping->current_index)
+		% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+
+	/*
+	 * No explicit timestamp is stored for any entry other than the oldest one,
+	 * but each entry corresponds to 1-minute period, so we can just add.
+	 */
+	timestamp = TimestampTzPlusMilliseconds(mapping->head_timestamp,
+											mapping->current_index * 60000);
+
+	/* Initialize nulls and values arrays. */
+	memset(nulls, 0, sizeof(nulls));
+	values[0] = Int32GetDatum(array_position);
+	values[1] = TimestampTzGetDatum(timestamp);
+	values[2] = TransactionIdGetDatum(mapping->xid_by_minute[array_position]);
+
+	return heap_form_tuple(tupdesc, values, nulls);
+}
-- 
2.20.1

From 76ed484a4c7975d3bc507fab4d387b54c3ac152d Mon Sep 17 00:00:00 2001
From: Robert Haas <rh...@postgresql.org>
Date: Thu, 16 Apr 2020 12:15:57 -0400
Subject: [PATCH v1 3/5] Fix bugs in MaintainOldSnapshotTimeMapping.

---
 src/backend/utils/time/snapmgr.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index abaaea569a..72b2c61a07 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1926,10 +1926,32 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 	else
 	{
 		/* We need a new bucket, but it might not be the very next one. */
-		int			advance = ((ts - oldSnapshotControl->head_timestamp)
-							   / USECS_PER_MINUTE);
+		int			distance_to_new_tail;
+		int			distance_to_current_tail;
+		int			advance;
 
-		oldSnapshotControl->head_timestamp = ts;
+		/*
+		 * Our goal is for the new "tail" of the mapping, that is, the entry
+		 * which is newest and thus furthest from the "head" entry, to
+		 * correspond to "ts". Since there's one entry per minute, the
+		 * distance between the current head and the new tail is just the
+		 * number of minutes of difference between ts and the current
+		 * head_timestamp.
+		 *
+		 * The distance from the current head to the current tail is one
+		 * less than the number of entries in the mapping, because the
+		 * entry at the head_offset is for 0 minutes after head_timestamp.
+		 *
+		 * The difference between these two values is the number of minutes
+		 * by which we need to advance the mapping, either adding new entries
+		 * or rotating old ones out.
+		 */
+		distance_to_new_tail =
+			(ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
+		distance_to_current_tail =
+			oldSnapshotControl->count_used - 1;
+		advance = distance_to_new_tail - distance_to_current_tail;
+		Assert(advance > 0);
 
 		if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
 		{
@@ -1937,6 +1959,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 			oldSnapshotControl->head_offset = 0;
 			oldSnapshotControl->count_used = 1;
 			oldSnapshotControl->xid_by_minute[0] = xmin;
+			oldSnapshotControl->head_timestamp = ts;
 		}
 		else
 		{
@@ -1955,6 +1978,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 					else
 						oldSnapshotControl->head_offset = old_head + 1;
 					oldSnapshotControl->xid_by_minute[old_head] = xmin;
+					oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
 				}
 				else
 				{
-- 
2.20.1

From ed965c21b4e5841a4afc249f6365b774440ac8cf Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 17 Apr 2020 14:10:35 +1200
Subject: [PATCH v1 4/5] Add pg_clobber_current_snapshot_timestamp().

---
 contrib/old_snapshot/old_snapshot--1.0.sql |  5 +++++
 contrib/old_snapshot/time_mapping.c        | 13 +++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/contrib/old_snapshot/old_snapshot--1.0.sql b/contrib/old_snapshot/old_snapshot--1.0.sql
index 9ebb8829e3..aacf1704b5 100644
--- a/contrib/old_snapshot/old_snapshot--1.0.sql
+++ b/contrib/old_snapshot/old_snapshot--1.0.sql
@@ -11,4 +11,9 @@ RETURNS SETOF record
 AS 'MODULE_PATHNAME', 'pg_old_snapshot_time_mapping'
 LANGUAGE C STRICT;
 
+CREATE FUNCTION pg_clobber_current_snapshot_timestamp(now timestamptz)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'pg_clobber_current_snapshot_timestamp'
+LANGUAGE C STRICT;
+
 -- XXX. Do we want REVOKE commands here?
diff --git a/contrib/old_snapshot/time_mapping.c b/contrib/old_snapshot/time_mapping.c
index 37e0055a00..8728c4ddb5 100644
--- a/contrib/old_snapshot/time_mapping.c
+++ b/contrib/old_snapshot/time_mapping.c
@@ -36,6 +36,7 @@ typedef struct
 
 PG_MODULE_MAGIC;
 PG_FUNCTION_INFO_V1(pg_old_snapshot_time_mapping);
+PG_FUNCTION_INFO_V1(pg_clobber_current_snapshot_timestamp);
 
 static OldSnapshotTimeMapping *GetOldSnapshotTimeMapping(void);
 static TupleDesc MakeOldSnapshotTimeMappingTupleDesc(void);
@@ -157,3 +158,15 @@ MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc, OldSnapshotTimeMapping *mappi
 
 	return heap_form_tuple(tupdesc, values, nulls);
 }
+
+Datum
+pg_clobber_current_snapshot_timestamp(PG_FUNCTION_ARGS)
+{
+	TimestampTz new_current_timestamp = PG_GETARG_TIMESTAMPTZ(0);
+
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	oldSnapshotControl->current_timestamp = new_current_timestamp;
+	LWLockRelease(OldSnapshotTimeMapLock);
+
+	PG_RETURN_NULL();
+}
-- 
2.20.1

From 7babd9e2bf24063485334a59ab115f6bfdc33db2 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 17 Apr 2020 15:18:49 +1200
Subject: [PATCH v1 5/5] Truncate old snapshot XIDs before truncating CLOG.

---
 contrib/old_snapshot/Makefile          |  2 +-
 contrib/old_snapshot/t/001_truncate.pl | 80 ++++++++++++++++++++++++++
 src/backend/commands/vacuum.c          |  3 +
 src/backend/utils/time/snapmgr.c       | 21 +++++++
 src/include/utils/snapmgr.h            |  1 +
 5 files changed, 106 insertions(+), 1 deletion(-)
 create mode 100644 contrib/old_snapshot/t/001_truncate.pl

diff --git a/contrib/old_snapshot/Makefile b/contrib/old_snapshot/Makefile
index 091231f25f..c839d30346 100644
--- a/contrib/old_snapshot/Makefile
+++ b/contrib/old_snapshot/Makefile
@@ -10,7 +10,7 @@ EXTENSION = old_snapshot
 DATA = old_snapshot--1.0.sql
 PGFILEDESC = "old_snapshot - utilities in support of old_snapshot_threshold"
 
-REGRESS = old_snapshot
+TAP_TESTS = 1
 
 ifdef USE_PGXS
 PG_CONFIG = pg_config
diff --git a/contrib/old_snapshot/t/001_truncate.pl b/contrib/old_snapshot/t/001_truncate.pl
new file mode 100644
index 0000000000..d6c0def00f
--- /dev/null
+++ b/contrib/old_snapshot/t/001_truncate.pl
@@ -0,0 +1,80 @@
+# Test truncation of the old snapshot time mapping, to check
+# that we can't get into trouble when xids wrap around.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 6;
+
+my $node = get_new_node('master');
+$node->init;
+$node->append_conf("postgresql.conf", "timezone = UTC");
+$node->append_conf("postgresql.conf", "old_snapshot_threshold=10");
+$node->append_conf("postgresql.conf", "max_prepared_transactions=10");
+$node->start;
+$node->psql('postgres', 'update pg_database set datallowconn = true');
+$node->psql('postgres', 'create extension old_snapshot');
+
+note "check time map is truncated when CLOG is";
+
+# build up a time map with 4 entries
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:00:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:01:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:02:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:03:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+my $count;
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 4, "expected to have 4 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we expect all XIDs to have been truncated
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+# put two more in the map
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:04:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:05:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 2, "expected to have 2 entries in the old snapshot time map");
+
+# prepare a transaction, to stop xmin from getting further ahead
+$node->psql('postgres', "begin; select pg_current_xact_id(); prepare transaction 'tx1'");
+
+# add 16 more minutes (this tests wrapping around the mapping array, which is of size 10 + 10)...
+$node->psql('postgres', "select pg_clobber_current_snapshot_timestamp('3000-01-01 00:21:00Z')");
+$node->psql('postgres', "select pg_current_xact_id()");
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 18, "expected to have 18 entries in the old snapshot time map");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# this should leave just 16
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 16, "expected to have 16 entries in the old snapshot time map");
+
+# commit tx1, and then freeze again to get rid of all of them
+$node->psql('postgres', "commit prepared 'tx1'");
+
+# now cause frozen XID to advance
+$node->psql('postgres', 'vacuum freeze');
+$node->psql('template0', 'vacuum freeze');
+$node->psql('template1', 'vacuum freeze');
+
+# we should now be back to empty
+$node->psql('postgres', "select count(*) from pg_old_snapshot_time_mapping()", stdout => \$count);
+is($count, 0, "expected to have 0 entries in the old snapshot time map");
+
+$node->stop;
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5a110edb07..37ead45fa5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1627,6 +1627,9 @@ vac_truncate_clog(TransactionId frozenXID,
 	 */
 	AdvanceOldestCommitTsXid(frozenXID);
 
+	/* Make sure snapshot_too_old drops old XIDs. */
+	TruncateOldSnapshotTimeMapping(frozenXID);
+
 	/*
 	 * Truncate CLOG, multixact and CommitTs to the oldest computed value.
 	 */
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c
index 72b2c61a07..d604e69270 100644
--- a/src/backend/utils/time/snapmgr.c
+++ b/src/backend/utils/time/snapmgr.c
@@ -1998,6 +1998,27 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 }
 
 
+/*
+ * Remove old xids from the timing map, so the CLOG can be truncated.
+ */
+void
+TruncateOldSnapshotTimeMapping(TransactionId frozenXID)
+{
+	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
+	while (oldSnapshotControl->count_used > 0 &&
+		   TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[oldSnapshotControl->head_offset],
+								 frozenXID))
+	{
+		oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
+		oldSnapshotControl->head_offset =
+			(oldSnapshotControl->head_offset + 1) %
+			OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+		oldSnapshotControl->count_used--;
+	}
+	LWLockRelease(OldSnapshotTimeMapLock);
+}
+
+
 /*
  * Setup a snapshot that replaces normal catalog snapshots that allows catalog
  * access to behave just like it did at a certain point in the past.
diff --git a/src/include/utils/snapmgr.h b/src/include/utils/snapmgr.h
index b28d13ce84..4f53aad956 100644
--- a/src/include/utils/snapmgr.h
+++ b/src/include/utils/snapmgr.h
@@ -135,6 +135,7 @@ extern TransactionId TransactionIdLimitedForOldSnapshots(TransactionId recentXmi
 														 Relation relation);
 extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
 										   TransactionId xmin);
+extern void TruncateOldSnapshotTimeMapping(TransactionId frozenXID);
 
 extern char *ExportSnapshot(Snapshot snapshot);
 
-- 
2.20.1

Reply via email to