From 20ecc8bc74acc631c8714d2cec08008d86ace076 Mon Sep 17 00:00:00 2001
From: Pavan Deolasee <pavan.deolasee@gmail.com>
Date: Wed, 11 Apr 2018 23:49:48 +0530
Subject: [PATCH 2/2] Do more extensive search while looking for duplicate OID
 in a toast table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We now use SnapshotAny while searching the toast index/table for a potential
duplicate OID. This is necessary because SnapshotDirty, as the code was
previously using, fails to see RECENTLY_DEAD tuples and thus had a risk of
inserting duplicate OIDs that are not visible to MVCC snapshot but are visible
to SnapshotToast, which is used to scan toast tables.

Per multiple reports over the last many years, the most recent being from Adam
Sjøgren and one of 2ndQuadrant's customers.

This should be back patched to all supported releases.
---
 src/backend/access/heap/tuptoaster.c | 28 +++++++++++++++++++++-------
 src/backend/catalog/catalog.c        | 17 +++++++++--------
 src/include/catalog/catalog.h        |  3 ++-
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index 546f80f05c..3d00f6873a 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -1569,11 +1569,25 @@ toast_save_datum(Relation rel, Datum value,
 	 */
 	if (!OidIsValid(rel->rd_toastoid))
 	{
-		/* normal case: just choose an unused OID */
+		/*
+		 * Normal case: just choose an unused OID. But we must scan the TOAST
+		 * table using SnapshotAny to ensure that the value does not exists. We
+		 * must take into account RECENTLY_DEAD tuples since SnapshotToast can
+		 * see those tuples and we must take into account uncommitted tuples
+		 * since they may become LIVE in due time. So SnapshotAny is the right
+		 * thing to use.
+		 *
+		 * NB: There was a long standing bug where we used to scan the toast
+		 * table with SnapshotDirty and thus failing to note conflicting OIDs
+		 * in RECENTLY_DEAD tuples. toast_fetch_datum() and friends then would
+		 * return duplicate tuples, leading to various errors while reading
+		 * from the toast tuple.
+		 */
 		toast_pointer.va_valueid =
 			GetNewOidWithIndex(toastrel,
 							   RelationGetRelid(toastidxs[validIndex]),
-							   (AttrNumber) 1);
+							   (AttrNumber) 1,
+							   SnapshotAny);
 	}
 	else
 	{
@@ -1627,7 +1641,8 @@ toast_save_datum(Relation rel, Datum value,
 				toast_pointer.va_valueid =
 					GetNewOidWithIndex(toastrel,
 									   RelationGetRelid(toastidxs[validIndex]),
-									   (AttrNumber) 1);
+									   (AttrNumber) 1,
+									   SnapshotAny);
 			} while (toastid_valueid_exists(rel->rd_toastoid,
 											toast_pointer.va_valueid));
 		}
@@ -1806,7 +1821,6 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
 	int			num_indexes;
 	int			validIndex;
 	Relation   *toastidxs;
-	SnapshotData SnapshotToast;
 
 	/* Fetch a valid index relation */
 	validIndex = toast_open_indexes(toastrel,
@@ -1823,12 +1837,12 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
 				ObjectIdGetDatum(valueid));
 
 	/*
-	 * Is there any such chunk?
+	 * Is there any such chunk? Scan using SnapshotAny to ensure we do not miss
+	 * any tuple, LIVE or RECENTLY_DEAD.
 	 */
-	init_toast_snapshot(&SnapshotToast);
 	toastscan = systable_beginscan(toastrel,
 								   RelationGetRelid(toastidxs[validIndex]),
-								   true, &SnapshotToast, 1, &toastkey);
+								   true, SnapshotAny, 1, &toastkey);
 
 	if (systable_getnext(toastscan) != NULL)
 		result = true;
diff --git a/src/backend/catalog/catalog.c b/src/backend/catalog/catalog.c
index 809749add9..afff78e2f7 100644
--- a/src/backend/catalog/catalog.c
+++ b/src/backend/catalog/catalog.c
@@ -288,7 +288,8 @@ IsSharedRelation(Oid relationId)
 Oid
 GetNewOid(Relation relation)
 {
-	Oid			oidIndex;
+	Oid				oidIndex;
+	SnapshotData	SnapshotDirty;
 
 	/* If relation doesn't have OIDs at all, caller is confused */
 	Assert(relation->rd_rel->relhasoids);
@@ -315,8 +316,11 @@ GetNewOid(Relation relation)
 		return GetNewObjectId();
 	}
 
+	InitDirtySnapshot(SnapshotDirty);
+
 	/* Otherwise, use the index to find a nonconflicting OID */
-	return GetNewOidWithIndex(relation, oidIndex, ObjectIdAttributeNumber);
+	return GetNewOidWithIndex(relation, oidIndex, ObjectIdAttributeNumber,
+							  &SnapshotDirty);
 }
 
 /*
@@ -333,10 +337,10 @@ GetNewOid(Relation relation)
  * Caller must have a suitable lock on the relation.
  */
 Oid
-GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
+GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn,
+				   Snapshot snapshot)
 {
 	Oid			newOid;
-	SnapshotData SnapshotDirty;
 	SysScanDesc scan;
 	ScanKeyData key;
 	bool		collides;
@@ -349,8 +353,6 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
 	 */
 	Assert(!IsBinaryUpgrade || RelationGetRelid(relation) != TypeRelationId);
 
-	InitDirtySnapshot(SnapshotDirty);
-
 	/* Generate new OIDs until we find one not in the table */
 	do
 	{
@@ -364,8 +366,7 @@ GetNewOidWithIndex(Relation relation, Oid indexId, AttrNumber oidcolumn)
 					ObjectIdGetDatum(newOid));
 
 		/* see notes above about using SnapshotDirty */
-		scan = systable_beginscan(relation, indexId, true,
-								  &SnapshotDirty, 1, &key);
+		scan = systable_beginscan(relation, indexId, true, snapshot, 1, &key);
 
 		collides = HeapTupleIsValid(systable_getnext(scan));
 
diff --git a/src/include/catalog/catalog.h b/src/include/catalog/catalog.h
index 197e77f7f4..b5338646f2 100644
--- a/src/include/catalog/catalog.h
+++ b/src/include/catalog/catalog.h
@@ -16,6 +16,7 @@
 
 #include "catalog/pg_class.h"
 #include "utils/relcache.h"
+#include "utils/snapshot.h"
 
 
 extern bool IsSystemRelation(Relation relation);
@@ -35,7 +36,7 @@ extern bool IsSharedRelation(Oid relationId);
 
 extern Oid	GetNewOid(Relation relation);
 extern Oid GetNewOidWithIndex(Relation relation, Oid indexId,
-				   AttrNumber oidcolumn);
+				   AttrNumber oidcolumn, Snapshot snapshot);
 extern Oid GetNewRelFileNode(Oid reltablespace, Relation pg_class,
 				  char relpersistence);
 
-- 
2.14.3 (Apple Git-98)

