On 2018-08-28 20:29:08 -0700, Andres Freund wrote:
> On 2018-08-28 20:27:14 -0700, Andres Freund wrote:
> > Locally that triggers the problem within usually a few seconds.
> 
> FWIW, it does so including versions as old as 9.2.
> 
> Now I need to look for power for my laptop and some for me ;)

A bit of food, a coke and a talk later, here's a first draft *prototype*
of how this could be solved.

It's not OK to rebuild relcache entries in the middle of
ReceiveSharedInvalidMessages() - a later entry in the invalidation queue
might be relmapper invalidation, and thus immediately processing a
relcache invalidation might attempt to scan a relation that does not
exist anymore.

Instead, receiving a relcache invalidation now just queues an entry onto
a new "pending rebuilds" list, that is then processed in a second stage,
after emptying the entire sinval queue.  At that stage we're guaranteed
that the relmapper is up2date.

This might actually have performance benefits in some realistic
scenarios - while RelationFlushRelation() marks the relcache entry
invalid for each of the received entries,
ProcessPendingRelcacheRebuilds() can skip rebuilding if the entry has
since been rebuild.


Obviously this is far from clean enough, but what do you think about the
basic approach?  It does, in my limited testing, indeed solve the "could
not read block" issue.


Greetings,

Andres Freund
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 5f4ae1291c6..d7037d02a75 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -2096,6 +2096,7 @@ ReorderBufferExecuteInvalidations(ReorderBuffer *rb, ReorderBufferTXN *txn)
 
 	for (i = 0; i < txn->ninvalidations; i++)
 		LocalExecuteInvalidationMessage(&txn->invalidations[i]);
+	ProcessPendingRelcacheRebuilds();
 }
 
 /*
diff --git a/src/backend/storage/ipc/sinval.c b/src/backend/storage/ipc/sinval.c
index 563e2906e36..4528a17a9f7 100644
--- a/src/backend/storage/ipc/sinval.c
+++ b/src/backend/storage/ipc/sinval.c
@@ -127,6 +127,8 @@ ReceiveSharedInvalidMessages(
 		 */
 	} while (nummsgs == MAXINVALMSGS);
 
+	ProcessPendingRelcacheRebuilds();
+
 	/*
 	 * We are now caught up.  If we received a catchup signal, reset that
 	 * flag, and call SICleanupQueue().  This is not so much because we need
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 16d86a29390..e4f70428999 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -456,6 +456,8 @@ ProcessInvalidationMessages(InvalidationListHeader *hdr,
 {
 	ProcessMessageList(hdr->cclist, func(msg));
 	ProcessMessageList(hdr->rclist, func(msg));
+
+	ProcessPendingRelcacheRebuilds();
 }
 
 /*
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 6125421d39a..927b8cef4d9 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -144,6 +144,14 @@ bool		criticalSharedRelcachesBuilt = false;
  */
 static long relcacheInvalsReceived = 0L;
 
+/*
+ * List of oids for entries that need to be rebuilt. Entries get queued onto
+ * it while receiving invalidations, and are processed at the end. This both
+ * avoids errors due to relcache rebuilds relying on an up2date relmapper, and
+ * avoids redundant rebuilds.
+ */
+static List *pending_rebuilds = NIL;
+
 /*
  * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
  * cleanup work.  This list intentionally has limited size; if it overflows,
@@ -251,7 +259,7 @@ static void RelationClearRelation(Relation relation, bool rebuild);
 
 static void RelationReloadIndexInfo(Relation relation);
 static void RelationReloadNailed(Relation relation);
-static void RelationFlushRelation(Relation relation);
+static void RelationFlushRelation(Relation relation, bool immed);
 static void RememberToFreeTupleDescAtEOX(TupleDesc td);
 static void AtEOXact_cleanup(Relation relation, bool isCommit);
 static void AtEOSubXact_cleanup(Relation relation, bool isCommit,
@@ -2556,7 +2564,7 @@ RelationClearRelation(Relation relation, bool rebuild)
  *	 This is used when we receive a cache invalidation event for the rel.
  */
 static void
-RelationFlushRelation(Relation relation)
+RelationFlushRelation(Relation relation, bool immed)
 {
 	if (relation->rd_createSubid != InvalidSubTransactionId ||
 		relation->rd_newRelfilenodeSubid != InvalidSubTransactionId)
@@ -2581,10 +2589,69 @@ RelationFlushRelation(Relation relation)
 		 */
 		bool		rebuild = !RelationHasReferenceCountZero(relation);
 
-		RelationClearRelation(relation, rebuild);
+		/*
+		 * Can't immediately rebuild entry - in the middle of receiving
+		 * invalidations relmapper might be out of date and point to an older
+		 * version of required catalogs. Defer rebuilds until the queue has
+		 * been emptied.
+		 */
+		if (rebuild && !immed)
+		{
+			MemoryContext oldcontext;
+
+			oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
+			pending_rebuilds = lappend_oid(pending_rebuilds, RelationGetRelid(relation));
+			MemoryContextSwitchTo(oldcontext);
+
+			relation->rd_isvalid = false;
+		}
+		else
+			RelationClearRelation(relation, rebuild);
 	}
 }
 
+/*
+ * Rebuild relcache entries that have been marked as invalid and that need to
+ * to be rebuilt. This is separate from RelationFlushRelation() because
+ * relcache entries can only be rebuilt with an up2date relmapper, and because
+ * doing so allows us to avoid repeated rebuilds of the same entry.
+ */
+void
+ProcessPendingRelcacheRebuilds(void)
+{
+	List *pending = pending_rebuilds;
+	ListCell *lc;
+
+	/*
+	 * Clear out list of pending rebuilds before starting, new entries might
+	 * be queued while processing the current entries.
+	 *
+	 * XXX: What to do if we error out somewhere in the middle?
+	 */
+	pending_rebuilds = NIL;
+
+	if (IsTransactionState())
+	{
+		foreach(lc, pending)
+		{
+			Oid			relationId = lfirst_oid(lc);
+			Relation	relation;
+
+			RelationIdCacheLookup(relationId, relation);
+
+			/* nothing to do if if the entry since has been dropped */
+			if (!PointerIsValid(relation))
+				continue;
+
+			/* only rebuild if not already done so */
+			if (!relation->rd_isvalid)
+				RelationFlushRelation(relation, true);
+		}
+	}
+	list_free(pending);
+
+}
+
 /*
  * RelationForgetRelation - unconditionally remove a relcache entry
  *
@@ -2633,7 +2700,7 @@ RelationCacheInvalidateEntry(Oid relationId)
 	if (PointerIsValid(relation))
 	{
 		relcacheInvalsReceived++;
-		RelationFlushRelation(relation);
+		RelationFlushRelation(relation, false);
 	}
 }
 
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index dbbf41b0c16..4d867bda7d9 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -119,6 +119,8 @@ extern void RelationForgetRelation(Oid rid);
 
 extern void RelationCacheInvalidateEntry(Oid relationId);
 
+extern void ProcessPendingRelcacheRebuilds(void);
+
 extern void RelationCacheInvalidate(void);
 
 extern void RelationCloseSmgrByOid(Oid relationId);

Reply via email to