I think this formulation (attached v3) has fewer moving parts.

However, now that I did that, I wonder if this is really the best
approach to solve this problem.  Maybe instead of doing this at the BRIN
level, it should be handled at the autovac level, by having the worker
copy the work-item to local memory and remove it from the shared list as
soon as it is in progress.  That way, if *any* error occurs while trying
to execute it, it will go away instead of being retried for all
eternity.

Preliminary patch for that attached as autovacuum-workitem.patch.

I would propose to clean that up to apply instead of your proposed fix.
>From e0b457866e503a1b70b94c540147026b652fb019 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Fri, 13 Nov 2020 13:39:31 -0300
Subject: [PATCH v3] Avoid errors in brin summarization..

..which can happen if an index is reindexed concurrently
---
 src/backend/access/brin/brin.c | 37 ++++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 1f72562c60..4d012ebd76 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -33,6 +33,7 @@
 #include "postmaster/autovacuum.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
+#include "storage/lmgr.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
 #include "utils/index_selfuncs.h"
@@ -886,21 +887,35 @@ brin_summarize_range(PG_FUNCTION_ARGS)
 
 	/*
 	 * We must lock table before index to avoid deadlocks.  However, if the
-	 * passed indexoid isn't an index then IndexGetRelation() will fail.
-	 * Rather than emitting a not-very-helpful error message, postpone
-	 * complaining, expecting that the is-it-an-index test below will fail.
+	 * passed indexoid isn't an index, then IndexGetRelation(false) would
+	 * emit a not-very-helpful error message.  Instead, postpone complaining
+	 * until the is-it-an-index test, below.
 	 */
 	heapoid = IndexGetRelation(indexoid, true);
-	if (OidIsValid(heapoid))
-		heapRel = table_open(heapoid, ShareUpdateExclusiveLock);
-	else
-		heapRel = NULL;
+	if (heapoid != InvalidOid)
+		LockRelationOid(heapoid, ShareUpdateExclusiveLock);
 
-	indexRel = index_open(indexoid, ShareUpdateExclusiveLock);
+	/* Now we can open the index; silently skip if it's gone. */
+	indexRel = try_relation_open(indexoid, ShareUpdateExclusiveLock);
+	if (!indexRel)
+	{
+		if (heapoid != InvalidOid)
+			UnlockRelationOid(heapoid, ShareUpdateExclusiveLock);
+		PG_RETURN_INT32(0);
+	}
+
+	/* If passed a non-index, fail noisily */
+	if (indexRel->rd_rel->relkind != RELKIND_INDEX)
+		ereport(ERROR,
+				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+				 errmsg("\"%s\" is not an index",
+						RelationGetRelationName(indexRel))));
+
+	/* Now we can open the table */
+	heapRel = table_open(heapoid, NoLock);
 
 	/* Must be a BRIN index */
-	if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
-		indexRel->rd_rel->relam != BRIN_AM_OID)
+	if (indexRel->rd_rel->relam != BRIN_AM_OID)
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("\"%s\" is not a BRIN index",
@@ -916,7 +931,7 @@ brin_summarize_range(PG_FUNCTION_ARGS)
 	 * barely possible that a race against an index drop/recreation could have
 	 * netted us the wrong table.  Recheck.
 	 */
-	if (heapRel == NULL || heapoid != IndexGetRelation(indexoid, false))
+	if (heapoid != IndexGetRelation(indexoid, false))
 		ereport(ERROR,
 				(errcode(ERRCODE_UNDEFINED_TABLE),
 				 errmsg("could not open parent table of index %s",
-- 
2.20.1

commit 2326df869ee8516b8b1f0e01930946e9a63800b5
Author:     Alvaro Herrera <alvhe...@alvh.no-ip.org>
AuthorDate: Mon Nov 23 15:56:23 2020 -0300
CommitDate: Mon Nov 23 15:57:28 2020 -0300

    autovacuum-centered workitem fix

diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index aa5b97fbac..77c1401bc9 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2561,6 +2561,7 @@ deleted:
 	for (i = 0; i < NUM_WORKITEMS; i++)
 	{
 		AutoVacuumWorkItem *workitem = &AutoVacuumShmem->av_workItems[i];
+		AutoVacuumWorkItem my_workitem;
 
 		if (!workitem->avw_used)
 			continue;
@@ -2569,11 +2570,14 @@ deleted:
 		if (workitem->avw_database != MyDatabaseId)
 			continue;
 
+		my_workitem = *workitem;
+		workitem->avw_used = false;
+
 		/* claim this one, and release lock while performing it */
 		workitem->avw_active = true;
 		LWLockRelease(AutovacuumLock);
 
-		perform_work_item(workitem);
+		perform_work_item(&my_workitem);
 
 		/*
 		 * Check for config changes before acquiring lock for further jobs.

Reply via email to