On 3/12/19 11:54 PM, Tomas Vondra wrote:
> 
> 
> On 3/10/19 9:09 PM, Alvaro Herrera wrote:
>> On 2019-Feb-07, Tomas Vondra wrote:
>>
>>> Attached is a WIP patch removing the optimization from DropRelationFiles
>>> and adding it to smgrDoPendingDeletes. This resolves the issue, at least
>>> in the cases I've been able to reproduce. But maybe we should deal with
>>> this issue earlier by ensuring the two lists are ordered the same way
>>> from the very beginning, somehow.
>>
>> I noticed that this patch isn't in the commitfest.  Are you planning to
>> push it soon?
>>
> 
> I wasn't planning to push anything particularly soon, for two reasons:
> Firstly, the issue is not particularly pressing except with non-trivial
> number of relations (and I only noticed that during benchmarking).
> Secondly, I still have a feeling I'm missing something about b4166911
> because for me that commit does not actually fix the issue - i.e. I can
> create a lot of relations in a transaction, abort it, and observe that
> the replica actually accesses the relations in exactly the wrong order.
> So that commit does not seem to actually fix anything.
> 
> Attached is a patch adopting the dlist approach - it seems to be working
> quite fine, and is a bit cleaner than just slapping another pointer into
> the SMgrRelationData struct. So I'd say this is the way to go.
> 
> I see b4166911 was actually backpatched to all supported versions, on
> the basis that it fixes oversight in 279628a0a7. So I suppose this fix
> should also be backpatched.
> 

OK, so here is a bit more polished version of the dlist-based patch.

It's almost identical to what I posted before, except that it:

1) undoes the non-working optimization in DropRelationFiles()

2) removes add_to_unowned_list/remove_from_unowned_list entirely and
just replaces that with dlist_push_tail/dlist_delete

I've originally planned to keep those functions, mostly because the
remove_from_unowned_list comment says this:

  - * If the reln is not present in the list, nothing happens.
  - * Typically this would be caller error, but there seems no
  - * reason to throw an error.

I don't think dlist_delete allows that. But after further inspection of
all places calling those functions, don't think that can happen.

I plan to commit this soon-ish and backpatch it as mentioned before,
because I consider it pretty much a fix for b4166911.

I'm however still mildly puzzled that b4166911 apparently improved the
behavior in some cases, at least judging by [1]. OTOH there's not much
detail about how it was tested, so I can't quite run it again.


[1]
https://www.postgresql.org/message-id/flat/CAHGQGwHVQkdfDqtvGVkty%2B19cQakAydXn1etGND3X0PHbZ3%2B6w%40mail.gmail.com

regards

-- 
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 2aba2dfe91..6ed68185ed 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -1699,13 +1699,7 @@ DropRelationFiles(RelFileNode *delrels, int ndelrels, bool isRedo)
 
 	smgrdounlinkall(srels, ndelrels, isRedo);
 
-	/*
-	 * Call smgrclose() in reverse order as when smgropen() is called.
-	 * This trick enables remove_from_unowned_list() in smgrclose()
-	 * to search the SMgrRelation from the unowned list,
-	 * with O(1) performance.
-	 */
-	for (i = ndelrels - 1; i >= 0; i--)
+	for (i = 0; i < ndelrels; i++)
 		smgrclose(srels[i]);
 	pfree(srels);
 }
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 0c0bba4ab3..f6de9df9e6 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -18,6 +18,7 @@
 #include "postgres.h"
 
 #include "commands/tablespace.h"
+#include "lib/ilist.h"
 #include "storage/bufmgr.h"
 #include "storage/ipc.h"
 #include "storage/smgr.h"
@@ -97,12 +98,10 @@ static const int NSmgr = lengthof(smgrsw);
  */
 static HTAB *SMgrRelationHash = NULL;
 
-static SMgrRelation first_unowned_reln = NULL;
+static dlist_head	unowned_relns;
 
 /* local function prototypes */
 static void smgrshutdown(int code, Datum arg);
-static void add_to_unowned_list(SMgrRelation reln);
-static void remove_from_unowned_list(SMgrRelation reln);
 
 
 /*
@@ -165,7 +164,7 @@ smgropen(RelFileNode rnode, BackendId backend)
 		ctl.entrysize = sizeof(SMgrRelationData);
 		SMgrRelationHash = hash_create("smgr relation table", 400,
 									   &ctl, HASH_ELEM | HASH_BLOBS);
-		first_unowned_reln = NULL;
+		dlist_init(&unowned_relns);
 	}
 
 	/* Look up or create an entry */
@@ -192,7 +191,7 @@ smgropen(RelFileNode rnode, BackendId backend)
 			reln->md_num_open_segs[forknum] = 0;
 
 		/* it has no owner yet */
-		add_to_unowned_list(reln);
+		dlist_push_tail(&unowned_relns, &reln->node);
 	}
 
 	return reln;
@@ -222,7 +221,7 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
 	if (reln->smgr_owner)
 		*(reln->smgr_owner) = NULL;
 	else
-		remove_from_unowned_list(reln);
+		dlist_delete(&reln->node);
 
 	/* Now establish the ownership relationship. */
 	reln->smgr_owner = owner;
@@ -246,53 +245,8 @@ smgrclearowner(SMgrRelation *owner, SMgrRelation reln)
 	/* unset our reference to the owner */
 	reln->smgr_owner = NULL;
 
-	add_to_unowned_list(reln);
-}
-
-/*
- * add_to_unowned_list -- link an SMgrRelation onto the unowned list
- *
- * Check remove_from_unowned_list()'s comments for performance
- * considerations.
- */
-static void
-add_to_unowned_list(SMgrRelation reln)
-{
-	/* place it at head of the list (to make smgrsetowner cheap) */
-	reln->next_unowned_reln = first_unowned_reln;
-	first_unowned_reln = reln;
-}
-
-/*
- * remove_from_unowned_list -- unlink an SMgrRelation from the unowned list
- *
- * If the reln is not present in the list, nothing happens.  Typically this
- * would be caller error, but there seems no reason to throw an error.
- *
- * In the worst case this could be rather slow; but in all the cases that seem
- * likely to be performance-critical, the reln being sought will actually be
- * first in the list.  Furthermore, the number of unowned relns touched in any
- * one transaction shouldn't be all that high typically.  So it doesn't seem
- * worth expending the additional space and management logic needed for a
- * doubly-linked list.
- */
-static void
-remove_from_unowned_list(SMgrRelation reln)
-{
-	SMgrRelation *link;
-	SMgrRelation cur;
-
-	for (link = &first_unowned_reln, cur = *link;
-		 cur != NULL;
-		 link = &cur->next_unowned_reln, cur = *link)
-	{
-		if (cur == reln)
-		{
-			*link = cur->next_unowned_reln;
-			cur->next_unowned_reln = NULL;
-			break;
-		}
-	}
+	/* add to list of unowned relations */
+	dlist_push_tail(&unowned_relns, &reln->node);
 }
 
 /*
@@ -319,7 +273,7 @@ smgrclose(SMgrRelation reln)
 	owner = reln->smgr_owner;
 
 	if (!owner)
-		remove_from_unowned_list(reln);
+		dlist_delete(&reln->node);
 
 	if (hash_search(SMgrRelationHash,
 					(void *) &(reln->smgr_rnode),
@@ -812,13 +766,19 @@ smgrpostckpt(void)
 void
 AtEOXact_SMgr(void)
 {
+	dlist_mutable_iter	iter;
+
 	/*
 	 * Zap all unowned SMgrRelations.  We rely on smgrclose() to remove each
 	 * one from the list.
 	 */
-	while (first_unowned_reln != NULL)
+	dlist_foreach_modify(iter, &unowned_relns)
 	{
-		Assert(first_unowned_reln->smgr_owner == NULL);
-		smgrclose(first_unowned_reln);
+		SMgrRelation	rel = dlist_container(SMgrRelationData, node,
+											  iter.cur);
+
+		Assert(rel->smgr_owner == NULL);
+
+		smgrclose(rel);
 	}
 }
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 4d60b28dac..8e98273878 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -14,6 +14,7 @@
 #ifndef SMGR_H
 #define SMGR_H
 
+#include "lib/ilist.h"
 #include "storage/block.h"
 #include "storage/relfilenode.h"
 
@@ -71,7 +72,7 @@ typedef struct SMgrRelationData
 	struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
 
 	/* if unowned, list link in list of all unowned SMgrRelations */
-	struct SMgrRelationData *next_unowned_reln;
+	dlist_node	node;
 } SMgrRelationData;
 
 typedef SMgrRelationData *SMgrRelation;

Reply via email to