On Wed, Dec 10, 2014 at 9:49 AM, Robert Haas <robertmh...@gmail.com> wrote: > I guess this bears some further thought. I certainly don't like the > fact that this makes the whole system crap out at a lower number of > subtransactions than presently. The actual performance numbers don't > bother me very much; I'm comfortable with the possibility that closing > a cursor will be some modest percentage slower if you've got thousands > of active savepoints.
Here's a new version with two changes: 1. I changed the traversal of the resource owner tree to iterate instead of recurse. It now does a depth-first, pre-order traversal of the tree; when we reach the last child of a node, we follow its parent pointer to get back to where we were. That way, we don't need to keep anything on the stack. That fixed the crash at 100k cursors, but it was still 4x slower. 2. Instead of traversing the tree until we find an xmin equal to the one we're currently advertising, the code now traverses the entire tree each time it runs. However, it also keeps a record of how many times the oldest xmin occurred in the tree, which is decremented each time we unregister a snapshot with that xmin; the traversal doesn't run again until that count reaches 0. That fixed the performance regression on your test case. With a million subtransactions: master 34.464s 33.742s 34.317s advance-xmin 34.516s 34.069s 34.196s -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
diff --git a/src/backend/utils/resowner/resowner.c b/src/backend/utils/resowner/resowner.c index 0955bcc..529209f 100644 --- a/src/backend/utils/resowner/resowner.c +++ b/src/backend/utils/resowner/resowner.c @@ -21,6 +21,7 @@ #include "postgres.h" #include "access/hash.h" +#include "miscadmin.h" #include "storage/predicate.h" #include "storage/proc.h" #include "utils/memutils.h" @@ -113,6 +114,7 @@ typedef struct ResourceOwnerData ResourceOwner CurrentResourceOwner = NULL; ResourceOwner CurTransactionResourceOwner = NULL; ResourceOwner TopTransactionResourceOwner = NULL; +ResourceOwner FirstRootResourceOwner = NULL; /* * List of add-on callbacks for resource releasing @@ -167,6 +169,11 @@ ResourceOwnerCreate(ResourceOwner parent, const char *name) owner->nextchild = parent->firstchild; parent->firstchild = owner; } + else + { + owner->nextchild = FirstRootResourceOwner; + FirstRootResourceOwner = owner; + } return owner; } @@ -442,6 +449,8 @@ ResourceOwnerDelete(ResourceOwner owner) * the owner tree. Better a leak than a crash. */ ResourceOwnerNewParent(owner, NULL); + Assert(FirstRootResourceOwner == owner); + FirstRootResourceOwner = owner->nextchild; /* And free the object. */ if (owner->buffers) @@ -502,6 +511,14 @@ ResourceOwnerNewParent(ResourceOwner owner, } } } + else + { + ResourceOwner *link = &FirstRootResourceOwner; + + while (*link != owner) + link = &((*link)->nextchild); + *link = owner->nextchild; + } if (newparent) { @@ -513,7 +530,8 @@ ResourceOwnerNewParent(ResourceOwner owner, else { owner->parent = NULL; - owner->nextchild = NULL; + owner->nextchild = FirstRootResourceOwner; + FirstRootResourceOwner = owner; } } @@ -1161,6 +1179,59 @@ ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snapshot) } /* + * Invoke a caller-supplied function on every snapshot registered with any + * resource owner in the system. The callback can abort the traversal by + * returning true. + */ +bool +ResourceOwnerWalkAllSnapshots(ResourceWalkSnapshotCallback callback, void *arg) +{ + ResourceOwner owner = FirstRootResourceOwner; + bool visited = false; + + /* + * We do this traversal non-recursively in order to avoid running out of + * stack space, which can otherwise happen with large numbers of nested + * subtransactions. The easiest way to do that is to search depth-first, + * so that we visit all of a given node's descendents before reaching its + * right sibling. When we've visited all of the node's descendents, we'll + * follow the last child's parent pointer back to that node, but visited + * will be set to true at that point, so we'll advance to the right + * sibling instead of traversing it again. + */ + while (owner != NULL) + { + if (!visited) + { + int i; + + for (i = 0; i < owner->nsnapshots; ++i) + if (callback(owner->snapshots[i], arg)) + return true; + + if (owner->firstchild != NULL) + { + owner = owner->firstchild; + continue; + } + } + + if (owner->nextchild != NULL) + { + owner = owner->nextchild; + visited = false; + } + else + { + owner = owner->parent; + visited = true; + } + } + + return false; +} + +/* * Debugging subroutine */ static void diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index d601efe..1fd73c8 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -122,14 +122,28 @@ typedef struct ActiveSnapshotElt /* Top of the stack of active snapshots */ static ActiveSnapshotElt *ActiveSnapshot = NULL; +/* Data for recomputing xmin */ +typedef struct SnapshotXminData +{ + TransactionId new_xmin; + int new_xmin_count; + int snapshots_remaining; +} SnapshotXminData; + +/* How many snapshots is resowner.c tracking for us? */ +static int RegisteredSnapshots = 0; + /* - * How many snapshots is resowner.c tracking for us? - * - * Note: for now, a simple counter is enough. However, if we ever want to be - * smarter about advancing our MyPgXact->xmin we will need to be more - * sophisticated about this, perhaps keeping our own list of snapshots. + * When a cursor is released, it can be advantageous to recompute our + * advertised xmin to avoid system-wide impacts. However, we don't want + * to do this recomputation needlessly. After the first xmin recomputation + * in a particular transaction, these variables will indicate the result of + * the previous recomputation and the number of snapshots which at that time + * had that xmin. The actual number of snapshots with that xmin can be higher + * than the value indicated here, but cannot be lower. */ -static int RegisteredSnapshots = 0; +static TransactionId RegisteredSnapshotOldestXmin = InvalidTransactionId; +static int RegisteredSnapshotOldestXminCount = 0; /* first GetTransactionSnapshot call in a transaction? */ bool FirstSnapshotSet = false; @@ -153,6 +167,7 @@ static List *exportedSnapshots = NIL; static Snapshot CopySnapshot(Snapshot snapshot); static void FreeSnapshot(Snapshot snapshot); +static bool SnapshotResetXminWorker(Snapshot snapshot, void *data); static void SnapshotResetXmin(void); @@ -675,6 +690,8 @@ UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner) ResourceOwnerForgetSnapshot(owner, snapshot); RegisteredSnapshots--; + if (TransactionIdEquals(snapshot->xmin, RegisteredSnapshotOldestXmin)) + RegisteredSnapshotOldestXminCount--; if (--snapshot->regd_count == 0 && snapshot->active_count == 0) { FreeSnapshot(snapshot); @@ -688,12 +705,77 @@ UnregisterSnapshotFromOwner(Snapshot snapshot, ResourceOwner owner) * If there are no more snapshots, we can reset our PGXACT->xmin to InvalidXid. * Note we can do this without locking because we assume that storing an Xid * is atomic. + * + * Even if there are some remaining snapshots, we may be able to advance our + * PGXACT->xmin to some degree. This typically happens when a portal is + * dropped. For efficiency, we only consider recomputing PGXACT->xmin when + * the active snapshot stack is empty. */ static void SnapshotResetXmin(void) { - if (RegisteredSnapshots == 0 && ActiveSnapshot == NULL) + SnapshotXminData sxd; + ListCell *lc; + + if (ActiveSnapshot != NULL) + return; + + if (RegisteredSnapshots == 0) + { MyPgXact->xmin = InvalidTransactionId; + RegisteredSnapshotOldestXmin = InvalidTransactionId; + RegisteredSnapshotOldestXminCount = 0; + return; + } + + if (RegisteredSnapshotOldestXminCount > 0) + return; + + sxd.new_xmin = InvalidTransactionId; + sxd.new_xmin_count = 0; + sxd.snapshots_remaining = RegisteredSnapshots; + + if (FirstXactSnapshot != NULL) + SnapshotResetXminWorker(FirstXactSnapshot, &sxd); + + foreach(lc, exportedSnapshots) + { + Snapshot snapshot = lfirst(lc); + + SnapshotResetXminWorker(snapshot, &sxd); + } + + ResourceOwnerWalkAllSnapshots(SnapshotResetXminWorker, &sxd); + + if (sxd.snapshots_remaining > 0) + elog(FATAL, "failed to traverse all snapshots"); + + MyPgXact->xmin = sxd.new_xmin; + RegisteredSnapshotOldestXmin = sxd.new_xmin; + RegisteredSnapshotOldestXminCount = sxd.new_xmin_count; +} + +static bool +SnapshotResetXminWorker(Snapshot snapshot, void *data) +{ + SnapshotXminData *sxd = data; + + if (TransactionIdEquals(snapshot->xmin, sxd->new_xmin)) + { + Assert(TransactionIdIsNormal(snapshot->xmin)); + sxd->new_xmin_count++; + } + else if (!TransactionIdIsValid(sxd->new_xmin) + || TransactionIdPrecedes(snapshot->xmin, sxd->new_xmin)) + { + sxd->new_xmin = snapshot->xmin; + sxd->new_xmin_count = 1; + } + + if (--sxd->snapshots_remaining < 0) + elog(FATAL, "traversed too many snapshots"); + + return false; } /* @@ -830,6 +912,8 @@ AtEOXact_Snapshot(bool isCommit) */ ActiveSnapshot = NULL; RegisteredSnapshots = 0; + RegisteredSnapshotOldestXmin = InvalidTransactionId; + RegisteredSnapshotOldestXminCount = 0; CurrentSnapshot = NULL; SecondarySnapshot = NULL; diff --git a/src/include/utils/resowner_private.h b/src/include/utils/resowner_private.h index b8fd1a9..9944144 100644 --- a/src/include/utils/resowner_private.h +++ b/src/include/utils/resowner_private.h @@ -73,6 +73,9 @@ extern void ResourceOwnerRememberSnapshot(ResourceOwner owner, Snapshot snapshot); extern void ResourceOwnerForgetSnapshot(ResourceOwner owner, Snapshot snapshot); +typedef bool (*ResourceWalkSnapshotCallback) (Snapshot snapshot, void *arg); +extern bool ResourceOwnerWalkAllSnapshots(ResourceWalkSnapshotCallback callback, + void *arg); /* support for temporary file management */ extern void ResourceOwnerEnlargeFiles(ResourceOwner owner);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers