An updated patch to address this issue is attached. It fixes a couple issues related to use of the backend-local lock table hint:
- CheckSingleTargetForConflictsIn now correctly handles the case where a lock that's being held is not reflected in the local lock table. This fixes the assertion failure reported in this thread. - PredicateLockPageCombine now retains locks for the page that is being removed, rather than removing them. This prevents a potentially dangerous false-positive inconsistency where the local lock table believes that a lock is held, but it is actually not. - add some more comments documenting the times when the local lock table can be inconsistent with reality, as reflected in the shared memory table. This patch also incorporates Kevin's changes to copy locks when creating a new version of a tuple rather than trying to maintain a linkage between different versions. So this is a patch that should apply against HEAD and addresses all outstanding SSI bugs known to Kevin or myself. Besides the usual regression and isolation tests, I have tested this by running DBT-2 on a 16-core machine to verify that there are no assertion failures that only show up under concurrent access. Dan -- Dan R. K. Ports MIT CSAIL http://drkp.net/
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index ba01874..7a0e1a9c 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -824,7 +824,6 @@ restart: if (_bt_page_recyclable(page)) { /* Okay to recycle this page */ - Assert(!PageIsPredicateLocked(rel, blkno)); RecordFreeIndexPage(rel, blkno); vstate->totFreePages++; stats->pages_deleted++; diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index d660ce5..580af2a 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -124,10 +124,6 @@ * SerializableXactHashLock * - Protects both PredXact and SerializableXidHash. * - * PredicateLockNextRowLinkLock - * - Protects the priorVersionOfRow and nextVersionOfRow fields of - * PREDICATELOCKTARGET when linkage is being created or destroyed. - * * * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -444,8 +440,6 @@ static void ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial, bool summarize); static bool XidIsConcurrent(TransactionId xid); static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag); -static bool CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, - PREDICATELOCKTARGETTAG *nexttargettag); static void FlagRWConflict(SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer); static void OnConflict_CheckForSerializationFailure(const SERIALIZABLEXACT *reader, SERIALIZABLEXACT *writer); @@ -1044,7 +1038,6 @@ InitPredicateLocks(void) PredXact->LastSxactCommitSeqNo = FirstNormalSerCommitSeqNo - 1; PredXact->CanPartialClearThrough = 0; PredXact->HavePartialClearedThrough = 0; - PredXact->NeedTargetLinkCleanup = false; requestSize = mul_size((Size) max_table_size, PredXactListElementDataSize); PredXact->element = ShmemAlloc(requestSize); @@ -1651,9 +1644,11 @@ PageIsPredicateLocked(const Relation relation, const BlockNumber blkno) * Important note: this function may return false even if the lock is * being held, because it uses the local lock table which is not * updated if another transaction modifies our lock list (e.g. to - * split an index page). However, it will never return true if the - * lock is not held. We only use this function in circumstances where - * such false negatives are acceptable. + * split an index page). However, it will almost never return true if + * the lock is not held; it can only do so in rare circumstances when + * a coarser-granularity lock that covers this one is being held. We + * are careful to only use this function in circumstances where such + * errors are acceptable. */ static bool PredicateLockExists(const PREDICATELOCKTARGETTAG *targettag) @@ -1717,6 +1712,9 @@ GetParentPredicateLockTag(const PREDICATELOCKTARGETTAG *tag, /* * Check whether the lock we are considering is already covered by a * coarser lock for our transaction. + * + * Like PredicateLockExists, this function might return a false + * negative, but it will never return a false positive. */ static bool CoarserLockCovers(const PREDICATELOCKTARGETTAG *newtargettag) @@ -1747,7 +1745,6 @@ static void RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash) { PREDICATELOCKTARGET *rmtarget; - PREDICATELOCKTARGET *next; Assert(LWLockHeldByMe(SerializablePredicateLockListLock)); @@ -1755,33 +1752,6 @@ RemoveTargetIfNoLongerUsed(PREDICATELOCKTARGET *target, uint32 targettaghash) if (!SHMQueueEmpty(&target->predicateLocks)) return; - /* Can't remove it if there are locks for a prior row version. */ - LWLockAcquire(PredicateLockNextRowLinkLock, LW_EXCLUSIVE); - if (target->priorVersionOfRow != NULL) - { - LWLockRelease(PredicateLockNextRowLinkLock); - return; - } - - /* - * We are going to release this target, This requires that we let the - * next version of the row (if any) know that it's previous version is - * done. - * - * It might be that the link was all that was keeping the other target - * from cleanup, but we can't clean that up here -- LW locking is all - * wrong for that. We'll pass the HTAB in the general cleanup function to - * get rid of such "dead" targets. - */ - next = target->nextVersionOfRow; - if (next != NULL) - { - next->priorVersionOfRow = NULL; - if (SHMQueueEmpty(&next->predicateLocks)) - PredXact->NeedTargetLinkCleanup = true; - } - LWLockRelease(PredicateLockNextRowLinkLock); - /* Actually remove the target. */ rmtarget = hash_search_with_hash_value(PredicateLockTargetHash, &target->tag, @@ -2065,11 +2035,7 @@ CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag, errmsg("out of shared memory"), errhint("You might need to increase max_pred_locks_per_transaction."))); if (!found) - { SHMQueueInit(&(target->predicateLocks)); - target->priorVersionOfRow = NULL; - target->nextVersionOfRow = NULL; - } /* We've got the sxact and target, make sure they're joined. */ locktag.myTarget = target; @@ -2125,8 +2091,6 @@ PredicateLockAcquire(const PREDICATELOCKTARGETTAG *targettag) hash_search_with_hash_value(LocalPredicateLockHash, targettag, targettaghash, HASH_ENTER, &found); - /* We should not hold the lock (but its entry might still exist) */ - Assert(!found || !locallock->held); locallock->held = true; if (!found) locallock->childLocks = 0; @@ -2215,6 +2179,7 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) { PREDICATELOCKTARGETTAG tag; ItemPointer tid; + TransactionId targetxmin; if (SkipSerialization(relation)) return; @@ -2224,15 +2189,16 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) */ if (relation->rd_index == NULL) { - TransactionId myxid = GetTopTransactionIdIfAny(); + TransactionId myxid; + + targetxmin = HeapTupleHeaderGetXmin(tuple->t_data); + myxid = GetTopTransactionIdIfAny(); if (TransactionIdIsValid(myxid)) { - TransactionId xid = HeapTupleHeaderGetXmin(tuple->t_data); - - if (TransactionIdFollowsOrEquals(xid, TransactionXmin)) + if (TransactionIdFollowsOrEquals(targetxmin, TransactionXmin)) { - xid = SubTransGetTopmostTransaction(xid); + TransactionId xid = SubTransGetTopmostTransaction(targetxmin); if (TransactionIdEquals(xid, myxid)) { /* We wrote it; we already have a write lock. */ @@ -2241,6 +2207,8 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) } } } + else + targetxmin = InvalidTransactionId; /* * Do quick-but-not-definitive test for a relation lock first. This will @@ -2259,116 +2227,48 @@ PredicateLockTuple(const Relation relation, const HeapTuple tuple) relation->rd_node.dbNode, relation->rd_id, ItemPointerGetBlockNumber(tid), - ItemPointerGetOffsetNumber(tid)); + ItemPointerGetOffsetNumber(tid), + targetxmin); PredicateLockAcquire(&tag); } /* - * If the old tuple has any predicate locks, create a lock target for the - * new tuple and point them at each other. Conflict detection needs to - * look for locks against prior versions of the row. + * If the old tuple has any predicate locks, copy them to the new target. */ void PredicateLockTupleRowVersionLink(const Relation relation, const HeapTuple oldTuple, const HeapTuple newTuple) { - PREDICATELOCKTARGETTAG oldtargettag; + PREDICATELOCKTARGETTAG oldtupletargettag; + PREDICATELOCKTARGETTAG oldpagetargettag; PREDICATELOCKTARGETTAG newtargettag; - PREDICATELOCKTARGET *oldtarget; - PREDICATELOCKTARGET *newtarget; - PREDICATELOCKTARGET *next; - uint32 oldtargettaghash; - LWLockId oldpartitionLock; - uint32 newtargettaghash; - LWLockId newpartitionLock; - bool found; - SET_PREDICATELOCKTARGETTAG_TUPLE(oldtargettag, + SET_PREDICATELOCKTARGETTAG_TUPLE(oldtupletargettag, relation->rd_node.dbNode, relation->rd_id, ItemPointerGetBlockNumber(&(oldTuple->t_self)), - ItemPointerGetOffsetNumber(&(oldTuple->t_self))); - oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag); - oldpartitionLock = PredicateLockHashPartitionLock(oldtargettaghash); + ItemPointerGetOffsetNumber(&(oldTuple->t_self)), + HeapTupleHeaderGetXmin(oldTuple->t_data)); + + SET_PREDICATELOCKTARGETTAG_PAGE(oldpagetargettag, + relation->rd_node.dbNode, + relation->rd_id, + ItemPointerGetBlockNumber(&(oldTuple->t_self))); SET_PREDICATELOCKTARGETTAG_TUPLE(newtargettag, relation->rd_node.dbNode, relation->rd_id, ItemPointerGetBlockNumber(&(newTuple->t_self)), - ItemPointerGetOffsetNumber(&(newTuple->t_self))); - newtargettaghash = PredicateLockTargetTagHashCode(&newtargettag); - newpartitionLock = PredicateLockHashPartitionLock(newtargettaghash); + ItemPointerGetOffsetNumber(&(newTuple->t_self)), + HeapTupleHeaderGetXmin(newTuple->t_data)); - /* Lock lower numbered partition first. */ - if (oldpartitionLock < newpartitionLock) - { - LWLockAcquire(oldpartitionLock, LW_SHARED); - LWLockAcquire(newpartitionLock, LW_EXCLUSIVE); - } - else if (newpartitionLock < oldpartitionLock) - { - LWLockAcquire(newpartitionLock, LW_EXCLUSIVE); - LWLockAcquire(oldpartitionLock, LW_SHARED); - } - else - LWLockAcquire(newpartitionLock, LW_EXCLUSIVE); - - oldtarget = (PREDICATELOCKTARGET *) - hash_search_with_hash_value(PredicateLockTargetHash, - &oldtargettag, oldtargettaghash, - HASH_FIND, NULL); - - /* Only need to link if there is an old target already. */ - if (oldtarget) - { - LWLockAcquire(PredicateLockNextRowLinkLock, LW_EXCLUSIVE); - - /* Guard against stale pointers from rollback. */ - next = oldtarget->nextVersionOfRow; - if (next != NULL) - { - next->priorVersionOfRow = NULL; - oldtarget->nextVersionOfRow = NULL; - } - - /* Find or create the new target, and link old and new. */ - newtarget = (PREDICATELOCKTARGET *) - hash_search_with_hash_value(PredicateLockTargetHash, - &newtargettag, newtargettaghash, - HASH_ENTER, &found); - if (!newtarget) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of shared memory"), - errhint("You might need to increase max_pred_locks_per_transaction."))); - if (!found) - { - SHMQueueInit(&(newtarget->predicateLocks)); - newtarget->nextVersionOfRow = NULL; - } - else - Assert(newtarget->priorVersionOfRow == NULL); + LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); - newtarget->priorVersionOfRow = oldtarget; - oldtarget->nextVersionOfRow = newtarget; + TransferPredicateLocksToNewTarget(oldtupletargettag, newtargettag, false); + TransferPredicateLocksToNewTarget(oldpagetargettag, newtargettag, false); - LWLockRelease(PredicateLockNextRowLinkLock); - } - - /* Release lower number partition last. */ - if (oldpartitionLock < newpartitionLock) - { - LWLockRelease(newpartitionLock); - LWLockRelease(oldpartitionLock); - } - else if (newpartitionLock < oldpartitionLock) - { - LWLockRelease(oldpartitionLock); - LWLockRelease(newpartitionLock); - } - else - LWLockRelease(newpartitionLock); + LWLockRelease(SerializablePredicateLockListLock); } @@ -2437,6 +2337,17 @@ DeleteLockTarget(PREDICATELOCKTARGET *target, uint32 targettaghash) * removeOld is set (by using the reserved entry in * PredicateLockTargetHash for scratch space). * + * Warning: the "removeOld" option should be used only with care, + * because this function does not (indeed, can not) update other + * backends' LocalPredicateLockHash. If we are only adding new + * entries, this is not a problem: the local lock table is used only + * as a hint, so missing entries for locks that are held are + * OK. Having entries for locks that are no longer held, as can happen + * when using "removeOld", is not in general OK. We can only use it + * safely when replacing a lock with a coarser-granularity lock that + * covers it, or if we are absolutely certain that no one will need to + * refer to that lock in the future. + * * Caller must hold SerializablePredicateLockListLock. */ static bool @@ -2533,11 +2444,7 @@ TransferPredicateLocksToNewTarget(const PREDICATELOCKTARGETTAG oldtargettag, /* If we created a new entry, initialize it */ if (!found) - { SHMQueueInit(&(newtarget->predicateLocks)); - newtarget->priorVersionOfRow = NULL; - newtarget->nextVersionOfRow = NULL; - } newpredlocktag.myTarget = newtarget; @@ -2704,7 +2611,14 @@ PredicateLockPageSplit(const Relation relation, const BlockNumber oldblkno, &newtargettag); Assert(success); - /* Move the locks to the parent. This shouldn't fail. */ + /* + * Move the locks to the parent. This shouldn't fail. + * + * Note that here we are removing locks held by other + * backends, leading to a possible inconsistency in their + * local lock hash table. This is OK because we're replacing + * it with a lock that covers the old one. + */ success = TransferPredicateLocksToNewTarget(oldtargettag, newtargettag, true); @@ -2727,36 +2641,17 @@ void PredicateLockPageCombine(const Relation relation, const BlockNumber oldblkno, const BlockNumber newblkno) { - PREDICATELOCKTARGETTAG oldtargettag; - PREDICATELOCKTARGETTAG newtargettag; - bool success; - - - if (SkipSplitTracking(relation)) - return; - - Assert(oldblkno != newblkno); - Assert(BlockNumberIsValid(oldblkno)); - Assert(BlockNumberIsValid(newblkno)); - - SET_PREDICATELOCKTARGETTAG_PAGE(oldtargettag, - relation->rd_node.dbNode, - relation->rd_id, - oldblkno); - SET_PREDICATELOCKTARGETTAG_PAGE(newtargettag, - relation->rd_node.dbNode, - relation->rd_id, - newblkno); - - LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE); - - /* Move the locks. This shouldn't fail. */ - success = TransferPredicateLocksToNewTarget(oldtargettag, - newtargettag, - true); - Assert(success); - - LWLockRelease(SerializablePredicateLockListLock); + /* + * Page combines differ from page splits in that we ought to be + * able to remove the locks on the old page after transferring + * them to the new page, instead of duplicating them. However, + * because we can't edit other backends' local lock tables, + * removing the old lock would leave them with an entry in their + * LocalPredicateLockHash for a lock they're not holding, which + * isn't acceptable. So we wind up having to do the same work as a + * page split. + */ + PredicateLockPageSplit(relation, oldblkno, newblkno); } /* @@ -3132,9 +3027,6 @@ ClearOldPredicateLocks(void) { SERIALIZABLEXACT *finishedSxact; PREDICATELOCK *predlock; - int i; - HASH_SEQ_STATUS seqstat; - PREDICATELOCKTARGET *locktarget; LWLockAcquire(SerializableFinishedListLock, LW_EXCLUSIVE); finishedSxact = (SERIALIZABLEXACT *) @@ -3232,35 +3124,6 @@ ClearOldPredicateLocks(void) LWLockRelease(SerializablePredicateLockListLock); LWLockRelease(SerializableFinishedListLock); - - if (!PredXact->NeedTargetLinkCleanup) - return; - - /* - * Clean up any targets which were disconnected from a prior version with - * no predicate locks attached. - */ - for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++) - LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE); - LWLockAcquire(PredicateLockNextRowLinkLock, LW_SHARED); - - hash_seq_init(&seqstat, PredicateLockTargetHash); - while ((locktarget = (PREDICATELOCKTARGET *) hash_seq_search(&seqstat))) - { - if (SHMQueueEmpty(&locktarget->predicateLocks) - && locktarget->priorVersionOfRow == NULL - && locktarget->nextVersionOfRow == NULL) - { - hash_search(PredicateLockTargetHash, &locktarget->tag, - HASH_REMOVE, NULL); - } - } - - PredXact->NeedTargetLinkCleanup = false; - - LWLockRelease(PredicateLockNextRowLinkLock); - for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--) - LWLockRelease(FirstPredicateLockMgrLock + i); } /* @@ -3682,32 +3545,10 @@ CheckForSerializableConflictOut(const bool visible, const Relation relation, static void CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag) { - PREDICATELOCKTARGETTAG nexttargettag = { 0 }; - PREDICATELOCKTARGETTAG thistargettag; - - for (;;) - { - if (!CheckSingleTargetForConflictsIn(targettag, &nexttargettag)) - break; - thistargettag = nexttargettag; - targettag = &thistargettag; - } -} - -/* - * Check a particular target for rw-dependency conflict in. If the tuple - * has prior versions, returns true and *nexttargettag is set to the tag - * of the prior tuple version. - */ -static bool -CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, - PREDICATELOCKTARGETTAG *nexttargettag) -{ uint32 targettaghash; LWLockId partitionLock; PREDICATELOCKTARGET *target; PREDICATELOCK *predlock; - bool hasnexttarget = false; Assert(MySerializableXact != InvalidSerializableXact); @@ -3717,7 +3558,6 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, targettaghash = PredicateLockTargetTagHashCode(targettag); partitionLock = PredicateLockHashPartitionLock(targettaghash); LWLockAcquire(partitionLock, LW_SHARED); - LWLockAcquire(PredicateLockNextRowLinkLock, LW_SHARED); target = (PREDICATELOCKTARGET *) hash_search_with_hash_value(PredicateLockTargetHash, targettag, targettaghash, @@ -3725,21 +3565,9 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, if (!target) { /* Nothing has this target locked; we're done here. */ - LWLockRelease(PredicateLockNextRowLinkLock); LWLockRelease(partitionLock); - return false; - } - - /* - * If the target is linked to a prior version of the row, save the tag so - * that it can be used for iterative calls to this function. - */ - if (target->priorVersionOfRow != NULL) - { - *nexttargettag = target->priorVersionOfRow->tag; - hasnexttarget = true; + return; } - LWLockRelease(PredicateLockNextRowLinkLock); /* * Each lock for an overlapping transaction represents a conflict: a @@ -3828,17 +3656,25 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, hash_search_with_hash_value(LocalPredicateLockHash, targettag, targettaghash, HASH_FIND, NULL); - Assert(locallock != NULL); - Assert(locallock->held); - locallock->held = false; - if (locallock->childLocks == 0) + /* + * Remove entry in local lock table if it exists and has + * no children. It's OK if it doesn't exist; that means + * the lock was transferred to a new target by a + * different backend. + */ + if (locallock != NULL) { - rmlocallock = (LOCALPREDICATELOCK *) - hash_search_with_hash_value(LocalPredicateLockHash, - targettag, targettaghash, - HASH_REMOVE, NULL); - Assert(rmlocallock == locallock); + locallock->held = false; + + if (locallock->childLocks == 0) + { + rmlocallock = (LOCALPREDICATELOCK *) + hash_search_with_hash_value(LocalPredicateLockHash, + targettag, targettaghash, + HASH_REMOVE, NULL); + Assert(rmlocallock == locallock); + } } DecrementParentLocks(targettag); @@ -3848,7 +3684,7 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, * the target, bail out before re-acquiring the locks. */ if (rmtarget) - return hasnexttarget; + return; /* * The list has been altered. Start over at the front. @@ -3895,8 +3731,6 @@ CheckSingleTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag, } LWLockRelease(SerializableXactHashLock); LWLockRelease(partitionLock); - - return hasnexttarget; } /* @@ -3943,7 +3777,8 @@ CheckForSerializableConflictIn(const Relation relation, const HeapTuple tuple, relation->rd_node.dbNode, relation->rd_id, ItemPointerGetBlockNumber(&(tuple->t_data->t_ctid)), - ItemPointerGetOffsetNumber(&(tuple->t_data->t_ctid))); + ItemPointerGetOffsetNumber(&(tuple->t_data->t_ctid)), + HeapTupleHeaderGetXmin(tuple->t_data)); CheckTargetForConflictsIn(&targettag); } diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 99fe37e..ad0bcd7 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -78,7 +78,6 @@ typedef enum LWLockId SerializableFinishedListLock, SerializablePredicateLockListLock, OldSerXidLock, - PredicateLockNextRowLinkLock, /* Individual lock IDs end here */ FirstBufMappingLock, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, diff --git a/src/include/storage/predicate_internals.h b/src/include/storage/predicate_internals.h index 32e9a1b..1c784f7 100644 --- a/src/include/storage/predicate_internals.h +++ b/src/include/storage/predicate_internals.h @@ -150,8 +150,6 @@ typedef struct PredXactListData SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this * seq no */ SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */ - bool NeedTargetLinkCleanup; /* to save cleanup effort for rare - * case */ PredXactListElement element; } PredXactListData; @@ -231,9 +229,13 @@ typedef struct SERIALIZABLEXID /* * The PREDICATELOCKTARGETTAG struct identifies a database object which can - * be the target of predicate locks. It is designed to fit into 16 bytes - * with no padding. Note that this would need adjustment if we widen Oid or - * BlockNumber to more than 32 bits. + * be the target of predicate locks. + * + * locktag_field6 was added to allow slack space to be filled so that stack- + * allocated tags wouldn't have random bytes in the middle. Moving the only + * uint16 to the end didn't work, because the hash function being used + * doesn't properly respect tag length -- it will go to a four byte boundary + * past the end of the tag. * * TODO SSI: If we always use the same fields for the same type of value, we * should rename these. Holding off until it's clear there are no exceptions. @@ -247,8 +249,9 @@ typedef struct PREDICATELOCKTARGETTAG uint32 locktag_field1; /* a 32-bit ID field */ uint32 locktag_field2; /* a 32-bit ID field */ uint32 locktag_field3; /* a 32-bit ID field */ + uint32 locktag_field5; /* a 32-bit ID field */ uint16 locktag_field4; /* a 16-bit ID field */ - uint16 locktag_field5; /* a 16-bit ID field */ + uint16 locktag_field6; /* filler */ } PREDICATELOCKTARGETTAG; /* @@ -260,12 +263,11 @@ typedef struct PREDICATELOCKTARGETTAG * already have one. An entry is removed when the last lock is removed from * its list. * - * Because a check for predicate locks on a tuple target should also find - * locks on previous versions of the same row, if there are any created by - * overlapping transactions, we keep a pointer to the target for the prior - * version of the row. We also keep a pointer to the next version of the - * row, so that when we no longer have any predicate locks and the back - * pointer is clear, we can clean up the prior pointer for the next version. + * Because a particular target might become obsolete, due to update to a new + * version, before the reading transaction is obsolete, we need some way to + * prevent errors from reuse of a tuple ID. Rather than attempting to clean + * up the targets as the related tuples are pruned or vacuumed, we check the + * xmin on access. This should be far less costly. */ typedef struct PREDICATELOCKTARGET PREDICATELOCKTARGET; @@ -277,15 +279,6 @@ struct PREDICATELOCKTARGET /* data */ SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with * predicate lock target */ - - /* - * The following two pointers are only used for tuple locks, and are only - * consulted for conflict detection and cleanup; not for granularity - * promotion. - */ - PREDICATELOCKTARGET *priorVersionOfRow; /* what other locks to check */ - PREDICATELOCKTARGET *nextVersionOfRow; /* who has pointer here for - * more targets */ }; @@ -387,21 +380,24 @@ typedef struct PredicateLockData (locktag).locktag_field2 = (reloid), \ (locktag).locktag_field3 = InvalidBlockNumber, \ (locktag).locktag_field4 = InvalidOffsetNumber, \ - (locktag).locktag_field5 = 0) + (locktag).locktag_field5 = InvalidTransactionId, \ + (locktag).locktag_field6 = 0) #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (reloid), \ (locktag).locktag_field3 = (blocknum), \ (locktag).locktag_field4 = InvalidOffsetNumber, \ - (locktag).locktag_field5 = 0) + (locktag).locktag_field5 = InvalidTransactionId, \ + (locktag).locktag_field6 = 0) -#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \ +#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (reloid), \ (locktag).locktag_field3 = (blocknum), \ (locktag).locktag_field4 = (offnum), \ - (locktag).locktag_field5 = 0) + (locktag).locktag_field5 = (xmin), \ + (locktag).locktag_field6 = 0) #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \ ((locktag).locktag_field1) @@ -411,6 +407,8 @@ typedef struct PredicateLockData ((locktag).locktag_field3) #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \ ((locktag).locktag_field4) +#define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \ + ((locktag).locktag_field5) #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \ (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \ (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers