On Tue, Mar 22, 2016 at 1:12 PM, Petr Jelinek <p...@2ndquadrant.com> wrote: > I also think the code simplicity makes this worth it.
Agreed. I went over this patch and did a cleanup pass today. I discovered that the LockWaiterCount() function was broken if you try to tried to use it on a lock that you didn't hold or a lock that you held in any mode other than exclusive, so I tried to fix that. I rewrote a lot of the comments and tightened some other things up. The result is attached. I'm baffled by the same code Amit asked about upthread, even though there's now a comment: + /* + * Here we are calling RecordAndGetPageWithFreeSpace + * instead of GetPageWithFreeSpace, because other backend + * who have got the lock might have added extra blocks in + * the FSM and its possible that FSM tree might not have + * been updated so far and we will not get the page by + * searching from top using GetPageWithFreeSpace, so we + * need to search the leaf page directly using our last + * valid target block. + * + * XXX. I don't understand what is happening here. -RMH + */ I've read this over several times and looked at RecordAndGetPageWithFreeSpace() and I'm still confused. First of all, if the lock was acquired by some other backend which did RelationAddExtraBlocks(), it *will* have updated the FSM - that's the whole point. Second, if the other backend extended the relation in some other manner and did not extend the FSM, how does calling RecordAndGetPageWithFreeSpace help? As far as I can see, GetPageWithFreeSpace and RecordAndGetPageWithFreeSpace are both just searching the FSM, so if one is stymied the other will be too. What am I missing? -- Robert Haas EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 8140418..3ab911f 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -169,6 +169,55 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, } /* + * Extend a relation by multiple blocks to avoid future contention on the + * relation extension lock. Our goal is to pre-extend the relation by an + * amount which ramps up as the degree of contention ramps up, but limiting + * the result to some sane overall value. + */ +static void +RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) +{ + Page page; + Size freespace; + BlockNumber blockNum; + int extraBlocks = 0; + int lockWaiters = 0; + Buffer buffer; + + /* + * We use the length of the lock wait queue to judge how much to extend. + * It might seem like multiplying the number of lock waiters by as much + * as 20 is too aggressive, but benchmarking revealed that smaller numbers + * were insufficient. 512 is just an arbitrary cap to prevent pathological + * results (and excessive wasted disk space). + */ + lockWaiters = RelationExtensionLockWaiterCount(relation); + extraBlocks = Min(512, lockWaiters * 20); + + while (extraBlocks-- >= 0) + { + /* Ouch - an unnecessary lseek() each time through the loop! */ + buffer = ReadBufferBI(relation, P_NEW, bistate); + + /* Extend by one page. */ + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buffer); + PageInit(page, BufferGetPageSize(buffer), 0); + freespace = PageGetHeapFreeSpace(page); + MarkBufferDirty(buffer); + blockNum = BufferGetBlockNumber(buffer); + UnlockReleaseBuffer(buffer); + + /* + * Put the page in the freespace map so other backends can find it. + * This is what will keep those other backends from also queueing up + * on the relation extension lock. + */ + RecordPageWithFreeSpace(relation, blockNum, freespace); + } +} + +/* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation @@ -233,10 +282,11 @@ RelationGetBufferForTuple(Relation relation, Size len, bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; - Size pageFreeSpace, - saveFreeSpace; + Size pageFreeSpace = 0, + saveFreeSpace = 0; BlockNumber targetBlock, - otherBlock; + otherBlock, + lastValidBlock = InvalidBlockNumber; bool needLock; len = MAXALIGN(len); /* be conservative */ @@ -308,6 +358,7 @@ RelationGetBufferForTuple(Relation relation, Size len, } } +loop: while (targetBlock != InvalidBlockNumber) { /* @@ -388,6 +439,8 @@ RelationGetBufferForTuple(Relation relation, Size len, otherBlock, targetBlock, vmbuffer_other, vmbuffer); + lastValidBlock = targetBlock; + /* * Now we can check to see if there's enough free space here. If so, * we're done. @@ -440,10 +493,60 @@ RelationGetBufferForTuple(Relation relation, Size len, */ needLock = !RELATION_IS_LOCAL(relation); + /* + * If we need the lock but are not able to acquire it immediately, we'll + * consider extending the relation by multiple blocks at a time to manage + * contention on the relation extension lock. However, this only makes + * sense if we're using the FSM; otherwise, there's no point. + */ if (needLock) - LockRelationForExtension(relation, ExclusiveLock); + { + if (use_fsm) + LockRelationForExtension(relation, ExclusiveLock); + else if (!ConditionLockRelationForExtension(relation, ExclusiveLock)) + { + /* Couldn't get the lock immmediately; wait for it. */ + LockRelationForExtension(relation, ExclusiveLock); + + if (lastValidBlock != InvalidBlockNumber) + { + /* + * Here we are calling RecordAndGetPageWithFreeSpace + * instead of GetPageWithFreeSpace, because other backend + * who have got the lock might have added extra blocks in + * the FSM and its possible that FSM tree might not have + * been updated so far and we will not get the page by + * searching from top using GetPageWithFreeSpace, so we + * need to search the leaf page directly using our last + * valid target block. + * + * XXX. I don't understand what is happening here. -RMH + */ + targetBlock = RecordAndGetPageWithFreeSpace(relation, + lastValidBlock, + pageFreeSpace, + len + saveFreeSpace); + } + + /* + * If some other waiter has already extended the relation, we + * don't need to do so; just use the existing freespace. + */ + if (targetBlock != InvalidBlockNumber) + { + UnlockRelationForExtension(relation, ExclusiveLock); + goto loop; + } + + /* Time to bulk-extend. */ + RelationAddExtraBlocks(relation, bistate); + } + } /* + * In addition to whatever extension we performed above, we always add + * at least one block to satisfy our own request. + * * XXX This does an lseek - rather expensive - but at the moment it is the * only way to accurately determine how many blocks are in a relation. Is * it worth keeping an accurate file length in shared memory someplace, diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 0632fc0..7e04137 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -341,6 +341,41 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode) } /* + * ConditionLockRelationForExtension + * + * As above, but only lock if we can get the lock without blocking. + * Returns TRUE iff the lock was acquired. + */ +bool +ConditionLockRelationForExtension(Relation relation, LOCKMODE lockmode) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * RelationExtensionLockWaiterCount + * + * Count the lock requester for the RelationExtension lock. + */ +int +RelationExtensionLockWaiterCount(Relation relation) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return LockWaiterCount(&tag); +} + +/* * UnlockRelationForExtension */ void diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index b30b7b1..353f705 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -4380,3 +4380,40 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) LockRelease(&tag, ShareLock, false); return true; } + +/* + * LockWaiterCount + * + * Find the number of lock requester on this locktag + */ +int +LockWaiterCount(const LOCKTAG *locktag) +{ + LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; + LOCK *lock; + bool found; + uint32 hashcode; + LWLock *partitionLock; + int waiters = 0; + + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) + elog(ERROR, "unrecognized lock method: %d", lockmethodid); + + hashcode = LockTagHashCode(locktag); + partitionLock = LockHashPartitionLock(hashcode); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + (const void *) locktag, + hashcode, + HASH_FIND, + &found); + if (found) + { + Assert(lock != NULL); + waiters = lock->nRequested; + } + LWLockRelease(partitionLock); + + return waiters; +} diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 975b6f8..4460756 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -53,6 +53,9 @@ extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); /* Lock a relation for extension */ extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode); extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode); +extern bool ConditionLockRelationForExtension(Relation relation, + LOCKMODE lockmode); +extern int RelationExtensionLockWaiterCount(Relation relation); /* Lock a page (currently only used within indexes) */ extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index b26427d..9c08679 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -574,6 +574,8 @@ extern void RememberSimpleDeadLock(PGPROC *proc1, PGPROC *proc2); extern void InitDeadLockChecking(void); +extern int LockWaiterCount(const LOCKTAG *locktag); + #ifdef LOCK_DEBUG extern void DumpLocks(PGPROC *proc); extern void DumpAllLocks(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers