On Thu, Mar 10, 2016 at 7:55 AM, Petr Jelinek <p...@2ndquadrant.com> wrote:
Thanks for the comments.. > Hmm, why did you remove the comment above the call to > UnlockRelationForExtension? While re factoring I lose this comment.. Fixed it > It still seems relevant, maybe with some minor modification? > > Also there is a bit of whitespace mess inside the conditional lock block. Fixed I got the result of 10 mins run so posting it.. Note: Base code results are copied from up thread... Results For 10 Mins run of COPY 10000 records of size 4 bytes script and configuration are same as used in up thread -------------------------------------------------------------------------------------------- Client Base Patch 1 105 111 2 217 246 4 210 428 8 166 653 16 145 808 32 124 988 64 --- 974 Results For 10 Mins run of INSERT 1000 records of size 1024 bytes(data don't fits in shared buffer) -------------------------------------------------------------------------------------------------- Client Base Patch 1 117 120 2 111 126 4 51 130 8 43 147 16 40 209 32 --- 254 64 --- 205 -- Regards, Dilip Kumar EnterpriseDB: http://www.enterprisedb.com
diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 8140418..b73535c 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -169,6 +169,87 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, } /* + * RelationAddOneBlock + * + * Extend relation by one block and lock the buffer + */ +static Buffer +RelationAddOneBlock(Relation relation, Buffer otherBuffer, BulkInsertState bistate) +{ + Buffer buffer; + /* + * XXX This does an lseek - rather expensive - but at the moment it is the + * only way to accurately determine how many blocks are in a relation. Is + * it worth keeping an accurate file length in shared memory someplace, + * rather than relying on the kernel to do it for us? + */ + buffer = ReadBufferBI(relation, P_NEW, bistate); + + /* + * We can be certain that locking the otherBuffer first is OK, since + * it must have a lower page number. + */ + if (otherBuffer != InvalidBuffer) + LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); + + /* + * Now acquire lock on the new page. + */ + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + return buffer; +} + +/* + * RelationAddExtraBlocks + * + * Extend extra blocks for the relations to avoid the future contention + * on the relation extension lock. + */ + +static void +RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) +{ + Page page; + Size freespace; + BlockNumber blockNum; + int extraBlocks = 0; + int lockWaiters = 0; + Buffer buffer; + + /* + * For calculating number of extra blocks to extend, find the level + * of contention on this lock, by getting the requester of this lock + * and add extra blocks in multiple of waiters. + */ + lockWaiters = RelationExtensionLockWaiter(relation); + + extraBlocks = lockWaiters * 20; + + while (extraBlocks--) + { + /* + * Here we are adding extra blocks to the relation after + * adding each block update the information in FSM so that + * other backend running parallel can find the block. + */ + buffer = ReadBufferBI(relation, P_NEW, bistate); + + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buffer); + PageInit(page, BufferGetPageSize(buffer), 0); + freespace = PageGetHeapFreeSpace(page); + MarkBufferDirty(buffer); + blockNum = BufferGetBlockNumber(buffer); + + UnlockReleaseBuffer(buffer); + + RecordPageWithFreeSpace(relation, blockNum, freespace); + } +} + +/* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation @@ -233,10 +314,11 @@ RelationGetBufferForTuple(Relation relation, Size len, bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; - Size pageFreeSpace, - saveFreeSpace; + Size pageFreeSpace = 0, + saveFreeSpace = 0; BlockNumber targetBlock, - otherBlock; + otherBlock, + lastValidBlock = InvalidBlockNumber; bool needLock; len = MAXALIGN(len); /* be conservative */ @@ -308,6 +390,7 @@ RelationGetBufferForTuple(Relation relation, Size len, } } +loop: while (targetBlock != InvalidBlockNumber) { /* @@ -388,6 +471,8 @@ RelationGetBufferForTuple(Relation relation, Size len, otherBlock, targetBlock, vmbuffer_other, vmbuffer); + lastValidBlock = targetBlock; + /* * Now we can check to see if there's enough free space here. If so, * we're done. @@ -441,27 +526,47 @@ RelationGetBufferForTuple(Relation relation, Size len, needLock = !RELATION_IS_LOCAL(relation); if (needLock) - LockRelationForExtension(relation, ExclusiveLock); - - /* - * XXX This does an lseek - rather expensive - but at the moment it is the - * only way to accurately determine how many blocks are in a relation. Is - * it worth keeping an accurate file length in shared memory someplace, - * rather than relying on the kernel to do it for us? - */ - buffer = ReadBufferBI(relation, P_NEW, bistate); - - /* - * We can be certain that locking the otherBuffer first is OK, since it - * must have a lower page number. - */ - if (otherBuffer != InvalidBuffer) - LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); + { + /* + * First try to get the lock in no-wait mode, if succeed extend one + * block, else get the lock in normal mode and after we get the lock + * extend some extra blocks, extra blocks will be added to satisfy + * request of other waiters and avoid future contention. Here instead + * of directly taking lock we try no-wait mode, this is to handle the + * case, when there is no contention - it should not find the lock + * waiter and execute extra instructions. + */ + if (LOCKACQUIRE_OK + != RelationExtensionLockConditional(relation, ExclusiveLock)) + { + LockRelationForExtension(relation, ExclusiveLock); + + if (use_fsm) + { + if (lastValidBlock != InvalidBlockNumber) + { + targetBlock = RecordAndGetPageWithFreeSpace(relation, + lastValidBlock, + pageFreeSpace, + len + saveFreeSpace); + } + + /* Other waiter has extended the block for us*/ + if (targetBlock != InvalidBlockNumber) + { + UnlockRelationForExtension(relation, ExclusiveLock); + goto loop; + } + + RelationAddExtraBlocks(relation, bistate); + } + } + } - /* - * Now acquire lock on the new page. + /* For all case we need to add at least one block to satisfy our + * own request. */ - LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + buffer = RelationAddOneBlock(relation, otherBuffer, bistate); /* * Release the file-extension lock; it's now OK for someone else to extend @@ -472,6 +577,8 @@ RelationGetBufferForTuple(Relation relation, Size len, if (needLock) UnlockRelationForExtension(relation, ExclusiveLock); + + /* * We need to initialize the empty new page. Double-check that it really * is empty (this should never happen, but if it does we don't want to diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 9d16afb..5d9454d 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -341,6 +341,40 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode) } /* + * RelationExtensionLockConditional + * + * Same as LockRelationForExtension except it will not wait on the lock. + */ +LockAcquireResult +RelationExtensionLockConditional(Relation relation, LOCKMODE lockmode) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return LockAcquire(&tag, lockmode, false, true); +} + +/* + * RelationExtensionLockWaiter + * + * Count the lock requester for the RelationExtension lock. + */ +int +RelationExtensionLockWaiter(Relation relation) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return LockWaiterCount(&tag); +} + +/* * UnlockRelationForExtension */ void diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index a458c68..cf723ae 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -4380,3 +4380,47 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) LockRelease(&tag, ShareLock, false); return true; } + +/* + * LockWaiterCount + * + * Find the number of lock requester on this locktag + */ +int +LockWaiterCount(const LOCKTAG *locktag) +{ + LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; + LOCALLOCKTAG localtag; + LOCALLOCK *locallock; + bool found; + uint32 hashcode; + LWLock *partitionLock; + int waiters = 0; + + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) + elog(ERROR, "unrecognized lock method: %d", lockmethodid); + + /* + * Find a LOCALLOCK entry for this lock and lockmode + */ + MemSet(&localtag, 0, sizeof(localtag)); /* must clear padding */ + localtag.lock = *locktag; + localtag.mode = ExclusiveLock; + + locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash, + (void *) &localtag, + HASH_FIND, &found); + + + hashcode = locallock->hashcode; + + partitionLock = LockHashPartitionLock(hashcode); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + if (found) + waiters = locallock->lock->nRequested; + + LWLockRelease(partitionLock); + + return waiters; +} diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index e9d41bf..ea8e19e 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -101,4 +101,7 @@ extern void UnlockSharedObjectForSession(Oid classid, Oid objid, uint16 objsubid /* Describe a locktag for error messages */ extern void DescribeLockTag(StringInfo buf, const LOCKTAG *tag); +extern LockAcquireResult RelationExtensionLockConditional(Relation relation, LOCKMODE lockmode); +extern int RelationExtensionLockWaiter(Relation relation); + #endif /* LMGR_H */ diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 788d50a..3fd74fb 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -572,6 +572,8 @@ extern void RememberSimpleDeadLock(PGPROC *proc1, PGPROC *proc2); extern void InitDeadLockChecking(void); +extern int LockWaiterCount(const LOCKTAG *locktag); + #ifdef LOCK_DEBUG extern void DumpLocks(PGPROC *proc); extern void DumpAllLocks(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers