Hi, I've been annoyed at the amount of memory used by the backend local PrivateRefCount array for a couple of reasons:
a) The performance impact of AtEOXact_Buffers() on Assert() enabled builds is really, really annoying. b) On larger nodes, the L1/2/3 cache impact of randomly accessing several megabyte big array at a high frequency is noticeable. I've seen the access to that to be the primary (yes, really) source of pipeline stalls. c) On nodes with significant shared_memory the sum of the per-backend arrays is a significant amount of memory, that could very well be used more beneficially. So what I have done in the attached proof of concept is to have a small (8 currently) array of (buffer, pincount) that's searched linearly when the refcount of a buffer is needed. When more than 8 buffers are pinned a hashtable is used to lookup the values. That seems to work fairly well. On the few tests I could run on my laptop - I've done this during a flight - it's a small performance win in all cases I could test. While saving a fair amount of memory. Alternatively we could just get rid of the idea of tracking this per backend, relying on tracking via resource managers... Greetings, Andres Freund -- Andres Freund http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
>From 433f248c0f4c3e3d43d1cc955354e5dd5cddfcea Mon Sep 17 00:00:00 2001 From: Andres Freund <and...@anarazel.de> Date: Thu, 20 Mar 2014 21:46:34 +0100 Subject: [PATCH] Make backend local tracking of buffer pins more efficient. --- src/backend/storage/buffer/buf_init.c | 25 --- src/backend/storage/buffer/bufmgr.c | 346 ++++++++++++++++++++++++++++------ src/include/storage/bufmgr.h | 19 -- 3 files changed, 290 insertions(+), 100 deletions(-) diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c index e187242..3b2432d 100644 --- a/src/backend/storage/buffer/buf_init.c +++ b/src/backend/storage/buffer/buf_init.c @@ -130,31 +130,6 @@ InitBufferPool(void) } /* - * Initialize access to shared buffer pool - * - * This is called during backend startup (whether standalone or under the - * postmaster). It sets up for this backend's access to the already-existing - * buffer pool. - * - * NB: this is called before InitProcess(), so we do not have a PGPROC and - * cannot do LWLockAcquire; hence we can't actually access stuff in - * shared memory yet. We are only initializing local data here. - * (See also InitBufferPoolBackend, over in bufmgr.c.) - */ -void -InitBufferPoolAccess(void) -{ - /* - * Allocate and zero local arrays of per-buffer info. - */ - PrivateRefCount = (int32 *) calloc(NBuffers, sizeof(int32)); - if (!PrivateRefCount) - ereport(FATAL, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); -} - -/* * BufferShmemSize * * compute the size of shared memory for the buffer pool including diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 19eecab..113b7ed 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -86,6 +86,175 @@ static bool IsForInput; /* local state for LockBufferForCleanup */ static volatile BufferDesc *PinCountWaitBuf = NULL; +typedef struct PrivateRefCount +{ + Buffer buffer; + int32 refcount; +} PrivateRefCount; + +/* one full cacheline */ +#define REFCOUNT_ARRAY_ENTRIES 8 + +/* + * Backend-Private refcount management. + */ +static struct PrivateRefCount PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]; +static HTAB *PrivateRefCountHash = NULL; +static int32 PrivateRefCountOverflowed = 0; + +static PrivateRefCount* GetPrivateRefCountEntry(Buffer buffer, bool create); +static inline int32 GetPrivateRefCount(Buffer buffer); +static void ForgetPrivateRefCountEntry(PrivateRefCount *ref); + +/* + * Return the PrivateRefCount entry for the passed buffer. + * + * Returns NULL if create = false is passed and the buffer doesn't have a + * PrivateRefCount entry; allocates a new PrivateRefCount entry if currently + * none exists and create = true is passed. + * + * When a returned refcount entry isn't used anymore it has to be forgotten, + * using ForgetPrivateRefCountEntry(). + * + * Only works for shared buffers. + */ +static PrivateRefCount* +GetPrivateRefCountEntry(Buffer buffer, bool create) +{ + PrivateRefCount *res; + PrivateRefCount *free = NULL; + + int i; + bool found = false; + + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); + + /* + * First search for references in the array, that'll be sufficient in the + * majority of cases. + */ + for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) + { + res = &PrivateRefCountArray[i]; + + if (res->buffer == buffer) + return res; + + /* Remember where to put a new refcount, should it become necessary. */ + if (create && free == NULL && res->buffer == InvalidBuffer) + free = res; + } + + /* + * By here we know that the buffer, if already pinned, isn't residing in + * the array. + */ + + res = NULL; + found = false; + + /* + * Search in the hashtable if either we haven't found the target buffer + * yet and it already has entries, or if there wasn't a free entry in the + * local array. + */ + if (PrivateRefCountOverflowed > 0 || (create && free == NULL)) + { + res = hash_search(PrivateRefCountHash, + (void *) &buffer, + (create && free == NULL) ? HASH_ENTER : HASH_FIND, + &found); + } + + if (found) + { + /* TODO: could relocate to `free' here */ + } + else if (res) + { + /* a new refcount entry, initialize */ + PrivateRefCountOverflowed++; + res->refcount = 0; + } + else if (create) + { + /* buffer wasn't in the hash and we have a free array entry */ + Assert(free != NULL); + free->buffer = buffer; + free->refcount = 0; + res = free; + } + + return res; +} + +/* + * Returns how many times the passed buffer is pinned by this backend. + * + * Only works for shared memory buffers! + */ +static inline int32 +GetPrivateRefCount(Buffer buffer) +{ + PrivateRefCount *ref; + + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); + + ref = GetPrivateRefCountEntry(buffer, false); + + if (ref == NULL) + return 0; + return ref->refcount; +} + +/* + * Stop tracking the refcount of the buffer ref is tracking the refcount + * for. Nono, there's no circularity here. + */ +static void +ForgetPrivateRefCountEntry(PrivateRefCount *ref) +{ + Assert(ref->refcount == 0); + + if (ref >= &PrivateRefCountArray[0] && + ref < &PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]) + { + ref->buffer = InvalidBuffer; + } + else + { + bool found; + Buffer buffer = ref->buffer; + hash_search(PrivateRefCountHash, + (void *) &buffer, + HASH_REMOVE, + &found); + Assert(found); + Assert(PrivateRefCountOverflowed > 0); + PrivateRefCountOverflowed--; + } +} + +/* + * BufferIsPinned + * True iff the buffer is pinned (also checks for valid buffer number). + * + * NOTE: what we check here is that *this* backend holds a pin on + * the buffer. We do not care whether some other backend does. + */ +#define BufferIsPinned(bufnum) \ +( \ + !BufferIsValid(bufnum) ? \ + false \ + : \ + BufferIsLocal(bufnum) ? \ + (LocalRefCount[-(bufnum) - 1] > 0) \ + : \ + (GetPrivateRefCount(bufnum) > 0) \ +) + static Buffer ReadBuffer_common(SMgrRelation reln, char relpersistence, ForkNumber forkNum, BlockNumber blockNum, @@ -940,7 +1109,7 @@ retry: UnlockBufHdr(buf); LWLockRelease(oldPartitionLock); /* safety check: should definitely not be our *own* pin */ - if (PrivateRefCount[buf->buf_id] != 0) + if (GetPrivateRefCount(buf->buf_id) > 0) elog(ERROR, "buffer is pinned in InvalidateBuffer"); WaitIO(buf); goto retry; @@ -999,7 +1168,7 @@ MarkBufferDirty(Buffer buffer) bufHdr = &BufferDescriptors[buffer - 1]; - Assert(PrivateRefCount[buffer - 1] > 0); + Assert(BufferIsPinned(buffer)); /* unfortunately we can't check if the lock is held exclusively */ Assert(LWLockHeldByMe(bufHdr->content_lock)); @@ -1046,9 +1215,9 @@ ReleaseAndReadBuffer(Buffer buffer, if (BufferIsValid(buffer)) { + Assert(BufferIsPinned(buffer)); if (BufferIsLocal(buffer)) { - Assert(LocalRefCount[-buffer - 1] > 0); bufHdr = &LocalBufferDescriptors[-buffer - 1]; if (bufHdr->tag.blockNum == blockNum && RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) && @@ -1059,7 +1228,6 @@ ReleaseAndReadBuffer(Buffer buffer, } else { - Assert(PrivateRefCount[buffer - 1] > 0); bufHdr = &BufferDescriptors[buffer - 1]; /* we have pin, so it's ok to examine tag without spinlock */ if (bufHdr->tag.blockNum == blockNum && @@ -1089,15 +1257,18 @@ ReleaseAndReadBuffer(Buffer buffer, * Note that ResourceOwnerEnlargeBuffers must have been done already. * * Returns TRUE if buffer is BM_VALID, else FALSE. This provision allows - * some callers to avoid an extra spinlock cycle. + * some callradix_tree_inserters to avoid an extra spinlock cycle. */ static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy) { int b = buf->buf_id; bool result; + PrivateRefCount *ref; + + ref = GetPrivateRefCountEntry(b + 1, true); - if (PrivateRefCount[b] == 0) + if (ref->refcount == 0) { LockBufHdr(buf); buf->refcount++; @@ -1119,8 +1290,9 @@ PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy) /* If we previously pinned the buffer, it must surely be valid */ result = true; } - PrivateRefCount[b]++; - Assert(PrivateRefCount[b] > 0); + + ref->refcount++; + Assert(ref->refcount > 0); ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); return result; @@ -1143,12 +1315,15 @@ static void PinBuffer_Locked(volatile BufferDesc *buf) { int b = buf->buf_id; + PrivateRefCount *ref; + + ref = GetPrivateRefCountEntry(b + 1, true); - if (PrivateRefCount[b] == 0) + if (ref->refcount == 0) buf->refcount++; UnlockBufHdr(buf); - PrivateRefCount[b]++; - Assert(PrivateRefCount[b] > 0); + ref->refcount++; + Assert(ref->refcount > 0); ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); } @@ -1165,14 +1340,18 @@ static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner) { int b = buf->buf_id; + PrivateRefCount *ref; + + ref = GetPrivateRefCountEntry(b + 1, false); + Assert(ref != NULL); if (fixOwner) ResourceOwnerForgetBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); - Assert(PrivateRefCount[b] > 0); - PrivateRefCount[b]--; - if (PrivateRefCount[b] == 0) + Assert(ref->refcount > 0); + ref->refcount--; + if (ref->refcount == 0) { /* I'd better not still hold any locks on the buffer */ Assert(!LWLockHeldByMe(buf->content_lock)); @@ -1197,6 +1376,8 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner) } else UnlockBufHdr(buf); + + ForgetPrivateRefCountEntry(ref); } } @@ -1700,36 +1881,95 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) return result | BUF_WRITTEN; } - /* - * AtEOXact_Buffers - clean up at end of transaction. + * Helper Routine for AtProcExit_Buffers() and AtEOXact_Buffers(). * - * As of PostgreSQL 8.0, buffer pins should get released by the - * ResourceOwner mechanism. This routine is just a debugging - * cross-check that no pins remain. + * Check whether any buffer pins are still held by this backend, but only if + * assertions are enabled. */ -void -AtEOXact_Buffers(bool isCommit) +static void +AssertNoPinnedBuffers(void) { #ifdef USE_ASSERT_CHECKING if (assert_enabled) { int RefCountErrors = 0; - Buffer b; + PrivateRefCount *res; + int i; - for (b = 1; b <= NBuffers; b++) + /* check the array */ + for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) { - if (PrivateRefCount[b - 1] != 0) + res = &PrivateRefCountArray[i]; + + if (res->buffer != InvalidBuffer) + { + PrintBufferLeakWarning(res->buffer); + RefCountErrors++; + } + } + + /* if neccessary search the hash */ + if (PrivateRefCountOverflowed) + { + HASH_SEQ_STATUS hstat; + hash_seq_init(&hstat, PrivateRefCountHash); + while ((res = (PrivateRefCount *) hash_seq_search(&hstat)) != NULL) { - PrintBufferLeakWarning(b); + PrintBufferLeakWarning(res->buffer); RefCountErrors++; } + } + Assert(RefCountErrors == 0); } #endif +} + +/* + * AtEOXact_Buffers - clean up at end of transaction. + * + * As of PostgreSQL 8.0, buffer pins should get released by the + * ResourceOwner mechanism. This routine is just a debugging + * cross-check that no pins remain. + */ +void +AtEOXact_Buffers(bool isCommit) +{ + AssertNoPinnedBuffers(); AtEOXact_LocalBuffers(isCommit); + + Assert(PrivateRefCountOverflowed == 0); +} + +/* + * Initialize access to shared buffer pool + * + * This is called during backend startup (whether standalone or under the + * postmaster). It sets up for this backend's access to the already-existing + * buffer pool. + * + * NB: this is called before InitProcess(), so we do not have a PGPROC and + * cannot do LWLockAcquire; hence we can't actually access stuff in + * shared memory yet. We are only initializing local data here. + * (See also InitBufferPoolBackend) + */ +void +InitBufferPoolAccess(void) +{ + HASHCTL hash_ctl; + + memset(&PrivateRefCountArray, 0, sizeof(PrivateRefCountArray)); + + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(int32); + hash_ctl.entrysize = sizeof(PrivateRefCountArray); + hash_ctl.hash = oid_hash; /* a bit more efficient than tag_hash */ + + PrivateRefCountHash = hash_create("PrivateRefCount", 100, &hash_ctl, + HASH_ELEM | HASH_FUNCTION); } /* @@ -1757,26 +1997,12 @@ AtProcExit_Buffers(int code, Datum arg) AbortBufferIO(); UnlockBuffers(); -#ifdef USE_ASSERT_CHECKING - if (assert_enabled) - { - int RefCountErrors = 0; - Buffer b; - - for (b = 1; b <= NBuffers; b++) - { - if (PrivateRefCount[b - 1] != 0) - { - PrintBufferLeakWarning(b); - RefCountErrors++; - } - } - Assert(RefCountErrors == 0); - } -#endif + AssertNoPinnedBuffers(); /* localbuf.c needs a chance too */ AtProcExit_LocalBuffers(); + + Assert(PrivateRefCountOverflowed == 0); } /* @@ -1800,7 +2026,7 @@ PrintBufferLeakWarning(Buffer buffer) else { buf = &BufferDescriptors[buffer - 1]; - loccount = PrivateRefCount[buffer - 1]; + loccount = GetPrivateRefCount(buffer); backend = InvalidBackendId; } @@ -2340,7 +2566,7 @@ PrintBufferDescs(void) i, buf->freeNext, relpathbackend(buf->tag.rnode, InvalidBackendId, buf->tag.forkNum), buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); + buf->refcount, GetPrivateRefCount(i)); } } #endif @@ -2354,7 +2580,7 @@ PrintPinnedBufs(void) for (i = 0; i < NBuffers; ++i, ++buf) { - if (PrivateRefCount[i] > 0) + if (GetPrivateRefCount(i + 1) > 0) { /* theoretically we should lock the bufhdr here */ elog(LOG, @@ -2363,7 +2589,7 @@ PrintPinnedBufs(void) i, buf->freeNext, relpath(buf->tag.rnode, buf->tag.forkNum), buf->tag.blockNum, buf->flags, - buf->refcount, PrivateRefCount[i]); + buf->refcount, GetPrivateRefCount(i + 1)); } } } @@ -2520,6 +2746,7 @@ void ReleaseBuffer(Buffer buffer) { volatile BufferDesc *bufHdr; + PrivateRefCount *ref; if (!BufferIsValid(buffer)) elog(ERROR, "bad buffer ID: %d", buffer); @@ -2535,10 +2762,12 @@ ReleaseBuffer(Buffer buffer) bufHdr = &BufferDescriptors[buffer - 1]; - Assert(PrivateRefCount[buffer - 1] > 0); + ref = GetPrivateRefCountEntry(buffer, false); + Assert(ref != NULL); + Assert(ref->refcount > 0); - if (PrivateRefCount[buffer - 1] > 1) - PrivateRefCount[buffer - 1]--; + if (ref->refcount > 1) + ref->refcount--; else UnpinBuffer(bufHdr, false); } @@ -2572,7 +2801,12 @@ IncrBufferRefCount(Buffer buffer) if (BufferIsLocal(buffer)) LocalRefCount[-buffer - 1]++; else - PrivateRefCount[buffer - 1]++; + { + PrivateRefCount *ref; + ref = GetPrivateRefCountEntry(buffer, false); + Assert(ref != NULL); + ref->refcount++; + } } /* @@ -2606,7 +2840,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std) bufHdr = &BufferDescriptors[buffer - 1]; - Assert(PrivateRefCount[buffer - 1] > 0); + Assert(GetPrivateRefCount(buffer) > 0); /* here, either share or exclusive lock is OK */ Assert(LWLockHeldByMe(bufHdr->content_lock)); @@ -2823,9 +3057,9 @@ LockBufferForCleanup(Buffer buffer) } /* There should be exactly one local pin */ - if (PrivateRefCount[buffer - 1] != 1) + if (GetPrivateRefCount(buffer) != 1) elog(ERROR, "incorrect local pin count: %d", - PrivateRefCount[buffer - 1]); + GetPrivateRefCount(buffer)); bufHdr = &BufferDescriptors[buffer - 1]; @@ -2890,7 +3124,7 @@ HoldingBufferPinThatDelaysRecovery(void) if (bufid < 0) return false; - if (PrivateRefCount[bufid] > 0) + if (GetPrivateRefCount(bufid + 1) > 0) return true; return false; @@ -2920,8 +3154,8 @@ ConditionalLockBufferForCleanup(Buffer buffer) } /* There should be exactly one local pin */ - Assert(PrivateRefCount[buffer - 1] > 0); - if (PrivateRefCount[buffer - 1] != 1) + Assert(GetPrivateRefCount(buffer) > 0); + if (GetPrivateRefCount(buffer) != 1) return false; /* Try to acquire lock */ diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 89447d0..42d9120 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -55,7 +55,6 @@ extern int target_prefetch_pages; /* in buf_init.c */ extern PGDLLIMPORT char *BufferBlocks; -extern PGDLLIMPORT int32 *PrivateRefCount; /* in localbuf.c */ extern PGDLLIMPORT int NLocBuffer; @@ -102,24 +101,6 @@ extern PGDLLIMPORT int32 *LocalRefCount; ) /* - * BufferIsPinned - * True iff the buffer is pinned (also checks for valid buffer number). - * - * NOTE: what we check here is that *this* backend holds a pin on - * the buffer. We do not care whether some other backend does. - */ -#define BufferIsPinned(bufnum) \ -( \ - !BufferIsValid(bufnum) ? \ - false \ - : \ - BufferIsLocal(bufnum) ? \ - (LocalRefCount[-(bufnum) - 1] > 0) \ - : \ - (PrivateRefCount[(bufnum) - 1] > 0) \ -) - -/* * BufferGetBlock * Returns a reference to a disk page image associated with a buffer. * -- 1.8.3.251.g1462b67
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers