On 2015-01-28 17:08:46 +0100, Andres Freund wrote:
> I just have no idea whether it'd be beneficial to use more space on
> 32bit to pad the individual entries. Since this mostly is beneficial on
> multi-socket, highly concurrent workloads, I doubt it really matter.

> I personally still think that a comment above sbufdesc's definition
> would be sufficient for now. But whatever. I'll enforce 64byte padding
> on 64bit platforms, and do nothing on 32bit platforms.

Patch doing that attached. I've for now dealt with the 32bit issue by:

#define BUFFERDESC_PADDED_SIZE  (sizeof(void*) == 8 ? 64 : sizeof(BufferDesc))
and
 * XXX: As this is primarily matters in highly concurrent workloads which
 * probably all are 64bit these days, and the space wastage would be a bit
 * more noticeable on 32bit systems, we don't force the stride to be cache
 * line sized. If somebody does actual performance testing, we can reevaluate.

I've replaced direct accesses to the (Local)BufferDescriptors array by a
wrapper macros to hide the union indirect and allow potential future
changes to be made more easily.

Greetings,

Andres Freund

-- 
 Andres Freund                     http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
>From caeb98e4ab48748556f4983fdaa52098a190aa57 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Wed, 28 Jan 2015 18:51:50 +0100
Subject: [PATCH 1/3] Fix #ifdefed'ed out code to compile again.

---
 src/backend/storage/buffer/bufmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 7eb2d22..f92d0ef 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2764,7 +2764,7 @@ PrintPinnedBufs(void)
 				 "[%02d] (freeNext=%d, rel=%s, "
 				 "blockNum=%u, flags=0x%x, refcount=%u %d)",
 				 i, buf->freeNext,
-				 relpath(buf->tag.rnode, buf->tag.forkNum),
+				 relpathperm(buf->tag.rnode, buf->tag.forkNum),
 				 buf->tag.blockNum, buf->flags,
 				 buf->refcount, GetPrivateRefCount(i + 1));
 		}
-- 
2.0.0.rc2.4.g1dc51c6.dirty

>From 2bb5f85d8c46cea30b3d1d80b781574c2f2a3903 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Wed, 28 Jan 2015 13:55:36 +0100
Subject: [PATCH 2/3] Align buffer descriptors to cache line boundaries.

Benchmarks has shown that aligning the buffer descriptor array to
cache lines is important for scalability, especially on bigger,
multi-socket, machines.

Currently the array sometimes already happens to be aligned by
happenstance, depending how large previous shared memory allocations
were. That can lead to wildly varying performance results, after minor
configuration changes.

In addition to aligning the start of array array, also force the size
of individual descriptors to be of common cache line size (64
bytes). That happens to already be the case on 64bit platforms, but
this way we can change the definition more easily.

As the alignment primarily matters in highly concurrent workloads
which probably all are 64bit these days, and the space wastage of
element alignment would be a bit more noticeable on 32bit systems, we
don't force the stride to be cacheline sized for now. If somebody does
actual performance testing, we can reevaluate that decision by
changing the definition of BUFFERDESC_PADDED_SIZE.

Discussion: 20140202151319.gd32...@awork2.anarazel.de

Per discussion with Bruce Momjan, Robert Haas, Tom Lane and Peter
Geoghegan.
---
 contrib/pg_buffercache/pg_buffercache_pages.c |  6 ++-
 src/backend/storage/buffer/buf_init.c         | 19 ++++----
 src/backend/storage/buffer/bufmgr.c           | 66 ++++++++++++++-------------
 src/backend/storage/buffer/freelist.c         |  6 +--
 src/backend/storage/buffer/localbuf.c         | 12 ++---
 src/include/c.h                               |  1 +
 src/include/storage/buf_internals.h           | 34 +++++++++++++-
 7 files changed, 91 insertions(+), 53 deletions(-)

diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index d00f985..98016fc 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -73,7 +73,6 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
 	if (SRF_IS_FIRSTCALL())
 	{
 		int			i;
-		volatile BufferDesc *bufHdr;
 
 		funcctx = SRF_FIRSTCALL_INIT();
 
@@ -146,8 +145,11 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
 		 * Scan though all the buffers, saving the relevant fields in the
 		 * fctx->record structure.
 		 */
-		for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++)
+		for (i = 0; i < NBuffers; i++)
 		{
+			volatile BufferDesc *bufHdr;
+
+			bufHdr = GetBufferDescriptor(i);
 			/* Lock each buffer header before inspecting. */
 			LockBufHdr(bufHdr);
 
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 4434bc3..c013e10 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -18,7 +18,7 @@
 #include "storage/buf_internals.h"
 
 
-BufferDesc *BufferDescriptors;
+BufferDescPadded *BufferDescriptors;
 char	   *BufferBlocks;
 
 
@@ -67,9 +67,11 @@ InitBufferPool(void)
 	bool		foundBufs,
 				foundDescs;
 
-	BufferDescriptors = (BufferDesc *)
+	/* Align descriptors to a cacheline boundary. */
+	BufferDescriptors = (BufferDescPadded *) CACHELINEALIGN(
 		ShmemInitStruct("Buffer Descriptors",
-						NBuffers * sizeof(BufferDesc), &foundDescs);
+						NBuffers * sizeof(BufferDescPadded) + PG_CACHE_LINE_SIZE,
+						&foundDescs));
 
 	BufferBlocks = (char *)
 		ShmemInitStruct("Buffer Blocks",
@@ -83,16 +85,15 @@ InitBufferPool(void)
 	}
 	else
 	{
-		BufferDesc *buf;
 		int			i;
 
-		buf = BufferDescriptors;
-
 		/*
 		 * Initialize all the buffer headers.
 		 */
-		for (i = 0; i < NBuffers; buf++, i++)
+		for (i = 0; i < NBuffers; i++)
 		{
+			BufferDesc *buf = GetBufferDescriptor(i);
+
 			CLEAR_BUFFERTAG(buf->tag);
 			buf->flags = 0;
 			buf->usage_count = 0;
@@ -114,7 +115,7 @@ InitBufferPool(void)
 		}
 
 		/* Correct last entry of linked list */
-		BufferDescriptors[NBuffers - 1].freeNext = FREENEXT_END_OF_LIST;
+		GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST;
 	}
 
 	/* Init other shared buffer-management stuff */
@@ -134,6 +135,8 @@ BufferShmemSize(void)
 
 	/* size of buffer descriptors */
 	size = add_size(size, mul_size(NBuffers, sizeof(BufferDesc)));
+	/* to allow aligning buffer descriptors */
+	size = add_size(size, PG_CACHE_LINE_SIZE);
 
 	/* size of data pages */
 	size = add_size(size, mul_size(NBuffers, BLCKSZ));
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f92d0ef..7b543dd 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -898,7 +898,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 		 * buffer pool, and check to see if the correct data has been loaded
 		 * into the buffer.
 		 */
-		buf = &BufferDescriptors[buf_id];
+		buf = GetBufferDescriptor(buf_id);
 
 		valid = PinBuffer(buf, strategy);
 
@@ -1105,7 +1105,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 
 			/* remaining code should match code at top of routine */
 
-			buf = &BufferDescriptors[buf_id];
+			buf = GetBufferDescriptor(buf_id);
 
 			valid = PinBuffer(buf, strategy);
 
@@ -1328,7 +1328,7 @@ MarkBufferDirty(Buffer buffer)
 		return;
 	}
 
-	bufHdr = &BufferDescriptors[buffer - 1];
+	bufHdr = GetBufferDescriptor(buffer - 1);
 
 	Assert(BufferIsPinned(buffer));
 	/* unfortunately we can't check if the lock is held exclusively */
@@ -1380,7 +1380,7 @@ ReleaseAndReadBuffer(Buffer buffer,
 		Assert(BufferIsPinned(buffer));
 		if (BufferIsLocal(buffer))
 		{
-			bufHdr = &LocalBufferDescriptors[-buffer - 1];
+			bufHdr = GetLocalBufferDescriptor(-buffer - 1);
 			if (bufHdr->tag.blockNum == blockNum &&
 				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
 				bufHdr->tag.forkNum == forkNum)
@@ -1390,7 +1390,7 @@ ReleaseAndReadBuffer(Buffer buffer,
 		}
 		else
 		{
-			bufHdr = &BufferDescriptors[buffer - 1];
+			bufHdr = GetBufferDescriptor(buffer - 1);
 			/* we have pin, so it's ok to examine tag without spinlock */
 			if (bufHdr->tag.blockNum == blockNum &&
 				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
@@ -1609,7 +1609,7 @@ BufferSync(int flags)
 	num_to_write = 0;
 	for (buf_id = 0; buf_id < NBuffers; buf_id++)
 	{
-		volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+		volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
 
 		/*
 		 * Header spinlock is enough to examine BM_DIRTY, see comment in
@@ -1644,7 +1644,7 @@ BufferSync(int flags)
 	num_written = 0;
 	while (num_to_scan-- > 0)
 	{
-		volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+		volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
 
 		/*
 		 * We don't need to acquire the lock here, because we're only looking
@@ -2016,7 +2016,7 @@ BgBufferSync(void)
 static int
 SyncOneBuffer(int buf_id, bool skip_recently_used)
 {
-	volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
+	volatile BufferDesc *bufHdr = GetBufferDescriptor(buf_id);
 	int			result = 0;
 
 	ReservePrivateRefCountEntry();
@@ -2196,13 +2196,13 @@ PrintBufferLeakWarning(Buffer buffer)
 	Assert(BufferIsValid(buffer));
 	if (BufferIsLocal(buffer))
 	{
-		buf = &LocalBufferDescriptors[-buffer - 1];
+		buf = GetLocalBufferDescriptor(-buffer - 1);
 		loccount = LocalRefCount[-buffer - 1];
 		backend = MyBackendId;
 	}
 	else
 	{
-		buf = &BufferDescriptors[buffer - 1];
+		buf = GetBufferDescriptor(buffer - 1);
 		loccount = GetPrivateRefCount(buffer);
 		backend = InvalidBackendId;
 	}
@@ -2265,9 +2265,9 @@ BufferGetBlockNumber(Buffer buffer)
 	Assert(BufferIsPinned(buffer));
 
 	if (BufferIsLocal(buffer))
-		bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
+		bufHdr = GetLocalBufferDescriptor(-buffer - 1);
 	else
-		bufHdr = &BufferDescriptors[buffer - 1];
+		bufHdr = GetBufferDescriptor(buffer - 1);
 
 	/* pinned, so OK to read tag without spinlock */
 	return bufHdr->tag.blockNum;
@@ -2288,9 +2288,9 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
 	Assert(BufferIsPinned(buffer));
 
 	if (BufferIsLocal(buffer))
-		bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
+		bufHdr = GetLocalBufferDescriptor(-buffer - 1);
 	else
-		bufHdr = &BufferDescriptors[buffer - 1];
+		bufHdr = GetBufferDescriptor(buffer - 1);
 
 	/* pinned, so OK to read tag without spinlock */
 	*rnode = bufHdr->tag.rnode;
@@ -2473,7 +2473,7 @@ BufferIsPermanent(Buffer buffer)
 	 * changing an aligned 2-byte BufFlags value is atomic, so we'll read the
 	 * old value or the new value, but not random garbage.
 	 */
-	bufHdr = &BufferDescriptors[buffer - 1];
+	bufHdr = GetBufferDescriptor(buffer - 1);
 	return (bufHdr->flags & BM_PERMANENT) != 0;
 }
 
@@ -2486,7 +2486,7 @@ BufferIsPermanent(Buffer buffer)
 XLogRecPtr
 BufferGetLSNAtomic(Buffer buffer)
 {
-	volatile BufferDesc *bufHdr = &BufferDescriptors[buffer - 1];
+	volatile BufferDesc *bufHdr = GetBufferDescriptor(buffer - 1);
 	char	   *page = BufferGetPage(buffer);
 	XLogRecPtr	lsn;
 
@@ -2549,7 +2549,7 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum,
 
 	for (i = 0; i < NBuffers; i++)
 	{
-		volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+		volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
 
 		/*
 		 * We can make this a tad faster by prechecking the buffer tag before
@@ -2639,7 +2639,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 	for (i = 0; i < NBuffers; i++)
 	{
 		RelFileNode *rnode = NULL;
-		volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+		volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
 
 		/*
 		 * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2703,7 +2703,7 @@ DropDatabaseBuffers(Oid dbid)
 
 	for (i = 0; i < NBuffers; i++)
 	{
-		volatile BufferDesc *bufHdr = &BufferDescriptors[i];
+		volatile BufferDesc *bufHdr = GetBufferDescriptor(i);
 
 		/*
 		 * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2732,10 +2732,11 @@ void
 PrintBufferDescs(void)
 {
 	int			i;
-	volatile BufferDesc *buf = BufferDescriptors;
 
-	for (i = 0; i < NBuffers; ++i, ++buf)
+	for (i = 0; i < NBuffers; ++i)
 	{
+		volatile BufferDesc *buf = GetBufferDescriptor(i);
+
 		/* theoretically we should lock the bufhdr here */
 		elog(LOG,
 			 "[%02d] (freeNext=%d, rel=%s, "
@@ -2753,10 +2754,11 @@ void
 PrintPinnedBufs(void)
 {
 	int			i;
-	volatile BufferDesc *buf = BufferDescriptors;
 
-	for (i = 0; i < NBuffers; ++i, ++buf)
+	for (i = 0; i < NBuffers; ++i)
 	{
+		volatile BufferDesc *buf = GetBufferDescriptor(i);
+
 		if (GetPrivateRefCount(i + 1) > 0)
 		{
 			/* theoretically we should lock the bufhdr here */
@@ -2804,7 +2806,7 @@ FlushRelationBuffers(Relation rel)
 	{
 		for (i = 0; i < NLocBuffer; i++)
 		{
-			bufHdr = &LocalBufferDescriptors[i];
+			bufHdr = GetLocalBufferDescriptor(i);
 			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node) &&
 				(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
 			{
@@ -2842,7 +2844,7 @@ FlushRelationBuffers(Relation rel)
 
 	for (i = 0; i < NBuffers; i++)
 	{
-		bufHdr = &BufferDescriptors[i];
+		bufHdr = GetBufferDescriptor(i);
 
 		/*
 		 * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2894,7 +2896,7 @@ FlushDatabaseBuffers(Oid dbid)
 
 	for (i = 0; i < NBuffers; i++)
 	{
-		bufHdr = &BufferDescriptors[i];
+		bufHdr = GetBufferDescriptor(i);
 
 		/*
 		 * As in DropRelFileNodeBuffers, an unlocked precheck should be safe
@@ -2938,7 +2940,7 @@ ReleaseBuffer(Buffer buffer)
 		return;
 	}
 
-	UnpinBuffer(&BufferDescriptors[buffer - 1], true);
+	UnpinBuffer(GetBufferDescriptor(buffer - 1), true);
 }
 
 /*
@@ -3007,7 +3009,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
 		return;
 	}
 
-	bufHdr = &BufferDescriptors[buffer - 1];
+	bufHdr = GetBufferDescriptor(buffer - 1);
 
 	Assert(GetPrivateRefCount(buffer) > 0);
 	/* here, either share or exclusive lock is OK */
@@ -3161,7 +3163,7 @@ LockBuffer(Buffer buffer, int mode)
 	if (BufferIsLocal(buffer))
 		return;					/* local buffers need no lock */
 
-	buf = &(BufferDescriptors[buffer - 1]);
+	buf = GetBufferDescriptor(buffer - 1);
 
 	if (mode == BUFFER_LOCK_UNLOCK)
 		LWLockRelease(buf->content_lock);
@@ -3187,7 +3189,7 @@ ConditionalLockBuffer(Buffer buffer)
 	if (BufferIsLocal(buffer))
 		return true;			/* act as though we got it */
 
-	buf = &(BufferDescriptors[buffer - 1]);
+	buf = GetBufferDescriptor(buffer - 1);
 
 	return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
 }
@@ -3231,7 +3233,7 @@ LockBufferForCleanup(Buffer buffer)
 		elog(ERROR, "incorrect local pin count: %d",
 			 GetPrivateRefCount(buffer));
 
-	bufHdr = &BufferDescriptors[buffer - 1];
+	bufHdr = GetBufferDescriptor(buffer - 1);
 
 	for (;;)
 	{
@@ -3332,7 +3334,7 @@ ConditionalLockBufferForCleanup(Buffer buffer)
 	if (!ConditionalLockBuffer(buffer))
 		return false;
 
-	bufHdr = &BufferDescriptors[buffer - 1];
+	bufHdr = GetBufferDescriptor(buffer - 1);
 	LockBufHdr(bufHdr);
 	Assert(bufHdr->refcount > 0);
 	if (bufHdr->refcount == 1)
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 3add619..0d1cbd1 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -259,7 +259,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
 				break;
 			}
 
-			buf = &BufferDescriptors[StrategyControl->firstFreeBuffer];
+			buf = GetBufferDescriptor(StrategyControl->firstFreeBuffer);
 			Assert(buf->freeNext != FREENEXT_NOT_IN_LIST);
 
 			/* Unconditionally remove buffer from freelist */
@@ -296,7 +296,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
 	for (;;)
 	{
 
-		buf = &BufferDescriptors[ClockSweepTick()];
+		buf = GetBufferDescriptor(ClockSweepTick());
 
 		/*
 		 * If the buffer is pinned or has a nonzero usage_count, we cannot use
@@ -614,7 +614,7 @@ GetBufferFromRing(BufferAccessStrategy strategy)
 	 * higher usage_count indicates someone else has touched the buffer, so we
 	 * shouldn't re-use it.
 	 */
-	buf = &BufferDescriptors[bufnum - 1];
+	buf = GetBufferDescriptor(bufnum - 1);
 	LockBufHdr(buf);
 	if (buf->refcount == 0 && buf->usage_count <= 1)
 	{
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 1fc0af3..3144afe 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -122,7 +122,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 	if (hresult)
 	{
 		b = hresult->id;
-		bufHdr = &LocalBufferDescriptors[b];
+		bufHdr = GetLocalBufferDescriptor(b);
 		Assert(BUFFERTAGS_EQUAL(bufHdr->tag, newTag));
 #ifdef LBDEBUG
 		fprintf(stderr, "LB ALLOC (%u,%d,%d) %d\n",
@@ -165,7 +165,7 @@ LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
 		if (++nextFreeLocalBuf >= NLocBuffer)
 			nextFreeLocalBuf = 0;
 
-		bufHdr = &LocalBufferDescriptors[b];
+		bufHdr = GetLocalBufferDescriptor(b);
 
 		if (LocalRefCount[b] == 0)
 		{
@@ -278,7 +278,7 @@ MarkLocalBufferDirty(Buffer buffer)
 
 	Assert(LocalRefCount[bufid] > 0);
 
-	bufHdr = &LocalBufferDescriptors[bufid];
+	bufHdr = GetLocalBufferDescriptor(bufid);
 
 	if (!(bufHdr->flags & BM_DIRTY))
 		pgBufferUsage.local_blks_dirtied++;
@@ -305,7 +305,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
 
 	for (i = 0; i < NLocBuffer; i++)
 	{
-		BufferDesc *bufHdr = &LocalBufferDescriptors[i];
+		BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
 		LocalBufferLookupEnt *hresult;
 
 		if ((bufHdr->flags & BM_TAG_VALID) &&
@@ -347,7 +347,7 @@ DropRelFileNodeAllLocalBuffers(RelFileNode rnode)
 
 	for (i = 0; i < NLocBuffer; i++)
 	{
-		BufferDesc *bufHdr = &LocalBufferDescriptors[i];
+		BufferDesc *bufHdr = GetLocalBufferDescriptor(i);
 		LocalBufferLookupEnt *hresult;
 
 		if ((bufHdr->flags & BM_TAG_VALID) &&
@@ -400,7 +400,7 @@ InitLocalBuffers(void)
 	/* initialize fields that need to start off nonzero */
 	for (i = 0; i < nbufs; i++)
 	{
-		BufferDesc *buf = &LocalBufferDescriptors[i];
+		BufferDesc *buf = GetLocalBufferDescriptor(i);
 
 		/*
 		 * negative to indicate local buffer. This is tricky: shared buffers
diff --git a/src/include/c.h b/src/include/c.h
index c929d3d..b187520 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -537,6 +537,7 @@ typedef NameData *Name;
 #define MAXALIGN(LEN)			TYPEALIGN(MAXIMUM_ALIGNOF, (LEN))
 /* MAXALIGN covers only built-in types, not buffers */
 #define BUFFERALIGN(LEN)		TYPEALIGN(ALIGNOF_BUFFER, (LEN))
+#define CACHELINEALIGN(LEN)		TYPEALIGN(PG_CACHE_LINE_SIZE, (LEN))
 
 #define TYPEALIGN_DOWN(ALIGNVAL,LEN)  \
 	(((uintptr_t) (LEN)) & ~((uintptr_t) ((ALIGNVAL) - 1)))
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 9b8ace5..1e2f8fd 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -134,7 +134,7 @@ typedef struct buftag
  * We use this same struct for local buffer headers, but the lock fields
  * are not used and not all of the flag bits are useful either.
  */
-typedef struct sbufdesc
+typedef struct BufferDesc
 {
 	BufferTag	tag;			/* ID of page contained in buffer */
 	BufFlags	flags;			/* see bit definitions above */
@@ -151,6 +151,36 @@ typedef struct sbufdesc
 	LWLock	   *content_lock;	/* to lock access to buffer contents */
 } BufferDesc;
 
+/*
+ * Concurrent access to buffer headers has proven to be more efficient if
+ * they're cache line aligned. So we force the start of the BufferDescriptors
+ * array to be on a cache line boundary and force the elements to be cache
+ * line sized.
+ *
+ * XXX: As this is primarily matters in highly concurrent workloads which
+ * probably all are 64bit these days, and the space wastage would be a bit
+ * more noticeable on 32bit systems, we don't force the stride to be cache
+ * line sized. If somebody does actual performance testing, we can reevaluate.
+ *
+ * Note that local buffer (arrays) aren't forced to be aligned - as there's no
+ * concurrent access to those it's unlikely to be beneficial.
+ *
+ * We use 64bit as the cache line size here, because that's the most common
+ * size. Making it bigger would be a waste of memory. Even if running on a
+ * platform with either 32 or 128 byte line sizes, it's good to align to
+ * boundaries and avoid false sharing.
+ */
+#define BUFFERDESC_PADDED_SIZE	(sizeof(void*) == 8 ? 64 : sizeof(BufferDesc))
+
+typedef union BufferDescPadded
+{
+	BufferDesc	bufferdesc;
+	char		pad[BUFFERDESC_PADDED_SIZE];
+} BufferDescPadded;
+
+#define GetBufferDescriptor(id) (&BufferDescriptors[(id)].bufferdesc)
+#define GetLocalBufferDescriptor(id) (&LocalBufferDescriptors[(id)])
+
 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
 
 /*
@@ -174,7 +204,7 @@ typedef struct sbufdesc
 
 
 /* in buf_init.c */
-extern PGDLLIMPORT BufferDesc *BufferDescriptors;
+extern PGDLLIMPORT BufferDescPadded *BufferDescriptors;
 
 /* in localbuf.c */
 extern BufferDesc *LocalBufferDescriptors;
-- 
2.0.0.rc2.4.g1dc51c6.dirty

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to