diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 0cd2530..cd9cd86 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -86,6 +86,7 @@ InitBufferPool(void)
 	else
 	{
 		int			i;
+		int			bufid = 0;
 
 		/*
 		 * Initialize all the buffer headers.
@@ -110,8 +111,12 @@ InitBufferPool(void)
 			 */
 			buf->freeNext = i + 1;
 
-			buf->io_in_progress_lock = LWLockAssign();
-			buf->content_lock = LWLockAssign();
+			/*
+			 * We used to use LWLockAssign() here, but that allocates from
+			 * the main array, which isn't tightly packed enough for us.
+			 */
+			buf->io_in_progress_lock = &BufferLWLockArray[bufid++].lock;
+			buf->content_lock = &BufferLWLockArray[bufid++].lock;
 		}
 
 		/* Correct last entry of linked list */
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 1acd2f0..32f4dd4 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -130,6 +130,14 @@ LWLockPadded *MainLWLockArray = NULL;
 static LWLockTranche MainLWLockTranche;
 
 /*
+ * This points to the buffer LWLocks in shared memory.  Backends inherit
+ * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
+ * where we have special measures to pass it down).
+ */
+LWLockMinSize *BufferLWLockArray = NULL;
+static LWLockTranche BufferLWLockTranche;
+
+/*
  * We use this structure to keep track of locked LWLocks for release
  * during error recovery.  Normally, only a few will be held at once, but
  * occasionally the number can be much higher; for example, the pg_buffercache
@@ -335,8 +343,7 @@ NumLWLocks(void)
 	/* Predefined LWLocks */
 	numLocks = NUM_FIXED_LWLOCKS;
 
-	/* bufmgr.c needs two for each shared buffer */
-	numLocks += 2 * NBuffers;
+	/* bufmgr.c locks no longer allocated here */
 
 	/* proc.c needs one for each backend or auxiliary process */
 	numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
@@ -373,6 +380,7 @@ NumLWLocks(void)
 	return numLocks;
 }
 
+#define NumBufferLWLocks	(2 * NBuffers)
 
 /*
  * RequestAddinLWLocks
@@ -403,9 +411,16 @@ LWLockShmemSize(void)
 	Size		size;
 	int			numLocks = NumLWLocks();
 
-	/* Space for the LWLock array. */
+	/* Space for the main LWLock array. */
 	size = mul_size(numLocks, sizeof(LWLockPadded));
 
+	/*
+	 * Space for the buffer LWLock array, allocated in a separate tranche,
+	 * with a different stride. This allows us to pad out the main array
+	 * to reduce contention, without wasting space for buffers.
+	 */
+	size = add_size(size, mul_size(NumBufferLWLocks, sizeof(LWLockMinSize)));
+
 	/* Space for dynamic allocation counter, plus room for alignment. */
 	size = add_size(size, 3 * sizeof(int) + LWLOCK_PADDED_SIZE);
 
@@ -425,9 +440,11 @@ CreateLWLocks(void)
 
 	if (!IsUnderPostmaster)
 	{
-		int			numLocks = NumLWLocks();
+		int			numMainLocks = NumLWLocks();
+		int			numBufLocks = NumBufferLWLocks;
 		Size		spaceLocks = LWLockShmemSize();
 		LWLockPadded *lock;
+		LWLockMinSize *buflock;
 		int		   *LWLockCounter;
 		char	   *ptr;
 		int			id;
@@ -443,10 +460,18 @@ CreateLWLocks(void)
 
 		MainLWLockArray = (LWLockPadded *) ptr;
 
+		ptr += numMainLocks * sizeof(LWLockPadded);
+
+		BufferLWLockArray = (LWLockMinSize *) ptr;
+
 		/* Initialize all LWLocks in main array */
-		for (id = 0, lock = MainLWLockArray; id < numLocks; id++, lock++)
+		for (id = 0, lock = MainLWLockArray; id < numMainLocks; id++, lock++)
 			LWLockInitialize(&lock->lock, 0);
 
+		/* Initialize all LWLocks in buffer array */
+		for (id = 0, buflock = BufferLWLockArray; id < numBufLocks; id++, buflock++)
+			LWLockInitialize(&buflock->lock, 0);
+
 		/*
 		 * Initialize the dynamic-allocation counters, which are stored just
 		 * before the first LWLock.  LWLockCounter[0] is the allocation
@@ -456,8 +481,8 @@ CreateLWLocks(void)
 		 */
 		LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
 		LWLockCounter[0] = NUM_FIXED_LWLOCKS;
-		LWLockCounter[1] = numLocks;
-		LWLockCounter[2] = 1;	/* 0 is the main array */
+		LWLockCounter[1] = numMainLocks;
+		LWLockCounter[2] = 2;	/* 0 is the main array, 1 is for buffers */
 	}
 
 	if (LWLockTrancheArray == NULL)
@@ -472,6 +497,11 @@ CreateLWLocks(void)
 	MainLWLockTranche.array_base = MainLWLockArray;
 	MainLWLockTranche.array_stride = sizeof(LWLockPadded);
 	LWLockRegisterTranche(0, &MainLWLockTranche);
+
+	BufferLWLockTranche.name = "buffer";
+	BufferLWLockTranche.array_base = BufferLWLockArray;
+	BufferLWLockTranche.array_stride = sizeof(LWLockMinSize);
+	LWLockRegisterTranche(1, &BufferLWLockTranche);
 }
 
 /*
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index cff3b99..7592315 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -72,13 +72,12 @@ typedef struct LWLock
  * (Of course, we have to also ensure that the array start address is suitably
  * aligned.)
  *
- * On a 32-bit platforms a LWLock will these days fit into 16 bytes, but since
- * that didn't use to be the case and cramming more lwlocks into a cacheline
- * might be detrimental performancewise we still use 32 byte alignment
- * there. So, both on 32 and 64 bit platforms, it should fit into 32 bytes
- * unless slock_t is really big.  We allow for that just in case.
+ * We pad out the main LWLocks so we have one per cache line to minimize
+ * contention. Buffer LWLocks are much less likely to be contended, so we
+ * keep them more tightly packed.
  */
-#define LWLOCK_PADDED_SIZE	(sizeof(LWLock) <= 32 ? 32 : 64)
+#define LWLOCK_PADDED_SIZE	PG_CACHE_LINE_SIZE
+#define LWLOCK_MINIMAL_SIZE	(sizeof(LWLock) <= 32 ? 32 : 64)
 
 typedef union LWLockPadded
 {
@@ -87,6 +86,13 @@ typedef union LWLockPadded
 } LWLockPadded;
 extern PGDLLIMPORT LWLockPadded *MainLWLockArray;
 
+typedef union LWLockMinSize
+{
+	LWLock		lock;
+	char		pad[LWLOCK_MINIMAL_SIZE];
+} LWLockMinSize;
+extern PGDLLIMPORT LWLockMinSize *BufferLWLockArray;
+
 /*
  * Some commonly-used locks have predefined positions within MainLWLockArray;
  * defining macros here makes it much easier to keep track of these.  If you
