Please find attached a patch to mark a shared buffer as read-write or
read-only using mprotect().  The idea is to catch violations of shared
buffer access rules.  This patch was useful to detect the access
violations reported in this thread.  The mprotect() calls are enabled
by -DMPROTECT_BUFFER compile time flag.

Asim
From ed7dcf633600b3a527ed52ffacd1b779da8b0235 Mon Sep 17 00:00:00 2001
From: Asim R P <aprav...@pivotal.io>
Date: Wed, 18 Jul 2018 18:32:40 -0700
Subject: [PATCH 1/2] Facility to detect shared buffer access violations

Using mprotect() to allow/disallow read/write access to one or more
shared buffers, this patch enables detection of shared buffer access
violations.  A new compile time flag "MPROTECT_BUFFERS"
(CFLAGS=-DMPROTECT_BUFFERS) is introduced to enable the detection.

A couple of violations have already been caught and fixed using this
facility: 130beba36d6dd46b8c527646f9f2433347cbfb11
---
 src/backend/storage/buffer/buf_init.c |  31 +++++++
 src/backend/storage/buffer/bufmgr.c   | 122 ++++++++++++++++++++++----
 src/backend/storage/ipc/shmem.c       |  18 ++++
 3 files changed, 153 insertions(+), 18 deletions(-)

diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 144a2cee6f..22e3d2821c 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -14,6 +14,11 @@
  */
 #include "postgres.h"
 
+#ifdef MPROTECT_BUFFERS
+#include <sys/mman.h>
+#include "miscadmin.h"
+#endif
+
 #include "storage/bufmgr.h"
 #include "storage/buf_internals.h"
 
@@ -24,6 +29,28 @@ LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
 WritebackContext BackendWritebackContext;
 CkptSortItem *CkptBufferIds;
 
+#ifdef MPROTECT_BUFFERS
+/*
+ * Protect the entire shared buffers region such that neither read nor write
+ * is allowed.  Protection will change for specific buffers when accessed
+ * through buffer manager's interface.  The intent is to catch violation of
+ * buffer access rules.
+ */
+static void
+ProtectMemoryPoolBuffers()
+{
+	Size bufferBlocksTotalSize = mul_size((Size)NBuffers, (Size) BLCKSZ);
+	if (IsUnderPostmaster && IsNormalProcessingMode() &&
+		mprotect(BufferBlocks, bufferBlocksTotalSize, PROT_NONE))
+	{
+		ereport(ERROR,
+				(errmsg("unable to set memory level to %d, error %d, "
+						"allocation size %ud, ptr %ld", PROT_NONE,
+						errno, (unsigned int) bufferBlocksTotalSize,
+						(long int) BufferBlocks)));
+	}
+}
+#endif
 
 /*
  * Data Structures:
@@ -149,6 +176,10 @@ InitBufferPool(void)
 	/* Initialize per-backend file flush context */
 	WritebackContextInit(&BackendWritebackContext,
 						 &backend_flush_after);
+
+#ifdef MPROTECT_BUFFERS
+	ProtectMemoryPoolBuffers();
+#endif
 }
 
 /*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 01eabe5706..2ef3c75b6a 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -31,6 +31,9 @@
 #include "postgres.h"
 
 #include <sys/file.h>
+#ifdef MPROTECT_BUFFERS
+#include <sys/mman.h>
+#endif
 #include <unistd.h>
 
 #include "access/xlog.h"
@@ -177,6 +180,75 @@ static PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move
 static inline int32 GetPrivateRefCount(Buffer buffer);
 static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref);
 
+#ifdef MPROTECT_BUFFERS
+#define ShouldMemoryProtect(buf) (IsUnderPostmaster &&		  \
+								  IsNormalProcessingMode() && \
+								  !BufferIsLocal(buf->buf_id+1) && \
+								  !BufferIsInvalid(buf->buf_id+1))
+
+static inline void BufferMProtect(volatile BufferDesc *bufHdr, int protectionLevel)
+{
+	if (ShouldMemoryProtect(bufHdr))
+	{
+		if (mprotect(BufHdrGetBlock(bufHdr), BLCKSZ, protectionLevel))
+		{
+			ereport(ERROR,
+					(errmsg("unable to set memory level to %d, error %d, "
+							"block size %d, ptr %ld", protectionLevel,
+							errno, BLCKSZ, (long int) BufHdrGetBlock(bufHdr))));
+		}
+	}
+}
+#endif
+
+static inline void ReleaseContentLock(volatile BufferDesc *buf)
+{
+	LWLockRelease(BufferDescriptorGetContentLock(buf));
+
+#ifdef MPROTECT_BUFFERS
+	/* make the buffer read-only after releasing content lock */
+	if (!LWLockHeldByMe(BufferDescriptorGetContentLock(buf)))
+		BufferMProtect(buf, PROT_READ);
+#endif
+}
+
+
+static inline void AcquireContentLock(volatile BufferDesc *buf, LWLockMode mode)
+{
+#ifdef MPROTECT_BUFFERS
+	const bool newAcquisition =
+		!LWLockHeldByMe(BufferDescriptorGetContentLock(buf));
+
+	LWLockAcquire(BufferDescriptorGetContentLock(buf), mode);
+
+	/* new acquisition of content lock, allow read/write memory access */
+	if (newAcquisition)
+		BufferMProtect(buf, PROT_READ|PROT_WRITE);
+#else
+	LWLockAcquire(BufferDescriptorGetContentLock(buf), mode);
+#endif
+}
+
+static inline bool ConditionalAcquireContentLock(volatile BufferDesc *buf,
+												 LWLockMode mode)
+{
+#ifdef MPROTECT_BUFFERS
+	const bool newAcquisition =
+		!LWLockHeldByMe(BufferDescriptorGetContentLock(buf));
+
+	if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), mode))
+	{
+		/* new acquisition of lock, allow read/write memory access */
+		if (newAcquisition)
+			BufferMProtect( buf, PROT_READ | PROT_WRITE );
+		return true;
+	}
+	return false;
+#else
+	return LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf), mode);
+#endif
+}
+
 /*
  * Ensure that the PrivateRefCountArray has sufficient space to store one more
  * entry. This has to be called before using NewPrivateRefCountEntry() to fill
@@ -779,8 +851,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			if (!isLocalBuf)
 			{
 				if (mode == RBM_ZERO_AND_LOCK)
-					LWLockAcquire(BufferDescriptorGetContentLock(bufHdr),
-								  LW_EXCLUSIVE);
+					AcquireContentLock(bufHdr, LW_EXCLUSIVE);
 				else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
 					LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
 			}
@@ -932,7 +1003,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 	if ((mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) &&
 		!isLocalBuf)
 	{
-		LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
+		AcquireContentLock((bufHdr), LW_EXCLUSIVE);
 	}
 
 	if (isLocalBuf)
@@ -1102,8 +1173,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 			 * happens to be trying to split the page the first one got from
 			 * StrategyGetBuffer.)
 			 */
-			if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf),
-										 LW_SHARED))
+			if (ConditionalAcquireContentLock(buf, LW_SHARED))
 			{
 				/*
 				 * If using a nondefault strategy, and writing the buffer
@@ -1125,7 +1195,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 						StrategyRejectBuffer(strategy, buf))
 					{
 						/* Drop lock/pin and loop around for another buffer */
-						LWLockRelease(BufferDescriptorGetContentLock(buf));
+						ReleaseContentLock(buf);
 						UnpinBuffer(buf, true);
 						continue;
 					}
@@ -1138,7 +1208,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 														  smgr->smgr_rnode.node.relNode);
 
 				FlushBuffer(buf, NULL);
-				LWLockRelease(BufferDescriptorGetContentLock(buf));
+				ReleaseContentLock(buf);
 
 				ScheduleBufferTagForWriteback(&BackendWritebackContext,
 											  &buf->tag);
@@ -1618,6 +1688,9 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy)
 				break;
 			}
 		}
+#ifdef MPROTECT_BUFFERS
+		BufferMProtect(buf, PROT_READ);
+#endif
 	}
 	else
 	{
@@ -1672,6 +1745,9 @@ PinBuffer_Locked(BufferDesc *buf)
 	buf_state = pg_atomic_read_u32(&buf->state);
 	Assert(buf_state & BM_LOCKED);
 	buf_state += BUF_REFCOUNT_ONE;
+#ifdef MPROTECT_BUFFERS
+	BufferMProtect(buf, PROT_READ);
+#endif
 	UnlockBufHdr(buf, buf_state);
 
 	b = BufferDescriptorGetBuffer(buf);
@@ -1747,6 +1823,9 @@ UnpinBuffer(BufferDesc *buf, bool fixOwner)
 			 */
 			buf_state = LockBufHdr(buf);
 
+#ifdef MPROTECT_BUFFERS
+			BufferMProtect(buf, PROT_NONE);
+#endif
 			if ((buf_state & BM_PIN_COUNT_WAITER) &&
 				BUF_STATE_GET_REFCOUNT(buf_state) == 1)
 			{
@@ -2389,11 +2468,11 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
 	 * buffer is clean by the time we've locked it.)
 	 */
 	PinBuffer_Locked(bufHdr);
-	LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
+	AcquireContentLock(bufHdr, LW_SHARED);
 
 	FlushBuffer(bufHdr, NULL);
 
-	LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
+	ReleaseContentLock(bufHdr);
 
 	tag = bufHdr->tag;
 
@@ -3217,9 +3296,9 @@ FlushRelationBuffers(Relation rel)
 			(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
 		{
 			PinBuffer_Locked(bufHdr);
-			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
+			AcquireContentLock(bufHdr, LW_SHARED);
 			FlushBuffer(bufHdr, rel->rd_smgr);
-			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
+			ReleaseContentLock(bufHdr);
 			UnpinBuffer(bufHdr, true);
 		}
 		else
@@ -3271,9 +3350,9 @@ FlushDatabaseBuffers(Oid dbid)
 			(buf_state & (BM_VALID | BM_DIRTY)) == (BM_VALID | BM_DIRTY))
 		{
 			PinBuffer_Locked(bufHdr);
-			LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
+			AcquireContentLock(bufHdr, LW_SHARED);
 			FlushBuffer(bufHdr, NULL);
-			LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
+			ReleaseContentLock(bufHdr);
 			UnpinBuffer(bufHdr, true);
 		}
 		else
@@ -3554,11 +3633,11 @@ LockBuffer(Buffer buffer, int mode)
 	buf = GetBufferDescriptor(buffer - 1);
 
 	if (mode == BUFFER_LOCK_UNLOCK)
-		LWLockRelease(BufferDescriptorGetContentLock(buf));
+		ReleaseContentLock(buf);
 	else if (mode == BUFFER_LOCK_SHARE)
-		LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED);
+		AcquireContentLock(buf, LW_SHARED);
 	else if (mode == BUFFER_LOCK_EXCLUSIVE)
-		LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_EXCLUSIVE);
+		AcquireContentLock(buf, LW_EXCLUSIVE);
 	else
 		elog(ERROR, "unrecognized buffer lock mode: %d", mode);
 }
@@ -3579,8 +3658,7 @@ ConditionalLockBuffer(Buffer buffer)
 
 	buf = GetBufferDescriptor(buffer - 1);
 
-	return LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf),
-									LW_EXCLUSIVE);
+	return ConditionalAcquireContentLock(buf, LW_EXCLUSIVE);
 }
 
 /*
@@ -3913,6 +3991,9 @@ StartBufferIO(BufferDesc *buf, bool forInput)
 	}
 
 	buf_state |= BM_IO_IN_PROGRESS;
+#ifdef MPROTECT_BUFFERS
+    BufferMProtect(buf, forInput ? PROT_WRITE|PROT_READ : PROT_READ);
+#endif
 	UnlockBufHdr(buf, buf_state);
 
 	InProgressBuf = buf;
@@ -3958,6 +4039,11 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits)
 
 	InProgressBuf = NULL;
 
+#ifdef MPROTECT_BUFFERS
+	/* XXX: should this be PROT_NONE if called from AbortBufferIO? */
+	if (!LWLockHeldByMe(BufferDescriptorGetContentLock(buf)))
+		BufferMProtect(buf, PROT_READ);
+#endif
 	LWLockRelease(BufferDescriptorGetIOLock(buf));
 }
 
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 7893c01983..ce429626bc 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -65,6 +65,10 @@
 
 #include "postgres.h"
 
+#ifdef MPROTECT_BUFFERS
+#include <unistd.h>
+#endif
+
 #include "access/transam.h"
 #include "miscadmin.h"
 #include "storage/lwlock.h"
@@ -198,6 +202,20 @@ ShmemAllocNoError(Size size)
 
 	newStart = ShmemSegHdr->freeoffset;
 
+#ifdef MPROTECT_BUFFERS
+	/*
+	 * Align shared buffers start address to system page size because mprotect
+	 * can only work with system page size aligned addresses.
+	 */
+	if (size >= BLCKSZ)
+	{
+		long systemPageSize = sysconf(_SC_PAGESIZE);
+		if (systemPageSize <=1 || (systemPageSize & (systemPageSize-1)))
+			elog(ERROR, "invalid page size %ld", systemPageSize);
+		newStart =  TYPEALIGN(systemPageSize, newStart);
+	}
+#endif
+
 	newFree = newStart + size;
 	if (newFree <= ShmemSegHdr->totalsize)
 	{
-- 
2.17.1

From 9d5e5c61d4c954a71acca07ec37066b8ca5b4822 Mon Sep 17 00:00:00 2001
From: Asim R P <aprav...@pivotal.io>
Date: Wed, 18 Jul 2018 18:30:41 -0700
Subject: [PATCH 2/2] Fix known but not problematic buffer access violations

This is needed to make regression tests pass with
CFLAGS=-DMPROTECT_BUFFERS.
---
 src/backend/commands/sequence.c           |  3 ++-
 src/backend/storage/freespace/freespace.c | 11 ++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 89122d4ad7..f44fb8c563 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -348,13 +348,14 @@ fill_seq_with_data(Relation rel, HeapTuple tuple)
 
 	page = BufferGetPage(buf);
 
+	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+
 	PageInit(page, BufferGetPageSize(buf), sizeof(sequence_magic));
 	sm = (sequence_magic *) PageGetSpecialPointer(page);
 	sm->magic = SEQ_MAGIC;
 
 	/* Now insert sequence tuple */
 
-	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
 	/*
 	 * Since VACUUM does not process sequences, we have to force the tuple to
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 8d0ee7fc93..df813d7416 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -901,8 +901,17 @@ fsm_vacuum_page(Relation rel, FSMAddress addr,
 	 * relation.  We don't bother with a lock here, nor with marking the page
 	 * dirty if it wasn't already, since this is just a hint.
 	 */
+#ifdef MPROTECT_BUFFERS
+	/*
+	 * When mprotect() is used to detect shared buffer access violations, lock
+	 * must be acquired so that write access is allowed on this buffer.
+	 */
+	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+#endif
 	((FSMPage) PageGetContents(page))->fp_next_slot = 0;
-
+#ifdef MPROTECT_BUFFERS
+	LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+#endif
 	ReleaseBuffer(buf);
 
 	return max_avail;
-- 
2.17.1

Reply via email to