On Sat, Jan 21, 2012 at 1:57 PM, Robert Haas <robertmh...@gmail.com> wrote:
> On Fri, Jan 20, 2012 at 10:44 AM, Robert Haas <robertmh...@gmail.com> wrote:
>>>> D'oh.  You're right.  Looks like I accidentally tried to apply this to
>>>> the 9.1 sources.  Sigh...
>>>
>>> No worries. It's Friday.
>
> Server passed 'make check' with this patch, but when I tried to fire
> it up for some test runs, it fell over with:
>
> FATAL:  no more LWLockIds available
>
> I assume that it must be dependent on the config settings used.  Here are 
> mine:
>
> shared_buffers = 8GB
> maintenance_work_mem = 1GB
> synchronous_commit = off
> checkpoint_segments = 300
> checkpoint_timeout = 15min
> checkpoint_completion_target = 0.9
> wal_writer_delay = 20ms

Yes, it was. Sorry about that. New version attached, retesting while
you read this.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 69b6ef3..6ff6894 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -37,6 +37,7 @@
 #include "access/transam.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
+#include "utils/snapmgr.h"
 
 /*
  * Defines for CLOG page sizes.  A page is the same BLCKSZ as is used
@@ -70,10 +71,17 @@
 
 /*
  * Link to shared-memory data structures for CLOG control
+ *
+ * As of 9.2, we have 2 structures for commit log data.
+ * ClogCtl manages the main read/write part of the commit log, while
+ * the ClogHistoryCtl manages the now read-only, older part. ClogHistory
+ * removes contention from the path of transaction commits.
  */
 static SlruCtlData ClogCtlData;
+static SlruCtlData ClogHistoryCtlData;
 
-#define ClogCtl (&ClogCtlData)
+#define ClogCtl 		(&ClogCtlData)
+#define ClogHistoryCtl	(&ClogHistoryCtlData)
 
 
 static int	ZeroCLOGPage(int pageno, bool writeXlog);
@@ -296,6 +304,10 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
 
 		/* ... then the main transaction */
 		TransactionIdSetStatusBit(xid, status, lsn, slotno);
+
+		/* When we commit advance ClogCtl's shared RecentXminPageno if needed */
+		if (ClogCtl->shared->RecentXminPageno < TransactionIdToPage(RecentXmin))
+			ClogCtl->shared->RecentXminPageno = TransactionIdToPage(RecentXmin);
 	}
 
 	/* Set the subtransactions */
@@ -387,6 +399,8 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
 XidStatus
 TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 {
+	SlruCtl		clog = ClogCtl;
+	bool		useClogHistory = true;
 	int			pageno = TransactionIdToPage(xid);
 	int			byteno = TransactionIdToByte(xid);
 	int			bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
@@ -397,15 +411,35 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
 
 	/* lock is acquired by SimpleLruReadPage_ReadOnly */
 
-	slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid);
-	byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
+	/*
+	 * Decide whether to use main Clog or read-only ClogHistory.
+	 *
+	 * Our knowledge of the boundary between the two may be a little out
+	 * of date, so if we try Clog and can't find it we need to try again
+	 * against ClogHistory.
+	 */
+	if (pageno >= ClogCtl->recent_oldest_active_page_number)
+	{
+		slotno = SimpleLruReadPage_ReadOnly(clog, pageno, xid);
+		if (slotno >= 0)
+			useClogHistory = false;
+	}
+
+	if (useClogHistory)
+	{
+		clog = ClogHistoryCtl;
+		slotno = SimpleLruReadPage_ReadOnly(clog, pageno, xid);
+		Assert(slotno >= 0);
+	}
+
+	byteptr = clog->shared->page_buffer[slotno] + byteno;
 
 	status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
 
 	lsnindex = GetLSNIndex(slotno, xid);
-	*lsn = ClogCtl->shared->group_lsn[lsnindex];
+	*lsn = clog->shared->group_lsn[lsnindex];
 
-	LWLockRelease(CLogControlLock);
+	LWLockRelease(clog->shared->ControlLock);
 
 	return status;
 }
@@ -445,15 +479,19 @@ CLOGShmemBuffers(void)
 Size
 CLOGShmemSize(void)
 {
-	return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
+	/* Reserve shmem for both ClogCtl and ClogHistoryCtl */
+	return SimpleLruShmemSize(2 * CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE);
 }
 
 void
 CLOGShmemInit(void)
 {
 	ClogCtl->PagePrecedes = CLOGPagePrecedes;
+	ClogHistoryCtl->PagePrecedes = CLOGPagePrecedes;
 	SimpleLruInit(ClogCtl, "CLOG Ctl", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
 				  CLogControlLock, "pg_clog");
+	SimpleLruInit(ClogHistoryCtl, "CLOG History Ctl", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
+				  CLogHistoryControlLock, "pg_clog");
 }
 
 /*
@@ -592,6 +630,16 @@ CheckPointCLOG(void)
 	TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
 	SimpleLruFlush(ClogCtl, true);
 	TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
+
+	/*
+	 * Now that we've written out all dirty buffers the only pages that
+	 * will get dirty again will be pages with active transactions on them.
+	 * So we can move forward the oldest_active_page_number and allow
+	 * read only operations via ClogHistoryCtl.
+	 */
+	LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
+	ClogCtl->shared->oldest_active_page_number = ClogCtl->shared->RecentXminPageno;
+	LWLockRelease(CLogControlLock);
 }
 
 
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 30538ff..2cebdf9 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -188,6 +188,9 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 
 		shared->cur_lru_count = 0;
 
+		shared->oldest_active_page_number = -1;
+		shared->RecentXminPageno = -1;
+
 		/* shared->latest_page_number will be set later */
 
 		ptr = (char *) shared;
@@ -476,6 +479,16 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid)
 	LWLockRelease(shared->ControlLock);
 	LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE);
 
+	/* update local state while we have the lock */
+	ctl->recent_oldest_active_page_number = shared->oldest_active_page_number;
+
+	/* Check if our cached boundary information was out of date */
+	if (pageno < ctl->recent_oldest_active_page_number)
+	{
+		LWLockRelease(shared->ControlLock);
+		return -1;
+	}
+
 	return SimpleLruReadPage(ctl, pageno, true, xid);
 }
 
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index cc41568..353f101 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -170,8 +170,8 @@ NumLWLocks(void)
 	/* proc.c needs one for each backend or auxiliary process */
 	numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
 
-	/* clog.c needs one per CLOG buffer */
-	numLocks += CLOGShmemBuffers();
+	/* clog.c needs two per CLOG buffer */
+	numLocks += 2 * CLOGShmemBuffers();
 
 	/* subtrans.c needs one per SubTrans buffer */
 	numLocks += NUM_SUBTRANS_BUFFERS;
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 41cd484..f7b0d87 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -99,6 +99,15 @@ typedef struct SlruSharedData
 	 * the latest page.
 	 */
 	int			latest_page_number;
+
+	/*
+	 * RecentXminPageno is the oldest page that any active
+	 * transaction would ever wish to write to.
+	 * oldest_active_page_number is the oldest dirty page, or the
+	 * RecentXminPageno, whichever is lower. We advance oldest at checkpoint.
+	 */
+	int			oldest_active_page_number;
+	int			RecentXminPageno;
 } SlruSharedData;
 
 typedef SlruSharedData *SlruShared;
@@ -125,6 +134,11 @@ typedef struct SlruCtlData
 	bool		(*PagePrecedes) (int, int);
 
 	/*
+	 * Local cached value of oldest_active_page_number.
+	 */
+	int			recent_oldest_active_page_number;
+
+	/*
 	 * Dir is set during SimpleLruInit and does not change thereafter. Since
 	 * it's always the same, it doesn't need to be in shared memory.
 	 */
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index df3df29..3d8838f 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -79,6 +79,7 @@ typedef enum LWLockId
 	SerializablePredicateLockListLock,
 	OldSerXidLock,
 	SyncRepLock,
+	CLogHistoryControlLock,
 	/* Individual lock IDs end here */
 	FirstBufMappingLock,
 	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to