On Sun, Jan 29, 2012 at 9:41 PM, Jeff Janes <jeff.ja...@gmail.com> wrote:
> If I cast to a int, then I see advancement: I'll initialise it as 0, rather than -1 and then we don't have a problem in any circumstance. >> I've specifically designed the pgbench changes required to simulate >> conditions of clog contention to help in the evaluation of this patch. > > Yep, I've used that one for the testing. Most of the current patch is just bookkeeping to keep track of the point when we can look at history in read only manner. I've isolated the code better to allow you to explore various implementation options. I don't see any performance difference between any of them really, but you're welcome to look. Please everybody note that the clog history doesn't even become active until the first checkpoint, so this is dead code until we've hit the first checkpoint cycle and completed a million transactions since startup. So its designed to tune for real world situations, and is not easy to benchmark. (Maybe we could start earlier, but having extra code just for first few minutes seems waste of energy, especially since we must hit million xids also). -- Simon Riggs http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 69b6ef3..8ab1b3c 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -37,6 +37,7 @@ #include "access/transam.h" #include "miscadmin.h" #include "pg_trace.h" +#include "utils/snapmgr.h" /* * Defines for CLOG page sizes. A page is the same BLCKSZ as is used @@ -70,12 +71,19 @@ /* * Link to shared-memory data structures for CLOG control + * + * As of 9.2, we have 2 structures for commit log data. + * ClogCtl manages the main read/write part of the commit log, while + * the ClogHistoryCtl manages the now read-only, older part. ClogHistory + * removes contention from the path of transaction commits. */ static SlruCtlData ClogCtlData; +static SlruCtlData ClogHistoryCtlData; -#define ClogCtl (&ClogCtlData) - +#define ClogCtl (&ClogCtlData) +#define ClogHistoryCtl (&ClogHistoryCtlData) +static XidStatus TransactionIdGetStatusHistory(TransactionId xid); static int ZeroCLOGPage(int pageno, bool writeXlog); static bool CLOGPagePrecedes(int page1, int page2); static void WriteZeroPageXlogRec(int pageno); @@ -296,6 +304,10 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, /* ... then the main transaction */ TransactionIdSetStatusBit(xid, status, lsn, slotno); + + /* When we commit advance ClogCtl's shared RecentXminPageno if needed */ + if (ClogCtl->shared->RecentXminPageno < TransactionIdToPage(RecentXmin)) + ClogCtl->shared->RecentXminPageno = TransactionIdToPage(RecentXmin); } /* Set the subtransactions */ @@ -387,6 +399,7 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i XidStatus TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) { + bool useClogHistory = true; int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; @@ -397,15 +410,64 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) /* lock is acquired by SimpleLruReadPage_ReadOnly */ - slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid); - byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; + /* + * Decide whether to use main Clog or read-only ClogHistory. + * + * Our knowledge of the boundary between the two may be a little out + * of date, so if we try Clog and can't find it we need to try again + * against ClogHistory. + */ + if (pageno >= ClogCtl->recent_oldest_active_page_number) + { + slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid); + if (slotno >= 0) + useClogHistory = false; + } + + if (useClogHistory) + return TransactionIdGetStatusHistory(xid); + + byteptr = clog->shared->page_buffer[slotno] + byteno; status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; lsnindex = GetLSNIndex(slotno, xid); - *lsn = ClogCtl->shared->group_lsn[lsnindex]; + *lsn = clog->shared->group_lsn[lsnindex]; - LWLockRelease(CLogControlLock); + LWLockRelease(clog->shared->ControlLock); + + return status; +} + +/* + * Get state of a transaction from the read-only portion of the clog, + * which we refer to as the clog history. + * + * Code isolated here to more easily allow various implementation options. + */ +static XidStatus +TransactionIdGetStatusHistory(TransactionId xid) +{ + SlruCtl clog = ClogHistoryCtl; + int pageno = TransactionIdToPage(xid); + int byteno = TransactionIdToByte(xid); + int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; + int slotno; + char *byteptr; + XidStatus status; + + slotno = SimpleLruReadPage_ReadOnly(clog, pageno, xid); + + byteptr = clog->shared->page_buffer[slotno] + byteno; + status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; + + /* + * No need to check LSN in aged transactions, since the page has + * been flushed from main clog long ago, so WAL flush has already + * occurred. + */ + + LWLockRelease(clog->shared->ControlLock); return status; } @@ -445,15 +507,19 @@ CLOGShmemBuffers(void) Size CLOGShmemSize(void) { - return SimpleLruShmemSize(CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE); + /* Reserve shmem for both ClogCtl and ClogHistoryCtl */ + return SimpleLruShmemSize(2 * CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE); } void CLOGShmemInit(void) { ClogCtl->PagePrecedes = CLOGPagePrecedes; + ClogHistoryCtl->PagePrecedes = CLOGPagePrecedes; SimpleLruInit(ClogCtl, "CLOG Ctl", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, CLogControlLock, "pg_clog"); + SimpleLruInit(ClogHistoryCtl, "CLOG History Ctl", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, + CLogHistoryControlLock, "pg_clog"); } /* @@ -592,6 +658,16 @@ CheckPointCLOG(void) TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true); SimpleLruFlush(ClogCtl, true); TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true); + + /* + * Now that we've written out all dirty buffers the only pages that + * will get dirty again will be pages with active transactions on them. + * So we can move forward the oldest_active_page_number and allow + * read only operations via clog history. + */ + LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); + ClogCtl->shared->oldest_active_page_number = ClogCtl->shared->RecentXminPageno; + LWLockRelease(CLogControlLock); } diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 30538ff..dd38213 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -188,6 +188,12 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, shared->cur_lru_count = 0; + /* + * Start page number from zero. If used, updates after every checkpoint. + */ + shared->oldest_active_page_number = 0; + shared->RecentXminPageno = 0; + /* shared->latest_page_number will be set later */ ptr = (char *) shared; @@ -476,6 +482,16 @@ SimpleLruReadPage_ReadOnly(SlruCtl ctl, int pageno, TransactionId xid) LWLockRelease(shared->ControlLock); LWLockAcquire(shared->ControlLock, LW_EXCLUSIVE); + /* update local state while we have the lock */ + ctl->recent_oldest_active_page_number = shared->oldest_active_page_number; + + /* Check if our cached boundary information was out of date */ + if (pageno < ctl->recent_oldest_active_page_number) + { + LWLockRelease(shared->ControlLock); + return -1; + } + return SimpleLruReadPage(ctl, pageno, true, xid); } diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index cc41568..353f101 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -170,8 +170,8 @@ NumLWLocks(void) /* proc.c needs one for each backend or auxiliary process */ numLocks += MaxBackends + NUM_AUXILIARY_PROCS; - /* clog.c needs one per CLOG buffer */ - numLocks += CLOGShmemBuffers(); + /* clog.c needs two per CLOG buffer */ + numLocks += 2 * CLOGShmemBuffers(); /* subtrans.c needs one per SubTrans buffer */ numLocks += NUM_SUBTRANS_BUFFERS; diff --git a/src/include/access/slru.h b/src/include/access/slru.h index 41cd484..f7b0d87 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -99,6 +99,15 @@ typedef struct SlruSharedData * the latest page. */ int latest_page_number; + + /* + * RecentXminPageno is the oldest page that any active + * transaction would ever wish to write to. + * oldest_active_page_number is the oldest dirty page, or the + * RecentXminPageno, whichever is lower. We advance oldest at checkpoint. + */ + int oldest_active_page_number; + int RecentXminPageno; } SlruSharedData; typedef SlruSharedData *SlruShared; @@ -125,6 +134,11 @@ typedef struct SlruCtlData bool (*PagePrecedes) (int, int); /* + * Local cached value of oldest_active_page_number. + */ + int recent_oldest_active_page_number; + + /* * Dir is set during SimpleLruInit and does not change thereafter. Since * it's always the same, it doesn't need to be in shared memory. */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index df3df29..3d8838f 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -79,6 +79,7 @@ typedef enum LWLockId SerializablePredicateLockListLock, OldSerXidLock, SyncRepLock, + CLogHistoryControlLock, /* Individual lock IDs end here */ FirstBufMappingLock, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers