On Sat, Feb 4, 2012 at 6:37 PM, Simon Riggs <[email protected]> wrote:
> Patch to do that attached
--
Simon Riggs http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 99a431a..4758931 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -4590,6 +4590,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
int ndead;
int nunused;
Size freespace;
+ bool hit;
/*
* We're about to remove tuples. In Hot Standby mode, ensure that there's
@@ -4608,7 +4609,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
if (record->xl_info & XLR_BKP_BLOCK_1)
return;
- buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL);
+ buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit);
if (!BufferIsValid(buffer))
return;
LockBufferForCleanup(buffer);
@@ -4664,6 +4665,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
TransactionId cutoff_xid = xlrec->cutoff_xid;
Buffer buffer;
Page page;
+ bool hit;
/*
* In Hot Standby mode, ensure that there's no queries running which still
@@ -4677,7 +4679,7 @@ heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
if (record->xl_info & XLR_BKP_BLOCK_1)
return;
- buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL);
+ buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit);
if (!BufferIsValid(buffer))
return;
LockBufferForCleanup(buffer);
@@ -4728,6 +4730,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
xl_heap_visible *xlrec = (xl_heap_visible *) XLogRecGetData(record);
Buffer buffer;
Page page;
+ bool hit;
/*
* Read the heap page, if it still exists. If the heap file has been
@@ -4736,7 +4739,7 @@ heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
* will have to be cleared out at the same time.
*/
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block,
- RBM_NORMAL);
+ RBM_NORMAL, &hit);
if (!BufferIsValid(buffer))
return;
page = (Page) BufferGetPage(buffer);
@@ -4806,13 +4809,14 @@ heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
Buffer buffer;
Page page;
+ bool hit;
/*
* Note: the NEWPAGE log record is used for both heaps and indexes, so do
* not do anything that assumes we are touching a heap.
*/
buffer = XLogReadBufferExtended(xlrec->node, xlrec->forknum, xlrec->blkno,
- RBM_ZERO);
+ RBM_ZERO, &hit);
Assert(BufferIsValid(buffer));
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 0f5c113..d10b0b8 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -466,6 +466,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
Buffer buffer;
Page page;
BTPageOpaque opaque;
+ bool hit;
xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
@@ -491,7 +492,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* Another simple optimization would be to check if there's any
* backends running; if not, we could just skip this.
*/
- buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL);
+ buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL, &hit);
if (BufferIsValid(buffer))
{
LockBufferForCleanup(buffer);
@@ -513,7 +514,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf
* page. See nbtree/README for details.
*/
- buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL);
+ buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, xlrec->block, RBM_NORMAL, &hit);
if (!BufferIsValid(buffer))
return;
LockBufferForCleanup(buffer);
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index cce87a3..3f4842d 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3687,6 +3687,7 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup)
BkpBlock bkpb;
char *blk;
int i;
+ bool hit;
if (!(record->xl_info & XLR_BKP_BLOCK_MASK))
return;
@@ -3700,8 +3701,21 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup)
memcpy(&bkpb, blk, sizeof(BkpBlock));
blk += sizeof(BkpBlock);
+ hit = false;
buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
- RBM_ZERO);
+ RBM_ZERO, &hit);
+
+ /*
+ * If we found the block in shared buffers and we are already
+ * consistent then skip applying the backup block. The block
+ * was already removable anyway, so we can skip without problems.
+ * This avoids us needing to take a cleanup lock in all cases when
+ * we apply backup blocks because of potential effects on user queries,
+ * which expect data on blocks to remain constant while being read.
+ */
+ if (reachedConsistency && hit)
+ continue;
+
Assert(BufferIsValid(buffer));
if (cleanup)
LockBufferForCleanup(buffer);
@@ -3716,9 +3730,9 @@ RestoreBkpBlocks(XLogRecPtr lsn, XLogRecord *record, bool cleanup)
}
else
{
- /* must zero-fill the hole */
- MemSet((char *) page, 0, BLCKSZ);
memcpy((char *) page, blk, bkpb.hole_offset);
+ /* must zero-fill the hole */
+ MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
blk + bkpb.hole_offset,
BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index f286cdf..b7b9ec8 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -263,9 +263,10 @@ Buffer
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
Buffer buf;
+ bool hit;
buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
- init ? RBM_ZERO : RBM_NORMAL);
+ init ? RBM_ZERO : RBM_NORMAL, &hit);
if (BufferIsValid(buf))
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
@@ -290,7 +291,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
*/
Buffer
XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
- BlockNumber blkno, ReadBufferMode mode)
+ BlockNumber blkno, ReadBufferMode mode, bool *hit)
{
BlockNumber lastblock;
Buffer buffer;
@@ -317,7 +318,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
{
/* page exists in file */
buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno,
- mode, NULL);
+ mode, NULL, hit);
}
else
{
@@ -336,7 +337,7 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer);
buffer = ReadBufferWithoutRelcache(rnode, forknum,
- P_NEW, mode, NULL);
+ P_NEW, mode, NULL, hit);
lastblock++;
}
Assert(BufferGetBlockNumber(buffer) == blkno);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1adb6d3..64293ae 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -264,14 +264,12 @@ ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
Buffer
ReadBufferWithoutRelcache(RelFileNode rnode, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
- BufferAccessStrategy strategy)
+ BufferAccessStrategy strategy, bool *hit)
{
- bool hit;
-
SMgrRelation smgr = smgropen(rnode, InvalidBackendId);
return ReadBuffer_common(smgr, RELPERSISTENCE_PERMANENT, forkNum, blockNum,
- mode, strategy, &hit);
+ mode, strategy, hit);
}
diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c
index 7840adb..0aa099a 100644
--- a/src/backend/storage/freespace/freespace.c
+++ b/src/backend/storage/freespace/freespace.c
@@ -202,13 +202,14 @@ XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk,
BlockNumber blkno;
Buffer buf;
Page page;
+ bool hit;
/* Get the location of the FSM byte representing the heap block */
addr = fsm_get_location(heapBlk, &slot);
blkno = fsm_logical_to_physical(addr);
/* If the page doesn't exist already, extend */
- buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR);
+ buf = XLogReadBufferExtended(rnode, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, &hit);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf);
diff --git a/src/include/access/xlogutils.h b/src/include/access/xlogutils.h
index 6ade476..038f94f 100644
--- a/src/include/access/xlogutils.h
+++ b/src/include/access/xlogutils.h
@@ -24,7 +24,7 @@ extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
- BlockNumber blkno, ReadBufferMode mode);
+ BlockNumber blkno, ReadBufferMode mode, bool *hit);
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel);
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index de1bbd0..aa8f77c 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -165,8 +165,8 @@ extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
BufferAccessStrategy strategy);
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode,
- ForkNumber forkNum, BlockNumber blockNum,
- ReadBufferMode mode, BufferAccessStrategy strategy);
+ ForkNumber forkNum, BlockNumber blockNum,
+ ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers