Hi,

I'm trying to see if it makes sense to do the double-buffering of page
writes before going further ahead with CRC checking.  I came up with the
attached patch; it does the double-buffering inconditionally, because as
it was said, it allows releasing the io_in_progress lock (and resetting
BM_IO_IN_PROGRESS) early.

So far I have not managed to convince me that this is a correct change
to make; the io_in_progress bits are pretty convoluted -- for example, I
wonder how does "releasing" the buffer early (before actually sending
the write to the kernel and marking it not dirty) interact with
checkpoint and a possible full-page image.

Basically the change is to take the unsetting of BM_DIRTY out of
TerminateBufferIO and into its own routine; and in FlushBuffer, release
io_in_progress just after copying the buffer contents elsewhere, and
mark the buffer not dirty after actually doing the write.

Thoughts?

-- 
Alvaro Herrera                                http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.239
diff -c -p -r1.239 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c	20 Oct 2008 21:11:15 -0000	1.239
--- src/backend/storage/buffer/bufmgr.c	21 Oct 2008 15:30:38 -0000
*************** static void BufferSync(int flags);
*** 84,91 ****
  static int	SyncOneBuffer(int buf_id, bool skip_recently_used);
  static void WaitIO(volatile BufferDesc *buf);
  static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
! static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
! 				  int set_flag_bits);
  static void buffer_write_error_callback(void *arg);
  static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
  			BlockNumber blockNum,
--- 84,91 ----
  static int	SyncOneBuffer(int buf_id, bool skip_recently_used);
  static void WaitIO(volatile BufferDesc *buf);
  static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
! static void TerminateBufferIO(volatile BufferDesc *buf, int set_flag_bits);
! static void MarkBufferNotDirty(volatile BufferDesc *buf);
  static void buffer_write_error_callback(void *arg);
  static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
  			BlockNumber blockNum,
*************** ReadBuffer_common(SMgrRelation smgr, boo
*** 395,401 ****
  	else
  	{
  		/* Set BM_VALID, terminate IO, and wake up any waiters */
! 		TerminateBufferIO(bufHdr, false, BM_VALID);
  	}
  
  	if (VacuumCostActive)
--- 395,401 ----
  	else
  	{
  		/* Set BM_VALID, terminate IO, and wake up any waiters */
! 		TerminateBufferIO(bufHdr, BM_VALID);
  	}
  
  	if (VacuumCostActive)
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1792,1797 ****
--- 1792,1798 ----
  {
  	XLogRecPtr	recptr;
  	ErrorContextCallback errcontext;
+ 	char		dblbuf[BLCKSZ];
  
  	/*
  	 * Acquire the buffer's io_in_progress lock.  If StartBufferIO returns
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1834,1856 ****
  	buf->flags &= ~BM_JUST_DIRTIED;
  	UnlockBufHdr(buf);
  
  	smgrwrite(reln,
  			  buf->tag.forkNum,
  			  buf->tag.blockNum,
! 			  (char *) BufHdrGetBlock(buf),
  			  false);
  
  	BufferFlushCount++;
  
  	TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(reln->smgr_rnode.spcNode,
  		 reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode);
  
- 	/*
- 	 * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and
- 	 * end the io_in_progress state.
- 	 */
- 	TerminateBufferIO(buf, true, 0);
- 
  	/* Pop the error context stack */
  	error_context_stack = errcontext.previous;
  }
--- 1835,1863 ----
  	buf->flags &= ~BM_JUST_DIRTIED;
  	UnlockBufHdr(buf);
  
+ 	/*
+ 	 * We make a copy of the buffer to write.  This allows us to release the
+ 	 * io_in_progress lock early, before actually doing the write.
+ 	 */
+ 	memcpy(dblbuf, BufHdrGetBlock(buf), BLCKSZ);
+ 
+ 	/* End the io_in_progress state. */
+ 	TerminateBufferIO(buf, 0);
+ 
  	smgrwrite(reln,
  			  buf->tag.forkNum,
  			  buf->tag.blockNum,
! 			  (char *) dblbuf,
  			  false);
  
+ 	/* Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) */
+ 	MarkBufferNotDirty(buf);
+ 
  	BufferFlushCount++;
  
  	TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(reln->smgr_rnode.spcNode,
  		 reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode);
  
  	/* Pop the error context stack */
  	error_context_stack = errcontext.previous;
  }
*************** StartBufferIO(volatile BufferDesc *buf, 
*** 2578,2595 ****
   *	We hold the buffer's io_in_progress lock
   *	The buffer is Pinned
   *
-  * If clear_dirty is TRUE and BM_JUST_DIRTIED is not set, we clear the
-  * buffer's BM_DIRTY flag.  This is appropriate when terminating a
-  * successful write.  The check on BM_JUST_DIRTIED is necessary to avoid
-  * marking the buffer clean if it was re-dirtied while we were writing.
-  *
   * set_flag_bits gets ORed into the buffer's flags.  It must include
   * BM_IO_ERROR in a failure case.  For successful completion it could
   * be 0, or BM_VALID if we just finished reading in the page.
   */
  static void
! TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
! 				  int set_flag_bits)
  {
  	Assert(buf == InProgressBuf);
  
--- 2585,2596 ----
   *	We hold the buffer's io_in_progress lock
   *	The buffer is Pinned
   *
   * set_flag_bits gets ORed into the buffer's flags.  It must include
   * BM_IO_ERROR in a failure case.  For successful completion it could
   * be 0, or BM_VALID if we just finished reading in the page.
   */
  static void
! TerminateBufferIO(volatile BufferDesc *buf, int set_flag_bits)
  {
  	Assert(buf == InProgressBuf);
  
*************** TerminateBufferIO(volatile BufferDesc *b
*** 2597,2604 ****
  
  	Assert(buf->flags & BM_IO_IN_PROGRESS);
  	buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
- 	if (clear_dirty && !(buf->flags & BM_JUST_DIRTIED))
- 		buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
  	buf->flags |= set_flag_bits;
  
  	UnlockBufHdr(buf);
--- 2598,2603 ----
*************** TerminateBufferIO(volatile BufferDesc *b
*** 2609,2614 ****
--- 2608,2631 ----
  }
  
  /*
+  * Clear the buffer's BM_DIRTY flag, unless BM_JUST_DIRTIED is set.
+  *
+  * This is appropriate when terminating a successful write.  The check on
+  * BM_JUST_DIRTIED is necessary to avoid marking the buffer clean if it was
+  * re-dirtied while we were writing.
+  */
+ static void
+ MarkBufferNotDirty(volatile BufferDesc *buf)
+ {
+ 	LockBufHdr(buf);
+ 
+ 	if (!(buf->flags & BM_JUST_DIRTIED))
+ 		buf->flags &= ~(BM_DIRTY | BM_CHECKPOINT_NEEDED);
+ 
+ 	UnlockBufHdr(buf);
+ }
+ 
+ /*
   * AbortBufferIO: Clean up any active buffer I/O after an error.
   *
   *	All LWLocks we might have held have been released,
*************** AbortBufferIO(void)
*** 2662,2668 ****
  						 errdetail("Multiple failures --- write error might be permanent.")));
  			}
  		}
! 		TerminateBufferIO(buf, false, BM_IO_ERROR);
  	}
  }
  
--- 2679,2685 ----
  						 errdetail("Multiple failures --- write error might be permanent.")));
  			}
  		}
! 		TerminateBufferIO(buf, BM_IO_ERROR);
  	}
  }
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to