Hello,

Patch 0001 gets rid of the unconditional lseek() calls for SLRU I/O,
as a small follow-up to commit c24dcd0c.  Patch 0002 gets rid of a few
places that usually do a good job of avoiding lseek() calls while
reading and writing WAL, but it seems better to have no code at all.

-- 
Thomas Munro
https://enterprisedb.com
From 68c1ec0531f802d39c80b8fa5c310c3c70d795d9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 30 Mar 2019 17:04:08 +1300
Subject: [PATCH 1/2] Use pg_pread() and pg_pwrite() in slru.c.

This avoids lseek() system calls at every SLRU I/O, as was
done for relation files in commit c24dcd0c.

Author: Thomas Munro
Discussion:
---
 src/backend/access/transam/slru.c | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 974d42fc86..6527cfce3b 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -647,7 +647,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 	SlruShared	shared = ctl->shared;
 	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
 	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	int			offset = rpageno * BLCKSZ;
+	off_t		offset = rpageno * BLCKSZ;
 	char		path[MAXPGPATH];
 	int			fd;
 
@@ -677,17 +677,9 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
 		return true;
 	}
 
-	if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
-	{
-		slru_errcause = SLRU_SEEK_FAILED;
-		slru_errno = errno;
-		CloseTransientFile(fd);
-		return false;
-	}
-
 	errno = 0;
 	pgstat_report_wait_start(WAIT_EVENT_SLRU_READ);
-	if (read(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
+	if (pg_pread(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
 	{
 		pgstat_report_wait_end();
 		slru_errcause = SLRU_READ_FAILED;
@@ -727,7 +719,7 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 	SlruShared	shared = ctl->shared;
 	int			segno = pageno / SLRU_PAGES_PER_SEGMENT;
 	int			rpageno = pageno % SLRU_PAGES_PER_SEGMENT;
-	int			offset = rpageno * BLCKSZ;
+	off_t		offset = rpageno * BLCKSZ;
 	char		path[MAXPGPATH];
 	int			fd = -1;
 
@@ -837,18 +829,9 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
 		}
 	}
 
-	if (lseek(fd, (off_t) offset, SEEK_SET) < 0)
-	{
-		slru_errcause = SLRU_SEEK_FAILED;
-		slru_errno = errno;
-		if (!fdata)
-			CloseTransientFile(fd);
-		return false;
-	}
-
 	errno = 0;
 	pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
-	if (write(fd, shared->page_buffer[slotno], BLCKSZ) != BLCKSZ)
+	if (pg_pwrite(fd, shared->page_buffer[slotno], BLCKSZ, offset) != BLCKSZ)
 	{
 		pgstat_report_wait_end();
 		/* if write didn't set errno, assume problem is no disk space */
-- 
2.21.0

From 446007aed7fb1bafb125eaca5569f62acdf41069 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Sat, 30 Mar 2019 17:33:30 +1300
Subject: [PATCH 2/2] Use pg_pread() and pg_pwrite() in various places.

Remove several copies of some code that usually does a good job of
avoiding lseek() calls, but it seems better to have no code at all.
Also remove a simple case of an unconditional lseek() call.

Author: Thomas Munro
Discussion:
---
 src/backend/access/heap/rewriteheap.c  |  9 +--------
 src/backend/access/transam/xlogutils.c | 25 ++-----------------------
 src/backend/replication/walreceiver.c  | 21 +++------------------
 src/backend/replication/walsender.c    | 23 ++++-------------------
 src/bin/pg_waldump/pg_waldump.c        | 25 +++----------------------
 5 files changed, 13 insertions(+), 90 deletions(-)

diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index bce4274362..c121e44719 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -1165,13 +1165,6 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
 						path, (uint32) xlrec->offset)));
 	pgstat_report_wait_end();
 
-	/* now seek to the position we want to write our data to */
-	if (lseek(fd, xlrec->offset, SEEK_SET) != xlrec->offset)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not seek to end of file \"%s\": %m",
-						path)));
-
 	data = XLogRecGetData(r) + sizeof(*xlrec);
 
 	len = xlrec->num_mappings * sizeof(LogicalRewriteMappingData);
@@ -1179,7 +1172,7 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
 	/* write out tail end of mapping file (again) */
 	errno = 0;
 	pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE);
-	if (write(fd, data, len) != len)
+	if (pg_pwrite(fd, data, len, xlrec->offset) != len)
 	{
 		/* if write didn't set errno, assume problem is no disk space */
 		if (errno == 0)
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index 10a663bae6..a63bfed3a6 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -664,7 +664,6 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
 	static int	sendFile = -1;
 	static XLogSegNo sendSegNo = 0;
 	static TimeLineID sendTLI = 0;
-	static uint32 sendOff = 0;
 
 	Assert(segsize == wal_segment_size);
 
@@ -708,28 +707,9 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
 							 errmsg("could not open file \"%s\": %m",
 									path)));
 			}
-			sendOff = 0;
 			sendTLI = tli;
 		}
 
-		/* Need to seek in the file? */
-		if (sendOff != startoff)
-		{
-			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
-			{
-				char		path[MAXPGPATH];
-				int			save_errno = errno;
-
-				XLogFilePath(path, tli, sendSegNo, segsize);
-				errno = save_errno;
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not seek in log segment %s to offset %u: %m",
-								path, startoff)));
-			}
-			sendOff = startoff;
-		}
-
 		/* How many bytes are within this segment? */
 		if (nbytes > (segsize - startoff))
 			segbytes = segsize - startoff;
@@ -737,7 +717,7 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
 			segbytes = nbytes;
 
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-		readbytes = read(sendFile, p, segbytes);
+		readbytes = pg_pread(sendFile, p, segbytes, (off_t) startoff);
 		pgstat_report_wait_end();
 		if (readbytes <= 0)
 		{
@@ -749,13 +729,12 @@ XLogRead(char *buf, int segsize, TimeLineID tli, XLogRecPtr startptr,
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read from log segment %s, offset %u, length %lu: %m",
-							path, sendOff, (unsigned long) segbytes)));
+							path, startoff, (unsigned long) segbytes)));
 		}
 
 		/* Update state for read */
 		recptr += readbytes;
 
-		sendOff += readbytes;
 		nbytes -= readbytes;
 		p += readbytes;
 	}
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index f32cf91ffb..d2c0f964d0 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -83,14 +83,13 @@ WalReceiverFunctionsType *WalReceiverFunctions = NULL;
 #define NAPTIME_PER_CYCLE 100	/* max sleep time between cycles (100ms) */
 
 /*
- * These variables are used similarly to openLogFile/SegNo/Off,
+ * These variables are used similarly to openLogFile/SegNo,
  * but for walreceiver to write the XLOG. recvFileTLI is the TimeLineID
  * corresponding the filename of recvFile.
  */
 static int	recvFile = -1;
 static TimeLineID recvFileTLI = 0;
 static XLogSegNo recvSegNo = 0;
-static uint32 recvOff = 0;
 
 /*
  * Flags set by interrupt handlers of walreceiver for later service in the
@@ -971,7 +970,6 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 			use_existent = true;
 			recvFile = XLogFileInit(recvSegNo, &use_existent, true);
 			recvFileTLI = ThisTimeLineID;
-			recvOff = 0;
 		}
 
 		/* Calculate the start offset of the received logs */
@@ -982,22 +980,10 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 		else
 			segbytes = nbytes;
 
-		/* Need to seek in the file? */
-		if (recvOff != startoff)
-		{
-			if (lseek(recvFile, (off_t) startoff, SEEK_SET) < 0)
-				ereport(PANIC,
-						(errcode_for_file_access(),
-						 errmsg("could not seek in log segment %s to offset %u: %m",
-								XLogFileNameP(recvFileTLI, recvSegNo),
-								startoff)));
-			recvOff = startoff;
-		}
-
 		/* OK to write the logs */
 		errno = 0;
 
-		byteswritten = write(recvFile, buf, segbytes);
+		byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff);
 		if (byteswritten <= 0)
 		{
 			/* if write didn't set errno, assume no disk space */
@@ -1008,13 +994,12 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 					 errmsg("could not write to log segment %s "
 							"at offset %u, length %lu: %m",
 							XLogFileNameP(recvFileTLI, recvSegNo),
-							recvOff, (unsigned long) segbytes)));
+							startoff, (unsigned long) segbytes)));
 		}
 
 		/* Update state for write */
 		recptr += byteswritten;
 
-		recvOff += byteswritten;
 		nbytes -= byteswritten;
 		buf += byteswritten;
 
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 21f5c868f1..bc6d4bc500 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -129,12 +129,11 @@ bool		log_replication_commands = false;
 bool		wake_wal_senders = false;
 
 /*
- * These variables are used similarly to openLogFile/SegNo/Off,
+ * These variables are used similarly to openLogFile/SegNo,
  * but for walsender to read the XLOG.
  */
 static int	sendFile = -1;
 static XLogSegNo sendSegNo = 0;
-static uint32 sendOff = 0;
 
 /* Timeline ID of the currently open file */
 static TimeLineID curFileTimeLine = 0;
@@ -2441,19 +2440,6 @@ retry:
 							 errmsg("could not open file \"%s\": %m",
 									path)));
 			}
-			sendOff = 0;
-		}
-
-		/* Need to seek in the file? */
-		if (sendOff != startoff)
-		{
-			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
-				ereport(ERROR,
-						(errcode_for_file_access(),
-						 errmsg("could not seek in log segment %s to offset %u: %m",
-								XLogFileNameP(curFileTimeLine, sendSegNo),
-								startoff)));
-			sendOff = startoff;
 		}
 
 		/* How many bytes are within this segment? */
@@ -2463,7 +2449,7 @@ retry:
 			segbytes = nbytes;
 
 		pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-		readbytes = read(sendFile, p, segbytes);
+		readbytes = pg_pread(sendFile, p, segbytes, (off_t) startoff);
 		pgstat_report_wait_end();
 		if (readbytes < 0)
 		{
@@ -2471,7 +2457,7 @@ retry:
 					(errcode_for_file_access(),
 					 errmsg("could not read from log segment %s, offset %u, length %zu: %m",
 							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, (Size) segbytes)));
+							startoff, (Size) segbytes)));
 		}
 		else if (readbytes == 0)
 		{
@@ -2479,13 +2465,12 @@ retry:
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg("could not read from log segment %s, offset %u: read %d of %zu",
 							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, readbytes, (Size) segbytes)));
+							startoff, readbytes, (Size) segbytes)));
 		}
 
 		/* Update state for read */
 		recptr += readbytes;
 
-		sendOff += readbytes;
 		nbytes -= readbytes;
 		p += readbytes;
 	}
diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c
index 1e5379eb3e..35216b4550 100644
--- a/src/bin/pg_waldump/pg_waldump.c
+++ b/src/bin/pg_waldump/pg_waldump.c
@@ -329,7 +329,6 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 
 	static int	sendFile = -1;
 	static XLogSegNo sendSegNo = 0;
-	static uint32 sendOff = 0;
 
 	p = buf;
 	recptr = startptr;
@@ -384,23 +383,6 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 			if (sendFile < 0)
 				fatal_error("could not find file \"%s\": %s",
 							fname, strerror(errno));
-			sendOff = 0;
-		}
-
-		/* Need to seek in the file? */
-		if (sendOff != startoff)
-		{
-			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
-			{
-				int			err = errno;
-				char		fname[MAXPGPATH];
-
-				XLogFileName(fname, timeline_id, sendSegNo, WalSegSz);
-
-				fatal_error("could not seek in log file %s to offset %u: %s",
-							fname, startoff, strerror(err));
-			}
-			sendOff = startoff;
 		}
 
 		/* How many bytes are within this segment? */
@@ -409,7 +391,7 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 		else
 			segbytes = nbytes;
 
-		readbytes = read(sendFile, p, segbytes);
+		readbytes = pg_pread(sendFile, p, segbytes, (off_t) startoff);
 		if (readbytes <= 0)
 		{
 			int			err = errno;
@@ -421,16 +403,15 @@ XLogDumpXLogRead(const char *directory, TimeLineID timeline_id,
 
 			if (readbytes < 0)
 				fatal_error("could not read from log file %s, offset %u, length %d: %s",
-							fname, sendOff, segbytes, strerror(err));
+							fname, startoff, segbytes, strerror(err));
 			else if (readbytes == 0)
 				fatal_error("could not read from log file %s, offset %u: read %d of %zu",
-							fname, sendOff, readbytes, (Size) segbytes);
+							fname, startoff, readbytes, (Size) segbytes);
 		}
 
 		/* Update state for read */
 		recptr += readbytes;
 
-		sendOff += readbytes;
 		nbytes -= readbytes;
 		p += readbytes;
 	}
-- 
2.21.0

Reply via email to