On Tue, Dec 03, 2019 at 09:35:00AM -0300, Alvaro Herrera wrote:
> You didn't attach anything, but I concur about the low probability
> aspect: the assertion failure does not occur in production builds
> (obviously); and only an out-of-memory situation is a real problem
> when
> an fsync fails.  Anyway this should be a very localized fix, right?

Sorry.  You get something like the attached.  The recent refactoring
work you committed in this area causes already conflicts on
REL_12_STABLE.

> I'm not sure that the internationalization stuff in issue_xlog_fsync
> is correct.  I think the _() should be gettext_noop(), or alternatively
> the errmsg() should be errmsg_internal(); otherwise the translation is
> invoked twice.  (I didn't verify this.)

Hmm.  We actually do both in tablecmds.c:ATWrongRelkindError(), and
that's the code I was looking at yesterday when thinking about the
problem..  However, parse_agg.c, parse_expr.c and parse_func.c among
others like vacuumlazy.c use directly errmsg_internal() without
translating the string first.  So there is indeed duplicated work for
both.  Does the attached patch look correct to you?
--
Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 6bc1a6b46d..01ca07b552 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -10171,7 +10171,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
 		errno = save_errno;
 		ereport(PANIC,
 				(errcode_for_file_access(),
-				 errmsg(msg, xlogfname)));
+				 errmsg_internal(msg, xlogfname)));
 	}
 
 	pgstat_report_wait_end();
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 5440eb9015..3d98af47b1 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -5230,7 +5230,7 @@ ATWrongRelkindError(Relation rel, int allowed_targets)
 
 	ereport(ERROR,
 			(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-			 errmsg(msg, RelationGetRelationName(rel))));
+			 errmsg_internal(msg, RelationGetRelationName(rel))));
 }
 
 /*
From 491faa2e92b5f637bb8b76eb1a9c2e93a6eec3a6 Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@paquier.xyz>
Date: Wed, 4 Dec 2019 13:57:18 +0900
Subject: [PATCH] Remove use of XLogFileNameP() in error string generation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

XLogFileNameP() is a wrapper routine able to build a palloc'd string for
a WAL segment name, which is used in error strings.  There were several
code paths where it is called in a critical section, where memory
allocation is not allowed, triggering an assertion failure on PANIC
instead of generating a proper message.  Note that this could be a
problem on OOM when calling the routine, as any failure would be
escalated to a PANIC as well.  This removes all the callers of this
routine, to fix existing mistakes and prevent future ones, replacing the
incorrect locations with a logic using a fixed-size buffer.

Author: Masahiko Sawada
Reviewed-by: Michael Paquier, Álvaro Herrera
Discussion: https://postgr.es/m/ca+fd4k5gc9h4uowmlg9k_qfnrnkkdew+-afveob9yx7z8jn...@mail.gmail.com
---
 src/backend/access/transam/xlog.c     | 78 ++++++++++++++++++++-------
 src/backend/replication/walreceiver.c | 30 ++++++++---
 src/backend/replication/walsender.c   | 39 +++++++++++---
 3 files changed, 112 insertions(+), 35 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1a87042223..58c4ed9083 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2499,14 +2499,21 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 				pgstat_report_wait_end();
 				if (written <= 0)
 				{
+					char		xlogfname[MAXFNAMELEN];
+					int			save_errno;
+
 					if (errno == EINTR)
 						continue;
+
+					save_errno = errno;
+					XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo,
+								 wal_segment_size);
+					errno = save_errno;
 					ereport(PANIC,
 							(errcode_for_file_access(),
 							 errmsg("could not write to log file %s "
 									"at offset %u, length %zu: %m",
-									XLogFileNameP(ThisTimeLineID, openLogSegNo),
-									startoffset, nleft)));
+									xlogfname, startoffset, nleft)));
 				}
 				nleft -= written;
 				from += written;
@@ -3792,10 +3799,17 @@ XLogFileClose(void)
 #endif
 
 	if (close(openLogFile))
+	{
+		char		xlogfname[MAXFNAMELEN];
+		int			save_errno = errno;
+
+		XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo, wal_segment_size);
+		errno = save_errno;
 		ereport(PANIC,
 				(errcode_for_file_access(),
-				 errmsg("could not close file \"%s\": %m",
-						XLogFileNameP(ThisTimeLineID, openLogSegNo))));
+				 errmsg("could not close file \"%s\": %m", xlogfname)));
+	}
+
 	openLogFile = -1;
 }
 
@@ -5527,10 +5541,17 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog)
 		fd = XLogFileInit(startLogSegNo, &use_existent, true);
 
 		if (close(fd))
+		{
+			char		xlogfname[MAXFNAMELEN];
+			int			save_errno = errno;
+
+			XLogFileName(xlogfname, ThisTimeLineID, startLogSegNo,
+						 wal_segment_size);
+			errno = save_errno;
 			ereport(ERROR,
 					(errcode_for_file_access(),
-					 errmsg("could not close file \"%s\": %m",
-							XLogFileNameP(ThisTimeLineID, startLogSegNo))));
+					 errmsg("could not close file \"%s\": %m", xlogfname)));
+		}
 	}
 
 	/*
@@ -10078,10 +10099,19 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
 		{
 			pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
 			if (pg_fsync(openLogFile) != 0)
+			{
+				char		xlogfname[MAXFNAMELEN];
+				int			save_errno;
+
+				save_errno = errno;
+				XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo,
+							 wal_segment_size);
+				errno = save_errno;
 				ereport(PANIC,
 						(errcode_for_file_access(),
-						 errmsg("could not fsync file \"%s\": %m",
-								XLogFileNameP(ThisTimeLineID, openLogSegNo))));
+						 errmsg("could not fsync file \"%s\": %m", xlogfname)));
+			}
+
 			pgstat_report_wait_end();
 			if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method))
 				XLogFileClose();
@@ -10099,32 +10129,25 @@ assign_xlog_sync_method(int new_sync_method, void *extra)
 void
 issue_xlog_fsync(int fd, XLogSegNo segno)
 {
+	char	   *msg = NULL;
+
 	pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
 	switch (sync_method)
 	{
 		case SYNC_METHOD_FSYNC:
 			if (pg_fsync_no_writethrough(fd) != 0)
-				ereport(PANIC,
-						(errcode_for_file_access(),
-						 errmsg("could not fsync file \"%s\": %m",
-								XLogFileNameP(ThisTimeLineID, segno))));
+				msg = _("could not fsync file \"%s\": %m");
 			break;
 #ifdef HAVE_FSYNC_WRITETHROUGH
 		case SYNC_METHOD_FSYNC_WRITETHROUGH:
 			if (pg_fsync_writethrough(fd) != 0)
-				ereport(PANIC,
-						(errcode_for_file_access(),
-						 errmsg("could not fsync write-through file \"%s\": %m",
-								XLogFileNameP(ThisTimeLineID, segno))));
+				msg = _("could not fsync write-through file \"%s\": %m");
 			break;
 #endif
 #ifdef HAVE_FDATASYNC
 		case SYNC_METHOD_FDATASYNC:
 			if (pg_fdatasync(fd) != 0)
-				ereport(PANIC,
-						(errcode_for_file_access(),
-						 errmsg("could not fdatasync file \"%s\": %m",
-								XLogFileNameP(ThisTimeLineID, segno))));
+				msg = _("could not fdatasync file \"%s\": %m");
 			break;
 #endif
 		case SYNC_METHOD_OPEN:
@@ -10135,6 +10158,21 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
 			elog(PANIC, "unrecognized wal_sync_method: %d", sync_method);
 			break;
 	}
+
+	/* PANIC if failed to fsync */
+	if (msg)
+	{
+		char		xlogfname[MAXFNAMELEN];
+		int			save_errno = errno;
+
+		XLogFileName(xlogfname, ThisTimeLineID, segno,
+					 wal_segment_size);
+		errno = save_errno;
+		ereport(PANIC,
+				(errcode_for_file_access(),
+				 errmsg(msg, xlogfname)));
+	}
+
 	pgstat_report_wait_end();
 }
 
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 6abc780778..dbba96af6c 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -576,17 +576,17 @@ WalReceiverMain(void)
 			char		xlogfname[MAXFNAMELEN];
 
 			XLogWalRcvFlush(false);
+			XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
 			if (close(recvFile) != 0)
 				ereport(PANIC,
 						(errcode_for_file_access(),
 						 errmsg("could not close log segment %s: %m",
-								XLogFileNameP(recvFileTLI, recvSegNo))));
+								xlogfname)));
 
 			/*
 			 * Create .done file forcibly to prevent the streamed segment from
 			 * being archived later.
 			 */
-			XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
 			if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
 				XLogArchiveForceDone(xlogfname);
 			else
@@ -911,6 +911,8 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 
 				XLogWalRcvFlush(false);
 
+				XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
+
 				/*
 				 * XLOG segment files will be re-read by recovery in startup
 				 * process soon, so we don't advise the OS to release cache
@@ -920,13 +922,12 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 					ereport(PANIC,
 							(errcode_for_file_access(),
 							 errmsg("could not close log segment %s: %m",
-									XLogFileNameP(recvFileTLI, recvSegNo))));
+									xlogfname)));
 
 				/*
 				 * Create .done file forcibly to prevent the streamed segment
 				 * from being archived later.
 				 */
-				XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
 				if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS)
 					XLogArchiveForceDone(xlogfname);
 				else
@@ -954,11 +955,18 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 		if (recvOff != startoff)
 		{
 			if (lseek(recvFile, (off_t) startoff, SEEK_SET) < 0)
+			{
+				char		xlogfname[MAXFNAMELEN];
+				int			save_errno = errno;
+
+				XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
+				errno = save_errno;
 				ereport(PANIC,
 						(errcode_for_file_access(),
 						 errmsg("could not seek in log segment %s to offset %u: %m",
-								XLogFileNameP(recvFileTLI, recvSegNo),
-								startoff)));
+								xlogfname, startoff)));
+			}
+
 			recvOff = startoff;
 		}
 
@@ -968,15 +976,21 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr)
 		byteswritten = write(recvFile, buf, segbytes);
 		if (byteswritten <= 0)
 		{
+			char		xlogfname[MAXFNAMELEN];
+			int			save_errno;
+
 			/* if write didn't set errno, assume no disk space */
 			if (errno == 0)
 				errno = ENOSPC;
+
+			save_errno = errno;
+			XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size);
+			errno = save_errno;
 			ereport(PANIC,
 					(errcode_for_file_access(),
 					 errmsg("could not write to log segment %s "
 							"at offset %u, length %lu: %m",
-							XLogFileNameP(recvFileTLI, recvSegNo),
-							recvOff, (unsigned long) segbytes)));
+							xlogfname, recvOff, (unsigned long) segbytes)));
 		}
 
 		/* Update state for write */
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 356073ed4a..813d7d92cc 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2440,10 +2440,19 @@ retry:
 				 * removed or recycled.
 				 */
 				if (errno == ENOENT)
+				{
+					char		xlogfname[MAXFNAMELEN];
+					int			save_errno = errno;
+
+					XLogFileName(xlogfname, curFileTimeLine, sendSegNo,
+								 wal_segment_size);
+					errno = save_errno;
+
 					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("requested WAL segment %s has already been removed",
-									XLogFileNameP(curFileTimeLine, sendSegNo))));
+									xlogfname)));
+				}
 				else
 					ereport(ERROR,
 							(errcode_for_file_access(),
@@ -2457,11 +2466,18 @@ retry:
 		if (sendOff != startoff)
 		{
 			if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0)
+			{
+				char		xlogfname[MAXFNAMELEN];
+				int			save_errno = errno;
+
+				XLogFileName(xlogfname, curFileTimeLine, sendSegNo,
+							 wal_segment_size);
+				errno = save_errno;
 				ereport(ERROR,
 						(errcode_for_file_access(),
 						 errmsg("could not seek in log segment %s to offset %u: %m",
-								XLogFileNameP(curFileTimeLine, sendSegNo),
-								startoff)));
+								xlogfname, startoff)));
+			}
 			sendOff = startoff;
 		}
 
@@ -2476,19 +2492,28 @@ retry:
 		pgstat_report_wait_end();
 		if (readbytes < 0)
 		{
+			char		xlogfname[MAXFNAMELEN];
+			int			save_errno = errno;
+
+			XLogFileName(xlogfname, curFileTimeLine, sendSegNo,
+						 wal_segment_size);
+			errno = save_errno;
+
 			ereport(ERROR,
 					(errcode_for_file_access(),
 					 errmsg("could not read from log segment %s, offset %u, length %zu: %m",
-							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, (Size) segbytes)));
+							xlogfname, sendOff, (Size) segbytes)));
 		}
 		else if (readbytes == 0)
 		{
+			char		xlogfname[MAXFNAMELEN];
+
+			XLogFileName(xlogfname, curFileTimeLine, sendSegNo,
+						 wal_segment_size);
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg("could not read from log segment %s, offset %u: read %d of %zu",
-							XLogFileNameP(curFileTimeLine, sendSegNo),
-							sendOff, readbytes, (Size) segbytes)));
+							xlogfname, sendOff, readbytes, (Size) segbytes)));
 		}
 
 		/* Update state for read */
-- 
2.24.0

Attachment: signature.asc
Description: PGP signature

Reply via email to