On Tue, Dec 03, 2019 at 09:35:00AM -0300, Alvaro Herrera wrote: > You didn't attach anything, but I concur about the low probability > aspect: the assertion failure does not occur in production builds > (obviously); and only an out-of-memory situation is a real problem > when > an fsync fails. Anyway this should be a very localized fix, right?
Sorry. You get something like the attached. The recent refactoring work you committed in this area causes already conflicts on REL_12_STABLE. > I'm not sure that the internationalization stuff in issue_xlog_fsync > is correct. I think the _() should be gettext_noop(), or alternatively > the errmsg() should be errmsg_internal(); otherwise the translation is > invoked twice. (I didn't verify this.) Hmm. We actually do both in tablecmds.c:ATWrongRelkindError(), and that's the code I was looking at yesterday when thinking about the problem.. However, parse_agg.c, parse_expr.c and parse_func.c among others like vacuumlazy.c use directly errmsg_internal() without translating the string first. So there is indeed duplicated work for both. Does the attached patch look correct to you? -- Michael
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6bc1a6b46d..01ca07b552 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10171,7 +10171,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno) errno = save_errno; ereport(PANIC, (errcode_for_file_access(), - errmsg(msg, xlogfname))); + errmsg_internal(msg, xlogfname))); } pgstat_report_wait_end(); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 5440eb9015..3d98af47b1 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -5230,7 +5230,7 @@ ATWrongRelkindError(Relation rel, int allowed_targets) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg(msg, RelationGetRelationName(rel)))); + errmsg_internal(msg, RelationGetRelationName(rel)))); } /*
From 491faa2e92b5f637bb8b76eb1a9c2e93a6eec3a6 Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@paquier.xyz> Date: Wed, 4 Dec 2019 13:57:18 +0900 Subject: [PATCH] Remove use of XLogFileNameP() in error string generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit XLogFileNameP() is a wrapper routine able to build a palloc'd string for a WAL segment name, which is used in error strings. There were several code paths where it is called in a critical section, where memory allocation is not allowed, triggering an assertion failure on PANIC instead of generating a proper message. Note that this could be a problem on OOM when calling the routine, as any failure would be escalated to a PANIC as well. This removes all the callers of this routine, to fix existing mistakes and prevent future ones, replacing the incorrect locations with a logic using a fixed-size buffer. Author: Masahiko Sawada Reviewed-by: Michael Paquier, Álvaro Herrera Discussion: https://postgr.es/m/ca+fd4k5gc9h4uowmlg9k_qfnrnkkdew+-afveob9yx7z8jn...@mail.gmail.com --- src/backend/access/transam/xlog.c | 78 ++++++++++++++++++++------- src/backend/replication/walreceiver.c | 30 ++++++++--- src/backend/replication/walsender.c | 39 +++++++++++--- 3 files changed, 112 insertions(+), 35 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 1a87042223..58c4ed9083 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -2499,14 +2499,21 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible) pgstat_report_wait_end(); if (written <= 0) { + char xlogfname[MAXFNAMELEN]; + int save_errno; + if (errno == EINTR) continue; + + save_errno = errno; + XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo, + wal_segment_size); + errno = save_errno; ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to log file %s " "at offset %u, length %zu: %m", - XLogFileNameP(ThisTimeLineID, openLogSegNo), - startoffset, nleft))); + xlogfname, startoffset, nleft))); } nleft -= written; from += written; @@ -3792,10 +3799,17 @@ XLogFileClose(void) #endif if (close(openLogFile)) + { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo, wal_segment_size); + errno = save_errno; ereport(PANIC, (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", - XLogFileNameP(ThisTimeLineID, openLogSegNo)))); + errmsg("could not close file \"%s\": %m", xlogfname))); + } + openLogFile = -1; } @@ -5527,10 +5541,17 @@ exitArchiveRecovery(TimeLineID endTLI, XLogRecPtr endOfLog) fd = XLogFileInit(startLogSegNo, &use_existent, true); if (close(fd)) + { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, ThisTimeLineID, startLogSegNo, + wal_segment_size); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), - errmsg("could not close file \"%s\": %m", - XLogFileNameP(ThisTimeLineID, startLogSegNo)))); + errmsg("could not close file \"%s\": %m", xlogfname))); + } } /* @@ -10078,10 +10099,19 @@ assign_xlog_sync_method(int new_sync_method, void *extra) { pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN); if (pg_fsync(openLogFile) != 0) + { + char xlogfname[MAXFNAMELEN]; + int save_errno; + + save_errno = errno; + XLogFileName(xlogfname, ThisTimeLineID, openLogSegNo, + wal_segment_size); + errno = save_errno; ereport(PANIC, (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", - XLogFileNameP(ThisTimeLineID, openLogSegNo)))); + errmsg("could not fsync file \"%s\": %m", xlogfname))); + } + pgstat_report_wait_end(); if (get_sync_bit(sync_method) != get_sync_bit(new_sync_method)) XLogFileClose(); @@ -10099,32 +10129,25 @@ assign_xlog_sync_method(int new_sync_method, void *extra) void issue_xlog_fsync(int fd, XLogSegNo segno) { + char *msg = NULL; + pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC); switch (sync_method) { case SYNC_METHOD_FSYNC: if (pg_fsync_no_writethrough(fd) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not fsync file \"%s\": %m", - XLogFileNameP(ThisTimeLineID, segno)))); + msg = _("could not fsync file \"%s\": %m"); break; #ifdef HAVE_FSYNC_WRITETHROUGH case SYNC_METHOD_FSYNC_WRITETHROUGH: if (pg_fsync_writethrough(fd) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not fsync write-through file \"%s\": %m", - XLogFileNameP(ThisTimeLineID, segno)))); + msg = _("could not fsync write-through file \"%s\": %m"); break; #endif #ifdef HAVE_FDATASYNC case SYNC_METHOD_FDATASYNC: if (pg_fdatasync(fd) != 0) - ereport(PANIC, - (errcode_for_file_access(), - errmsg("could not fdatasync file \"%s\": %m", - XLogFileNameP(ThisTimeLineID, segno)))); + msg = _("could not fdatasync file \"%s\": %m"); break; #endif case SYNC_METHOD_OPEN: @@ -10135,6 +10158,21 @@ issue_xlog_fsync(int fd, XLogSegNo segno) elog(PANIC, "unrecognized wal_sync_method: %d", sync_method); break; } + + /* PANIC if failed to fsync */ + if (msg) + { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, ThisTimeLineID, segno, + wal_segment_size); + errno = save_errno; + ereport(PANIC, + (errcode_for_file_access(), + errmsg(msg, xlogfname))); + } + pgstat_report_wait_end(); } diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 6abc780778..dbba96af6c 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -576,17 +576,17 @@ WalReceiverMain(void) char xlogfname[MAXFNAMELEN]; XLogWalRcvFlush(false); + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); if (close(recvFile) != 0) ereport(PANIC, (errcode_for_file_access(), errmsg("could not close log segment %s: %m", - XLogFileNameP(recvFileTLI, recvSegNo)))); + xlogfname))); /* * Create .done file forcibly to prevent the streamed segment from * being archived later. */ - XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) XLogArchiveForceDone(xlogfname); else @@ -911,6 +911,8 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) XLogWalRcvFlush(false); + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); + /* * XLOG segment files will be re-read by recovery in startup * process soon, so we don't advise the OS to release cache @@ -920,13 +922,12 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) ereport(PANIC, (errcode_for_file_access(), errmsg("could not close log segment %s: %m", - XLogFileNameP(recvFileTLI, recvSegNo)))); + xlogfname))); /* * Create .done file forcibly to prevent the streamed segment * from being archived later. */ - XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); if (XLogArchiveMode != ARCHIVE_MODE_ALWAYS) XLogArchiveForceDone(xlogfname); else @@ -954,11 +955,18 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) if (recvOff != startoff) { if (lseek(recvFile, (off_t) startoff, SEEK_SET) < 0) + { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); + errno = save_errno; ereport(PANIC, (errcode_for_file_access(), errmsg("could not seek in log segment %s to offset %u: %m", - XLogFileNameP(recvFileTLI, recvSegNo), - startoff))); + xlogfname, startoff))); + } + recvOff = startoff; } @@ -968,15 +976,21 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr) byteswritten = write(recvFile, buf, segbytes); if (byteswritten <= 0) { + char xlogfname[MAXFNAMELEN]; + int save_errno; + /* if write didn't set errno, assume no disk space */ if (errno == 0) errno = ENOSPC; + + save_errno = errno; + XLogFileName(xlogfname, recvFileTLI, recvSegNo, wal_segment_size); + errno = save_errno; ereport(PANIC, (errcode_for_file_access(), errmsg("could not write to log segment %s " "at offset %u, length %lu: %m", - XLogFileNameP(recvFileTLI, recvSegNo), - recvOff, (unsigned long) segbytes))); + xlogfname, recvOff, (unsigned long) segbytes))); } /* Update state for write */ diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 356073ed4a..813d7d92cc 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -2440,10 +2440,19 @@ retry: * removed or recycled. */ if (errno == ENOENT) + { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, curFileTimeLine, sendSegNo, + wal_segment_size); + errno = save_errno; + ereport(ERROR, (errcode_for_file_access(), errmsg("requested WAL segment %s has already been removed", - XLogFileNameP(curFileTimeLine, sendSegNo)))); + xlogfname))); + } else ereport(ERROR, (errcode_for_file_access(), @@ -2457,11 +2466,18 @@ retry: if (sendOff != startoff) { if (lseek(sendFile, (off_t) startoff, SEEK_SET) < 0) + { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, curFileTimeLine, sendSegNo, + wal_segment_size); + errno = save_errno; ereport(ERROR, (errcode_for_file_access(), errmsg("could not seek in log segment %s to offset %u: %m", - XLogFileNameP(curFileTimeLine, sendSegNo), - startoff))); + xlogfname, startoff))); + } sendOff = startoff; } @@ -2476,19 +2492,28 @@ retry: pgstat_report_wait_end(); if (readbytes < 0) { + char xlogfname[MAXFNAMELEN]; + int save_errno = errno; + + XLogFileName(xlogfname, curFileTimeLine, sendSegNo, + wal_segment_size); + errno = save_errno; + ereport(ERROR, (errcode_for_file_access(), errmsg("could not read from log segment %s, offset %u, length %zu: %m", - XLogFileNameP(curFileTimeLine, sendSegNo), - sendOff, (Size) segbytes))); + xlogfname, sendOff, (Size) segbytes))); } else if (readbytes == 0) { + char xlogfname[MAXFNAMELEN]; + + XLogFileName(xlogfname, curFileTimeLine, sendSegNo, + wal_segment_size); ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("could not read from log segment %s, offset %u: read %d of %zu", - XLogFileNameP(curFileTimeLine, sendSegNo), - sendOff, readbytes, (Size) segbytes))); + xlogfname, sendOff, readbytes, (Size) segbytes))); } /* Update state for read */ -- 2.24.0
signature.asc
Description: PGP signature