Thank you for the comment.

At Fri, 25 May 2018 09:05:21 +0300, Heikki Linnakangas <hlinn...@iki.fi> wrote 
in <466a3c6d-7986-8cb1-d908-e85aa6a09...@iki.fi>
> On 25/05/18 07:45, Kyotaro HORIGUCHI wrote:
> > Hello.
> > I happened to see the following in XLogWrite.
> > 
> >>   ereport(PANIC,
> >>       (errcode_for_file_access(),
> >>        errmsg("could not seek in log file %s to offset %u: %m",
> >>           XLogFileNameP(ThisTimeLineID, openLogSegNo),
> >>           startoffset)));
> > where XLogFileNameP calls palloc within, and it is within a
> > critical section there. So it ends with assertion failure hiding
> > the PANIC message. We should use XLogFileName instead. The
> > problem has existed at least since 9.3. The code is frequently
> > revised so the patch needed to vary into four files.
> 
> Hmm, that's rather annoying, it sure would be handy if we could do
> small palloc()s like this in error messages safely.
> 
> I wonder if we could switch to ErrorContext in errstart()? That way,
> any small allocations in the ereport() arguments, like what
> XLogFileNameP() does, would be allocated in ErrorContext.

It already controlling error context per-recursion-level basis
but it doesn't work for the top-level errmsg(). I'm not sure the
basis of edata->assoc_context, it seems always set to
ErrorContext.

As a PoC, just moving to and restore from ErrorContext at the top
level seems working fine. (The first attached and it changes only
ereport.)

# The second is less invasive version of the previous patch..

regards.

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index 16531f7a0f..0e4877240f 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -154,6 +154,8 @@ static bool saved_timeval_set = false;
 static char formatted_start_time[FORMATTED_TS_LEN];
 static char formatted_log_time[FORMATTED_TS_LEN];
 
+/* Store memory context to restore after error reporting */
+MemoryContext	elog_oldcontext = NULL;
 
 /* Macro for checking errordata_stack_depth is reasonable */
 #define CHECK_STACK_DEPTH() \
@@ -396,8 +398,11 @@ errstart(int elevel, const char *filename, int lineno,
 	 * Any allocations for this error state level should go into ErrorContext
 	 */
 	edata->assoc_context = ErrorContext;
-
 	recursion_depth--;
+
+	/* Set memory context to ErrorContext if this is the toplevel */
+	if (recursion_depth == 0)
+		elog_oldcontext = MemoryContextSwitchTo(ErrorContext);
 	return true;
 }
 
@@ -414,19 +419,12 @@ errfinish(int dummy,...)
 {
 	ErrorData  *edata = &errordata[errordata_stack_depth];
 	int			elevel;
-	MemoryContext oldcontext;
 	ErrorContextCallback *econtext;
 
 	recursion_depth++;
 	CHECK_STACK_DEPTH();
 	elevel = edata->elevel;
 
-	/*
-	 * Do processing in ErrorContext, which we hope has enough reserved space
-	 * to report an error.
-	 */
-	oldcontext = MemoryContextSwitchTo(ErrorContext);
-
 	/*
 	 * Call any context callback functions.  Errors occurring in callback
 	 * functions will be treated as recursive errors --- this ensures we will
@@ -509,9 +507,12 @@ errfinish(int dummy,...)
 	errordata_stack_depth--;
 
 	/* Exit error-handling context */
-	MemoryContextSwitchTo(oldcontext);
 	recursion_depth--;
 
+	/* Restore memory context if this is the top-level */
+	if (recursion_depth == 0 && elog_oldcontext)
+		MemoryContextSwitchTo(elog_oldcontext);
+
 	/*
 	 * Perform error recovery action as specified by elevel.
 	 */
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index adbd6a2126..9cd7a106ba 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2477,7 +2477,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 					ereport(PANIC,
 							(errcode_for_file_access(),
 							 errmsg("could not seek in log file %s to offset %u: %m",
-									XLogFileNameP(ThisTimeLineID, openLogSegNo),
+									XLogFileNameEP(ThisTimeLineID, openLogSegNo),
 									startoffset)));
 				openLogOff = startoffset;
 			}
@@ -2500,7 +2500,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
 							(errcode_for_file_access(),
 							 errmsg("could not write to log file %s "
 									"at offset %u, length %zu: %m",
-									XLogFileNameP(ThisTimeLineID, openLogSegNo),
+									XLogFileNameEP(ThisTimeLineID, openLogSegNo),
 									openLogOff, nbytes)));
 				}
 				nleft -= written;
@@ -3743,7 +3743,8 @@ XLogFileClose(void)
 		ereport(PANIC,
 				(errcode_for_file_access(),
 				 errmsg("could not close log file %s: %m",
-						XLogFileNameP(ThisTimeLineID, openLogSegNo))));
+						XLogFileNameEP(ThisTimeLineID, openLogSegNo))));
+
 	openLogFile = -1;
 }
 
@@ -10160,7 +10161,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
 				ereport(PANIC,
 						(errcode_for_file_access(),
 						 errmsg("could not fsync log file %s: %m",
-								XLogFileNameP(ThisTimeLineID, segno))));
+								XLogFileNameEP(ThisTimeLineID, segno))));
 			break;
 #ifdef HAVE_FSYNC_WRITETHROUGH
 		case SYNC_METHOD_FSYNC_WRITETHROUGH:
@@ -10168,7 +10169,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
 				ereport(PANIC,
 						(errcode_for_file_access(),
 						 errmsg("could not fsync write-through log file %s: %m",
-								XLogFileNameP(ThisTimeLineID, segno))));
+								XLogFileNameEP(ThisTimeLineID, segno))));
 			break;
 #endif
 #ifdef HAVE_FDATASYNC
@@ -10177,7 +10178,7 @@ issue_xlog_fsync(int fd, XLogSegNo segno)
 				ereport(PANIC,
 						(errcode_for_file_access(),
 						 errmsg("could not fdatasync log file %s: %m",
-								XLogFileNameP(ThisTimeLineID, segno))));
+								XLogFileNameEP(ThisTimeLineID, segno))));
 			break;
 #endif
 		case SYNC_METHOD_OPEN:
@@ -10202,6 +10203,21 @@ XLogFileNameP(TimeLineID tli, XLogSegNo segno)
 	return result;
 }
 
+/*
+ * Do the same thing with XLogFileNameP but using ErrorContext.
+ *
+ * This is intended to be used in the parameters of ereport invoked in a
+ * critical section.
+ */
+char *
+XLogFileNameEP(TimeLineID tli, XLogSegNo segno)
+{
+	char	   *result = MemoryContextAlloc(ErrorContext, MAXFNAMELEN);
+
+	XLogFileName(result, tli, segno, wal_segment_size);
+	return result;
+}
+
 /*
  * do_pg_start_backup is the workhorse of the user-visible pg_start_backup()
  * function. It creates the necessary starting checkpoint and constructs the
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e0ec1c0c91 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -253,6 +253,7 @@ extern void SetRecoveryPause(bool recoveryPause);
 extern TimestampTz GetLatestXTime(void);
 extern TimestampTz GetCurrentChunkReplayStartTime(void);
 extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
+extern char *XLogFileNameEP(TimeLineID tli, XLogSegNo segno);
 
 extern void UpdateControlFile(void);
 extern uint64 GetSystemIdentifier(void);

Reply via email to