Here is a new version of the patch with feedback addressed.

-- 
Nathan Bossart
Amazon Web Services: https://aws.amazon.com
>From 3cda5bb87c82738ad6f8a082ef5dfeb49cd51392 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <nat...@postgresql.org>
Date: Tue, 28 Nov 2023 11:17:13 -0600
Subject: [PATCH v3 1/1] archiver exception handling

---
 contrib/basic_archive/basic_archive.c | 134 +-------------------------
 doc/src/sgml/archive-modules.sgml     |  11 ++-
 src/backend/archive/shell_archive.c   |   5 -
 src/backend/postmaster/pgarch.c       |  93 +++++++++++++++++-
 4 files changed, 107 insertions(+), 136 deletions(-)

diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c
index 4d78c31859..d575faab93 100644
--- a/contrib/basic_archive/basic_archive.c
+++ b/contrib/basic_archive/basic_archive.c
@@ -40,26 +40,18 @@
 
 PG_MODULE_MAGIC;
 
-typedef struct BasicArchiveData
-{
-	MemoryContext context;
-} BasicArchiveData;
-
 static char *archive_directory = NULL;
 
-static void basic_archive_startup(ArchiveModuleState *state);
 static bool basic_archive_configured(ArchiveModuleState *state);
 static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path);
-static void basic_archive_file_internal(const char *file, const char *path);
 static bool check_archive_directory(char **newval, void **extra, GucSource source);
 static bool compare_files(const char *file1, const char *file2);
-static void basic_archive_shutdown(ArchiveModuleState *state);
 
 static const ArchiveModuleCallbacks basic_archive_callbacks = {
-	.startup_cb = basic_archive_startup,
+	.startup_cb = NULL,
 	.check_configured_cb = basic_archive_configured,
 	.archive_file_cb = basic_archive_file,
-	.shutdown_cb = basic_archive_shutdown
+	.shutdown_cb = NULL
 };
 
 /*
@@ -93,24 +85,6 @@ _PG_archive_module_init(void)
 	return &basic_archive_callbacks;
 }
 
-/*
- * basic_archive_startup
- *
- * Creates the module's memory context.
- */
-void
-basic_archive_startup(ArchiveModuleState *state)
-{
-	BasicArchiveData *data;
-
-	data = (BasicArchiveData *) MemoryContextAllocZero(TopMemoryContext,
-													   sizeof(BasicArchiveData));
-	data->context = AllocSetContextCreate(TopMemoryContext,
-										  "basic_archive",
-										  ALLOCSET_DEFAULT_SIZES);
-	state->private_data = (void *) data;
-}
-
 /*
  * check_archive_directory
  *
@@ -171,74 +145,6 @@ basic_archive_configured(ArchiveModuleState *state)
  */
 static bool
 basic_archive_file(ArchiveModuleState *state, const char *file, const char *path)
-{
-	sigjmp_buf	local_sigjmp_buf;
-	MemoryContext oldcontext;
-	BasicArchiveData *data = (BasicArchiveData *) state->private_data;
-	MemoryContext basic_archive_context = data->context;
-
-	/*
-	 * We run basic_archive_file_internal() in our own memory context so that
-	 * we can easily reset it during error recovery (thus avoiding memory
-	 * leaks).
-	 */
-	oldcontext = MemoryContextSwitchTo(basic_archive_context);
-
-	/*
-	 * Since the archiver operates at the bottom of the exception stack,
-	 * ERRORs turn into FATALs and cause the archiver process to restart.
-	 * However, using ereport(ERROR, ...) when there are problems is easy to
-	 * code and maintain.  Therefore, we create our own exception handler to
-	 * catch ERRORs and return false instead of restarting the archiver
-	 * whenever there is a failure.
-	 */
-	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
-	{
-		/* Since not using PG_TRY, must reset error stack by hand */
-		error_context_stack = NULL;
-
-		/* Prevent interrupts while cleaning up */
-		HOLD_INTERRUPTS();
-
-		/* Report the error and clear ErrorContext for next time */
-		EmitErrorReport();
-		FlushErrorState();
-
-		/* Close any files left open by copy_file() or compare_files() */
-		AtEOSubXact_Files(false, InvalidSubTransactionId, InvalidSubTransactionId);
-
-		/* Reset our memory context and switch back to the original one */
-		MemoryContextSwitchTo(oldcontext);
-		MemoryContextReset(basic_archive_context);
-
-		/* Remove our exception handler */
-		PG_exception_stack = NULL;
-
-		/* Now we can allow interrupts again */
-		RESUME_INTERRUPTS();
-
-		/* Report failure so that the archiver retries this file */
-		return false;
-	}
-
-	/* Enable our exception handler */
-	PG_exception_stack = &local_sigjmp_buf;
-
-	/* Archive the file! */
-	basic_archive_file_internal(file, path);
-
-	/* Remove our exception handler */
-	PG_exception_stack = NULL;
-
-	/* Reset our memory context and switch back to the original one */
-	MemoryContextSwitchTo(oldcontext);
-	MemoryContextReset(basic_archive_context);
-
-	return true;
-}
-
-static void
-basic_archive_file_internal(const char *file, const char *path)
 {
 	char		destination[MAXPGPATH];
 	char		temp[MAXPGPATH + 256];
@@ -272,7 +178,7 @@ basic_archive_file_internal(const char *file, const char *path)
 			fsync_fname(destination, false);
 			fsync_fname(archive_directory, true);
 
-			return;
+			return true;
 		}
 
 		ereport(ERROR,
@@ -312,6 +218,8 @@ basic_archive_file_internal(const char *file, const char *path)
 
 	ereport(DEBUG1,
 			(errmsg("archived \"%s\" via basic_archive", file)));
+
+	return true;
 }
 
 /*
@@ -394,35 +302,3 @@ compare_files(const char *file1, const char *file2)
 
 	return ret;
 }
-
-/*
- * basic_archive_shutdown
- *
- * Frees our allocated state.
- */
-static void
-basic_archive_shutdown(ArchiveModuleState *state)
-{
-	BasicArchiveData *data = (BasicArchiveData *) state->private_data;
-	MemoryContext basic_archive_context;
-
-	/*
-	 * If we didn't get to storing the pointer to our allocated state, we
-	 * don't have anything to clean up.
-	 */
-	if (data == NULL)
-		return;
-
-	basic_archive_context = data->context;
-	Assert(CurrentMemoryContext != basic_archive_context);
-
-	if (MemoryContextIsValid(basic_archive_context))
-		MemoryContextDelete(basic_archive_context);
-	data->context = NULL;
-
-	/*
-	 * Finally, free the state.
-	 */
-	pfree(data);
-	state->private_data = NULL;
-}
diff --git a/doc/src/sgml/archive-modules.sgml b/doc/src/sgml/archive-modules.sgml
index 7064307d9e..f9b8d6c740 100644
--- a/doc/src/sgml/archive-modules.sgml
+++ b/doc/src/sgml/archive-modules.sgml
@@ -128,12 +128,21 @@ typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, cons
 
     If <literal>true</literal> is returned, the server proceeds as if the file
     was successfully archived, which may include recycling or removing the
-    original WAL file.  If <literal>false</literal> is returned, the server will
+    original WAL file.  If <literal>false</literal> is returned or an error is thrown, the server will
     keep the original WAL file and retry archiving later.
     <replaceable>file</replaceable> will contain just the file name of the WAL
     file to archive, while <replaceable>path</replaceable> contains the full
     path of the WAL file (including the file name).
    </para>
+
+   <note>
+    <para>
+     The <function>archive_file_cb</function> callback is called in a
+     short-lived memory context that will be reset between invocations.  If you
+     need longer-lived storage, create a memory context in the module's
+     <function>startup_cb</function> callback.
+    </para>
+   </note>
   </sect2>
 
   <sect2 id="archive-module-shutdown">
diff --git a/src/backend/archive/shell_archive.c b/src/backend/archive/shell_archive.c
index 157ca4c751..793f607db8 100644
--- a/src/backend/archive/shell_archive.c
+++ b/src/backend/archive/shell_archive.c
@@ -66,9 +66,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file,
 											   "archive_command", "fp",
 											   file, nativePath);
 
-	if (nativePath)
-		pfree(nativePath);
-
 	ereport(DEBUG3,
 			(errmsg_internal("executing archive command \"%s\"",
 							 xlogarchcmd)));
@@ -127,8 +124,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file,
 		return false;
 	}
 
-	pfree(xlogarchcmd);
-
 	elog(DEBUG1, "archived write-ahead log file \"%s\"", file);
 	return true;
 }
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index 46af349564..8bb15ac960 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -38,6 +38,7 @@
 #include "pgstat.h"
 #include "postmaster/interrupt.h"
 #include "postmaster/pgarch.h"
+#include "storage/condition_variable.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/latch.h"
@@ -49,6 +50,8 @@
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/ps_status.h"
+#include "utils/resowner.h"
+#include "utils/timeout.h"
 
 
 /* ----------
@@ -101,6 +104,7 @@ static time_t last_sigterm_time = 0;
 static PgArchData *PgArch = NULL;
 static const ArchiveModuleCallbacks *ArchiveCallbacks;
 static ArchiveModuleState *archive_module_state;
+static MemoryContext archive_context;
 
 
 /*
@@ -252,6 +256,11 @@ PgArchiverMain(void)
 	arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN,
 												ready_file_comparator, NULL);
 
+	/* Initialize our memory context. */
+	archive_context = AllocSetContextCreate(TopMemoryContext,
+											"archiver",
+											ALLOCSET_DEFAULT_SIZES);
+
 	/* Load the archive_library. */
 	LoadArchiveLibrary();
 
@@ -501,6 +510,8 @@ pgarch_ArchiverCopyLoop(void)
 static bool
 pgarch_archiveXlog(char *xlog)
 {
+	sigjmp_buf	local_sigjmp_buf;
+	MemoryContext oldcontext;
 	char		pathname[MAXPGPATH];
 	char		activitymsg[MAXFNAMELEN + 16];
 	bool		ret;
@@ -511,7 +522,87 @@ pgarch_archiveXlog(char *xlog)
 	snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog);
 	set_ps_display(activitymsg);
 
-	ret = ArchiveCallbacks->archive_file_cb(archive_module_state, xlog, pathname);
+	oldcontext = MemoryContextSwitchTo(archive_context);
+
+	/*
+	 * Since the archiver operates at the bottom of the exception stack,
+	 * ERRORs turn into FATALs and cause the archiver process to restart.
+	 * However, using ereport(ERROR, ...) when there are problems is easy to
+	 * code and maintain.  Therefore, we create our own exception handler to
+	 * catch ERRORs and return false instead of restarting the archiver
+	 * whenever there is a failure.
+	 *
+	 * We assume ERRORs from the archiving callback are the most common
+	 * exceptions experienced by the archiver, so we opt to handle exceptions
+	 * here instead of PgArchiverMain() to avoid reinitializing the archiver
+	 * too frequently.  We could instead add a sigsetjmp() block to
+	 * PgArchiverMain() and use PG_TRY/PG_CATCH here, but the extra code to
+	 * avoid the odd archiver restart doesn't seem worth it.
+	 */
+	if (sigsetjmp(local_sigjmp_buf, 1) != 0)
+	{
+		/* Since not using PG_TRY, must reset error stack by hand */
+		error_context_stack = NULL;
+
+		/* Prevent interrupts while cleaning up */
+		HOLD_INTERRUPTS();
+
+		/* Report the error to the server log. */
+		EmitErrorReport();
+
+		/*
+		 * Try to clean up anything the archive module left behind.  We try to
+		 * cover anything that an archive module could conceivably have left
+		 * behind, but it is of course possible that modules could be doing
+		 * unexpected things that require additional cleanup.  Module authors
+		 * should be sure to do any extra required cleanup in a PG_CATCH block
+		 * within the archiving callback, and they are encouraged to notify
+		 * the pgsql-hackers mailing list so that we can add it here.
+		 */
+		disable_all_timeouts(false);
+		LWLockReleaseAll();
+		ConditionVariableCancelSleep();
+		pgstat_report_wait_end();
+		ReleaseAuxProcessResources(false);
+		AtEOXact_Files(false);
+		AtEOXact_HashTables(false);
+
+		/*
+		 * Return to the original memory context and clear ErrorContext for
+		 * next time.
+		 */
+		MemoryContextSwitchTo(oldcontext);
+		FlushErrorState();
+
+		/* Flush any leaked data */
+		MemoryContextReset(archive_context);
+
+		/* Remove our exception handler */
+		PG_exception_stack = NULL;
+
+		/* Now we can allow interrupts again */
+		RESUME_INTERRUPTS();
+
+		/* Report failure so that the archiver retries this file */
+		ret = false;
+	}
+	else
+	{
+		/* Enable our exception handler */
+		PG_exception_stack = &local_sigjmp_buf;
+
+		/* Archive the file! */
+		ret = ArchiveCallbacks->archive_file_cb(archive_module_state,
+												xlog, pathname);
+
+		/* Remove our exception handler */
+		PG_exception_stack = NULL;
+
+		/* Reset our memory context and switch back to the original one */
+		MemoryContextSwitchTo(oldcontext);
+		MemoryContextReset(archive_context);
+	}
+
 	if (ret)
 		snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog);
 	else
-- 
2.25.1

Reply via email to