Here is a new version of the patch with feedback addressed. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From 3cda5bb87c82738ad6f8a082ef5dfeb49cd51392 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Tue, 28 Nov 2023 11:17:13 -0600 Subject: [PATCH v3 1/1] archiver exception handling
--- contrib/basic_archive/basic_archive.c | 134 +------------------------- doc/src/sgml/archive-modules.sgml | 11 ++- src/backend/archive/shell_archive.c | 5 - src/backend/postmaster/pgarch.c | 93 +++++++++++++++++- 4 files changed, 107 insertions(+), 136 deletions(-) diff --git a/contrib/basic_archive/basic_archive.c b/contrib/basic_archive/basic_archive.c index 4d78c31859..d575faab93 100644 --- a/contrib/basic_archive/basic_archive.c +++ b/contrib/basic_archive/basic_archive.c @@ -40,26 +40,18 @@ PG_MODULE_MAGIC; -typedef struct BasicArchiveData -{ - MemoryContext context; -} BasicArchiveData; - static char *archive_directory = NULL; -static void basic_archive_startup(ArchiveModuleState *state); static bool basic_archive_configured(ArchiveModuleState *state); static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path); -static void basic_archive_file_internal(const char *file, const char *path); static bool check_archive_directory(char **newval, void **extra, GucSource source); static bool compare_files(const char *file1, const char *file2); -static void basic_archive_shutdown(ArchiveModuleState *state); static const ArchiveModuleCallbacks basic_archive_callbacks = { - .startup_cb = basic_archive_startup, + .startup_cb = NULL, .check_configured_cb = basic_archive_configured, .archive_file_cb = basic_archive_file, - .shutdown_cb = basic_archive_shutdown + .shutdown_cb = NULL }; /* @@ -93,24 +85,6 @@ _PG_archive_module_init(void) return &basic_archive_callbacks; } -/* - * basic_archive_startup - * - * Creates the module's memory context. - */ -void -basic_archive_startup(ArchiveModuleState *state) -{ - BasicArchiveData *data; - - data = (BasicArchiveData *) MemoryContextAllocZero(TopMemoryContext, - sizeof(BasicArchiveData)); - data->context = AllocSetContextCreate(TopMemoryContext, - "basic_archive", - ALLOCSET_DEFAULT_SIZES); - state->private_data = (void *) data; -} - /* * check_archive_directory * @@ -171,74 +145,6 @@ basic_archive_configured(ArchiveModuleState *state) */ static bool basic_archive_file(ArchiveModuleState *state, const char *file, const char *path) -{ - sigjmp_buf local_sigjmp_buf; - MemoryContext oldcontext; - BasicArchiveData *data = (BasicArchiveData *) state->private_data; - MemoryContext basic_archive_context = data->context; - - /* - * We run basic_archive_file_internal() in our own memory context so that - * we can easily reset it during error recovery (thus avoiding memory - * leaks). - */ - oldcontext = MemoryContextSwitchTo(basic_archive_context); - - /* - * Since the archiver operates at the bottom of the exception stack, - * ERRORs turn into FATALs and cause the archiver process to restart. - * However, using ereport(ERROR, ...) when there are problems is easy to - * code and maintain. Therefore, we create our own exception handler to - * catch ERRORs and return false instead of restarting the archiver - * whenever there is a failure. - */ - if (sigsetjmp(local_sigjmp_buf, 1) != 0) - { - /* Since not using PG_TRY, must reset error stack by hand */ - error_context_stack = NULL; - - /* Prevent interrupts while cleaning up */ - HOLD_INTERRUPTS(); - - /* Report the error and clear ErrorContext for next time */ - EmitErrorReport(); - FlushErrorState(); - - /* Close any files left open by copy_file() or compare_files() */ - AtEOSubXact_Files(false, InvalidSubTransactionId, InvalidSubTransactionId); - - /* Reset our memory context and switch back to the original one */ - MemoryContextSwitchTo(oldcontext); - MemoryContextReset(basic_archive_context); - - /* Remove our exception handler */ - PG_exception_stack = NULL; - - /* Now we can allow interrupts again */ - RESUME_INTERRUPTS(); - - /* Report failure so that the archiver retries this file */ - return false; - } - - /* Enable our exception handler */ - PG_exception_stack = &local_sigjmp_buf; - - /* Archive the file! */ - basic_archive_file_internal(file, path); - - /* Remove our exception handler */ - PG_exception_stack = NULL; - - /* Reset our memory context and switch back to the original one */ - MemoryContextSwitchTo(oldcontext); - MemoryContextReset(basic_archive_context); - - return true; -} - -static void -basic_archive_file_internal(const char *file, const char *path) { char destination[MAXPGPATH]; char temp[MAXPGPATH + 256]; @@ -272,7 +178,7 @@ basic_archive_file_internal(const char *file, const char *path) fsync_fname(destination, false); fsync_fname(archive_directory, true); - return; + return true; } ereport(ERROR, @@ -312,6 +218,8 @@ basic_archive_file_internal(const char *file, const char *path) ereport(DEBUG1, (errmsg("archived \"%s\" via basic_archive", file))); + + return true; } /* @@ -394,35 +302,3 @@ compare_files(const char *file1, const char *file2) return ret; } - -/* - * basic_archive_shutdown - * - * Frees our allocated state. - */ -static void -basic_archive_shutdown(ArchiveModuleState *state) -{ - BasicArchiveData *data = (BasicArchiveData *) state->private_data; - MemoryContext basic_archive_context; - - /* - * If we didn't get to storing the pointer to our allocated state, we - * don't have anything to clean up. - */ - if (data == NULL) - return; - - basic_archive_context = data->context; - Assert(CurrentMemoryContext != basic_archive_context); - - if (MemoryContextIsValid(basic_archive_context)) - MemoryContextDelete(basic_archive_context); - data->context = NULL; - - /* - * Finally, free the state. - */ - pfree(data); - state->private_data = NULL; -} diff --git a/doc/src/sgml/archive-modules.sgml b/doc/src/sgml/archive-modules.sgml index 7064307d9e..f9b8d6c740 100644 --- a/doc/src/sgml/archive-modules.sgml +++ b/doc/src/sgml/archive-modules.sgml @@ -128,12 +128,21 @@ typedef bool (*ArchiveFileCB) (ArchiveModuleState *state, const char *file, cons If <literal>true</literal> is returned, the server proceeds as if the file was successfully archived, which may include recycling or removing the - original WAL file. If <literal>false</literal> is returned, the server will + original WAL file. If <literal>false</literal> is returned or an error is thrown, the server will keep the original WAL file and retry archiving later. <replaceable>file</replaceable> will contain just the file name of the WAL file to archive, while <replaceable>path</replaceable> contains the full path of the WAL file (including the file name). </para> + + <note> + <para> + The <function>archive_file_cb</function> callback is called in a + short-lived memory context that will be reset between invocations. If you + need longer-lived storage, create a memory context in the module's + <function>startup_cb</function> callback. + </para> + </note> </sect2> <sect2 id="archive-module-shutdown"> diff --git a/src/backend/archive/shell_archive.c b/src/backend/archive/shell_archive.c index 157ca4c751..793f607db8 100644 --- a/src/backend/archive/shell_archive.c +++ b/src/backend/archive/shell_archive.c @@ -66,9 +66,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file, "archive_command", "fp", file, nativePath); - if (nativePath) - pfree(nativePath); - ereport(DEBUG3, (errmsg_internal("executing archive command \"%s\"", xlogarchcmd))); @@ -127,8 +124,6 @@ shell_archive_file(ArchiveModuleState *state, const char *file, return false; } - pfree(xlogarchcmd); - elog(DEBUG1, "archived write-ahead log file \"%s\"", file); return true; } diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 46af349564..8bb15ac960 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -38,6 +38,7 @@ #include "pgstat.h" #include "postmaster/interrupt.h" #include "postmaster/pgarch.h" +#include "storage/condition_variable.h" #include "storage/fd.h" #include "storage/ipc.h" #include "storage/latch.h" @@ -49,6 +50,8 @@ #include "utils/guc.h" #include "utils/memutils.h" #include "utils/ps_status.h" +#include "utils/resowner.h" +#include "utils/timeout.h" /* ---------- @@ -101,6 +104,7 @@ static time_t last_sigterm_time = 0; static PgArchData *PgArch = NULL; static const ArchiveModuleCallbacks *ArchiveCallbacks; static ArchiveModuleState *archive_module_state; +static MemoryContext archive_context; /* @@ -252,6 +256,11 @@ PgArchiverMain(void) arch_files->arch_heap = binaryheap_allocate(NUM_FILES_PER_DIRECTORY_SCAN, ready_file_comparator, NULL); + /* Initialize our memory context. */ + archive_context = AllocSetContextCreate(TopMemoryContext, + "archiver", + ALLOCSET_DEFAULT_SIZES); + /* Load the archive_library. */ LoadArchiveLibrary(); @@ -501,6 +510,8 @@ pgarch_ArchiverCopyLoop(void) static bool pgarch_archiveXlog(char *xlog) { + sigjmp_buf local_sigjmp_buf; + MemoryContext oldcontext; char pathname[MAXPGPATH]; char activitymsg[MAXFNAMELEN + 16]; bool ret; @@ -511,7 +522,87 @@ pgarch_archiveXlog(char *xlog) snprintf(activitymsg, sizeof(activitymsg), "archiving %s", xlog); set_ps_display(activitymsg); - ret = ArchiveCallbacks->archive_file_cb(archive_module_state, xlog, pathname); + oldcontext = MemoryContextSwitchTo(archive_context); + + /* + * Since the archiver operates at the bottom of the exception stack, + * ERRORs turn into FATALs and cause the archiver process to restart. + * However, using ereport(ERROR, ...) when there are problems is easy to + * code and maintain. Therefore, we create our own exception handler to + * catch ERRORs and return false instead of restarting the archiver + * whenever there is a failure. + * + * We assume ERRORs from the archiving callback are the most common + * exceptions experienced by the archiver, so we opt to handle exceptions + * here instead of PgArchiverMain() to avoid reinitializing the archiver + * too frequently. We could instead add a sigsetjmp() block to + * PgArchiverMain() and use PG_TRY/PG_CATCH here, but the extra code to + * avoid the odd archiver restart doesn't seem worth it. + */ + if (sigsetjmp(local_sigjmp_buf, 1) != 0) + { + /* Since not using PG_TRY, must reset error stack by hand */ + error_context_stack = NULL; + + /* Prevent interrupts while cleaning up */ + HOLD_INTERRUPTS(); + + /* Report the error to the server log. */ + EmitErrorReport(); + + /* + * Try to clean up anything the archive module left behind. We try to + * cover anything that an archive module could conceivably have left + * behind, but it is of course possible that modules could be doing + * unexpected things that require additional cleanup. Module authors + * should be sure to do any extra required cleanup in a PG_CATCH block + * within the archiving callback, and they are encouraged to notify + * the pgsql-hackers mailing list so that we can add it here. + */ + disable_all_timeouts(false); + LWLockReleaseAll(); + ConditionVariableCancelSleep(); + pgstat_report_wait_end(); + ReleaseAuxProcessResources(false); + AtEOXact_Files(false); + AtEOXact_HashTables(false); + + /* + * Return to the original memory context and clear ErrorContext for + * next time. + */ + MemoryContextSwitchTo(oldcontext); + FlushErrorState(); + + /* Flush any leaked data */ + MemoryContextReset(archive_context); + + /* Remove our exception handler */ + PG_exception_stack = NULL; + + /* Now we can allow interrupts again */ + RESUME_INTERRUPTS(); + + /* Report failure so that the archiver retries this file */ + ret = false; + } + else + { + /* Enable our exception handler */ + PG_exception_stack = &local_sigjmp_buf; + + /* Archive the file! */ + ret = ArchiveCallbacks->archive_file_cb(archive_module_state, + xlog, pathname); + + /* Remove our exception handler */ + PG_exception_stack = NULL; + + /* Reset our memory context and switch back to the original one */ + MemoryContextSwitchTo(oldcontext); + MemoryContextReset(archive_context); + } + if (ret) snprintf(activitymsg, sizeof(activitymsg), "last was %s", xlog); else -- 2.25.1