From 0fede8c102afbc91f8178129b86d61c0f5be3821 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Thu, 29 Sep 2022 08:21:48 +0000
Subject: [PATCH v3] Avoid memory leaks during backups

Postgres currently can leak memory if a failure occurs during base
backup in do_pg_backup_start() or do_pg_backup_stop() or
perform_base_backup(). The palloc'd memory such as backup_state or
tablespace_map in xlogfuncs.c or basebackup.c or tablespaceinfo in
perform_base_backup() or any other, is left-out which may cause
memory bloat on the server eventually. To experience this issue,
run pg_basebackup with --label name longer than 1024 characters and
observe memory with watch command, the memory usage goes up. It looks
like the memory leak issue has been there for quite some time.

To solve the above problem, we create separate memory context for
backup and add clean up callback in PostgresMain()'s error handling
code using sigsetjmp().

The design of the patch is as follows:

For backup functions or on-line backups, a new memory context is
created as a child of TopMemoryContext in pg_backup_start() which
gets deleted upon any error or at the end of pg_backup_stop().

For perform_base_backup() or base backups, a new memory context is
created as a child of CurrentMemoryContext which gets deleted upon
any error or at the end of perform_base_backup().
---
 src/backend/access/transam/xlogfuncs.c | 73 +++++++++++++++++---------
 src/backend/backup/basebackup.c        | 40 ++++++++++++++
 src/backend/replication/walsender.c    |  2 +
 src/backend/tcop/postgres.c            |  4 ++
 src/include/access/xlogbackup.h        |  1 +
 src/include/backup/basebackup.h        |  1 +
 6 files changed, 97 insertions(+), 24 deletions(-)

diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index a801a94fe8..346570100e 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -45,6 +45,24 @@
 static BackupState *backup_state = NULL;
 static StringInfo tablespace_map = NULL;
 
+/* A workspace for on-line backup. */
+static MemoryContext backupcontext = NULL;
+
+/*
+ * Clean up backup memory context we created.
+ */
+void
+BackupMemoryContextCleanup(void)
+{
+	if (backupcontext == NULL)
+		return;		/* Nothing to do. */
+
+	MemoryContextDelete(backupcontext);
+	backupcontext = NULL;
+	backup_state = NULL;
+	tablespace_map = NULL;
+}
+
 /*
  * pg_backup_start: set up for taking an on-line backup dump
  *
@@ -72,33 +90,32 @@ pg_backup_start(PG_FUNCTION_ARGS)
 
 	/*
 	 * backup_state and tablespace_map need to be long-lived as they are used
-	 * in pg_backup_stop().
+	 * in pg_backup_stop(). Create a special memory context as a direct child
+	 * of TopMemoryContext so that the memory allocated is carried till the
+	 * pg_backup_stop() and cleaned in case of errors (see error handling code
+	 * using sigsetjmp() in PostgresMain()).
 	 */
-	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
-
-	/* Allocate backup state or reset it, if it comes from a previous run */
-	if (backup_state == NULL)
-		backup_state = (BackupState *) palloc0(sizeof(BackupState));
-	else
-		MemSet(backup_state, 0, sizeof(BackupState));
+	if (backupcontext == NULL)
+		backupcontext = AllocSetContextCreate(TopMemoryContext,
+											  "on-line backup context",
+											  ALLOCSET_DEFAULT_SIZES);
 
 	/*
-	 * tablespace_map may have been created in a previous backup, so take this
-	 * occasion to clean it.
+	 * We switch to backup memory context to clean up the allocated memory even
+	 * if an error occurs.
 	 */
-	if (tablespace_map != NULL)
-	{
-		pfree(tablespace_map->data);
-		pfree(tablespace_map);
-		tablespace_map = NULL;
-	}
+	oldcontext = MemoryContextSwitchTo(backupcontext);
 
+	Assert(backup_state == NULL);
+	Assert(tablespace_map == NULL);
+	backup_state = (BackupState *) palloc0(sizeof(BackupState));
 	tablespace_map = makeStringInfo();
-	MemoryContextSwitchTo(oldcontext);
 
 	register_persistent_abort_backup_handler();
 	do_pg_backup_start(backupidstr, fast, NULL, backup_state, tablespace_map);
 
+	MemoryContextSwitchTo(oldcontext);
+
 	PG_RETURN_LSN(backup_state->startpoint);
 }
 
@@ -132,6 +149,7 @@ pg_backup_stop(PG_FUNCTION_ARGS)
 	bool		waitforarchive = PG_GETARG_BOOL(0);
 	char	   *backup_label;
 	SessionBackupState status = get_backup_status();
+	MemoryContext oldcontext;
 
 	/* Initialize attributes information in the tuple descriptor */
 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
@@ -145,6 +163,13 @@ pg_backup_stop(PG_FUNCTION_ARGS)
 
 	Assert(backup_state != NULL);
 	Assert(tablespace_map != NULL);
+	Assert(backupcontext != NULL);
+
+	/*
+	 * We switch to backup memory context to clean up the allocated memory even
+	 * if an error occurs.
+	 */
+	oldcontext = MemoryContextSwitchTo(backupcontext);
 
 	/* Stop the backup */
 	do_pg_backup_stop(backup_state, waitforarchive);
@@ -152,17 +177,17 @@ pg_backup_stop(PG_FUNCTION_ARGS)
 	/* Build the contents of backup_label */
 	backup_label = build_backup_content(backup_state, false);
 
+	/*
+	 * We switch back from backup memory context so that the results are
+	 * created in right memory context.
+	 */
+	MemoryContextSwitchTo(oldcontext);
+
 	values[0] = LSNGetDatum(backup_state->stoppoint);
 	values[1] = CStringGetTextDatum(backup_label);
 	values[2] = CStringGetTextDatum(tablespace_map->data);
 
-	/* Deallocate backup-related variables */
-	pfree(backup_state);
-	backup_state = NULL;
-	pfree(tablespace_map->data);
-	pfree(tablespace_map);
-	tablespace_map = NULL;
-	pfree(backup_label);
+	BackupMemoryContextCleanup();
 
 	/* Returns the record as Datum */
 	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index e252ad7421..fe07a3dc8f 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -42,6 +42,7 @@
 #include "storage/reinit.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
+#include "utils/memutils.h"
 #include "utils/ps_status.h"
 #include "utils/relcache.h"
 #include "utils/resowner.h"
@@ -75,6 +76,9 @@ typedef struct
 	pg_checksum_type manifest_checksum_type;
 } basebackup_options;
 
+/* A workspace for base backup. */
+static MemoryContext backupcontext = NULL;
+
 static int64 sendTablespace(bbsink *sink, char *path, char *spcoid, bool sizeonly,
 							struct backup_manifest_info *manifest);
 static int64 sendDir(bbsink *sink, const char *path, int basepathlen, bool sizeonly,
@@ -220,6 +224,19 @@ static const struct exclude_list_item noChecksumFiles[] = {
 	{NULL, false}
 };
 
+/*
+ * Clean up base backup memory context we created.
+ */
+void
+BaseBackupMemoryContextCleanup(void)
+{
+	if (backupcontext == NULL)
+		return;		/* Nothing to do. */
+
+	MemoryContextDelete(backupcontext);
+	backupcontext = NULL;
+}
+
 /*
  * Actually do a base backup for the specified tablespaces.
  *
@@ -235,6 +252,7 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
 	backup_manifest_info manifest;
 	BackupState *backup_state;
 	StringInfo	tablespace_map;
+	MemoryContext oldcontext;
 
 	/* Initial backup state, insofar as we know it now. */
 	state.tablespaces = NIL;
@@ -252,6 +270,23 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
 	InitializeBackupManifest(&manifest, opt->manifest,
 							 opt->manifest_checksum_type);
 
+	/*
+	 * Perform the base backup in a special memory context to not leak any
+	 * memory in case of errors (see error handling code using sigsetjmp() in
+	 * PostgresMain()).
+	 */
+	if (backupcontext == NULL)
+		backupcontext = AllocSetContextCreate(CurrentMemoryContext,
+											  "base backup context",
+											  ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * We switch to base backup memory context only after
+	 * InitializeBackupManifest() as it uses BufFile which needs to be in the
+	 * ResourceOwner's memory context.
+	 */
+	oldcontext = MemoryContextSwitchTo(backupcontext);
+
 	total_checksum_failures = 0;
 
 	/* Allocate backup related varilables. */
@@ -660,6 +695,11 @@ perform_base_backup(basebackup_options *opt, bbsink *sink)
 	 */
 	FreeBackupManifest(&manifest);
 
+	MemoryContextSwitchTo(oldcontext);
+
+	/* clean up the base backup memory context we created */
+	BaseBackupMemoryContextCleanup();
+
 	/* clean up the resource owner we created */
 	WalSndResourceCleanup(true);
 
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e9ba500a15..c58091253e 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -328,6 +328,8 @@ WalSndErrorCleanup(void)
 
 	replication_active = false;
 
+	BaseBackupMemoryContextCleanup();
+
 	/*
 	 * If there is a transaction in progress, it will clean up our
 	 * ResourceOwner, but if a replication command set up a resource owner
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 5352d5f4c6..1ef345dd9d 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -31,6 +31,7 @@
 #include "access/parallel.h"
 #include "access/printtup.h"
 #include "access/xact.h"
+#include "access/xlogbackup.h"
 #include "catalog/pg_type.h"
 #include "commands/async.h"
 #include "commands/prepare.h"
@@ -4277,6 +4278,9 @@ PostgresMain(const char *dbname, const char *username)
 		 */
 		AbortCurrentTransaction();
 
+		/* Clean up the backup memory context if created */
+		BackupMemoryContextCleanup();
+
 		if (am_walsender)
 			WalSndErrorCleanup();
 
diff --git a/src/include/access/xlogbackup.h b/src/include/access/xlogbackup.h
index 8ec3d88b0a..1f88511d61 100644
--- a/src/include/access/xlogbackup.h
+++ b/src/include/access/xlogbackup.h
@@ -37,5 +37,6 @@ typedef struct BackupState
 
 extern char *build_backup_content(BackupState *state,
 								  bool ishistoryfile);
+extern void BackupMemoryContextCleanup(void);
 
 #endif							/* XLOG_BACKUP_H */
diff --git a/src/include/backup/basebackup.h b/src/include/backup/basebackup.h
index 593479afdc..33ef95ada8 100644
--- a/src/include/backup/basebackup.h
+++ b/src/include/backup/basebackup.h
@@ -35,5 +35,6 @@ typedef struct
 } tablespaceinfo;
 
 extern void SendBaseBackup(BaseBackupCmd *cmd);
+extern void BaseBackupMemoryContextCleanup(void);
 
 #endif							/* _BASEBACKUP_H */
-- 
2.34.1

