Hi,

it seems there's a fairly annoying memory leak in trigger code,
introduced by

  commit fc22b6623b6b3bab3cb057ccd282c2bfad1a0b30
  Author: Peter Eisentraut <pe...@eisentraut.org>
  Date:   Sat Mar 30 08:13:09 2019 +0100

    Generated columns
    ...

which added GetAllUpdatedColumns() and uses it many places instead of
the original GetUpdatedColumns():

   #define GetUpdatedColumns(relinfo, estate) \
           (exec_rt_fetch((relinfo)->ri_RangeTableIndex,
                          estate)->updatedCols)

   #define GetAllUpdatedColumns(relinfo, estate) \
        (bms_union(exec_rt_fetch((relinfo)->ri_RangeTableIndex,
                                 estate)->updatedCols, \
                   exec_rt_fetch((relinfo)->ri_RangeTableIndex,
                                 estate)->extraUpdatedCols))

Notice this creates a new bitmap on every calls. That's a bit of a
problem, because we call this from:

  - ExecASUpdateTriggers
  - ExecARUpdateTriggers
  - ExecBSUpdateTriggers
  - ExecBRUpdateTriggers
  - ExecUpdateLockMode

This means that for an UPDATE with triggers, we may end up calling this
for each row, possibly multiple bitmaps. And those bitmaps are allocated
in ExecutorState, so won't be freed until the end of the query :-(

The bitmaps are typically fairly small (a couple bytes), but for wider
tables it can be a couple dozen bytes. But it's primarily driven by
number of updated rows.

It's easy to leak gigabytes when updating ~10M rows. I've seen cases
with a couple tens of GBs leaked, though, but in that case it seems to
be caused by UPDATE ... FROM missing a join condition (so in fact the
memory leak is proportional to number of rows in the join result, not
the number we end up updating).

Attached is a patch, restoring the pre-12 behavior for me.


While looking for other places allocating stuff in ExecutorState (for
the UPDATE case) and leaving it there, I found two more cases:

1) copy_plpgsql_datums

2) make_expanded_record_from_tupdesc
   make_expanded_record_from_exprecord

All of this is calls from plpgsql_exec_trigger.

Fixing the copy_plpgsql_datums case seems fairly simple, the space
allocated for local copies can be freed during the cleanup. That's what
0002 does.

I'm not sure what to do about the expanded records. My understanding of
the expanded record lifecycle is fairly limited, so my (rather naive)
attempt to free the memory failed ...


I wonder how much we should care about these cases. On the one hand we
often leave the cleanup up to the memory context, but the assumption is
the context is not unnecessarily long-lived. And ExecutorState is not
that. And leaking memory per-row does not seem great either.


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From 85439e2587f035040c82e7303b96b887e650b01f Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.von...@postgresql.org>
Date: Tue, 23 May 2023 17:45:47 +0200
Subject: [PATCH 1/2] Free updatedCols bitmaps

---
 src/backend/commands/trigger.c  | 22 ++++++++++++++++++++--
 src/backend/executor/execMain.c |  9 +++++++--
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 4b295f8da5e..fb11203c473 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -2916,6 +2916,8 @@ ExecBSUpdateTriggers(EState *estate, ResultRelInfo *relinfo)
 					(errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
 					 errmsg("BEFORE STATEMENT trigger cannot return a value")));
 	}
+
+	bms_free(updatedCols);
 }
 
 void
@@ -2928,12 +2930,18 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 	Assert(relinfo->ri_RootResultRelInfo == NULL);
 
 	if (trigdesc && trigdesc->trig_update_after_statement)
+	{
+		Bitmapset *updatedCols = ExecGetAllUpdatedCols(relinfo, estate);
+
 		AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
 							  TRIGGER_EVENT_UPDATE,
 							  false, NULL, NULL, NIL,
-							  ExecGetAllUpdatedCols(relinfo, estate),
+							  updatedCols,
 							  transition_capture,
 							  false);
+
+		bms_free(updatedCols);
+	}
 }
 
 bool
@@ -3043,6 +3051,9 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 				heap_freetuple(trigtuple);
 			if (should_free_new)
 				heap_freetuple(oldtuple);
+
+			bms_free(updatedCols);
+
 			return false;		/* "do nothing" */
 		}
 		else if (newtuple != oldtuple)
@@ -3068,6 +3079,8 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 	if (should_free_trig)
 		heap_freetuple(trigtuple);
 
+	bms_free(updatedCols);
+
 	return true;
 }
 
@@ -3107,6 +3120,7 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 		 */
 		TupleTableSlot *oldslot;
 		ResultRelInfo *tupsrc;
+		Bitmapset *updatedCols;
 
 		Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
 			   !is_crosspart_update);
@@ -3129,14 +3143,18 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 		else
 			ExecClearTuple(oldslot);
 
+		updatedCols = ExecGetAllUpdatedCols(relinfo, estate);
+
 		AfterTriggerSaveEvent(estate, relinfo,
 							  src_partinfo, dst_partinfo,
 							  TRIGGER_EVENT_UPDATE,
 							  true,
 							  oldslot, newslot, recheckIndexes,
-							  ExecGetAllUpdatedCols(relinfo, estate),
+							  updatedCols,
 							  transition_capture,
 							  is_crosspart_update);
+
+		bms_free(updatedCols);
 	}
 }
 
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index c76fdf59ec4..98502b956c2 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -2367,6 +2367,7 @@ ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
 {
 	Bitmapset  *keyCols;
 	Bitmapset  *updatedCols;
+	LockTupleMode	lockMode;
 
 	/*
 	 * Compute lock mode to use.  If columns that are part of the key have not
@@ -2378,9 +2379,13 @@ ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo)
 										 INDEX_ATTR_BITMAP_KEY);
 
 	if (bms_overlap(keyCols, updatedCols))
-		return LockTupleExclusive;
+		lockMode = LockTupleExclusive;
+	else
+		lockMode = LockTupleNoKeyExclusive;
+
+	bms_free(updatedCols);
 
-	return LockTupleNoKeyExclusive;
+	return lockMode;
 }
 
 /*
-- 
2.40.1

From 16a7075569fa56951971592b237929f49e414737 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Sat, 13 May 2023 13:19:49 +0200
Subject: [PATCH 2/2] Free space allocated by copy_plpgsql_datums

During PL/pgSQL execution, we allocate a local copy for calls. When
executing triggers, this gets allocated in ExecutorState, so it'd be
kept until query completes.

Free it explicitly, as part of cleanup.
---
 src/pl/plpgsql/src/pl_exec.c | 39 +++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 4b76f7699a6..99283d354f4 100644
--- a/src/pl/plpgsql/src/pl_exec.c
+++ b/src/pl/plpgsql/src/pl_exec.c
@@ -264,6 +264,8 @@ static void coerce_function_result_tuple(PLpgSQL_execstate *estate,
 static void plpgsql_exec_error_callback(void *arg);
 static void copy_plpgsql_datums(PLpgSQL_execstate *estate,
 								PLpgSQL_function *func);
+static void free_plpgsql_datums(PLpgSQL_execstate *estate,
+								PLpgSQL_function *func);
 static void plpgsql_fulfill_promise(PLpgSQL_execstate *estate,
 									PLpgSQL_var *var);
 static MemoryContext get_stmt_mcontext(PLpgSQL_execstate *estate);
@@ -788,6 +790,8 @@ plpgsql_exec_function(PLpgSQL_function *func, FunctionCallInfo fcinfo,
 	exec_eval_cleanup(&estate);
 	/* stmt_mcontext will be destroyed when function's main context is */
 
+	free_plpgsql_datums(&estate, func);
+
 	/*
 	 * Pop the error context stack
 	 */
@@ -1142,6 +1146,8 @@ plpgsql_exec_trigger(PLpgSQL_function *func,
 	exec_eval_cleanup(&estate);
 	/* stmt_mcontext will be destroyed when function's main context is */
 
+	free_plpgsql_datums(&estate, func);
+
 	/*
 	 * Pop the error context stack
 	 */
@@ -1217,6 +1223,8 @@ plpgsql_exec_event_trigger(PLpgSQL_function *func, EventTriggerData *trigdata)
 	exec_eval_cleanup(&estate);
 	/* stmt_mcontext will be destroyed when function's main context is */
 
+	free_plpgsql_datums(&estate, func);
+
 	/*
 	 * Pop the error context stack
 	 */
@@ -1304,15 +1312,20 @@ copy_plpgsql_datums(PLpgSQL_execstate *estate,
 	char	   *ws_next;
 	int			i;
 
-	/* Allocate local datum-pointer array */
-	estate->datums = (PLpgSQL_datum **)
-		palloc(sizeof(PLpgSQL_datum *) * ndatums);
-
 	/*
+	 * Allocate local datum-pointer array, followed by space for the values.
+	 *
 	 * To reduce palloc overhead, we make a single palloc request for all the
 	 * space needed for locally-instantiated datums.
 	 */
-	workspace = palloc(func->copiable_size);
+	estate->datums = (PLpgSQL_datum **)
+		palloc(MAXALIGN(sizeof(PLpgSQL_datum *) * ndatums) + func->copiable_size);
+
+	/*
+	 * Space for values starts after the datum-pointer array, but we need to make
+	 * sure it's aligned properly.
+	 */
+	workspace = (char *) estate->datums + MAXALIGN(sizeof(PLpgSQL_datum *) * ndatums);
 	ws_next = workspace;
 
 	/* Fill datum-pointer array, copying datums into workspace as needed */
@@ -1362,6 +1375,22 @@ copy_plpgsql_datums(PLpgSQL_execstate *estate,
 	Assert(ws_next == workspace + func->copiable_size);
 }
 
+/*
+ * Free the local execution variables. We've allocated all of the space at once
+ * so we can simply free the main pointer.
+ */
+static void
+free_plpgsql_datums(PLpgSQL_execstate *estate,
+					PLpgSQL_function *func)
+{
+	if (estate->datums != NULL)
+	{
+		pfree(estate->datums);
+		estate->datums = NULL;
+	}
+
+}
+
 /*
  * If the variable has an armed "promise", compute the promised value
  * and assign it to the variable.
-- 
2.40.1

Reply via email to