On 16/11/2023 01:08, Tom Lane wrote:
Heikki Linnakangas <hlinn...@iki.fi> writes:
On 09/03/2023 20:51, Tom Lane wrote:
After further thought that seems like a pretty ad-hoc solution.
We probably can do no better in the back branches, but shouldn't
we start treating WaitEventSets as ResourceOwner-managed resources?
Otherwise, transient WaitEventSets are going to be a permanent
source of headaches.
Let's change it so that it's always allocated in TopMemoryContext, but
pass a ResourceOwner instead:
WaitEventSet *
CreateWaitEventSet(ResourceOwner owner, int nevents)
And use owner == NULL to mean session lifetime.
WFM. (I didn't study your back-branch patch.)
And here is a patch to implement that on master.
--
Heikki Linnakangas
Neon (https://neon.tech)
From cc88af75011208fc7e9a2bba6b27e437edfab952 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Thu, 16 Nov 2023 11:16:58 +0100
Subject: [PATCH v1 1/1] Use ResourceOwner to track WaitEventSets.
A WaitEventSet holds file descriptors or event handles (on Windows).
If FreeWaitEventSet is not called, those fds or handles are leaked.
Use ResourceOwners to track WaitEventSets, to clean those up
automatically on error.
This was a live bug in async Append nodes, if a FDW's
ForeignAsyncRequest function failed. (In back branches, I will apply a
more localized fix for that based on PG_TRY-PG_FINALLY.) The added
test doesn't check for leaking resources, so it passed even before
this commit. But at least it covers the code path.
In the passing, fix misleading comment on what the 'nevents' argument
to WaitEventSetWait means.
Report by Alexander Lakhin, analysis and suggestion for the fix by
Tom Lane. Fixes bug #17828.
Discussion: https://www.postgresql.org/message-id/472235.1678387...@sss.pgh.pa.us
---
.../postgres_fdw/expected/postgres_fdw.out | 7 +++
contrib/postgres_fdw/sql/postgres_fdw.sql | 6 ++
src/backend/executor/nodeAppend.c | 5 +-
src/backend/libpq/pqcomm.c | 2 +-
src/backend/postmaster/postmaster.c | 2 +-
src/backend/postmaster/syslogger.c | 2 +-
src/backend/storage/ipc/latch.c | 63 +++++++++++++++++--
src/include/storage/latch.h | 4 +-
src/include/utils/resowner.h | 1 +
9 files changed, 82 insertions(+), 10 deletions(-)
diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 64bcc66b8d..22cae37a1e 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -10809,6 +10809,13 @@ SELECT * FROM result_tbl ORDER BY a;
(2 rows)
DELETE FROM result_tbl;
+-- Test error handling, if accessing one of the foreign partitions errors out
+CREATE FOREIGN TABLE async_p_broken PARTITION OF async_pt FOR VALUES FROM (10000) TO (10001)
+ SERVER loopback OPTIONS (table_name 'non_existent_table');
+SELECT * FROM async_pt;
+ERROR: relation "public.non_existent_table" does not exist
+CONTEXT: remote SQL command: SELECT a, b, c FROM public.non_existent_table
+DROP FOREIGN TABLE async_p_broken;
-- Check case where multiple partitions use the same connection
CREATE TABLE base_tbl3 (a int, b int, c text);
CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000)
diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql
index 2d14eeadb5..075da4ff86 100644
--- a/contrib/postgres_fdw/sql/postgres_fdw.sql
+++ b/contrib/postgres_fdw/sql/postgres_fdw.sql
@@ -3607,6 +3607,12 @@ INSERT INTO result_tbl SELECT a, b, 'AAA' || c FROM async_pt WHERE b === 505;
SELECT * FROM result_tbl ORDER BY a;
DELETE FROM result_tbl;
+-- Test error handling, if accessing one of the foreign partitions errors out
+CREATE FOREIGN TABLE async_p_broken PARTITION OF async_pt FOR VALUES FROM (10000) TO (10001)
+ SERVER loopback OPTIONS (table_name 'non_existent_table');
+SELECT * FROM async_pt;
+DROP FOREIGN TABLE async_p_broken;
+
-- Check case where multiple partitions use the same connection
CREATE TABLE base_tbl3 (a int, b int, c text);
CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000)
diff --git a/src/backend/executor/nodeAppend.c b/src/backend/executor/nodeAppend.c
index 609df6b9e6..af8e37205f 100644
--- a/src/backend/executor/nodeAppend.c
+++ b/src/backend/executor/nodeAppend.c
@@ -1025,7 +1025,8 @@ ExecAppendAsyncEventWait(AppendState *node)
/* We should never be called when there are no valid async subplans. */
Assert(node->as_nasyncremain > 0);
- node->as_eventset = CreateWaitEventSet(CurrentMemoryContext, nevents);
+ Assert(node->as_eventset == NULL);
+ node->as_eventset = CreateWaitEventSet(CurrentResourceOwner, nevents);
AddWaitEventToSet(node->as_eventset, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
NULL, NULL);
@@ -1050,7 +1051,7 @@ ExecAppendAsyncEventWait(AppendState *node)
return;
}
- /* We wait on at most EVENT_BUFFER_SIZE events. */
+ /* Return at most EVENT_BUFFER_SIZE events in one call. */
if (nevents > EVENT_BUFFER_SIZE)
nevents = EVENT_BUFFER_SIZE;
diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c
index 522584e597..2802efc63f 100644
--- a/src/backend/libpq/pqcomm.c
+++ b/src/backend/libpq/pqcomm.c
@@ -207,7 +207,7 @@ pq_init(void)
elog(FATAL, "fcntl(F_SETFD) failed on socket: %m");
#endif
- FeBeWaitSet = CreateWaitEventSet(TopMemoryContext, FeBeWaitSetNEvents);
+ FeBeWaitSet = CreateWaitEventSet(NULL, FeBeWaitSetNEvents);
socket_pos = AddWaitEventToSet(FeBeWaitSet, WL_SOCKET_WRITEABLE,
MyProcPort->sock, NULL, NULL);
latch_pos = AddWaitEventToSet(FeBeWaitSet, WL_LATCH_SET, PGINVALID_SOCKET,
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 7b6b613c4a..7a5cd06c5c 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1695,7 +1695,7 @@ ConfigurePostmasterWaitSet(bool accept_connections)
FreeWaitEventSet(pm_wait_set);
pm_wait_set = NULL;
- pm_wait_set = CreateWaitEventSet(CurrentMemoryContext,
+ pm_wait_set = CreateWaitEventSet(NULL,
accept_connections ? (1 + NumListenSockets) : 1);
AddWaitEventToSet(pm_wait_set, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch,
NULL);
diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c
index 858a2f6b2b..96dd03d9e0 100644
--- a/src/backend/postmaster/syslogger.c
+++ b/src/backend/postmaster/syslogger.c
@@ -311,7 +311,7 @@ SysLoggerMain(int argc, char *argv[])
* syslog pipe, which implies that all other backends have exited
* (including the postmaster).
*/
- wes = CreateWaitEventSet(CurrentMemoryContext, 2);
+ wes = CreateWaitEventSet(NULL, 2);
AddWaitEventToSet(wes, WL_LATCH_SET, PGINVALID_SOCKET, MyLatch, NULL);
#ifndef WIN32
AddWaitEventToSet(wes, WL_SOCKET_READABLE, syslogPipe[0], NULL, NULL);
diff --git a/src/backend/storage/ipc/latch.c b/src/backend/storage/ipc/latch.c
index 2fd386a4ed..b5c6b1e9b2 100644
--- a/src/backend/storage/ipc/latch.c
+++ b/src/backend/storage/ipc/latch.c
@@ -62,6 +62,7 @@
#include "storage/pmsignal.h"
#include "storage/shmem.h"
#include "utils/memutils.h"
+#include "utils/resowner.h"
/*
* Select the fd readiness primitive to use. Normally the "most modern"
@@ -101,6 +102,8 @@
/* typedef in latch.h */
struct WaitEventSet
{
+ ResourceOwner owner;
+
int nevents; /* number of registered events */
int nevents_space; /* maximum number of events in this set */
@@ -195,6 +198,30 @@ static void WaitEventAdjustWin32(WaitEventSet *set, WaitEvent *event);
static inline int WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
WaitEvent *occurred_events, int nevents);
+/* ResourceOwner support to hold WaitEventSets */
+static void ResOwnerReleaseWaitEventSet(Datum res);
+
+static const ResourceOwnerDesc wait_event_set_resowner_desc =
+{
+ .name = "WaitEventSet",
+ .release_phase = RESOURCE_RELEASE_AFTER_LOCKS,
+ .release_priority = RELEASE_PRIO_WAITEVENTSETS,
+ .ReleaseResource = ResOwnerReleaseWaitEventSet,
+ .DebugPrint = NULL
+};
+
+static inline void
+ResourceOwnerRememberWaitEventSet(ResourceOwner owner, WaitEventSet *set)
+{
+ ResourceOwnerRemember(owner, PointerGetDatum(set), &wait_event_set_resowner_desc);
+}
+static inline void
+ResourceOwnerForgetWaitEventSet(ResourceOwner owner, WaitEventSet *set)
+{
+ ResourceOwnerForget(owner, PointerGetDatum(set), &wait_event_set_resowner_desc);
+}
+
+
/*
* Initialize the process-local latch infrastructure.
*
@@ -323,7 +350,7 @@ InitializeLatchWaitSet(void)
Assert(LatchWaitSet == NULL);
/* Set up the WaitEventSet used by WaitLatch(). */
- LatchWaitSet = CreateWaitEventSet(TopMemoryContext, 2);
+ LatchWaitSet = CreateWaitEventSet(NULL, 2);
latch_pos = AddWaitEventToSet(LatchWaitSet, WL_LATCH_SET, PGINVALID_SOCKET,
MyLatch, NULL);
if (IsUnderPostmaster)
@@ -541,7 +568,7 @@ WaitLatchOrSocket(Latch *latch, int wakeEvents, pgsocket sock,
int ret = 0;
int rc;
WaitEvent event;
- WaitEventSet *set = CreateWaitEventSet(CurrentMemoryContext, 3);
+ WaitEventSet *set = CreateWaitEventSet(CurrentResourceOwner, 3);
if (wakeEvents & WL_TIMEOUT)
Assert(timeout >= 0);
@@ -716,9 +743,12 @@ ResetLatch(Latch *latch)
*
* These events can then be efficiently waited upon together, using
* WaitEventSetWait().
+ *
+ * The WaitEventSet is tracked by the given 'resowner'. Use NULL for session
+ * lifetime.
*/
WaitEventSet *
-CreateWaitEventSet(MemoryContext context, int nevents)
+CreateWaitEventSet(ResourceOwner resowner, int nevents)
{
WaitEventSet *set;
char *data;
@@ -744,7 +774,10 @@ CreateWaitEventSet(MemoryContext context, int nevents)
sz += MAXALIGN(sizeof(HANDLE) * (nevents + 1));
#endif
- data = (char *) MemoryContextAllocZero(context, sz);
+ if (resowner != NULL)
+ ResourceOwnerEnlarge(resowner);
+
+ data = (char *) MemoryContextAllocZero(TopMemoryContext, sz);
set = (WaitEventSet *) data;
data += MAXALIGN(sizeof(WaitEventSet));
@@ -770,6 +803,12 @@ CreateWaitEventSet(MemoryContext context, int nevents)
set->nevents_space = nevents;
set->exit_on_postmaster_death = false;
+ if (resowner != NULL)
+ {
+ ResourceOwnerRememberWaitEventSet(resowner, set);
+ set->owner = resowner;
+ }
+
#if defined(WAIT_USE_EPOLL)
if (!AcquireExternalFD())
{
@@ -834,6 +873,12 @@ CreateWaitEventSet(MemoryContext context, int nevents)
void
FreeWaitEventSet(WaitEventSet *set)
{
+ if (set->owner)
+ {
+ ResourceOwnerForgetWaitEventSet(set->owner, set);
+ set->owner = NULL;
+ }
+
#if defined(WAIT_USE_EPOLL)
close(set->epoll_fd);
ReleaseExternalFD();
@@ -2300,3 +2345,13 @@ drain(void)
}
#endif
+
+static void
+ResOwnerReleaseWaitEventSet(Datum res)
+{
+ WaitEventSet *set = (WaitEventSet *) DatumGetPointer(res);
+
+ Assert(set->owner != NULL);
+ set->owner = NULL;
+ FreeWaitEventSet(set);
+}
diff --git a/src/include/storage/latch.h b/src/include/storage/latch.h
index 99cc47874a..9efc33add8 100644
--- a/src/include/storage/latch.h
+++ b/src/include/storage/latch.h
@@ -102,6 +102,8 @@
#include <signal.h>
+#include "utils/resowner.h"
+
/*
* Latch structure should be treated as opaque and only accessed through
* the public functions. It is defined here to allow embedding Latches as
@@ -173,7 +175,7 @@ extern void SetLatch(Latch *latch);
extern void ResetLatch(Latch *latch);
extern void ShutdownLatchSupport(void);
-extern WaitEventSet *CreateWaitEventSet(MemoryContext context, int nevents);
+extern WaitEventSet *CreateWaitEventSet(ResourceOwner resowner, int nevents);
extern void FreeWaitEventSet(WaitEventSet *set);
extern void FreeWaitEventSetAfterFork(WaitEventSet *set);
extern int AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd,
diff --git a/src/include/utils/resowner.h b/src/include/utils/resowner.h
index 0735480214..ddbf19d8da 100644
--- a/src/include/utils/resowner.h
+++ b/src/include/utils/resowner.h
@@ -74,6 +74,7 @@ typedef uint32 ResourceReleasePriority;
#define RELEASE_PRIO_TUPDESC_REFS 400
#define RELEASE_PRIO_SNAPSHOT_REFS 500
#define RELEASE_PRIO_FILES 600
+#define RELEASE_PRIO_WAITEVENTSETS 700
/* 0 is considered invalid */
#define RELEASE_PRIO_FIRST 1
--
2.39.2