On Tue, Mar 25, 2025 at 11:58 AM Andres Freund <and...@anarazel.de> wrote: > > > Another thought on complete_shared running on other backends: I wonder if we > > should push an ErrorContextCallback that adds "CONTEXT: completing I/O of > > other process" or similar, so people wonder less about how "SELECT FROM a" > > led > > to a log message about IO on table "b". > > I've been wondering about that as well, and yes, we probably should. > > I'd add the pid of the backend that started the IO to the message - although > need to check whether we're trying to keep PIDs of other processes from > unprivileged users. > > I think we probably should add a similar, but not equivalent, context in io > workers. Maybe "I/O worker executing I/O on behalf of process %d".
I think this has not yet been done. Attached patch is an attempt to add error context for IO completions by another backend when using io_uring and IO processing in general by an IO worker. It seems to work -- that is, running the test_aio tests, you can see the context in the logs. I'm not certain that I did this in the way you both were envisioning, though. - Melanie
From 79d7e930de510cad6aef31532b06ba679a72d94a Mon Sep 17 00:00:00 2001 From: Melanie Plageman <melanieplage...@gmail.com> Date: Sun, 30 Mar 2025 14:03:55 -0400 Subject: [PATCH] Add errcontext for processing I/Os for another backend Push an ErrorContextCallback adding additional detail about the process performing the I/O and the owner of the I/O when those are not the same. For io_method worker, this adds context specifying which process owns the I/O that the I/O worker is processing. For io_method io_uring, this adds context only when a backend is *completing* I/O for another backend. It specifies the pid of the owning process. Discussion: https://postgr.es/m/20250325141120.8e.nmisch%40google.com --- src/backend/storage/aio/aio.c | 1 + src/backend/storage/aio/method_io_uring.c | 31 +++++++++++++++++++++++ src/backend/storage/aio/method_worker.c | 29 +++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c index 43f1e2a7785..3d5cf726e24 100644 --- a/src/backend/storage/aio/aio.c +++ b/src/backend/storage/aio/aio.c @@ -42,6 +42,7 @@ #include "port/atomics.h" #include "storage/aio_internal.h" #include "storage/aio_subsys.h" +#include "storage/proc.h" #include "storage/procnumber.h" #include "utils/guc.h" #include "utils/guc_hooks.h" diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index 3b299dcf388..244918e1883 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -303,14 +303,41 @@ pgaio_uring_submit(uint16 num_staged_ios, PgAioHandle **staged_ios) return num_staged_ios; } +static void +pgaio_uring_completion_error_callback(void *arg) +{ + ProcNumber owner; + PGPROC *owner_proc; + int32 owner_pid; + PgAioHandle *ioh = arg; + + if (!ioh) + return; + + /* No need for context if a backend is completing the IO for itself */ + if (ioh->owner_procno == MyProcNumber) + return; + + owner = ioh->owner_procno; + owner_proc = GetPGProcByNumber(owner); + owner_pid = owner_proc->pid; + + errcontext("completing I/O on behalf of process %d", owner_pid); +} + static void pgaio_uring_drain_locked(PgAioUringContext *context) { int ready; int orig_ready; + ErrorContextCallback errcallback = {0}; Assert(LWLockHeldByMeInMode(&context->completion_lock, LW_EXCLUSIVE)); + errcallback.callback = pgaio_uring_completion_error_callback; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + /* * Don't drain more events than available right now. Otherwise it's * plausible that one backend could get stuck, for a while, receiving CQEs @@ -338,9 +365,11 @@ pgaio_uring_drain_locked(PgAioUringContext *context) PgAioHandle *ioh; ioh = io_uring_cqe_get_data(cqe); + errcallback.arg = ioh; io_uring_cqe_seen(&context->io_uring_ring, cqe); pgaio_io_process_completion(ioh, cqe->res); + errcallback.arg = NULL; } END_CRIT_SECTION(); @@ -349,6 +378,8 @@ pgaio_uring_drain_locked(PgAioUringContext *context) "drained %d/%d, now expecting %d", ncqes, orig_ready, io_uring_cq_ready(&context->io_uring_ring)); } + + error_context_stack = errcallback.previous; } static void diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c index 5ea00d8a89e..52f901ed4c2 100644 --- a/src/backend/storage/aio/method_worker.c +++ b/src/backend/storage/aio/method_worker.c @@ -361,11 +361,33 @@ pgaio_worker_register(void) on_shmem_exit(pgaio_worker_die, 0); } +static void +pgaio_worker_error_callback(void *arg) +{ + ProcNumber owner; + PGPROC *owner_proc; + int32 owner_pid; + PgAioHandle *ioh = arg; + + if (!ioh) + return; + + Assert(ioh->owner_procno != MyProcNumber); + Assert(MyBackendType == B_IO_WORKER); + + owner = ioh->owner_procno; + owner_proc = GetPGProcByNumber(owner); + owner_pid = owner_proc->pid; + + errcontext("I/O worker executing I/O on behalf of process %d", owner_pid); +} + void IoWorkerMain(const void *startup_data, size_t startup_data_len) { sigjmp_buf local_sigjmp_buf; PgAioHandle *volatile error_ioh = NULL; + ErrorContextCallback errcallback = {0}; volatile int error_errno = 0; char cmd[128]; @@ -392,6 +414,10 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len) sprintf(cmd, "%d", MyIoWorkerId); set_ps_display(cmd); + errcallback.callback = pgaio_worker_error_callback; + errcallback.previous = error_context_stack; + error_context_stack = &errcallback; + /* see PostgresMain() */ if (sigsetjmp(local_sigjmp_buf, 1) != 0) { @@ -475,6 +501,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len) ioh = &pgaio_ctl->io_handles[io_index]; error_ioh = ioh; + errcallback.arg = ioh; pgaio_debug_io(DEBUG4, ioh, "worker %d processing IO", @@ -515,6 +542,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len) pgaio_io_perform_synchronously(ioh); RESUME_INTERRUPTS(); + errcallback.arg = NULL; } else { @@ -526,6 +554,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len) CHECK_FOR_INTERRUPTS(); } + error_context_stack = errcallback.previous; proc_exit(0); } -- 2.34.1