31.05.2025 06:00, Alexander Lakhin wrote:
Hello Thomas,
It looks like I managed to restore all the conditions needed to reproduce
that Assert more or less reliably (within a couple of hours), so I can
continue experiments.
I've added the following debugging:
...
With additional logging (the patch is attached), I can see the following:
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6,
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 15, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0
!!!pgaio_io_start_readv [63817]| ioh: 0x1046b5660, ioh->op: 1
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6,
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 16, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0
!!!pgaio_io_start_readv [63817]| ioh: 0x1046b5660, ioh->op: 1
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6,
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 17, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0
!!!pgaio_io_start_readv [63817]| ioh: 0x1046b5660, ioh->op: 1
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6,
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 18, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0,
ioh->num_callbacks: 0
2025-06-02 00:45:11.391 EDT [63817:14] pg_regress/brin LOG: !!!pgaio_io_before_start| ioh: 0x1046b5660, ioh->op: 1,
ioh->state: 1, ioh->result: 0, ioh->num_callbacks: 2
TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c", Line:
164, PID: 63817
0 postgres 0x0000000102fdb60c ExceptionalCondition
+ 236
1 postgres 0x0000000102ce233c
pgaio_io_before_start + 504
2 postgres 0x0000000102ce20a8 pgaio_io_start_readv
+ 36
3 postgres 0x0000000102d01200 FileStartReadV + 252
4 postgres 0x0000000102d544e0 mdstartreadv + 668
5 postgres 0x0000000102d57ac8 smgrstartreadv + 116
6 postgres 0x0000000102cea474 AsyncReadBuffers +
1996
7 postgres 0x0000000102ce9430 StartReadBuffersImpl
+ 1196
8 postgres 0x0000000102ce8f74 StartReadBuffers + 64
9 postgres 0x0000000102ce657c
read_stream_start_pending_read + 1204
10 postgres 0x0000000102ce5c60
read_stream_look_ahead + 812
11 postgres 0x0000000102ce57f8
read_stream_next_buffer + 2356
12 postgres 0x00000001026da1b8
heap_fetch_next_buffer + 284
13 postgres 0x00000001026caf54 heapgettup_pagemode
+ 192
14 postgres 0x00000001026cb524 heap_getnextslot + 84
15 postgres 0x0000000102a4d984
table_scan_getnextslot + 340
16 postgres 0x0000000102a4d790 SeqNext + 148
17 postgres 0x0000000102a4de64 ExecScanFetch + 772
18 postgres 0x0000000102a4da38 ExecScanExtended +
164
19 postgres 0x0000000102a4d374
ExecSeqScanWithQualProject + 248
20 postgres 0x0000000102a13248 ExecProcNode + 68
21 postgres 0x0000000102a12e70 fetch_input_tuple +
180
22 postgres 0x0000000102a12bb4 agg_retrieve_direct
+ 1664
23 postgres 0x0000000102a0f914 ExecAgg + 240
...
2025-06-02 00:45:11.409 EDT [57192:4] LOG: client backend (PID 63817) was
terminated by signal 6: Abort trap: 6
2025-06-02 00:45:11.409 EDT [57192:5] DETAIL: Failed process was running: DO
$x$
DECLARE
r record;
r2 record;
cond text;
idx_ctids tid[];
ss_ctids tid[];
count int;
plan_ok bool;
plan_line text;
BEGIN
...
Best regards,
Alexander Lakhin
Neon (https://neon.tech)
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index c64d815ebd1..49ecb264d9d 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -668,6 +668,7 @@ pgaio_io_reclaim(PgAioHandle *ioh)
Assert(ioh->owner_procno == MyProcNumber);
Assert(ioh->state != PGAIO_HS_IDLE);
+fprintf(stderr, "!!!pgaio_io_reclaim [%d]| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d\n", getpid(), ioh, ioh->op, ioh->state, ioh->result, ioh->num_callbacks);
/* see comment in function header */
HOLD_INTERRUPTS();
diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c
index 00e176135a6..2ae78217871 100644
--- a/src/backend/storage/aio/aio_io.c
+++ b/src/backend/storage/aio/aio_io.c
@@ -85,6 +85,7 @@ pgaio_io_start_readv(PgAioHandle *ioh,
ioh->op_data.read.iov_length = iovcnt;
pgaio_io_stage(ioh, PGAIO_OP_READV);
+fprintf(stderr, "!!!pgaio_io_start_readv [%d]| ioh: %p, ioh->op: %d\n", getpid(), ioh, ioh->op);
}
void
@@ -158,6 +159,7 @@ pgaio_io_before_start(PgAioHandle *ioh)
Assert(ioh->state == PGAIO_HS_HANDED_OUT);
Assert(pgaio_my_backend->handed_out_io == ioh);
Assert(pgaio_io_has_target(ioh));
+if (ioh->op != PGAIO_OP_INVALID) elog(LOG, "!!!pgaio_io_before_start| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d", ioh, ioh->op, ioh->state, ioh->result, ioh->num_callbacks);
Assert(ioh->op == PGAIO_OP_INVALID);
/*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..4bb428b2875 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -67,6 +67,7 @@
#include "utils/resowner.h"
#include "utils/timestamp.h"
+#include "storage/aio_internal.h"
/* Note: these two macros only work on shared buffers, not local ones! */
#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
@@ -1855,7 +1856,10 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
pgaio_submit_staged();
ioh = pgaio_io_acquire(CurrentResourceOwner, &operation->io_return);
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (2)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks);
}
+else
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (1)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks);
/*
* Check if we can start IO on the first to-be-read buffer.
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 1c11750ac1d..1135d6dc399 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -712,7 +712,7 @@ sub init
print $conf "wal_log_hints = on\n";
print $conf "hot_standby = on\n";
# conservative settings to ensure we can run multiple postmasters:
- print $conf "shared_buffers = 1MB\n";
+ print $conf "shared_buffers = 512kB\n";
print $conf "max_connections = 10\n";
# limit disk space consumption, too:
print $conf "max_wal_size = 128MB\n";