31.05.2025 06:00, Alexander Lakhin wrote:
Hello Thomas,

It looks like I managed to restore all the conditions needed to reproduce
that Assert more or less reliably (within a couple of hours), so I can
continue experiments.

I've added the following debugging:
...

With additional logging (the patch is attached), I can see the following:
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6, 
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 15, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0, ioh->num_callbacks: 0
!!!pgaio_io_start_readv [63817]| ioh: 0x1046b5660, ioh->op: 1
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6, 
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 16, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0, ioh->num_callbacks: 0
!!!pgaio_io_start_readv [63817]| ioh: 0x1046b5660, ioh->op: 1
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6, 
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 17, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0, ioh->num_callbacks: 0
!!!pgaio_io_start_readv [63817]| ioh: 0x1046b5660, ioh->op: 1
!!!pgaio_io_reclaim [63817]| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 6, 
ioh->result: 8192, ioh->num_callbacks: 2
!!!AsyncReadBuffers [63817] (1)| blocknum: 18, ioh: 0x1046b5660, ioh->op: 0, ioh->state: 1, ioh->result: 0, ioh->num_callbacks: 0 2025-06-02 00:45:11.391 EDT [63817:14] pg_regress/brin LOG: !!!pgaio_io_before_start| ioh: 0x1046b5660, ioh->op: 1, ioh->state: 1, ioh->result: 0, ioh->num_callbacks: 2

TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c", Line: 
164, PID: 63817
0   postgres                            0x0000000102fdb60c ExceptionalCondition 
+ 236
1   postgres                            0x0000000102ce233c 
pgaio_io_before_start + 504
2   postgres                            0x0000000102ce20a8 pgaio_io_start_readv 
+ 36
3   postgres                            0x0000000102d01200 FileStartReadV + 252
4   postgres                            0x0000000102d544e0 mdstartreadv + 668
5   postgres                            0x0000000102d57ac8 smgrstartreadv + 116
6   postgres                            0x0000000102cea474 AsyncReadBuffers + 
1996
7   postgres                            0x0000000102ce9430 StartReadBuffersImpl 
+ 1196
8   postgres                            0x0000000102ce8f74 StartReadBuffers + 64
9   postgres                            0x0000000102ce657c 
read_stream_start_pending_read + 1204
10  postgres                            0x0000000102ce5c60 
read_stream_look_ahead + 812
11  postgres                            0x0000000102ce57f8 
read_stream_next_buffer + 2356
12  postgres                            0x00000001026da1b8 
heap_fetch_next_buffer + 284
13  postgres                            0x00000001026caf54 heapgettup_pagemode 
+ 192
14  postgres                            0x00000001026cb524 heap_getnextslot + 84
15  postgres                            0x0000000102a4d984 
table_scan_getnextslot + 340
16  postgres                            0x0000000102a4d790 SeqNext + 148
17  postgres                            0x0000000102a4de64 ExecScanFetch + 772
18  postgres                            0x0000000102a4da38 ExecScanExtended + 
164
19  postgres                            0x0000000102a4d374 
ExecSeqScanWithQualProject + 248
20  postgres                            0x0000000102a13248 ExecProcNode + 68
21  postgres                            0x0000000102a12e70 fetch_input_tuple + 
180
22  postgres                            0x0000000102a12bb4 agg_retrieve_direct 
+ 1664
23  postgres                            0x0000000102a0f914 ExecAgg + 240
...
2025-06-02 00:45:11.409 EDT [57192:4] LOG:  client backend (PID 63817) was 
terminated by signal 6: Abort trap: 6
2025-06-02 00:45:11.409 EDT [57192:5] DETAIL:  Failed process was running: DO 
$x$
        DECLARE
                r record;
                r2 record;
                cond text;
                idx_ctids tid[];
                ss_ctids tid[];
                count int;
                plan_ok bool;
                plan_line text;
        BEGIN
...

Best regards,
Alexander Lakhin
Neon (https://neon.tech)
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index c64d815ebd1..49ecb264d9d 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -668,6 +668,7 @@ pgaio_io_reclaim(PgAioHandle *ioh)
 	Assert(ioh->owner_procno == MyProcNumber);
 	Assert(ioh->state != PGAIO_HS_IDLE);
 
+fprintf(stderr, "!!!pgaio_io_reclaim [%d]| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d\n", getpid(), ioh, ioh->op, ioh->state, ioh->result, ioh->num_callbacks);
 	/* see comment in function header */
 	HOLD_INTERRUPTS();
 
diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c
index 00e176135a6..2ae78217871 100644
--- a/src/backend/storage/aio/aio_io.c
+++ b/src/backend/storage/aio/aio_io.c
@@ -85,6 +85,7 @@ pgaio_io_start_readv(PgAioHandle *ioh,
 	ioh->op_data.read.iov_length = iovcnt;
 
 	pgaio_io_stage(ioh, PGAIO_OP_READV);
+fprintf(stderr, "!!!pgaio_io_start_readv [%d]| ioh: %p, ioh->op: %d\n", getpid(), ioh, ioh->op);
 }
 
 void
@@ -158,6 +159,7 @@ pgaio_io_before_start(PgAioHandle *ioh)
 	Assert(ioh->state == PGAIO_HS_HANDED_OUT);
 	Assert(pgaio_my_backend->handed_out_io == ioh);
 	Assert(pgaio_io_has_target(ioh));
+if (ioh->op != PGAIO_OP_INVALID) elog(LOG, "!!!pgaio_io_before_start| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d", ioh, ioh->op, ioh->state, ioh->result, ioh->num_callbacks);
 	Assert(ioh->op == PGAIO_OP_INVALID);
 
 	/*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..4bb428b2875 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -67,6 +67,7 @@
 #include "utils/resowner.h"
 #include "utils/timestamp.h"
 
+#include "storage/aio_internal.h"
 
 /* Note: these two macros only work on shared buffers, not local ones! */
 #define BufHdrGetBlock(bufHdr)	((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
@@ -1855,7 +1856,10 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
 		pgaio_submit_staged();
 
 		ioh = pgaio_io_acquire(CurrentResourceOwner, &operation->io_return);
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (2)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks);
 	}
+else
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (1)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->result: %d, ioh->num_callbacks: %d\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks);
 
 	/*
 	 * Check if we can start IO on the first to-be-read buffer.
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 1c11750ac1d..1135d6dc399 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -712,7 +712,7 @@ sub init
 		print $conf "wal_log_hints = on\n";
 		print $conf "hot_standby = on\n";
 		# conservative settings to ensure we can run multiple postmasters:
-		print $conf "shared_buffers = 1MB\n";
+		print $conf "shared_buffers = 512kB\n";
 		print $conf "max_connections = 10\n";
 		# limit disk space consumption, too:
 		print $conf "max_wal_size = 128MB\n";

Reply via email to