Hello Andres,
22.07.2025 02:19, Andres Freund wrote:
Hi,
On 2025-06-19 10:16:12 -0500, Nico Williams wrote:
On Thu, Jun 19, 2025 at 05:05:25PM +0200, Daniel Gustafsson wrote:
I also dug out an archeologically old MacBook Pro running macOS High Sierra
10.13.6 with an i5 using Apple LLVM version 10.0.0 (clang-1000.10.44.4), and it
too fails to reproduce any issue.
It's not going to be reproducible on x86_64 macs. It has to be ARM
macs.
In addition to various other folks that had reported in, I had various macs,
both real and virtualized, run this test in a loop. For weeks - without
reproducing the issue once [1]. Since this can only be reproduced on
Alexander and Konstantin's laptops, running an older macos [2] and running
MDM, I'm for now considering this issue to not be reproducible outside of that
environment. That's deeply unsatisfying, but I just don't see what else I
could do.
I got access to that M1 laptop the past week and reproduced the failure
again. Then I just copied all the postgres directory to my MacBook M4 Pro
and ran the same tests there. And the issue reproduced! This fact made me
believe that that's not the old CPU-specific issue. Armed with this belief,
I managed to reproduce this with a build made on M4 too.
So for now I have pretty stable reproducer which I'd like to share (please
forgive me some rough edges):
git apply AIO-debugging.patch
CC=/opt/homebrew/Cellar/llvm@17/17.0.6/bin/clang-17 CFLAGS="" ./configure --enable-cassert --enable-debug
--enable-tap-tests --without-icu --without-zlib --prefix=/tmp/postgres -q && make -s -j8 && make -s install && make -s check
for i in {1..10}; do np=$((20 + $RANDOM % 10)); echo "iteration $i: $np"; time parallel -j40 --linebuffer --tag
/tmp/repro-AIO-Assert.sh {} ::: `seq $np` || break; sleep $(($RANDOM % 20)); done; echo -e "\007"
(For M1 it works with ~6 servers.)
The script executes opr_sanity.sql from the regress suite, but it can be
reduced, of course. Though reducing the number of queries seemingly leads
to decrease of the failure's probability, so I decided to leave it as-is
for now. Anyway, I can work on simplifying the repro, if needed.
This fails for me as below:
Run 1:
iteration 3: 29
...
24 TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c",
Line: 167, PID: 56420
...
real 12m3.018s
server24 log contains:
!!!AsyncReadBuffers [56420] (1)| blocknum: 73, ioh: 0x102588640, ioh->op: 0, ioh->state: 1, ioh->target: 0, ioh->result:
0, ioh->num_callbacks: 0, ioh->generation: 16293
!!!AsyncReadBuffers [56420] before smgrstartreadv| blocknum: 73, ioh: 0x102588640, ioh->op: 170, pre_op: 170,
ioh->state: 1, ioh->target: 0, ioh->result: 0, ioh->num_callbacks: 1, ioh->generation: 16293
2025-08-20 19:20:23.121 EEST [56420] LOG: !!!FileStartReadV| ioh: 0x102588640, ioh->op: 170, ioh->state: 1,
ioh->target: 1, ioh->result: 0, ioh->num_callbacks: 2, ioh->generation: 16293
2025-08-20 19:20:23.121 EEST [56420] STATEMENT: SELECT DISTINCT
p1.proargtypes[7]::regtype, p2.proargtypes[7]::regtype
FROM pg_proc AS p1, pg_proc AS p2
WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
p1.prokind != 'a' AND p2.prokind != 'a' AND
(p1.proargtypes[7] < p2.proargtypes[7])
ORDER BY 1, 2;
2025-08-20 19:20:23.121 EEST [56420] LOG: !!!pgaio_io_before_start| ioh: 0x102588640, ioh->op: 170, ioh->state: 1,
ioh->target: 1, ioh->result: 0, ioh->num_callbacks: 2, ioh->generation: 16293
2025-08-20 19:20:23.121 EEST [56420] STATEMENT: SELECT DISTINCT
p1.proargtypes[7]::regtype, p2.proargtypes[7]::regtype
FROM pg_proc AS p1, pg_proc AS p2
WHERE p1.oid != p2.oid AND
p1.prosrc = p2.prosrc AND
p1.prolang = 12 AND p2.prolang = 12 AND
p1.prokind != 'a' AND p2.prokind != 'a' AND
(p1.proargtypes[7] < p2.proargtypes[7])
ORDER BY 1, 2;
TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c", Line:
167, PID: 56420
Run 2:
iteration 3: 21
...
21 TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c",
Line: 167, PID: 378
...
real 8m32.370s
server21.log contains:
!!!pgaio_io_reclaim [378]| ioh: 0x103f533d0, ioh->op: 0, ioh->state: 0,
ioh->target: 0, ioh->generation: 13609
!!!pgaio_io_acquire_nb [378]| ioh: 0x103f533d0, ioh->op: 170, pre_op: 170
!!!AsyncReadBuffers [378] (1)| blocknum: 55, ioh: 0x103f533d0, ioh->op: 170, ioh->state: 1, ioh->target: 0, ioh->result:
0, ioh->num_callbacks: 0, ioh->generation: 13609
!!!AsyncReadBuffers [378] before smgrstartreadv| blocknum: 55, ioh: 0x103f533d0, ioh->op: 170, pre_op: 170, ioh->state:
1, ioh->target: 0, ioh->result: 0, ioh->num_callbacks: 1, ioh->generation: 13609
!!!pgaio_io_reclaim [379]| ioh: 0x103f53340, ioh->op: 0, ioh->state: 1, ioh->target: 0, ioh->result: 0,
ioh->num_callbacks: 0, ioh->generation: 13457
!!!pgaio_io_reclaim [379]| ioh: 0x103f53340, ioh->op: 0, ioh->state: 0,
ioh->target: 0, ioh->generation: 13458
2025-08-20 19:53:56.726 EEST [378] LOG: !!!FileStartReadV| ioh: 0x103f533d0, ioh->op: 170, ioh->state: 1, ioh->target:
1, ioh->result: 0, ioh->num_callbacks: 2, ioh->generation: 13609
2025-08-20 19:53:56.726 EEST [378] STATEMENT: SELECT p1.oid, p1.proname
FROM pg_proc as p1
WHERE p1.prolang = 0 OR p1.prorettype = 0 OR
p1.pronargs < 0 OR
p1.pronargdefaults < 0 OR
p1.pronargdefaults > p1.pronargs OR
array_lower(p1.proargtypes, 1) != 0 OR
array_upper(p1.proargtypes, 1) != p1.pronargs-1 OR
0::oid = ANY (p1.proargtypes) OR
procost <= 0 OR
CASE WHEN proretset THEN prorows <= 0 ELSE prorows != 0 END OR
prokind NOT IN ('f', 'a', 'w', 'p') OR
provolatile NOT IN ('i', 's', 'v') OR
proparallel NOT IN ('s', 'r', 'u');
2025-08-20 19:53:56.726 EEST [378] LOG: !!!pgaio_io_before_start| ioh: 0x103f533d0, ioh->op: 170, ioh->state: 1,
ioh->target: 1, ioh->result: 0, ioh->num_callbacks: 2, ioh->generation: 13609
2025-08-20 19:53:56.726 EEST [378] STATEMENT: SELECT p1.oid, p1.proname
FROM pg_proc as p1
WHERE p1.prolang = 0 OR p1.prorettype = 0 OR
p1.pronargs < 0 OR
p1.pronargdefaults < 0 OR
p1.pronargdefaults > p1.pronargs OR
array_lower(p1.proargtypes, 1) != 0 OR
array_upper(p1.proargtypes, 1) != p1.pronargs-1 OR
0::oid = ANY (p1.proargtypes) OR
procost <= 0 OR
CASE WHEN proretset THEN prorows <= 0 ELSE prorows != 0 END OR
prokind NOT IN ('f', 'a', 'w', 'p') OR
provolatile NOT IN ('i', 's', 'v') OR
proparallel NOT IN ('s', 'r', 'u');
TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c", Line:
167, PID: 378
Run 3:
iteration 3: 29
...
2 TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c",
Line: 167, PID: 39023
...
real 11m51.809s
server2.log contains:
!!!pgaio_io_reclaim [39023]| ioh: 0x1045ff220, ioh->op: 0, ioh->state: 0,
ioh->target: 0, ioh->generation: 5268
!!!pgaio_io_acquire_nb [39023]| ioh: 0x1045ff220, ioh->op: 170, pre_op: 170
!!!AsyncReadBuffers [39023] (1)| blocknum: 67, ioh: 0x1045ff220, ioh->op: 170, ioh->state: 1, ioh->target: 0,
ioh->result: 0, ioh->num_callbacks: 0, ioh->generation: 5268
!!!AsyncReadBuffers [39023] before smgrstartreadv| blocknum: 67, ioh: 0x1045ff220, ioh->op: 170, pre_op: 170,
ioh->state: 1, ioh->target: 0, ioh->result: 0, ioh->num_callbacks: 1, ioh->generation: 5268
2025-08-20 20:24:41.767 EEST [39023] LOG: !!!FileStartReadV| ioh: 0x1045ff220, ioh->op: 170, ioh->state: 1,
ioh->target: 1, ioh->result: 0, ioh->num_callbacks: 2, ioh->generation: 5268
2025-08-20 20:24:41.767 EEST [39023] STATEMENT: SELECT p1.oid, p1.proname
FROM pg_proc as p1 LEFT JOIN pg_description as d
ON p1.tableoid = d.classoid and p1.oid = d.objoid and d.objsubid = 0
WHERE d.classoid IS NULL AND p1.oid <= 9999;
2025-08-20 20:24:41.767 EEST [39023] LOG: !!!pgaio_io_before_start| ioh: 0x1045ff220, ioh->op: 170, ioh->state: 1,
ioh->target: 1, ioh->result: 0, ioh->num_callbacks: 2, ioh->generation: 5268
2025-08-20 20:24:41.767 EEST [39023] STATEMENT: SELECT p1.oid, p1.proname
FROM pg_proc as p1 LEFT JOIN pg_description as d
ON p1.tableoid = d.classoid and p1.oid = d.objoid and d.objsubid = 0
WHERE d.classoid IS NULL AND p1.oid <= 9999;
TRAP: failed Assert("ioh->op == PGAIO_OP_INVALID"), File: "aio_io.c", Line:
167, PID: 39023
Best regards,
Alexander
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index 3643f27ad6e..bc5202227da 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -206,6 +206,7 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
dlist_node *ion = dclist_pop_head_node(&pgaio_my_backend->idle_ios);
ioh = dclist_container(PgAioHandle, node, ion);
+int pre_op = ioh->op;
Assert(ioh->state == PGAIO_HS_IDLE);
Assert(ioh->owner_procno == MyProcNumber);
@@ -221,6 +222,11 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
ioh->report_return = ret;
ret->result.status = PGAIO_RS_UNKNOWN;
}
+
+if (ioh->op != PGAIO_OP_INVALID)
+{
+fprintf(stderr, "!!!pgaio_io_acquire_nb [%d]| ioh: %p, ioh->op: %d, pre_op: %d\n", getpid(), ioh, ioh->op, pre_op);
+}
}
RESUME_INTERRUPTS();
@@ -675,6 +681,7 @@ pgaio_io_reclaim(PgAioHandle *ioh)
Assert(ioh->owner_procno == MyProcNumber);
Assert(ioh->state != PGAIO_HS_IDLE);
+fprintf(stderr, "!!!pgaio_io_reclaim [%d]| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->target: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu\n", getpid(), ioh, ioh->op, ioh->state, ioh->target, ioh->result, ioh->num_callbacks, ioh->generation);
/* see comment in function header */
HOLD_INTERRUPTS();
@@ -747,6 +754,7 @@ pgaio_io_reclaim(PgAioHandle *ioh)
dclist_push_head(&pgaio_my_backend->idle_ios, &ioh->node);
RESUME_INTERRUPTS();
+fprintf(stderr, "!!!pgaio_io_reclaim [%d]| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->target: %d, ioh->generation: %llu\n", getpid(), ioh, ioh->op, ioh->state, ioh->target, ioh->generation);
}
/*
diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c
index 520b5077df2..ffd7edcc82b 100644
--- a/src/backend/storage/aio/aio_io.c
+++ b/src/backend/storage/aio/aio_io.c
@@ -85,6 +85,7 @@ pgaio_io_start_readv(PgAioHandle *ioh,
ioh->op_data.read.iov_length = iovcnt;
pgaio_io_stage(ioh, PGAIO_OP_READV);
+fprintf(stderr, "!!!pgaio_io_start_readv [%d]| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->target: %d, ioh->generation: %llu\n", getpid(), ioh, ioh->op, ioh->state, ioh->target, ioh->generation);
}
void
@@ -158,6 +159,11 @@ pgaio_io_before_start(PgAioHandle *ioh)
Assert(ioh->state == PGAIO_HS_HANDED_OUT);
Assert(pgaio_my_backend->handed_out_io == ioh);
Assert(pgaio_io_has_target(ioh));
+if (ioh->op != PGAIO_OP_INVALID)
+{
+elog(LOG, "!!!pgaio_io_before_start| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->target: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu", ioh, ioh->op, ioh->state, ioh->target, ioh->result, ioh->num_callbacks, ioh->generation);
+}
+
Assert(ioh->op == PGAIO_OP_INVALID);
/*
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index fd7e21d96d3..6d9d1c60882 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -67,6 +67,7 @@
#include "utils/resowner.h"
#include "utils/timestamp.h"
+#include "storage/aio_internal.h"
/* Note: these two macros only work on shared buffers, not local ones! */
#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
@@ -1850,7 +1851,10 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
pgaio_submit_staged();
ioh = pgaio_io_acquire(CurrentResourceOwner, &operation->io_return);
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (2)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->target: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->target, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks, ((PgAioHandle *)ioh)->generation);
}
+else
+fprintf(stderr, "!!!AsyncReadBuffers [%d] (1)| blocknum: %d, ioh: %p, ioh->op: %d, ioh->state: %d, ioh->target: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->target, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks, ((PgAioHandle *)ioh)->generation);
/*
* Check if we can start IO on the first to-be-read buffer.
@@ -1902,6 +1906,7 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
}
else
{
+int pre_op = ioh->op;
instr_time io_start;
/* We found a buffer that we need to read in. */
@@ -1951,6 +1956,10 @@ AsyncReadBuffers(ReadBuffersOperation *operation, int *nblocks_progress)
* ---
*/
io_start = pgstat_prepare_io_time(track_io_timing);
+if (ioh->op != PGAIO_OP_INVALID)
+{
+fprintf(stderr, "!!!AsyncReadBuffers [%d] before smgrstartreadv| blocknum: %d, ioh: %p, ioh->op: %d, pre_op: %d, ioh->state: %d, ioh->target: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu\n", getpid(), blocknum, ioh, ((PgAioHandle *)ioh)->op, pre_op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->target, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks, ((PgAioHandle *)ioh)->generation);
+}
smgrstartreadv(ioh, operation->smgr, forknum,
blocknum + nblocks_done,
io_pages, io_buffers_len);
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index a4ec7959f31..3ba3a3c0e62 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -102,6 +102,8 @@
#include "utils/resowner.h"
#include "utils/varlena.h"
+#include "storage/aio_internal.h"
+
/* Define PG_FLUSH_DATA_WORKS if we have an implementation for pg_flush_data */
#if defined(HAVE_SYNC_FILE_RANGE)
#define PG_FLUSH_DATA_WORKS 1
@@ -2235,6 +2237,10 @@ FileStartReadV(PgAioHandle *ioh, File file,
vfdP = &VfdCache[file];
+if (ioh->op != PGAIO_OP_INVALID)
+{
+elog(LOG, "!!!FileStartReadV| ioh: %p, ioh->op: %d, ioh->state: %d, ioh->target: %d, ioh->result: %d, ioh->num_callbacks: %d, ioh->generation: %llu", ioh, ((PgAioHandle *)ioh)->op, ((PgAioHandle *)ioh)->state, ((PgAioHandle *)ioh)->target, ((PgAioHandle *)ioh)->result, ((PgAioHandle *)ioh)->num_callbacks, ((PgAioHandle *)ioh)->generation);
+}
pgaio_io_start_readv(ioh, vfdP->fd, iovcnt, offset);
return 0;
diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h
index 2933eea0649..7e2215bddfd 100644
--- a/src/include/storage/aio.h
+++ b/src/include/storage/aio.h
@@ -89,8 +89,8 @@ typedef enum PgAioOp
/* intentionally the zero value, to help catch zeroed memory etc */
PGAIO_OP_INVALID = 0,
- PGAIO_OP_READV,
- PGAIO_OP_WRITEV,
+ PGAIO_OP_READV = 0xaa,
+ PGAIO_OP_WRITEV = 0xbb,
/**
* In the near term we'll need at least:
#!/bin/bash
# Choose server id
[ -n "$1" ] && SID=$1 || SID=01
PGBIN=`pwd`/tmp_install/tmp/postgres/bin
PGDATA=`pwd`/tmpdb$SID
export PGDATA
export PATH="$PGBIN:$PATH"
export LD_LIBRARY_PATH="$PGBIN/../lib"
rm server$SID.log
pg_ctl stop -m immediate >/dev/null 2>&1
rm -rf "$PGDATA"; initdb >initdb$SID.log 2>&1
export PGPORT=$((15430 + 10#$SID))
echo "
port=$PGPORT
fsync = off
autovacuum = off
restart_after_crash = off
shared_buffers = 128kB
" > $PGDATA/postgresql.auto.conf
pg_ctl start -l server$SID.log >/dev/null
export PGDATABASE=regression
createdb regression
# Define the function to execute opr_sanity cleanly without test_setup
psql -q -c "CREATE FUNCTION binary_coercible(oid, oid) RETURNS bool LANGUAGE
SQL RETURN true"
res=0
for i in {1..100}; do
echo "ITERATION $i" >psql${SID}.log
psql -f src/test/regress/sql/opr_sanity.sql >>psql${SID}_1.log &
psql -f src/test/regress/sql/opr_sanity.sql >>psql${SID}_2.log &
wait
grep TRAP server$SID.log && { res=1; break; }
done
pg_ctl stop -m immediate >/dev/null
exit $res