And here's a script reproducing the issue. It creates the clusters,
sets up the logical replication and runs the necessary query to leave
FPI_FOR_HINT as the last written record.

If successful, the script should have pg_stop stuck with 'waiting for
server to shut down.......', with the walsender stuck at 100% CPU.
#!/bin/bash
set -eu

export PGDATABASE=postgres

# Setup primary
initdb -k -D primary
echo "port = 5432
wal_level = logical
# Just make it easier to gdb into the walsender without getting it killed
wal_receiver_status_interval = 0
wal_sender_timeout = 0" > primary/postgresql.conf

# Start it
pg_ctl -D primary -l primary.log -U postgres start

# Setup replica
initdb -k -D replica
echo "port = 5433
wal_receiver_timeout = 0" > replica/postgresql.conf

# Start it
pg_ctl -D replica -l replica.log -U postgres start

# Create empty pgbench tables
pgbench -i -Idtp
pgbench -i -Idtp -p 5433

# Start logical replication
psql -c 'CREATE PUBLICATION pgbench_accounts_replication for table pgbench_accounts;'
psql -p 5433 -c "CREATE SUBSCRIPTION my_subscription CONNECTION 'host=127.0.0.1 port=5432' PUBLICATION pgbench_accounts_replication;"

# Fill the first heap page
psql -c "INSERT INTO pgbench_accounts SELECT *, *, *, '' FROM generate_series(0, 62);"

# Set page full hint bit
psql -c "BEGIN; UPDATE pgbench_accounts SET bid=4 where aid=1; ROLLBACK;"

# Force next change to be a FPI
psql -c "CHECKPOINT;"

# Trigger the FPI_FOR_HINT as the last written record in the WAL
psql -c "BEGIN; SELECT ctid, * FROM pgbench_accounts WHERE aid=2; ROLLBACK;"

# Stop the primary, it should be blocked with the walsender stuck at 100% CPU
pg_ctl stop -D primary

Reply via email to