#!/bin/bash
# Deterministic reproduction: multixact compat logic crash-restart bug
#
# Primary: pg_old (16.11 with sleep injection in MultiXactIdCreateFromMembers)
#   Sleep 60s after GetNewMultiXactId() but before XLogInsert(CREATE_ID:N),
#   only when multi N is last entry on offset page (entry 2047).
#
# Standby: pg_new (16.13)
#
# The sleep creates a window where CHECKPOINT captures nextMXact=N+1
# but CREATE_ID:N hasn't been written to WAL yet. After standby
# crash-restart, StartupMultiXact sets latest_page_number=page(N+1),
# causing the compat logic to skip page initialization -> FATAL.
#

set -euo pipefail

PG_OLD=/home/postgres/pg_old
PG_NEW=/home/postgres/pg_new
TESTDIR=/home/postgres/test_mxact
PRIMARY_DATA=$TESTDIR/primary
STANDBY_DATA=$TESTDIR/standby
PRIMARY_PORT=15432
STANDBY_PORT=15433
PRIMARY_LOG=$TESTDIR/primary.log
STANDBY_LOG=$TESTDIR/standby.log

pg_old() { LD_LIBRARY_PATH=$PG_OLD/lib "$PG_OLD/bin/$@"; }
pg_new() { LD_LIBRARY_PATH=$PG_NEW/lib "$PG_NEW/bin/$@"; }
psql_pri() { LD_LIBRARY_PATH=$PG_OLD/lib $PG_OLD/bin/psql -p $PRIMARY_PORT -d postgres "$@"; }
psql_stb() { LD_LIBRARY_PATH=$PG_NEW/lib $PG_NEW/bin/psql -p $STANDBY_PORT -d postgres "$@"; }

cleanup() {
    rm -f /tmp/mxact_sleep_enabled 2>/dev/null || true
    for f in $TESTDIR/fifo_*; do echo "\\q" > "$f" 2>/dev/null || true; done
    kill $(jobs -p) 2>/dev/null || true
    sleep 1
    rm -f $TESTDIR/fifo_* 2>/dev/null || true
    LD_LIBRARY_PATH=$PG_OLD/lib $PG_OLD/bin/pg_ctl -D "$PRIMARY_DATA" -m immediate stop 2>/dev/null || true
    LD_LIBRARY_PATH=$PG_NEW/lib $PG_NEW/bin/pg_ctl -D "$STANDBY_DATA" -m immediate stop 2>/dev/null || true
}
trap cleanup EXIT

wait_standby_catchup() {
    local timeout=${1:-120}
    for i in $(seq 1 $timeout); do
        local p=$(psql_pri -tAc "SELECT pg_current_wal_lsn()" 2>/dev/null) || continue
        local s=$(psql_stb -tAc "SELECT pg_last_wal_replay_lsn()" 2>/dev/null) || continue
        [ -n "$s" ] && [ "$s" = "$p" ] && return 0
        sleep 1
    done
    return 1
}

rm -rf $TESTDIR && mkdir -p $TESTDIR
rm -f /tmp/mxact_sleep_enabled 2>/dev/null || true

echo "=== Multixact compat logic crash-restart bug repro ==="
echo "  Primary: pg_old 16.11 (sleep injection), Standby: pg_new 16.13"
echo ""

# ============================================================
# Step 1: Initialize primary
# ============================================================
echo "--- Step 1: Init primary (pg_old 16.11 with sleep injection)"

pg_old initdb -D "$PRIMARY_DATA" --no-locale -E UTF8 > /dev/null

cat >> "$PRIMARY_DATA/postgresql.conf" <<EOF
port = $PRIMARY_PORT
wal_level = replica
max_wal_senders = 5
hot_standby = on
checkpoint_timeout = 600
log_min_messages = debug1
logging_collector = on
log_directory = '$TESTDIR'
log_filename = 'primary.log'
EOF

cat >> "$PRIMARY_DATA/pg_hba.conf" <<EOF
local   replication     all                     trust
host    replication     all     127.0.0.1/32    trust
EOF

pg_old pg_ctl -D "$PRIMARY_DATA" -l "$PRIMARY_LOG" start
sleep 2

psql_pri -c "CREATE TABLE t(id int PRIMARY KEY); INSERT INTO t SELECT generate_series(1,10);"
psql_pri -c "SELECT pg_create_physical_replication_slot('standby_slot');"

echo "  Primary running on port $PRIMARY_PORT"
echo ""

# ============================================================
# Step 2: Generate multixacts until entry = 2047
# ============================================================
echo "--- Step 2: Generate multixacts until nextMulti entry = 2047"

get_next_multi() {
    psql_pri -tAc "SELECT next_multixact_id FROM pg_control_checkpoint()" 2>/dev/null
}

get_entry() {
    local m=$1
    echo $(( m % 2048 ))
}

# Batch phase: pgbench to push multixacts forward quickly
cat > "$TESTDIR/mxact.sql" <<'SQL'
BEGIN;
SELECT * FROM t WHERE id = 1 FOR SHARE;
SELECT * FROM t WHERE id = 1 FOR SHARE;
COMMIT;
SQL

NEXT=$(get_next_multi)
ENTRY=$(get_entry $NEXT)
echo "  Current: nextMulti=$NEXT, entry=$ENTRY"

while true; do
    ENTRY=$(get_entry $(get_next_multi))
    if [ "$ENTRY" -ge 2040 ]; then
        break
    fi
    LD_LIBRARY_PATH=$PG_OLD/lib $PG_OLD/bin/pgbench -p $PRIMARY_PORT -d postgres \
        -n -f "$TESTDIR/mxact.sql" -c 10 -t 50 > /dev/null 2>&1
    psql_pri -c "CHECKPOINT" > /dev/null 2>&1
done

# Fine-tune: pgbench -c 2 -t 1 to advance one multixact at a time
while true; do
    NEXT=$(get_next_multi)
    ENTRY=$(get_entry $NEXT)
    echo "  nextMulti=$NEXT, entry=$ENTRY"
    if [ "$ENTRY" -eq 2047 ]; then
        break
    fi
    LD_LIBRARY_PATH=$PG_OLD/lib $PG_OLD/bin/pgbench -p $PRIMARY_PORT -d postgres \
        -n -f "$TESTDIR/mxact.sql" -c 2 -t 1 > /dev/null 2>&1
    psql_pri -c "CHECKPOINT" > /dev/null 2>&1
done

NEXT=$(get_next_multi)
ENTRY=$(get_entry $NEXT)
echo "  Ready: nextMulti=$NEXT, entry=$ENTRY (last entry on page)"
echo ""

# ============================================================
# Step 3: Create standby via pg_basebackup
# ============================================================
echo "--- Step 3: Create standby (pg_new 16.13 with compat logic)"

pg_old pg_basebackup -D "$STANDBY_DATA" -p $PRIMARY_PORT -Fp -Xs -R -c fast -S standby_slot

cat >> "$STANDBY_DATA/postgresql.conf" <<EOF
port = $STANDBY_PORT
hot_standby = on
checkpoint_timeout = 30
log_min_messages = debug1
logging_collector = on
log_directory = '$TESTDIR'
log_filename = 'standby.log'
EOF

pg_new pg_ctl -D "$STANDBY_DATA" -l "$STANDBY_LOG" start
sleep 2

echo "  Waiting for standby to catch up..."
wait_standby_catchup 60
echo "  Standby caught up"

# Wait for standby to do a restartpoint (checkpoint_timeout=30s)
echo "  Waiting 40s for standby restartpoint..."
sleep 40
echo ""

# ============================================================
# Step 4: Trigger WAL disorder
# ============================================================
echo "--- Step 4: Trigger WAL disorder via sleep injection"

# Session A: dirty the heap buffer
FIFO_A=$TESTDIR/fifo_a
mkfifo "$FIFO_A"
(tail -f "$FIFO_A" | psql_pri -v ON_ERROR_STOP=1 > /dev/null 2>&1) &
PIDA=$!
echo "BEGIN; SELECT * FROM t WHERE id=1 FOR SHARE;" > "$FIFO_A"
sleep 2

# Flush the dirty buffer so checkpoint during sleep won't block
psql_pri -c "CHECKPOINT" > /dev/null 2>&1
echo "  Intermediate CHECKPOINT done (flushed dirty buffer)"
sleep 1

# Session B: trigger multixact creation -> 60s sleep
# Enable sleep injection now (file switch)
touch /tmp/mxact_sleep_enabled
FIFO_B=$TESTDIR/fifo_b
mkfifo "$FIFO_B"
(tail -f "$FIFO_B" | psql_pri -v ON_ERROR_STOP=1 > /dev/null 2>&1) &
PIDB=$!
echo "SELECT * FROM t WHERE id=1 FOR SHARE;" > "$FIFO_B"
sleep 3

echo "  Session B triggered multixact creation (sleeping 60s in backend)"

# CHECKPOINT during sleep window
psql_pri -c "CHECKPOINT" > /dev/null 2>&1
echo "  CHECKPOINT during sleep done (captured nextMulti=N+1)"

# Verify CREATE_ID:N not yet in WAL
NEXT=$(get_next_multi)
PREV=$((NEXT - 1))
WAL_CHECK=""
for wf in "$PRIMARY_DATA/pg_wal"/0000000*; do
    WAL_CHECK=$(LD_LIBRARY_PATH=$PG_OLD/lib $PG_OLD/bin/pg_waldump "$wf" -r MultiXact 2>/dev/null | grep "CREATE_ID $PREV " || true)
    [ -n "$WAL_CHECK" ] && break
done
if [ -z "$WAL_CHECK" ]; then
    echo "  CREATE_ID:$PREV not in WAL yet (sleep is working)"
else
    echo "  WARNING: CREATE_ID:$PREV already in WAL, sleep may not have triggered"
fi
echo ""

# ============================================================
# Step 5: Crash standby
# ============================================================
echo "--- Step 5: Crash standby (immediate stop)"

# Wait for standby to replay the checkpoint and do restartpoint
echo "  Waiting 40s for standby to process checkpoint + restartpoint..."
sleep 40

pg_new pg_ctl -D "$STANDBY_DATA" -m immediate stop
echo "  Standby crashed"

# Verify page P+1 not on disk
OFFSET_FILE="$STANDBY_DATA/pg_multixact/offsets/0000"
if [ -f "$OFFSET_FILE" ]; then
    FSIZE=$(stat -c%s "$OFFSET_FILE")
    echo "  Offset file size: $FSIZE bytes (page 1 needs offset >= 16384)"
    if [ "$FSIZE" -le 8192 ]; then
        echo "  Page P+1 not on disk (file too small)"
    fi
fi
echo ""

# ============================================================
# Step 6: Wait for sleep to end
# ============================================================
echo "--- Step 6: Wait for sleep to end and CREATE_ID to be written"

sleep 30
echo "\\q" > "$FIFO_A" 2>/dev/null || true
echo "\\q" > "$FIFO_B" 2>/dev/null || true
sleep 5

# Verify CREATE_ID:N now in WAL
WAL_CHECK=""
for wf in "$PRIMARY_DATA/pg_wal"/0000000*; do
    WAL_CHECK=$(LD_LIBRARY_PATH=$PG_OLD/lib $PG_OLD/bin/pg_waldump "$wf" -r MultiXact 2>/dev/null | grep "CREATE_ID $PREV " || true)
    [ -n "$WAL_CHECK" ] && break
done
if [ -n "$WAL_CHECK" ]; then
    echo "  CREATE_ID:$PREV now in WAL"
else
    echo "  ERROR: CREATE_ID:$PREV still not in WAL"
    exit 1
fi
echo ""

# ============================================================
# Step 7: Restart standby -> expect FATAL
# ============================================================
echo "--- Step 7: Restart standby (expect FATAL with buggy compat logic)"

pg_new pg_ctl -D "$STANDBY_DATA" -l "$STANDBY_LOG" start || true
sleep 5

# Check if standby is running
if pg_new pg_ctl -D "$STANDBY_DATA" status > /dev/null 2>&1; then
    echo "  Standby is running (fix is working or bug not triggered)"
    # Check for the debug message indicating compat logic fired
    if grep -q "next offsets page is not initialized" "$TESTDIR/standby.log"; then
        echo "  PASS: compat logic initialized the page"
    fi
else
    echo "  Standby is NOT running (crashed)"
    if grep -q "read too few bytes" "$TESTDIR/standby.log"; then
        echo "  FAIL: FATAL - read too few bytes (compat logic bug confirmed)"
        echo ""
        echo "  Relevant log lines:"
        grep -E "(FATAL|DETAIL|CONTEXT|next MultiXactId)" "$TESTDIR/standby.log" | tail -10
    fi
fi

echo ""
echo "=== Done ==="
