Raising stop and warn limits

Noah Misch Sun, 21 Jun 2020 01:35:49 -0700

In brief, I'm proposing to raise xidWrapLimit-xidStopLimit to 3M and
xidWrapLimit-xidWarnLimit to 40M.  Likewise for mxact counterparts.



PostgreSQL has three "stop limit" values beyond which only single-user mode
will assign new values of a certain counter:

- xidStopLimit protects pg_xact, pg_commit_ts, pg_subtrans, and pg_serial.
  SetTransactionIdLimit() withholds a million XIDs, and warnings start ten
  million before that.
- multiStopLimit protects pg_multixact/offsets.  SetMultiXactIdLimit()
  withholds 100 mxacts, and warnings start at ten million.
- offsetStopLimit protects pg_multixact/members.  SetOffsetVacuumLimit()
  withholds [1,2) SLRU segments thereof (50k-100k member XIDs).  No warning
  phase for this one.

Reasons to like a larger stop limit:

1. VACUUM, to advance a limit, may assign IDs subject to one of the limits.
   VACUUM formerly consumed XIDs, not mxacts.  It now consumes mxacts, not
   XIDs.  I think a DBA can suppress VACUUM's mxact consumption by stopping
   all transactions older than vacuum_freeze_min_age, including prepared
   transactions.

2. We currently have edge-case bugs when assigning values in the last few
   dozen pages before the wrap limit
   (https://postgr.es/m/20190214072623.ga1139...@rfd.leadboat.com and
   https://postgr.es/m/20200525070033.ga1591...@rfd.leadboat.com).  A higher
   stop limit could make this class of bug unreachable outside of single-user
   mode.  That's valuable against undiscovered bugs of this class.

3. Any bug in stop limit enforcement is less likely to have user impact.  For
   a live example, see the XXX comment that
   https://postgr.es/m/attachment/111084/slru-truncate-modulo-v3.patch adds to
   CheckPointPredicate().

Raising a stop limit prompts an examination of warn limits, which represent
the time separating the initial torrent of warnings from the service outage.
The current limits appeared in 2005; transaction rates have grown, while human
reaction times have not.  I like warnings starting when an SLRU is 98%
consumed (40M XIDs or mxacts remaining).  That doesn't change things enough to
make folks reconfigure VACUUM, and it buys back some of the grace period DBAs
had in 2005.  I considered 95-97%, but the max_val of
autovacuum_freeze_max_age would then start the warnings before the autovacuum.
While that wouldn't rule out a value lower than 98%, 98% felt fine anyhow.

For the new stop limits, I propose allowing 99.85% SLRU fullness (stop with 3M
XIDs or mxacts remaining).  If stopping this early will bother users, an
alternative is 3M for XIDs and 0.2M for others.  Either way leaves at least
two completely-idle segments for each SLRU, which I expect to mitigate present
and future edge-case bugs.

Changing this does threaten clusters that experience pg_upgrade when close to
a limit.  pg_upgrade can fail or, worse, yield a cluster that spews warnings
shortly after the upgrade.  I could implement countermeasures, but they would
take effect only when one upgrades a cluster having a 98%-full SLRU.  I
propose not to change pg_upgrade; some sites may find cause to do a
whole-cluster VACUUM before pg_upgrade.  Do you agree or disagree with that
choice?  I am attaching a patch (not for commit) that demonstrates the
pg_upgrade behavior that nearly-full-SLRU clusters would see.

Thanks,
nm

diff --git a/src/backend/access/transam/clog.c 
b/src/backend/access/transam/clog.c
index f3da40a..c43ebbf 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -795,6 +795,10 @@ TrimCLOG(void)
                int                     slotno;
                char       *byteptr;
 
+               /* hack for pg_resetwal moving us to the middle of a page */
+               if (!SimpleLruDoesPhysicalPageExist(XactCtl, pageno))
+                       SimpleLruZeroPage(XactCtl, pageno);
+
                slotno = SimpleLruReadPage(XactCtl, pageno, false, xid);
                byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
 
diff --git a/src/backend/access/transam/multixact.c 
b/src/backend/access/transam/multixact.c
index ce84dac..ed47ce0 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -2042,6 +2042,10 @@ TrimMultiXact(void)
                int                     slotno;
                MultiXactOffset *offptr;
 
+               /* hack for pg_resetwal moving us to the middle of a page */
+               if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
+                       SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+
                slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, 
nextMXact);
                offptr = (MultiXactOffset *) 
MultiXactOffsetCtl->shared->page_buffer[slotno];
                offptr += entryno;
diff --git a/src/bin/pg_resetwal/pg_resetwal.c 
b/src/bin/pg_resetwal/pg_resetwal.c
index 2334418..454ff8b 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -434,6 +434,8 @@ main(int argc, char *argv[])
                        
FullTransactionIdFromEpochAndXid(EpochFromFullTransactionId(ControlFile.checkPointCopy.nextFullXid),
                                                                                
         set_xid);
 
+               /* ruins testing of cases close to wraparound */
+#if 0
                /*
                 * For the moment, just set oldestXid to a value that will force
                 * immediate autovacuum-for-wraparound.  It's not clear whether 
adding
@@ -445,6 +447,7 @@ main(int argc, char *argv[])
                if (ControlFile.checkPointCopy.oldestXid < 
FirstNormalTransactionId)
                        ControlFile.checkPointCopy.oldestXid += 
FirstNormalTransactionId;
                ControlFile.checkPointCopy.oldestXidDB = InvalidOid;
+#endif
        }
 
        if (set_oldest_commit_ts_xid != 0)
diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh
index 7ff06de..befa5bb 100644
--- a/src/bin/pg_upgrade/test.sh
+++ b/src/bin/pg_upgrade/test.sh
@@ -28,6 +28,7 @@ standard_initdb() {
        then
                cat "$TEMP_CONFIG" >> "$PGDATA/postgresql.conf"
        fi
+       echo "backtrace_functions = 'SlruReportIOError'" >> 
"$PGDATA/postgresql.conf"
        ../../test/regress/pg_regress --config-auth "$PGDATA"
 }
 
@@ -202,7 +203,39 @@ if "$MAKE" -C "$oldsrc" installcheck-parallel; then
 else
        make_installcheck_status=$?
 fi
+
+# multixact alone between stop limit and wrap limit: no error
+reset_args_sql="
+select format('--multixact-ids=%s,%s',
+  oldest_multi_xid::text::bigint + (1::bigint << 31) - 50,
+  oldest_multi_xid)
+from pg_control_checkpoint();
+"
+# xid and multixact between warn limit and stop limit, noisy but successful:
+reset_args_sql="
+select format('--next-transaction-id=%s --multixact-ids=%s,%s',
+  oldest_xid::text::bigint       + (1::bigint << 31) - 5e6,
+  oldest_multi_xid::text::bigint + (1::bigint << 31) - 5e6,
+  oldest_multi_xid)
+from pg_control_checkpoint();
+"
+# xid and multixact between stop limit and wrap limit will fail:
+#   Setting frozenxid and minmxid counters in new cluster       SQL command 
failed
+#   UPDATE pg_catalog.pg_database SET       datfrozenxid = '2147484095'
+#   ERROR:  database is not accepting commands to avoid wraparound data loss 
in database "template0"
+reset_args_sql="
+select format('--next-transaction-id=%s --multixact-ids=%s,%s',
+  oldest_xid::text::bigint       + (1::bigint << 31) - 50,
+  oldest_multi_xid::text::bigint + (1::bigint << 31) - 50,
+  oldest_multi_xid)
+from pg_control_checkpoint();
+"
+reset_args=`psql -X -A -t -d template1 -c "$reset_args_sql"`
 "$oldbindir"/pg_ctl -m fast stop
+"$oldbindir"/pg_controldata | tee "$logdir"/before | grep 'Next.*X'
+"$oldbindir"/pg_resetwal $reset_args $PGDATA
+"$oldbindir"/pg_controldata | tee "$logdir"/after  | grep 'Next.*X'
+
 if [ -n "$createdb_status" ]; then
        exit 1
 fi

Raising stop and warn limits

Reply via email to