Hi, On 2020-04-01 11:04:43 -0700, Peter Geoghegan wrote: > On Wed, Apr 1, 2020 at 10:28 AM Robert Haas <robertmh...@gmail.com> wrote: > > Is there any chance that you're planning to look into the details? > > That would certainly be welcome from my perspective.
+1 This definitely needs more eyes. I am not even close to understanding the code fully. > I had a few other things that I was going to work on this week, but > those seems less urgent. I'll take a look into it, and report back > what I find. Thanks you! I attached a slightly evolved version of my debugging patch. Greetings, Andres Freund
diff --git i/src/backend/utils/time/snapmgr.c w/src/backend/utils/time/snapmgr.c index 1c063c592ce..6a722863bcf 100644 --- i/src/backend/utils/time/snapmgr.c +++ w/src/backend/utils/time/snapmgr.c @@ -123,6 +123,7 @@ typedef struct OldSnapshotControlData int head_offset; /* subscript of oldest tracked time */ TimestampTz head_timestamp; /* time corresponding to head xid */ int count_used; /* how many slots are in use */ + TimestampTz starttime; // rounded to minute TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER]; } OldSnapshotControlData; @@ -290,6 +291,8 @@ SnapMgrInit(void) oldSnapshotControl->head_offset = 0; oldSnapshotControl->head_timestamp = 0; oldSnapshotControl->count_used = 0; + oldSnapshotControl->starttime = + AlignTimestampToMinuteBoundary(GetCurrentTimestamp()); } } @@ -1762,6 +1765,39 @@ SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit) SpinLockRelease(&oldSnapshotControl->mutex_threshold); } +static void PrintOldSnapshotMapping(const char *head, bool already_locked) +{ + StringInfoData s; + + initStringInfo(&s); + + if (!already_locked) + LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED); + + + appendStringInfo(&s, "old snapshot mapping at \"%s\" with head ts: %lu, current entries: %d max entries: %d, offset: %d\n", + head, + (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE, + oldSnapshotControl->count_used, + OLD_SNAPSHOT_TIME_MAP_ENTRIES, + oldSnapshotControl->head_offset); + + for (int off = 0; off < oldSnapshotControl->count_used; off++) + { + int ringoff = (off + oldSnapshotControl->head_offset) % OLD_SNAPSHOT_TIME_MAP_ENTRIES; + + appendStringInfo(&s, " entry %d (ring %d): min %ld: xid %d\n", + off, ringoff, + (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE + off, + oldSnapshotControl->xid_by_minute[ringoff]); + } + + if (!already_locked) + LWLockRelease(OldSnapshotTimeMapLock); + + elog(WARNING, "%s", s.data); +} + /* * TransactionIdLimitedForOldSnapshots * @@ -1826,9 +1862,15 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, { if (ts == update_ts) { + PrintOldSnapshotMapping("non cached limit via update_ts", false); + xlimit = latest_xmin; if (NormalTransactionIdFollows(xlimit, recentXmin)) + { + elog(LOG, "increasing threshold from %u to %u (via update_ts)", + recentXmin, xlimit); SetOldSnapshotThresholdTimestamp(ts, xlimit); + } } else { @@ -1839,6 +1881,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, { int offset; + PrintOldSnapshotMapping("non cached limit via bins", true); + offset = ((ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE); if (offset > oldSnapshotControl->count_used - 1) @@ -1848,7 +1892,15 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, xlimit = oldSnapshotControl->xid_by_minute[offset]; if (NormalTransactionIdFollows(xlimit, recentXmin)) + { + elog(LOG, "increasing threshold from %u to %u (via bins)", + recentXmin, xlimit); SetOldSnapshotThresholdTimestamp(ts, xlimit); + } + } + else + { + // currently debugging output here is pretty darn verbose } LWLockRelease(OldSnapshotTimeMapLock); @@ -1869,7 +1921,11 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, xlimit = latest_xmin; if (NormalTransactionIdFollows(xlimit, recentXmin)) + { + elog(LOG, "increasing prune threshold from %u to %u", + recentXmin, xlimit); return xlimit; + } } return recentXmin; @@ -1923,14 +1979,14 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin) */ if (whenTaken < 0) { - elog(DEBUG1, + elog(PANIC, "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld", (long) whenTaken); return; } if (!TransactionIdIsNormal(xmin)) { - elog(DEBUG1, + elog(PANIC, "MaintainOldSnapshotTimeMapping called with xmin = %lu", (unsigned long) xmin); return; @@ -1938,6 +1994,8 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin) LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE); + PrintOldSnapshotMapping("before update", true); + Assert(oldSnapshotControl->head_offset >= 0); Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES); Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0); @@ -1956,7 +2014,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin) { /* old ts; log it at DEBUG */ LWLockRelease(OldSnapshotTimeMapLock); - elog(DEBUG1, + elog(PANIC, "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld", (long) whenTaken); return; @@ -1971,6 +2029,12 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin) / USECS_PER_MINUTE)) % OLD_SNAPSHOT_TIME_MAP_ENTRIES; + elog(WARNING, "head %ld min: updating existing bucket %d for whenTaken %ld min, with xmin %u", + (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE, + bucket, + (ts - oldSnapshotControl->starttime) / USECS_PER_MINUTE, + xmin); + if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin)) oldSnapshotControl->xid_by_minute[bucket] = xmin; } @@ -1980,6 +2044,13 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin) int advance = ((ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE); + elog(WARNING, "head %ld min: filling %d buckets starting at %d for whenTaken %ld min, with xmin %u", + (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE, + advance, + oldSnapshotControl->head_offset, + (ts - oldSnapshotControl->starttime) / USECS_PER_MINUTE, + xmin); + oldSnapshotControl->head_timestamp = ts; if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES) @@ -2021,6 +2092,8 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin) } } + PrintOldSnapshotMapping("after update", true); + LWLockRelease(OldSnapshotTimeMapLock); }