Hi,

On 2020-04-01 11:04:43 -0700, Peter Geoghegan wrote:
> On Wed, Apr 1, 2020 at 10:28 AM Robert Haas <robertmh...@gmail.com> wrote:
> > Is there any chance that you're planning to look into the details?
> > That would certainly be welcome from my perspective.

+1

This definitely needs more eyes. I am not even close to understanding
the code fully.


> I had a few other things that I was going to work on this week, but
> those seems less urgent. I'll take a look into it, and report back
> what I find.

Thanks you!

I attached a slightly evolved version of my debugging patch.


Greetings,

Andres Freund
diff --git i/src/backend/utils/time/snapmgr.c w/src/backend/utils/time/snapmgr.c
index 1c063c592ce..6a722863bcf 100644
--- i/src/backend/utils/time/snapmgr.c
+++ w/src/backend/utils/time/snapmgr.c
@@ -123,6 +123,7 @@ typedef struct OldSnapshotControlData
 	int			head_offset;	/* subscript of oldest tracked time */
 	TimestampTz head_timestamp; /* time corresponding to head xid */
 	int			count_used;		/* how many slots are in use */
+	TimestampTz starttime; // rounded to minute
 	TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
 } OldSnapshotControlData;
 
@@ -290,6 +291,8 @@ SnapMgrInit(void)
 		oldSnapshotControl->head_offset = 0;
 		oldSnapshotControl->head_timestamp = 0;
 		oldSnapshotControl->count_used = 0;
+		oldSnapshotControl->starttime =
+			AlignTimestampToMinuteBoundary(GetCurrentTimestamp());
 	}
 }
 
@@ -1762,6 +1765,39 @@ SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
 	SpinLockRelease(&oldSnapshotControl->mutex_threshold);
 }
 
+static void PrintOldSnapshotMapping(const char *head, bool already_locked)
+{
+	StringInfoData s;
+
+	initStringInfo(&s);
+
+	if (!already_locked)
+		LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
+
+
+	appendStringInfo(&s, "old snapshot mapping at \"%s\" with head ts: %lu, current entries: %d max entries: %d, offset: %d\n",
+					 head,
+					 (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+					 oldSnapshotControl->count_used,
+					 OLD_SNAPSHOT_TIME_MAP_ENTRIES,
+					 oldSnapshotControl->head_offset);
+
+	for (int off = 0; off < oldSnapshotControl->count_used; off++)
+	{
+		int ringoff = (off + oldSnapshotControl->head_offset) % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
+
+		appendStringInfo(&s, "  entry %d (ring %d): min %ld: xid %d\n",
+						 off, ringoff,
+						 (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE + off,
+						 oldSnapshotControl->xid_by_minute[ringoff]);
+	}
+
+	if (!already_locked)
+		LWLockRelease(OldSnapshotTimeMapLock);
+
+	elog(WARNING, "%s", s.data);
+}
+
 /*
  * TransactionIdLimitedForOldSnapshots
  *
@@ -1826,9 +1862,15 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 		{
 			if (ts == update_ts)
 			{
+				PrintOldSnapshotMapping("non cached limit via update_ts", false);
+
 				xlimit = latest_xmin;
 				if (NormalTransactionIdFollows(xlimit, recentXmin))
+				{
+					elog(LOG, "increasing threshold from %u to %u (via update_ts)",
+						 recentXmin, xlimit);
 					SetOldSnapshotThresholdTimestamp(ts, xlimit);
+				}
 			}
 			else
 			{
@@ -1839,6 +1881,8 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 				{
 					int			offset;
 
+					PrintOldSnapshotMapping("non cached limit via bins", true);
+
 					offset = ((ts - oldSnapshotControl->head_timestamp)
 							  / USECS_PER_MINUTE);
 					if (offset > oldSnapshotControl->count_used - 1)
@@ -1848,7 +1892,15 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 					xlimit = oldSnapshotControl->xid_by_minute[offset];
 
 					if (NormalTransactionIdFollows(xlimit, recentXmin))
+					{
+						elog(LOG, "increasing threshold from %u to %u (via bins)",
+							 recentXmin, xlimit);
 						SetOldSnapshotThresholdTimestamp(ts, xlimit);
+					}
+				}
+				else
+				{
+					// currently debugging output here is pretty darn verbose
 				}
 
 				LWLockRelease(OldSnapshotTimeMapLock);
@@ -1869,7 +1921,11 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
 			xlimit = latest_xmin;
 
 		if (NormalTransactionIdFollows(xlimit, recentXmin))
+		{
+			elog(LOG, "increasing prune threshold from %u to %u",
+				 recentXmin, xlimit);
 			return xlimit;
+		}
 	}
 
 	return recentXmin;
@@ -1923,14 +1979,14 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 	 */
 	if (whenTaken < 0)
 	{
-		elog(DEBUG1,
+		elog(PANIC,
 			 "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
 			 (long) whenTaken);
 		return;
 	}
 	if (!TransactionIdIsNormal(xmin))
 	{
-		elog(DEBUG1,
+		elog(PANIC,
 			 "MaintainOldSnapshotTimeMapping called with xmin = %lu",
 			 (unsigned long) xmin);
 		return;
@@ -1938,6 +1994,8 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 
 	LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
 
+	PrintOldSnapshotMapping("before update", true);
+
 	Assert(oldSnapshotControl->head_offset >= 0);
 	Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
 	Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
@@ -1956,7 +2014,7 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 	{
 		/* old ts; log it at DEBUG */
 		LWLockRelease(OldSnapshotTimeMapLock);
-		elog(DEBUG1,
+		elog(PANIC,
 			 "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
 			 (long) whenTaken);
 		return;
@@ -1971,6 +2029,12 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 								 / USECS_PER_MINUTE))
 		% OLD_SNAPSHOT_TIME_MAP_ENTRIES;
 
+		elog(WARNING, "head %ld min: updating existing bucket %d for whenTaken %ld min, with xmin %u",
+			 (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+			 bucket,
+			 (ts - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+			 xmin);
+
 		if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
 			oldSnapshotControl->xid_by_minute[bucket] = xmin;
 	}
@@ -1980,6 +2044,13 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 		int			advance = ((ts - oldSnapshotControl->head_timestamp)
 							   / USECS_PER_MINUTE);
 
+		elog(WARNING, "head %ld min: filling %d buckets starting at %d for whenTaken %ld min, with xmin %u",
+			 (oldSnapshotControl->head_timestamp - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+			 advance,
+			 oldSnapshotControl->head_offset,
+			 (ts - oldSnapshotControl->starttime) / USECS_PER_MINUTE,
+			 xmin);
+
 		oldSnapshotControl->head_timestamp = ts;
 
 		if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
@@ -2021,6 +2092,8 @@ MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
 		}
 	}
 
+	PrintOldSnapshotMapping("after update", true);
+
 	LWLockRelease(OldSnapshotTimeMapLock);
 }
 

Reply via email to