sorry, forgot to add a patch to the letter


чт, 20 мая 2021 г. в 13:52, Кирилл Решке <reshkekir...@gmail.com>:

> Hi,
> I recently ran into a problem in one of our production postgresql cluster.
> I had noticed lock contention on procarray lock on standby, which causes
> WAL replay lag growth.
> To reproduce this, you can do the following:
>
> 1) set max_connections to big number, like 100000
> 2) begin a transaction on primary
> 3) start pgbench workload on primary and on standby
>
> After a while it will be possible to see KnownAssignedXidsGetAndSetXmin in
> perf top consuming abount 75 % of CPU.
>
> %%
>   PerfTop:    1060 irqs/sec  kernel: 0.0%  exact:  0.0% [4000Hz cycles:u],
>  (target_pid: 273361)
>
> -------------------------------------------------------------------------------
>
>     73.92%  postgres       [.] KnownAssignedXidsGetAndSetXmin
>      1.40%  postgres       [.] base_yyparse
>      0.96%  postgres       [.] LWLockAttemptLock
>      0.84%  postgres       [.] hash_search_with_hash_value
>      0.84%  postgres       [.] AtEOXact_GUC
>      0.72%  postgres       [.] ResetAllOptions
>      0.70%  postgres       [.] AllocSetAlloc
>      0.60%  postgres       [.] _bt_compare
>      0.55%  postgres       [.] core_yylex
>      0.42%  libc-2.27.so   [.] __strlen_avx2
>      0.23%  postgres       [.] LWLockRelease
>      0.19%  postgres       [.] MemoryContextAllocZeroAligned
>      0.18%  postgres       [.] expression_tree_walker.part.3
>      0.18%  libc-2.27.so   [.] __memmove_avx_unaligned_erms
>      0.17%  postgres       [.] PostgresMain
>      0.17%  postgres       [.] palloc
>      0.17%  libc-2.27.so   [.] _int_malloc
>      0.17%  postgres       [.] set_config_option
>      0.17%  postgres       [.] ScanKeywordLookup
>      0.16%  postgres       [.] _bt_checkpage
>
> %%
>
>
> We have tried to fix this by using BitMapSet instead of boolean array
> KnownAssignedXidsValid, but this does not help too much.
>
> Instead, using a doubly linked list helps a little more, we got +1000 tps
> on pgbench workload with patched postgresql. The general idea of this patch
> is that, instead of memorizing which elements in KnownAssignedXids are
> valid, lets maintain a doubly linked list of them. This  solution will work
> in exactly the same way, except that taking a snapshot on the replica is
> now O(running transaction) instead of O(head - tail) which is significantly
> faster under some workloads. The patch helps to reduce CPU usage of
> KnownAssignedXidsGetAndSetXmin to ~48% instead of ~74%, but does eliminate
> it from perf top.
>
> The problem is better reproduced on PG13 since PG14 has some snapshot
> optimization.
>
> Thanks!
>
> Best regards, reshke
>
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 5ff8cab394..165cf56ea9 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -255,9 +255,18 @@ static PGPROC *allProcs;
  * Bookkeeping for tracking emulated transactions in recovery
  */
 static TransactionId *KnownAssignedXids;
-static bool *KnownAssignedXidsValid;
+
+typedef struct {
+	int prv;
+	int nxt;
+} KnownAssignedXidValidNode;
+
+// Doubly linked list of valid xids
+static KnownAssignedXidValidNode *KnownAssignedXidsValidDLL;
 static TransactionId latestObservedXid = InvalidTransactionId;
 
+
+
 /*
  * If we're in STANDBY_SNAPSHOT_PENDING state, standbySnapshotPendingXmin is
  * the highest xid that might still be running that we don't have in
@@ -348,6 +357,9 @@ static void GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons);
 /*
  * Report shared-memory space needed by CreateSharedProcArray.
  */
+
+static KnownAssignedXidValidNode KAX_E_INVALID;
+
 Size
 ProcArrayShmemSize(void)
 {
@@ -380,13 +392,20 @@ ProcArrayShmemSize(void)
 		size = add_size(size,
 						mul_size(sizeof(TransactionId),
 								 TOTAL_MAX_CACHED_SUBXIDS));
-		size = add_size(size,
-						mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS));
+                size = add_size(size,
+						mul_size(sizeof(KnownAssignedXidValidNode), TOTAL_MAX_CACHED_SUBXIDS));
 	}
 
+				KAX_E_INVALID.prv = -1;
+				KAX_E_INVALID.nxt = TOTAL_MAX_CACHED_SUBXIDS;
+
 	return size;
 }
 
+#define KAX_DLL_INDEX_VALID(i) (-1 < i && i < TOTAL_MAX_CACHED_SUBXIDS)
+#define KAX_DLL_ENTRY_INVALID(i) (-1 == KnownAssignedXidsValidDLL[i].prv && KnownAssignedXidsValidDLL[i].nxt == TOTAL_MAX_CACHED_SUBXIDS)
+
+
 /*
  * Initialize the shared PGPROC array during postmaster startup.
  */
@@ -431,10 +450,15 @@ CreateSharedProcArray(void)
 							mul_size(sizeof(TransactionId),
 									 TOTAL_MAX_CACHED_SUBXIDS),
 							&found);
-		KnownAssignedXidsValid = (bool *)
-			ShmemInitStruct("KnownAssignedXidsValid",
-							mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
+		
+                KnownAssignedXidsValidDLL = (KnownAssignedXidValidNode*)
+			ShmemInitStruct("KnownAssignedXidsValidDLL",
+							mul_size(sizeof(KnownAssignedXidValidNode), TOTAL_MAX_CACHED_SUBXIDS),
 							&found);
+
+							for (int i = 0; i < TOTAL_MAX_CACHED_SUBXIDS; ++ i) {
+										KnownAssignedXidsValidDLL[i] = KAX_E_INVALID;
+							}
 	}
 }
 
@@ -4545,15 +4569,17 @@ KnownAssignedXidsCompress(bool force)
 	 * re-aligning data to 0th element.
 	 */
 	compress_index = 0;
-	for (i = tail; i < head; i++)
+        int prv = -1;
+	for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
 	{
-		if (KnownAssignedXidsValid[i])
-		{
-			KnownAssignedXids[compress_index] = KnownAssignedXids[i];
-			KnownAssignedXidsValid[compress_index] = true;
-			compress_index++;
-		}
+                KnownAssignedXids[compress_index] = KnownAssignedXids[i];
+                KnownAssignedXidsValidDLL[compress_index].prv = prv;
+                KnownAssignedXidsValidDLL[compress_index].nxt = compress_index + 1;
+
+                compress_index++;
 	}
+        // fix last entry
+        KnownAssignedXidsValidDLL[compress_index - 1].nxt = TOTAL_MAX_CACHED_SUBXIDS;
 
 	pArray->tailKnownAssignedXids = 0;
 	pArray->headKnownAssignedXids = compress_index;
@@ -4652,7 +4678,8 @@ KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid,
 	for (i = 0; i < nxids; i++)
 	{
 		KnownAssignedXids[head] = next_xid;
-		KnownAssignedXidsValid[head] = true;
+		KnownAssignedXidsValidDLL[head].nxt = 1 + head;
+                KnownAssignedXidsValidDLL[head + 1].prv = head; 
 		TransactionIdAdvance(next_xid);
 		head++;
 	}
@@ -4741,12 +4768,23 @@ KnownAssignedXidsSearch(TransactionId xid, bool remove)
 	if (result_index < 0)
 		return false;			/* not in array */
 
-	if (!KnownAssignedXidsValid[result_index])
+	if (KAX_DLL_ENTRY_INVALID(result_index))
 		return false;			/* in array, but invalid */
 
 	if (remove)
 	{
-		KnownAssignedXidsValid[result_index] = false;
+                int prv = KnownAssignedXidsValidDLL[result_index].prv;
+                int nxt = KnownAssignedXidsValidDLL[result_index].nxt;
+
+                KnownAssignedXidsValidDLL[result_index] = KAX_E_INVALID;
+                
+                if (KAX_DLL_INDEX_VALID(prv)) {
+                        KnownAssignedXidsValidDLL[prv].nxt = nxt;
+                }
+
+                if (KAX_DLL_INDEX_VALID(nxt)) {
+                        KnownAssignedXidsValidDLL[nxt].prv = prv;
+                }
 
 		pArray->numKnownAssignedXids--;
 		Assert(pArray->numKnownAssignedXids >= 0);
@@ -4757,9 +4795,7 @@ KnownAssignedXidsSearch(TransactionId xid, bool remove)
 		 */
 		if (result_index == tail)
 		{
-			tail++;
-			while (tail < head && !KnownAssignedXidsValid[tail])
-				tail++;
+			tail = KnownAssignedXidsValidDLL[tail].nxt;
 			if (tail >= head)
 			{
 				/* Array is empty, so we can reset both pointers */
@@ -4868,21 +4904,23 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
 	tail = pArray->tailKnownAssignedXids;
 	head = pArray->headKnownAssignedXids;
 
-	for (i = tail; i < head; i++)
+	for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
 	{
-		if (KnownAssignedXidsValid[i])
-		{
-			TransactionId knownXid = KnownAssignedXids[i];
+                TransactionId knownXid = KnownAssignedXids[i];
 
-			if (TransactionIdFollowsOrEquals(knownXid, removeXid))
-				break;
+                if (TransactionIdFollowsOrEquals(knownXid, removeXid))
+                        break;
 
-			if (!StandbyTransactionIdIsPrepared(knownXid))
-			{
-				KnownAssignedXidsValid[i] = false;
-				count++;
-			}
-		}
+                if (!StandbyTransactionIdIsPrepared(knownXid))
+                {
+                        int nxt = KnownAssignedXidsValidDLL[i].nxt;
+                        KnownAssignedXidsValidDLL[i] = KAX_E_INVALID;
+
+                        if (KAX_DLL_INDEX_VALID(nxt)) {
+                                KnownAssignedXidsValidDLL[i].prv = -1;
+                        }
+                        count++;
+                }
 	}
 
 	pArray->numKnownAssignedXids -= count;
@@ -4891,21 +4929,20 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
 	/*
 	 * Advance the tail pointer if we've marked the tail item invalid.
 	 */
-	for (i = tail; i < head; i++)
-	{
-		if (KnownAssignedXidsValid[i])
-			break;
-	}
-	if (i >= head)
-	{
-		/* Array is empty, so we can reset both pointers */
-		pArray->headKnownAssignedXids = 0;
-		pArray->tailKnownAssignedXids = 0;
-	}
-	else
-	{
-		pArray->tailKnownAssignedXids = i;
-	}
+        if (KAX_DLL_ENTRY_INVALID(tail)) {
+                tail = KnownAssignedXidsValidDLL[tail].nxt;
+
+                if (!KAX_DLL_INDEX_VALID(tail))
+                {
+                        /* Array is empty, so we can reset both pointers */
+                        pArray->headKnownAssignedXids = 0;
+                        pArray->tailKnownAssignedXids = 0;
+                }
+                else
+                {
+                        pArray->tailKnownAssignedXids = tail;
+                }
+        }
 
 	/* Opportunistically compress the array */
 	KnownAssignedXidsCompress(false);
@@ -4957,32 +4994,29 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
 	head = procArray->headKnownAssignedXids;
 	SpinLockRelease(&procArray->known_assigned_xids_lck);
 
-	for (i = tail; i < head; i++)
+	for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
 	{
 		/* Skip any gaps in the array */
-		if (KnownAssignedXidsValid[i])
-		{
-			TransactionId knownXid = KnownAssignedXids[i];
-
-			/*
-			 * Update xmin if required.  Only the first XID need be checked,
-			 * since the array is sorted.
-			 */
-			if (count == 0 &&
-				TransactionIdPrecedes(knownXid, *xmin))
-				*xmin = knownXid;
-
-			/*
-			 * Filter out anything >= xmax, again relying on sorted property
-			 * of array.
-			 */
-			if (TransactionIdIsValid(xmax) &&
-				TransactionIdFollowsOrEquals(knownXid, xmax))
-				break;
-
-			/* Add knownXid into output array */
-			xarray[count++] = knownXid;
-		}
+                TransactionId knownXid = KnownAssignedXids[i];
+
+                /*
+                 * Update xmin if required.  Only the first XID need be checked,
+                 * since the array is sorted.
+                 */
+                if (count == 0 &&
+                        TransactionIdPrecedes(knownXid, *xmin))
+                        *xmin = knownXid;
+
+                /*
+                 * Filter out anything >= xmax, again relying on sorted property
+                 * of array.
+                 */
+                if (TransactionIdIsValid(xmax) &&
+                        TransactionIdFollowsOrEquals(knownXid, xmax))
+                        break;
+
+                /* Add knownXid into output array */
+                xarray[count++] = knownXid;
 	}
 
 	return count;
@@ -5007,12 +5041,12 @@ KnownAssignedXidsGetOldestXmin(void)
 	head = procArray->headKnownAssignedXids;
 	SpinLockRelease(&procArray->known_assigned_xids_lck);
 
-	for (i = tail; i < head; i++)
-	{
-		/* Skip any gaps in the array */
-		if (KnownAssignedXidsValid[i])
-			return KnownAssignedXids[i];
-	}
+        i = KnownAssignedXidsValidDLL[i].nxt;
+ 
+        /* Skip any gaps in the array */
+        if (i < head) {
+                return KnownAssignedXids[i];
+        }
 
 	return InvalidTransactionId;
 }
@@ -5042,13 +5076,10 @@ KnownAssignedXidsDisplay(int trace_level)
 
 	initStringInfo(&buf);
 
-	for (i = tail; i < head; i++)
+	for (i = tail; i < head; i = KnownAssignedXidsValidDLL[i].nxt)
 	{
-		if (KnownAssignedXidsValid[i])
-		{
-			nxids++;
-			appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
-		}
+                nxids++;
+                appendStringInfo(&buf, "[%d]=%u ", i, KnownAssignedXids[i]);
 	}
 
 	elog(trace_level, "%d KnownAssignedXids (num=%d tail=%d head=%d) %s",

Reply via email to