From 5216df17cfdba975ab3264ad484dc5d0830a9188 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 22 Apr 2025 12:25:11 -0700
Subject: [PATCH v3 4/4] Use batch TIDs lookup in btree index bulk-deletion.

TIDs in the postlist are sorted. But TIDs of the gathered regular
index tuples are not sorted.

Author:
Reviewed-by:
Discussion: https://postgr.es/m/
Backpatch-through:
---
 src/backend/access/nbtree/nbtree.c | 116 +++++++++++++++++++----------
 1 file changed, 77 insertions(+), 39 deletions(-)

diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 342dad0ef91..a459c700446 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -23,6 +23,7 @@
 #include "access/stratnum.h"
 #include "commands/progress.h"
 #include "commands/vacuum.h"
+#include "common/int.h"
 #include "nodes/execnodes.h"
 #include "pgstat.h"
 #include "storage/bulk_write.h"
@@ -1310,6 +1311,20 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 		IndexFreeSpaceMapVacuum(rel);
 }
 
+/* comparator for sorting OffsetNumbers */
+static inline int
+cmpOffsetNumbers(const void *a, const void *b)
+{
+	return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
+}
+
+#define ST_SORT sort_vacuumpage_offnum
+#define ST_ELEMENT_TYPE OffsetNumber
+#define ST_COMPARE(a, b) cmpOffsetNumbers(a, b)
+#define ST_SCOPE static
+#define ST_DEFINE
+#include "lib/sort_template.h"
+
 /*
  * btvacuumpage --- VACUUM one page
  *
@@ -1473,6 +1488,10 @@ backtrack:
 		nhtidslive = 0;
 		if (callback)
 		{
+			OffsetNumber workbuf_offs[MaxIndexTuplesPerPage];
+			ItemPointerData workbuf_htids[MaxIndexTuplesPerPage];
+			int			workbuf_nitem = 0;
+
 			/* btbulkdelete callback tells us what to delete (or update) */
 			for (offnum = minoff;
 				 offnum <= maxoff;
@@ -1486,16 +1505,13 @@ backtrack:
 				Assert(!BTreeTupleIsPivot(itup));
 				if (!BTreeTupleIsPosting(itup))
 				{
-					bool		dead;
-
-					/* Regular tuple, standard table TID representation */
-					if (callback(&itup->t_tid, 1, &dead, callback_state) > 0)
-					{
-						deletable[ndeletable++] = offnum;
-						nhtidsdead++;
-					}
-					else
-						nhtidslive++;
+					/*
+					 * Regular tuple, standard table TID representation. Will
+					 * verify them as a whole later.
+					 */
+					workbuf_offs[workbuf_nitem] = offnum;
+					workbuf_htids[workbuf_nitem] = itup->t_tid;
+					workbuf_nitem++;
 				}
 				else
 				{
@@ -1542,6 +1558,38 @@ backtrack:
 					nhtidslive += nremaining;
 				}
 			}
+
+			if (workbuf_nitem > 0)
+			{
+				bool		workbuf_deletable[MaxIndexTuplesPerPage];
+				bool		need_sort;
+				int			ndels;
+
+				/*
+				 * We will sort the deletable array if there are existing
+				 * offsets as it should be sorted in ascending order (see
+				 * _bt_delitems_vacuum()).
+				 */
+				need_sort = (ndeletable > 0);
+
+				ndels = callback(workbuf_htids, workbuf_nitem, workbuf_deletable,
+								 callback_state);
+				if (ndels > 0)
+				{
+					for (int i = 0; i < workbuf_nitem; i++)
+					{
+						if (workbuf_deletable[i])
+							deletable[ndeletable++] = workbuf_offs[i];
+					}
+
+					if (need_sort)
+						sort_vacuumpage_offnum(deletable, ndeletable);
+
+					nhtidsdead += ndels;
+				}
+
+				nhtidslive += workbuf_nitem - ndels;
+			}
 		}
 
 		/*
@@ -1666,45 +1714,35 @@ static BTVacuumPosting
 btreevacuumposting(BTVacState *vstate, IndexTuple posting,
 				   OffsetNumber updatedoffset, int *nremaining)
 {
-	int			live = 0;
+	int			ndeletable;
 	int			nitem = BTreeTupleGetNPosting(posting);
 	ItemPointer items = BTreeTupleGetPosting(posting);
+	bool		deletable[MaxIndexTuplesPerPage];
 	BTVacuumPosting vacposting = NULL;
 
-	for (int i = 0; i < nitem; i++)
+	ndeletable = vstate->callback(items, nitem, deletable, vstate->callback_state);
+
+	/* All items are live */
+	if (ndeletable == 0)
 	{
-		bool		dead;
+		*nremaining = nitem;
+		return NULL;
+	}
 
-		if (vstate->callback(items + i, 1, &dead, vstate->callback_state) == 0)
-		{
-			/* Live table TID */
-			live++;
-		}
-		else if (vacposting == NULL)
-		{
-			/*
-			 * First dead table TID encountered.
-			 *
-			 * It's now clear that we need to delete one or more dead table
-			 * TIDs, so start maintaining metadata describing how to update
-			 * existing posting list tuple.
-			 */
-			vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
-								nitem * sizeof(uint16));
+	vacposting = palloc(offsetof(BTVacuumPostingData, deletetids) +
+						ndeletable * sizeof(uint16));
+	vacposting->itup = posting;
+	vacposting->updatedoffset = updatedoffset;
+	vacposting->ndeletedtids = 0;
 
-			vacposting->itup = posting;
-			vacposting->updatedoffset = updatedoffset;
-			vacposting->ndeletedtids = 0;
-			vacposting->deletetids[vacposting->ndeletedtids++] = i;
-		}
-		else
-		{
-			/* Second or subsequent dead table TID */
+	for (int i = 0; i < nitem; i++)
+	{
+		if (deletable[i])
 			vacposting->deletetids[vacposting->ndeletedtids++] = i;
-		}
 	}
+	Assert(ndeletable == vacposting->ndeletedtids);
 
-	*nremaining = live;
+	*nremaining = nitem - ndeletable;
 	return vacposting;
 }
 
-- 
2.43.5

