Should we use BTreeInnerTupleGetDownLink() as soon as we use
BTreeInnerTupleSetDownLink() for setting this?
Or even invent BTreeInnerTupleDownLinkIsValid() macro?
I am not sure. Here we actually store UP link - to top parent to remove. I'm afraid using BTreeInnerTupleGetDownLink/BTreeInnerTupleSetDownLink could cause a confusion, in other hand, introducing TreeInnerTupleGetUpLink/BTreeInnerTupleSetUpLink seems over-engineering


After close look I change my opinion. To have a clean code it's much better to have new pair get/set macroses specialy to manage link to top pare during page deletion. This removes last naked usage of ItemPointer(SetInvalid/IsInvalid/GetBlockNumberNoCheck) and uses self-described macroses. Patch is attached.


--
Teodor Sigaev                                   E-mail: teo...@sigaev.ru
                                                   WWW: http://www.sigaev.ru/
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index beef089ba8..3be229db1f 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -1602,10 +1602,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
 	MemSet(&trunctuple, 0, sizeof(IndexTupleData));
 	trunctuple.t_info = sizeof(IndexTupleData);
 	if (target != leafblkno)
-		ItemPointerSetBlockNumber(&trunctuple.t_tid, target);
+		BTreeTupleSetTopParent(&trunctuple, target);
 	else
-		ItemPointerSetInvalid(&trunctuple.t_tid);
-	BTreeTupleSetNAtts(&trunctuple, 0);
+		BTreeTupleSetTopParent(&trunctuple, InvalidBlockNumber);
 
 	if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
 					false, false) == InvalidOffsetNumber)
@@ -1690,7 +1689,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
 	BTPageOpaque opaque;
 	bool		rightsib_is_rightmost;
 	int			targetlevel;
-	ItemPointer leafhikey;
+	IndexTuple	leafhikey;
 	BlockNumber nextchild;
 
 	page = BufferGetPage(leafbuf);
@@ -1702,7 +1701,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
 	 * Remember some information about the leaf page.
 	 */
 	itemid = PageGetItemId(page, P_HIKEY);
-	leafhikey = &((IndexTuple) PageGetItem(page, itemid))->t_tid;
+	leafhikey = (IndexTuple) PageGetItem(page, itemid);
 	leafleftsib = opaque->btpo_prev;
 	leafrightsib = opaque->btpo_next;
 
@@ -1714,9 +1713,10 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
 	 * parent in the branch.  Set 'target' and 'buf' to reference the page
 	 * actually being unlinked.
 	 */
-	if (ItemPointerIsValid(leafhikey))
+	target = BTreeTupleGetTopParent(leafhikey);
+
+	if (target != InvalidBlockNumber)
 	{
-		target = ItemPointerGetBlockNumberNoCheck(leafhikey);
 		Assert(target != leafblkno);
 
 		/* fetch the block number of the topmost parent's left sibling */
@@ -1919,12 +1919,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
 	 * branch.
 	 */
 	if (target != leafblkno)
-	{
-		if (nextchild == InvalidBlockNumber)
-			ItemPointerSetInvalid(leafhikey);
-		else
-			ItemPointerSetBlockNumber(leafhikey, nextchild);
-	}
+		BTreeTupleSetTopParent(leafhikey, nextchild);
 
 	/*
 	 * Mark the page itself deleted.  It can be recycled when all current
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index fb8c769df9..67a94cb80a 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -800,11 +800,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
 	 */
 	MemSet(&trunctuple, 0, sizeof(IndexTupleData));
 	trunctuple.t_info = sizeof(IndexTupleData);
-	if (xlrec->topparent != InvalidBlockNumber)
-		ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
-	else
-		ItemPointerSetInvalid(&trunctuple.t_tid);
-	BTreeTupleSetNAtts(&trunctuple, 0);
+	BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
 
 	if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
 					false, false) == InvalidOffsetNumber)
@@ -912,11 +908,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
 		/* Add a dummy hikey item */
 		MemSet(&trunctuple, 0, sizeof(IndexTupleData));
 		trunctuple.t_info = sizeof(IndexTupleData);
-		if (xlrec->topparent != InvalidBlockNumber)
-			ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
-		else
-			ItemPointerSetInvalid(&trunctuple.t_tid);
-		BTreeTupleSetNAtts(&trunctuple, 0);
+		BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
 
 		if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
 						false, false) == InvalidOffsetNumber)
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 1194be9281..34c71978d5 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -226,6 +226,20 @@ typedef struct BTMetaPageData
 #define BTreeInnerTupleSetDownLink(itup, blkno) \
 	ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
 
+/*
+ * Get/set link to top parent to remove. It will be better to have assertion
+ * BTreeTupleGetNAtts() == 0 but pre-v11 version hasn't set INDEX_ALT_TID_MASK
+ * bit and has P_HIKEY as value of t_tid.
+ */
+#define BTreeTupleGetTopParent(itup) \
+	ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
+
+#define BTreeTupleSetTopParent(itup, blkno)	\
+	do { \
+		ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \
+		BTreeTupleSetNAtts((itup), 0); \
+	} while(0)
+
 /*
  * Get/set number of attributes within B-tree index tuple. Asserts should be
  * removed when BT_RESERVED_OFFSET_MASK bits will be used.
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index fe5b698669..fc81088d4b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -3052,6 +3052,16 @@ explain (costs off)
          Filter: (NOT b)
 (4 rows)
 
+--
+-- Test for multilevel page deletion
+--
+CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
+INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
+ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
+DELETE FROM delete_test_table WHERE a > 40000;
+VACUUM delete_test_table;
+DELETE FROM delete_test_table WHERE a > 10;
+VACUUM delete_test_table;
 --
 -- REINDEX (VERBOSE)
 --
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index 8afb1f2f7e..0aa5357917 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -38,6 +38,7 @@ d_star|f
 date_tbl|f
 default_tbl|f
 defaultexpr_tbl|f
+delete_test_table|t
 dept|f
 dupindexcols|t
 e_star|f
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index f7731265a0..f9e7118f0d 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -1061,6 +1061,17 @@ explain (costs off)
 explain (costs off)
   select * from boolindex where not b order by i limit 10;
 
+--
+-- Test for multilevel page deletion
+--
+CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
+INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
+ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
+DELETE FROM delete_test_table WHERE a > 40000;
+VACUUM delete_test_table;
+DELETE FROM delete_test_table WHERE a > 10;
+VACUUM delete_test_table;
+
 --
 -- REINDEX (VERBOSE)
 --

Reply via email to