Should we use BTreeInnerTupleGetDownLink() as soon as we use
BTreeInnerTupleSetDownLink() for setting this?
Or even invent BTreeInnerTupleDownLinkIsValid() macro?
I am not sure. Here we actually store UP link - to top parent to remove. I'm
afraid using BTreeInnerTupleGetDownLink/BTreeInnerTupleSetDownLink could cause a
confusion, in other hand, introducing
TreeInnerTupleGetUpLink/BTreeInnerTupleSetUpLink seems over-engineering
After close look I change my opinion. To have a clean code it's much better to
have new pair get/set macroses specialy to manage link to top pare during page
deletion. This removes last naked usage of
ItemPointer(SetInvalid/IsInvalid/GetBlockNumberNoCheck) and uses self-described
macroses. Patch is attached.
--
Teodor Sigaev E-mail: teo...@sigaev.ru
WWW: http://www.sigaev.ru/
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index beef089ba8..3be229db1f 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -1602,10 +1602,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
trunctuple.t_info = sizeof(IndexTupleData);
if (target != leafblkno)
- ItemPointerSetBlockNumber(&trunctuple.t_tid, target);
+ BTreeTupleSetTopParent(&trunctuple, target);
else
- ItemPointerSetInvalid(&trunctuple.t_tid);
- BTreeTupleSetNAtts(&trunctuple, 0);
+ BTreeTupleSetTopParent(&trunctuple, InvalidBlockNumber);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
false, false) == InvalidOffsetNumber)
@@ -1690,7 +1689,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
BTPageOpaque opaque;
bool rightsib_is_rightmost;
int targetlevel;
- ItemPointer leafhikey;
+ IndexTuple leafhikey;
BlockNumber nextchild;
page = BufferGetPage(leafbuf);
@@ -1702,7 +1701,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
* Remember some information about the leaf page.
*/
itemid = PageGetItemId(page, P_HIKEY);
- leafhikey = &((IndexTuple) PageGetItem(page, itemid))->t_tid;
+ leafhikey = (IndexTuple) PageGetItem(page, itemid);
leafleftsib = opaque->btpo_prev;
leafrightsib = opaque->btpo_next;
@@ -1714,9 +1713,10 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
* parent in the branch. Set 'target' and 'buf' to reference the page
* actually being unlinked.
*/
- if (ItemPointerIsValid(leafhikey))
+ target = BTreeTupleGetTopParent(leafhikey);
+
+ if (target != InvalidBlockNumber)
{
- target = ItemPointerGetBlockNumberNoCheck(leafhikey);
Assert(target != leafblkno);
/* fetch the block number of the topmost parent's left sibling */
@@ -1919,12 +1919,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
* branch.
*/
if (target != leafblkno)
- {
- if (nextchild == InvalidBlockNumber)
- ItemPointerSetInvalid(leafhikey);
- else
- ItemPointerSetBlockNumber(leafhikey, nextchild);
- }
+ BTreeTupleSetTopParent(leafhikey, nextchild);
/*
* Mark the page itself deleted. It can be recycled when all current
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index fb8c769df9..67a94cb80a 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -800,11 +800,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
*/
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
trunctuple.t_info = sizeof(IndexTupleData);
- if (xlrec->topparent != InvalidBlockNumber)
- ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
- else
- ItemPointerSetInvalid(&trunctuple.t_tid);
- BTreeTupleSetNAtts(&trunctuple, 0);
+ BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
false, false) == InvalidOffsetNumber)
@@ -912,11 +908,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
/* Add a dummy hikey item */
MemSet(&trunctuple, 0, sizeof(IndexTupleData));
trunctuple.t_info = sizeof(IndexTupleData);
- if (xlrec->topparent != InvalidBlockNumber)
- ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
- else
- ItemPointerSetInvalid(&trunctuple.t_tid);
- BTreeTupleSetNAtts(&trunctuple, 0);
+ BTreeTupleSetTopParent(&trunctuple, xlrec->topparent);
if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
false, false) == InvalidOffsetNumber)
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 1194be9281..34c71978d5 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -226,6 +226,20 @@ typedef struct BTMetaPageData
#define BTreeInnerTupleSetDownLink(itup, blkno) \
ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
+/*
+ * Get/set link to top parent to remove. It will be better to have assertion
+ * BTreeTupleGetNAtts() == 0 but pre-v11 version hasn't set INDEX_ALT_TID_MASK
+ * bit and has P_HIKEY as value of t_tid.
+ */
+#define BTreeTupleGetTopParent(itup) \
+ ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
+
+#define BTreeTupleSetTopParent(itup, blkno) \
+ do { \
+ ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \
+ BTreeTupleSetNAtts((itup), 0); \
+ } while(0)
+
/*
* Get/set number of attributes within B-tree index tuple. Asserts should be
* removed when BT_RESERVED_OFFSET_MASK bits will be used.
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index fe5b698669..fc81088d4b 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -3052,6 +3052,16 @@ explain (costs off)
Filter: (NOT b)
(4 rows)
+--
+-- Test for multilevel page deletion
+--
+CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
+INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
+ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
+DELETE FROM delete_test_table WHERE a > 40000;
+VACUUM delete_test_table;
+DELETE FROM delete_test_table WHERE a > 10;
+VACUUM delete_test_table;
--
-- REINDEX (VERBOSE)
--
diff --git a/src/test/regress/expected/sanity_check.out b/src/test/regress/expected/sanity_check.out
index 8afb1f2f7e..0aa5357917 100644
--- a/src/test/regress/expected/sanity_check.out
+++ b/src/test/regress/expected/sanity_check.out
@@ -38,6 +38,7 @@ d_star|f
date_tbl|f
default_tbl|f
defaultexpr_tbl|f
+delete_test_table|t
dept|f
dupindexcols|t
e_star|f
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index f7731265a0..f9e7118f0d 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -1061,6 +1061,17 @@ explain (costs off)
explain (costs off)
select * from boolindex where not b order by i limit 10;
+--
+-- Test for multilevel page deletion
+--
+CREATE TABLE delete_test_table (a bigint, b bigint, c bigint, d bigint);
+INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
+ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
+DELETE FROM delete_test_table WHERE a > 40000;
+VACUUM delete_test_table;
+DELETE FROM delete_test_table WHERE a > 10;
+VACUUM delete_test_table;
+
--
-- REINDEX (VERBOSE)
--