From 21bda3f9707db1bfab74fe3a7a3a6e1e88df5ee4 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 11 Mar 2024 13:21:10 +0700
Subject: [PATCH v6901 5/5] WIP: 3 embeddable TIDs

---
 src/backend/access/common/tidstore.c          | 87 ++++++++++++++++---
 src/include/lib/radixtree.h                   | 19 ++++
 .../test_tidstore/expected/test_tidstore.out  | 31 ++-----
 .../test_tidstore/sql/test_tidstore.sql       |  2 +-
 4 files changed, 100 insertions(+), 39 deletions(-)

diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c
index dcdbd12c2e..8c04e7734a 100644
--- a/src/backend/access/common/tidstore.c
+++ b/src/backend/access/common/tidstore.c
@@ -47,7 +47,25 @@
  */
 typedef struct BlocktableEntry
 {
-	uint16		nwords;
+	union
+	{
+		struct
+		{
+#ifndef WORDS_BIGENDIAN
+			uint8		flags;
+			int8		nwords;
+#endif
+
+			OffsetNumber full_offsets[3];
+
+#ifdef WORDS_BIGENDIAN
+			int8		nwords;
+			uint8		flags;
+#endif
+		} ;
+		uintptr_t ptr;
+	} header;
+
 	bitmapword	words[FLEXIBLE_ARRAY_MEMBER];
 } BlocktableEntry;
 #define MaxBlocktableEntrySize \
@@ -61,7 +79,8 @@ typedef struct BlocktableEntry
 #define RT_VALUE_TYPE BlocktableEntry
 #define RT_VARLEN_VALUE_SIZE(page) \
 	(offsetof(BlocktableEntry, words) + \
-	sizeof(bitmapword) * (page)->nwords)
+	sizeof(bitmapword) * (page)->header.nwords)
+#define RT_RUNTIME_EMBEDDABLE_VALUE
 #include "lib/radixtree.h"
 
 #define RT_PREFIX shared_rt
@@ -72,7 +91,8 @@ typedef struct BlocktableEntry
 #define RT_VALUE_TYPE BlocktableEntry
 #define RT_VARLEN_VALUE_SIZE(page) \
 	(offsetof(BlocktableEntry, words) + \
-	sizeof(bitmapword) * (page)->nwords)
+	sizeof(bitmapword) * (page)->header.nwords)
+#define RT_RUNTIME_EMBEDDABLE_VALUE
 #include "lib/radixtree.h"
 
 /* Per-backend state for a TidStore */
@@ -307,7 +327,17 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 	bool	found PG_USED_FOR_ASSERTS_ONLY;
 
 	Assert(num_offsets > 0);
+	memset(page, 0, sizeof(BlocktableEntry));
 
+	if (num_offsets <= 3)
+	{
+		for (int i = 0; i < num_offsets; i++)
+			page->header.full_offsets[i] = offsets[i];
+
+		page->header.nwords = 0;
+	}
+	else
+	{
 	for (wordnum = 0, next_word_threshold = BITS_PER_BITMAPWORD;
 		wordnum <= WORDNUM(offsets[num_offsets - 1]);
 		wordnum++, next_word_threshold += BITS_PER_BITMAPWORD)
@@ -333,8 +363,9 @@ TidStoreSetBlockOffsets(TidStore *ts, BlockNumber blkno, OffsetNumber *offsets,
 		page->words[wordnum] = word;
 	}
 
-	page->nwords = wordnum;
-	Assert(page->nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
+	page->header.nwords = wordnum;
+	Assert(page->header.nwords == WORDS_PER_PAGE(offsets[num_offsets - 1]));
+	}
 
 	if (TidStoreIsShared(ts))
 		found = shared_rt_set(ts->tree.shared, blkno, page);
@@ -370,17 +401,37 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
 
 	/* no entry for the blk */
 	if (page == NULL)
-		return false;
+	{
+		ret = false;
+		goto finish;
+	}
 
 	wordnum = WORDNUM(off);
 	bitnum = BITNUM(off);
 
-	/* no bitmap for the off */
-	if (wordnum >= page->nwords)
-		return false;
-
-	ret = (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
+	if (page->header.nwords == 0)
+	{
+		/* we have offsets in the header */
+		for (int i = 0; i < lengthof(page->header.full_offsets); i++)
+		{
+			if (page->header.full_offsets[i] == off)
+			{
+				ret = true;
+				goto finish;
+			}
+		}
+		ret = false;
+	}
+	else
+	{
+	/* No bitmap for the off */
+	if (wordnum >= page->header.nwords)
+		ret = false;
+	else
+		ret = (page->words[wordnum] & ((bitmapword) 1 << bitnum)) != 0;
+	}
 
+finish:
 #ifdef TIDSTORE_DEBUG
 	if (!TidStoreIsShared(ts))
 	{
@@ -388,6 +439,7 @@ TidStoreIsMember(TidStore *ts, ItemPointer tid)
 		Assert(ret == ret_debug);
 	}
 #endif
+
 	return ret;
 }
 
@@ -504,7 +556,18 @@ tidstore_iter_extract_tids(TidStoreIter *iter, BlocktableEntry *page)
 	TidStoreIterResult *result = (&iter->output);
 	int			wordnum;
 
-	for (wordnum = 0; wordnum < page->nwords; wordnum++)
+	if (page->header.nwords == 0)
+	{
+		/* we have offsets in the header */
+		for (int i = 0; i < 3; i++)
+		{
+			if (OffsetNumberIsValid(page->header.full_offsets[i]))
+				result->offsets[result->num_offsets++] = page->header.full_offsets[i];
+		}
+		return;
+	}
+
+	for (wordnum = 0; wordnum < page->header.nwords; wordnum++)
 	{
 		bitmapword	w = page->words[wordnum];
 
diff --git a/src/include/lib/radixtree.h b/src/include/lib/radixtree.h
index b8ad51c14d..108f21aa08 100644
--- a/src/include/lib/radixtree.h
+++ b/src/include/lib/radixtree.h
@@ -437,7 +437,13 @@ static inline bool
 RT_VALUE_IS_EMBEDDABLE(RT_VALUE_TYPE * value_p)
 {
 #ifdef RT_VARLEN_VALUE_SIZE
+
+#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
+	return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
+#else
 	return false;
+#endif
+
 #else
 	return RT_GET_VALUE_SIZE(value_p) <= sizeof(RT_PTR_ALLOC);
 #endif
@@ -451,7 +457,14 @@ static inline bool
 RT_CHILDPTR_IS_VALUE(RT_PTR_ALLOC child)
 {
 #ifdef RT_VARLEN_VALUE_SIZE
+
+#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
+	/* check for pointer tag */
+	return ((uintptr_t) child) & 1;
+#else
 	return false;
+#endif
+
 #else
 	return sizeof(RT_VALUE_TYPE) <= sizeof(RT_PTR_ALLOC);
 #endif
@@ -1727,6 +1740,11 @@ have_slot:
 	{
 		/* store value directly in child pointer slot */
 		memcpy(slot, value_p, value_sz);
+
+#ifdef RT_RUNTIME_EMBEDDABLE_VALUE
+		/* tag child pointer */
+		*((uintptr_t *) slot) |= 1;
+#endif
 	}
 	else
 	{
@@ -2878,6 +2896,7 @@ RT_DUMP_NODE(RT_NODE * node)
 #undef RT_DEFINE
 #undef RT_VALUE_TYPE
 #undef RT_VARLEN_VALUE_SIZE
+#undef RT_RUNTIME_EMBEDDABLE_VALUE
 #undef RT_SHMEM
 #undef RT_USE_DELETE
 #undef RT_DEBUG
diff --git a/src/test/modules/test_tidstore/expected/test_tidstore.out b/src/test/modules/test_tidstore/expected/test_tidstore.out
index 5e0517f3c3..054cd57ad2 100644
--- a/src/test/modules/test_tidstore/expected/test_tidstore.out
+++ b/src/test/modules/test_tidstore/expected/test_tidstore.out
@@ -51,7 +51,7 @@ WITH blocks (blk) AS(
 VALUES (0), (1), (:maxblkno - 1), (:maxblkno / 2), (:maxblkno)
 ),
 offsets (off) AS (
-VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)
+VALUES (1), (2)--, (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)
 )
 SELECT row_number() over(ORDER BY blk),
      tidstore_set_block_offsets(blk, array_agg(offsets.off)::int2[])
@@ -68,19 +68,13 @@ SELECT row_number() over(ORDER BY blk),
 
 -- Lookup test and dump (sorted) tids.
 SELECT lookup_test();
-   lookup_test    
-------------------
+  lookup_test   
+----------------
  (0,1)
  (0,2)
- (0,256)
- (0,511)
- (0,512)
  (4294967295,1)
  (4294967295,2)
- (4294967295,256)
- (4294967295,511)
- (4294967295,512)
-(10 rows)
+(4 rows)
 
 SELECT tidstore_is_full();
  tidstore_is_full 
@@ -93,30 +87,15 @@ SELECT tidstore_dump_tids();
 --------------------
  (0,1)
  (0,2)
- (0,256)
- (0,511)
- (0,512)
  (1,1)
  (1,2)
- (1,256)
- (1,511)
- (1,512)
  (2147483647,1)
  (2147483647,2)
- (2147483647,256)
- (2147483647,511)
- (2147483647,512)
  (4294967294,1)
  (4294967294,2)
- (4294967294,256)
- (4294967294,511)
- (4294967294,512)
  (4294967295,1)
  (4294967295,2)
- (4294967295,256)
- (4294967295,511)
- (4294967295,512)
-(25 rows)
+(10 rows)
 
 -- cleanup
 SELECT tidstore_destroy();
diff --git a/src/test/modules/test_tidstore/sql/test_tidstore.sql b/src/test/modules/test_tidstore/sql/test_tidstore.sql
index 4e626b74e3..b29c6e797e 100644
--- a/src/test/modules/test_tidstore/sql/test_tidstore.sql
+++ b/src/test/modules/test_tidstore/sql/test_tidstore.sql
@@ -41,7 +41,7 @@ WITH blocks (blk) AS(
 VALUES (0), (1), (:maxblkno - 1), (:maxblkno / 2), (:maxblkno)
 ),
 offsets (off) AS (
-VALUES (1), (2), (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)
+VALUES (1), (2)--, (:maxoffset / 2), (:maxoffset - 1), (:maxoffset)
 )
 SELECT row_number() over(ORDER BY blk),
      tidstore_set_block_offsets(blk, array_agg(offsets.off)::int2[])
-- 
2.44.0

