Hi,
On 2022-09-22 08:19:09 -0700, Andres Freund wrote:
> Hi,
>
> On 2022-08-17 17:28:02 +0500, Andrey Borodin wrote:
> > Here's v13. Changes:
> > 1. Fixed passing through downlink in GIN index
> > 2. Fixed GIN tests (one test case was not working)
> >
> > Thanks to Vitaliy Kukharik for trying this patches.
>
> Due to the merge of the meson based build, this patch needs to be
> adjusted. See
> https://cirrus-ci.com/build/6637154947301376
>
> The changes should be fairly simple, just mirroring the Makefile ones.
Here's an updated patch adding meson compat.
I didn't fix the following warnings:
[25/28 3 89%] Compiling C object contrib/amcheck/amcheck.dll.p/amcheck.c.obj
../../home/andres/src/postgresql/contrib/amcheck/amcheck.c: In function
‘amcheck_lock_relation_and_check’:
../../home/andres/src/postgresql/contrib/amcheck/amcheck.c:81:20: warning:
implicit declaration of function ‘NewGUCNestLevel’
[-Wimplicit-function-declaration]
81 | save_nestlevel = NewGUCNestLevel();
| ^~~~~~~~~~~~~~~
../../home/andres/src/postgresql/contrib/amcheck/amcheck.c:124:2: warning:
implicit declaration of function ‘AtEOXact_GUC’; did you mean ‘AtEOXact_SMgr’?
[-Wimplicit-function-declaration]
124 | AtEOXact_GUC(false, save_nestlevel);
| ^~~~~~~~~~~~
| AtEOXact_SMgr
[26/28 2 92%] Compiling C object contrib/amcheck/amcheck.dll.p/verify_gin.c.obj
../../home/andres/src/postgresql/contrib/amcheck/verify_gin.c: In function
‘gin_check_parent_keys_consistency’:
../../home/andres/src/postgresql/contrib/amcheck/verify_gin.c:423:8: warning:
unused variable ‘heapallindexed’ [-Wunused-variable]
423 | bool heapallindexed = *((bool*)callback_state);
| ^~~~~~~~~~~~~~
[28/28 1 100%] Linking target contrib/amcheck/amcheck.dll
Greetings,
Andres Freund
>From 9c2919df1419216e596819e9a3e23616d431d8d0 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <[email protected]>
Date: Sat, 23 Jul 2022 14:08:10 +0500
Subject: [PATCH v14 1/3] Refactor amcheck to extract common locking routines
---
contrib/amcheck/Makefile | 2 +
contrib/amcheck/amcheck.c | 188 +++++++++++++++++++
contrib/amcheck/amcheck.h | 27 +++
contrib/amcheck/meson.build | 1 +
contrib/amcheck/verify_nbtree.c | 308 ++++++++------------------------
5 files changed, 297 insertions(+), 229 deletions(-)
create mode 100644 contrib/amcheck/amcheck.c
create mode 100644 contrib/amcheck/amcheck.h
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index b82f221e50b..f10fd9d89d5 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -3,11 +3,13 @@
MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
+ amcheck.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql
+
PGFILEDESC = "amcheck - function for verifying relation integrity"
REGRESS = check check_btree check_heap
diff --git a/contrib/amcheck/amcheck.c b/contrib/amcheck/amcheck.c
new file mode 100644
index 00000000000..0194ef0d7a2
--- /dev/null
+++ b/contrib/amcheck/amcheck.c
@@ -0,0 +1,188 @@
+/*-------------------------------------------------------------------------
+ *
+ * amcheck.c
+ * Utility functions common to all access methods.
+ *
+ * Copyright (c) 2017-2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/amcheck.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "amcheck.h"
+#include "catalog/index.h"
+#include "commands/tablecmds.h"
+
+
+static bool
+amcheck_index_mainfork_expected(Relation rel);
+
+/*
+ * Check if index relation should have a file for its main relation
+ * fork. Verification uses this to skip unlogged indexes when in hot standby
+ * mode, where there is simply nothing to verify.
+ *
+ * NB: Caller should call index_checkable()
+ * before calling here.
+ */
+static bool
+amcheck_index_mainfork_expected(Relation rel)
+{
+ if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED ||
+ !RecoveryInProgress())
+ return true;
+
+ ereport(NOTICE,
+ (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
+ errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
+ RelationGetRelationName(rel))));
+
+ return false;
+}
+
+void
+amcheck_lock_relation_and_check(Oid indrelid, IndexCheckableCallback checkable,
+ IndexDoCheckCallback check, LOCKMODE lockmode, void *state)
+{
+ Oid heapid;
+ Relation indrel;
+ Relation heaprel;
+ Oid save_userid;
+ int save_sec_context;
+ int save_nestlevel;
+
+ /*
+ * We must lock table before index to avoid deadlocks. However, if the
+ * passed indrelid isn't an index then IndexGetRelation() will fail.
+ * Rather than emitting a not-very-helpful error message, postpone
+ * complaining, expecting that the is-it-an-index test below will fail.
+ *
+ * In hot standby mode this will raise an error when parentcheck is true.
+ */
+ heapid = IndexGetRelation(indrelid, true);
+ if (OidIsValid(heapid))
+ {
+ heaprel = table_open(heapid, lockmode);
+
+ /*
+ * Switch to the table owner's userid, so that any index functions are
+ * run as that user. Also lock down security-restricted operations
+ * and arrange to make GUC variable changes local to this command.
+ */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(heaprel->rd_rel->relowner,
+ save_sec_context | SECURITY_RESTRICTED_OPERATION);
+ save_nestlevel = NewGUCNestLevel();
+ }
+ else
+ {
+ heaprel = NULL;
+ /* for "gcc -Og" https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78394 */
+ save_userid = InvalidOid;
+ save_sec_context = -1;
+ save_nestlevel = -1;
+ }
+
+ /*
+ * Open the target index relations separately (like relation_openrv(), but
+ * with heap relation locked first to prevent deadlocking). In hot
+ * standby mode this will raise an error when parentcheck is true.
+ *
+ * There is no need for the usual indcheckxmin usability horizon test
+ * here, even in the heapallindexed case, because index undergoing
+ * verification only needs to have entries for a new transaction snapshot.
+ * (If this is a parentcheck verification, there is no question about
+ * committed or recently dead heap tuples lacking index entries due to
+ * concurrent activity.)
+ */
+ indrel = index_open(indrelid, lockmode);
+
+ /*
+ * Since we did the IndexGetRelation call above without any lock, it's
+ * barely possible that a race against an index drop/recreation could have
+ * netted us the wrong table.
+ */
+ if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_TABLE),
+ errmsg("could not open parent table of index \"%s\"",
+ RelationGetRelationName(indrel))));
+
+ /* Relation suitable for checking */
+ checkable(indrel);
+
+ if (amcheck_index_mainfork_expected(indrel))
+ check(indrel, heaprel, state);
+
+ /* Roll back any GUC changes executed by index functions */
+ AtEOXact_GUC(false, save_nestlevel);
+
+ /* Restore userid and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /*
+ * Release locks early. That's ok here because nothing in the called
+ * routines will trigger shared cache invalidations to be sent, so we can
+ * relax the usual pattern of only releasing locks after commit.
+ */
+ index_close(indrel, lockmode);
+ if (heaprel)
+ table_close(heaprel, lockmode);
+}
+
+/*
+ * PageGetItemId() wrapper that validates returned line pointer.
+ *
+ * Buffer page/page item access macros generally trust that line pointers are
+ * not corrupt, which might cause problems for verification itself. For
+ * example, there is no bounds checking in PageGetItem(). Passing it a
+ * corrupt line pointer can cause it to return a tuple/pointer that is unsafe
+ * to dereference.
+ *
+ * Validating line pointers before tuples avoids undefined behavior and
+ * assertion failures with corrupt indexes, making the verification process
+ * more robust and predictable.
+ */
+ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset, size_t opaquesize)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ Assert(opaquesize == MAXALIGN(opaquesize));
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(opaquesize))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since nbtree and gist
+ * never uses either. Verify that line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/contrib/amcheck/amcheck.h b/contrib/amcheck/amcheck.h
new file mode 100644
index 00000000000..10906efd8a5
--- /dev/null
+++ b/contrib/amcheck/amcheck.h
@@ -0,0 +1,27 @@
+/*-------------------------------------------------------------------------
+ *
+ * amcheck.h
+ * Shared routines for amcheck verifications.
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/amcheck.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "storage/lockdefs.h"
+#include "utils/relcache.h"
+#include "miscadmin.h"
+
+/* Typedefs for callback functions for amcheck_lock_relation */
+typedef void (*IndexCheckableCallback) (Relation index);
+typedef void (*IndexDoCheckCallback) (Relation rel, Relation heaprel, void* state);
+
+extern void amcheck_lock_relation_and_check(Oid indrelid,
+ IndexCheckableCallback checkable,
+ IndexDoCheckCallback check,
+ LOCKMODE lockmode, void *state);
+
+extern ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset, size_t opaquesize);
\ No newline at end of file
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1db3d20349e..227e68ff834 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -1,4 +1,5 @@
amcheck = shared_module('amcheck', [
+ 'amcheck.c',
'verify_heapam.c',
'verify_nbtree.c',
],
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 9021d156eb7..950014f19d5 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -41,6 +41,8 @@
#include "utils/memutils.h"
#include "utils/snapmgr.h"
+#include "amcheck.h"
+
PG_MODULE_MAGIC;
@@ -138,10 +140,8 @@ typedef struct BtreeLevel
PG_FUNCTION_INFO_V1(bt_index_check);
PG_FUNCTION_INFO_V1(bt_index_parent_check);
-static void bt_index_check_internal(Oid indrelid, bool parentcheck,
- bool heapallindexed, bool rootdescend);
+static void bt_index_check_internal_callback(Relation indrel, Relation heaprel, void* state);
static inline void btree_index_checkable(Relation rel);
-static inline bool btree_index_mainfork_expected(Relation rel);
static void bt_check_every_level(Relation rel, Relation heaprel,
bool heapkeyspace, bool readonly, bool heapallindexed,
bool rootdescend);
@@ -184,12 +184,17 @@ static inline bool invariant_l_nontarget_offset(BtreeCheckState *state,
static Page palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum);
static inline BTScanInsert bt_mkscankey_pivotsearch(Relation rel,
IndexTuple itup);
-static ItemId PageGetItemIdCareful(BtreeCheckState *state, BlockNumber block,
- Page page, OffsetNumber offset);
static inline ItemPointer BTreeTupleGetHeapTIDCareful(BtreeCheckState *state,
IndexTuple itup, bool nonpivot);
static inline ItemPointer BTreeTupleGetPointsToTID(IndexTuple itup);
+typedef struct BTCheckCallbackState
+{
+ bool parentcheck;
+ bool heapallindexed;
+ bool rootdescend;
+} BTCheckCallbackState;
+
/*
* bt_index_check(index regclass, heapallindexed boolean)
*
@@ -203,12 +208,17 @@ Datum
bt_index_check(PG_FUNCTION_ARGS)
{
Oid indrelid = PG_GETARG_OID(0);
- bool heapallindexed = false;
+ BTCheckCallbackState args;
- if (PG_NARGS() == 2)
- heapallindexed = PG_GETARG_BOOL(1);
+ args.heapallindexed = false;
+ args.rootdescend = false;
+ args.parentcheck = false;
- bt_index_check_internal(indrelid, false, heapallindexed, false);
+ if (PG_NARGS() >= 2)
+ args.heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid, btree_index_checkable,
+ bt_index_check_internal_callback, AccessShareLock, &args);
PG_RETURN_VOID();
}
@@ -226,15 +236,18 @@ Datum
bt_index_parent_check(PG_FUNCTION_ARGS)
{
Oid indrelid = PG_GETARG_OID(0);
- bool heapallindexed = false;
- bool rootdescend = false;
+ BTCheckCallbackState args;
+ args.heapallindexed = false;
+ args.rootdescend = false;
+ args.parentcheck = true;
if (PG_NARGS() >= 2)
- heapallindexed = PG_GETARG_BOOL(1);
+ args.heapallindexed = PG_GETARG_BOOL(1);
if (PG_NARGS() == 3)
- rootdescend = PG_GETARG_BOOL(2);
+ args.rootdescend = PG_GETARG_BOOL(2);
- bt_index_check_internal(indrelid, true, heapallindexed, rootdescend);
+ amcheck_lock_relation_and_check(indrelid, btree_index_checkable,
+ bt_index_check_internal_callback, ShareLock, &args);
PG_RETURN_VOID();
}
@@ -242,126 +255,35 @@ bt_index_parent_check(PG_FUNCTION_ARGS)
/*
* Helper for bt_index_[parent_]check, coordinating the bulk of the work.
*/
-static void
-bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
- bool rootdescend)
+static void bt_index_check_internal_callback(Relation indrel, Relation heaprel, void* state)
{
- Oid heapid;
- Relation indrel;
- Relation heaprel;
- LOCKMODE lockmode;
- Oid save_userid;
- int save_sec_context;
- int save_nestlevel;
-
- if (parentcheck)
- lockmode = ShareLock;
- else
- lockmode = AccessShareLock;
-
- /*
- * We must lock table before index to avoid deadlocks. However, if the
- * passed indrelid isn't an index then IndexGetRelation() will fail.
- * Rather than emitting a not-very-helpful error message, postpone
- * complaining, expecting that the is-it-an-index test below will fail.
- *
- * In hot standby mode this will raise an error when parentcheck is true.
- */
- heapid = IndexGetRelation(indrelid, true);
- if (OidIsValid(heapid))
- {
- heaprel = table_open(heapid, lockmode);
-
- /*
- * Switch to the table owner's userid, so that any index functions are
- * run as that user. Also lock down security-restricted operations
- * and arrange to make GUC variable changes local to this command.
- */
- GetUserIdAndSecContext(&save_userid, &save_sec_context);
- SetUserIdAndSecContext(heaprel->rd_rel->relowner,
- save_sec_context | SECURITY_RESTRICTED_OPERATION);
- save_nestlevel = NewGUCNestLevel();
- }
- else
- {
- heaprel = NULL;
- /* Set these just to suppress "uninitialized variable" warnings */
- save_userid = InvalidOid;
- save_sec_context = -1;
- save_nestlevel = -1;
- }
-
- /*
- * Open the target index relations separately (like relation_openrv(), but
- * with heap relation locked first to prevent deadlocking). In hot
- * standby mode this will raise an error when parentcheck is true.
- *
- * There is no need for the usual indcheckxmin usability horizon test
- * here, even in the heapallindexed case, because index undergoing
- * verification only needs to have entries for a new transaction snapshot.
- * (If this is a parentcheck verification, there is no question about
- * committed or recently dead heap tuples lacking index entries due to
- * concurrent activity.)
- */
- indrel = index_open(indrelid, lockmode);
-
- /*
- * Since we did the IndexGetRelation call above without any lock, it's
- * barely possible that a race against an index drop/recreation could have
- * netted us the wrong table.
- */
- if (heaprel == NULL || heapid != IndexGetRelation(indrelid, false))
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_TABLE),
- errmsg("could not open parent table of index \"%s\"",
- RelationGetRelationName(indrel))));
-
- /* Relation suitable for checking as B-Tree? */
- btree_index_checkable(indrel);
-
- if (btree_index_mainfork_expected(indrel))
- {
- bool heapkeyspace,
+ BTCheckCallbackState* args = (BTCheckCallbackState*) state;
+ bool heapkeyspace,
allequalimage;
- if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("index \"%s\" lacks a main relation fork",
- RelationGetRelationName(indrel))));
+ if (!smgrexists(RelationGetSmgr(indrel), MAIN_FORKNUM))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" lacks a main relation fork",
+ RelationGetRelationName(indrel))));
- /* Extract metadata from metapage, and sanitize it in passing */
- _bt_metaversion(indrel, &heapkeyspace, &allequalimage);
- if (allequalimage && !heapkeyspace)
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version",
- RelationGetRelationName(indrel))));
- if (allequalimage && !_bt_allequalimage(indrel, false))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe",
- RelationGetRelationName(indrel))));
+ /* Extract metadata from metapage, and sanitize it in passing */
+ _bt_metaversion(indrel, &heapkeyspace, &allequalimage);
+ if (allequalimage && !heapkeyspace)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" metapage has equalimage field set on unsupported nbtree version",
+ RelationGetRelationName(indrel))));
+ if (allequalimage && !_bt_allequalimage(indrel, false))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" metapage incorrectly indicates that deduplication is safe",
+ RelationGetRelationName(indrel))));
- /* Check index, possibly against table it is an index on */
- bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck,
- heapallindexed, rootdescend);
- }
+ /* Check index, possibly against table it is an index on */
+ bt_check_every_level(indrel, heaprel, heapkeyspace, args->parentcheck,
+ args->heapallindexed, args->rootdescend);
- /* Roll back any GUC changes executed by index functions */
- AtEOXact_GUC(false, save_nestlevel);
-
- /* Restore userid and security context */
- SetUserIdAndSecContext(save_userid, save_sec_context);
-
- /*
- * Release locks early. That's ok here because nothing in the called
- * routines will trigger shared cache invalidations to be sent, so we can
- * relax the usual pattern of only releasing locks after commit.
- */
- index_close(indrel, lockmode);
- if (heaprel)
- table_close(heaprel, lockmode);
}
/*
@@ -398,29 +320,6 @@ btree_index_checkable(Relation rel)
errdetail("Index is not valid.")));
}
-/*
- * Check if B-Tree index relation should have a file for its main relation
- * fork. Verification uses this to skip unlogged indexes when in hot standby
- * mode, where there is simply nothing to verify. We behave as if the
- * relation is empty.
- *
- * NB: Caller should call btree_index_checkable() before calling here.
- */
-static inline bool
-btree_index_mainfork_expected(Relation rel)
-{
- if (rel->rd_rel->relpersistence != RELPERSISTENCE_UNLOGGED ||
- !RecoveryInProgress())
- return true;
-
- ereport(DEBUG1,
- (errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
- errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
- RelationGetRelationName(rel))));
-
- return false;
-}
-
/*
* Main entry point for B-Tree SQL-callable functions. Walks the B-Tree in
* logical order, verifying invariants as it goes. Optionally, verification
@@ -793,9 +692,9 @@ bt_check_level_from_leftmost(BtreeCheckState *state, BtreeLevel level)
ItemId itemid;
/* Internal page -- downlink gets leftmost on next level */
- itemid = PageGetItemIdCareful(state, state->targetblock,
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
state->target,
- P_FIRSTDATAKEY(opaque));
+ P_FIRSTDATAKEY(opaque), sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(state->target, itemid);
nextleveldown.leftmost = BTreeTupleGetDownLink(itup);
nextleveldown.level = opaque->btpo_level - 1;
@@ -875,8 +774,8 @@ nextpage:
IndexTuple itup;
ItemId itemid;
- itemid = PageGetItemIdCareful(state, state->targetblock,
- state->target, P_HIKEY);
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
+ state->target, P_HIKEY, sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(state->target, itemid);
state->lowkey = MemoryContextAlloc(oldcontext, IndexTupleSize(itup));
@@ -1093,8 +992,8 @@ bt_target_page_check(BtreeCheckState *state)
IndexTuple itup;
/* Verify line pointer before checking tuple */
- itemid = PageGetItemIdCareful(state, state->targetblock,
- state->target, P_HIKEY);
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
+ state->target, P_HIKEY, sizeof(BTPageOpaqueData));
if (!_bt_check_natts(state->rel, state->heapkeyspace, state->target,
P_HIKEY))
{
@@ -1129,8 +1028,8 @@ bt_target_page_check(BtreeCheckState *state)
CHECK_FOR_INTERRUPTS();
- itemid = PageGetItemIdCareful(state, state->targetblock,
- state->target, offset);
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
+ state->target, offset, sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(state->target, itemid);
tupsize = IndexTupleSize(itup);
@@ -1442,9 +1341,9 @@ bt_target_page_check(BtreeCheckState *state)
OffsetNumberNext(offset));
/* Reuse itup to get pointed-to heap location of second item */
- itemid = PageGetItemIdCareful(state, state->targetblock,
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
state->target,
- OffsetNumberNext(offset));
+ OffsetNumberNext(offset), sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(state->target, itemid);
tid = BTreeTupleGetPointsToTID(itup);
nhtid = psprintf("(%u,%u)",
@@ -1735,8 +1634,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
if (P_ISLEAF(opaque) && nline >= P_FIRSTDATAKEY(opaque))
{
/* Return first data item (if any) */
- rightitem = PageGetItemIdCareful(state, targetnext, rightpage,
- P_FIRSTDATAKEY(opaque));
+ rightitem = PageGetItemIdCareful(state->rel, targetnext, rightpage,
+ P_FIRSTDATAKEY(opaque), sizeof(BTPageOpaqueData));
}
else if (!P_ISLEAF(opaque) &&
nline >= OffsetNumberNext(P_FIRSTDATAKEY(opaque)))
@@ -1745,8 +1644,8 @@ bt_right_page_check_scankey(BtreeCheckState *state)
* Return first item after the internal page's "negative infinity"
* item
*/
- rightitem = PageGetItemIdCareful(state, targetnext, rightpage,
- OffsetNumberNext(P_FIRSTDATAKEY(opaque)));
+ rightitem = PageGetItemIdCareful(state->rel, targetnext, rightpage,
+ OffsetNumberNext(P_FIRSTDATAKEY(opaque)), sizeof(BTPageOpaqueData));
}
else
{
@@ -1865,8 +1764,8 @@ bt_child_highkey_check(BtreeCheckState *state,
if (OffsetNumberIsValid(target_downlinkoffnum))
{
- itemid = PageGetItemIdCareful(state, state->targetblock,
- state->target, target_downlinkoffnum);
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
+ state->target, target_downlinkoffnum, sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(state->target, itemid);
downlink = BTreeTupleGetDownLink(itup);
}
@@ -1969,7 +1868,7 @@ bt_child_highkey_check(BtreeCheckState *state,
OffsetNumber pivotkey_offset;
/* Get high key */
- itemid = PageGetItemIdCareful(state, blkno, page, P_HIKEY);
+ itemid = PageGetItemIdCareful(state->rel, blkno, page, P_HIKEY, sizeof(BTPageOpaqueData));
highkey = (IndexTuple) PageGetItem(page, itemid);
/*
@@ -2020,8 +1919,8 @@ bt_child_highkey_check(BtreeCheckState *state,
LSN_FORMAT_ARGS(state->targetlsn))));
pivotkey_offset = P_HIKEY;
}
- itemid = PageGetItemIdCareful(state, state->targetblock,
- state->target, pivotkey_offset);
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
+ state->target, pivotkey_offset, sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(state->target, itemid);
}
else
@@ -2107,8 +2006,8 @@ bt_child_check(BtreeCheckState *state, BTScanInsert targetkey,
BTPageOpaque copaque;
BTPageOpaque topaque;
- itemid = PageGetItemIdCareful(state, state->targetblock,
- state->target, downlinkoffnum);
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock,
+ state->target, downlinkoffnum, sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(state->target, itemid);
childblock = BTreeTupleGetDownLink(itup);
@@ -2339,7 +2238,7 @@ bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit,
RelationGetRelationName(state->rel));
level = opaque->btpo_level;
- itemid = PageGetItemIdCareful(state, blkno, page, P_FIRSTDATAKEY(opaque));
+ itemid = PageGetItemIdCareful(state->rel, blkno, page, P_FIRSTDATAKEY(opaque), sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(page, itemid);
childblk = BTreeTupleGetDownLink(itup);
for (;;)
@@ -2363,8 +2262,8 @@ bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit,
level - 1, copaque->btpo_level)));
level = copaque->btpo_level;
- itemid = PageGetItemIdCareful(state, childblk, child,
- P_FIRSTDATAKEY(copaque));
+ itemid = PageGetItemIdCareful(state->rel, childblk, child,
+ P_FIRSTDATAKEY(copaque), sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(child, itemid);
childblk = BTreeTupleGetDownLink(itup);
/* Be slightly more pro-active in freeing this memory, just in case */
@@ -2412,7 +2311,7 @@ bt_downlink_missing_check(BtreeCheckState *state, bool rightsplit,
*/
if (P_ISHALFDEAD(copaque) && !P_RIGHTMOST(copaque))
{
- itemid = PageGetItemIdCareful(state, childblk, child, P_HIKEY);
+ itemid = PageGetItemIdCareful(state->rel, childblk, child, P_HIKEY, sizeof(BTPageOpaqueData));
itup = (IndexTuple) PageGetItem(child, itemid);
if (BTreeTupleGetTopParent(itup) == blkno)
return;
@@ -2782,8 +2681,8 @@ invariant_l_offset(BtreeCheckState *state, BTScanInsert key,
Assert(key->pivotsearch);
/* Verify line pointer before checking tuple */
- itemid = PageGetItemIdCareful(state, state->targetblock, state->target,
- upperbound);
+ itemid = PageGetItemIdCareful(state->rel, state->targetblock, state->target,
+ upperbound, sizeof(BTPageOpaqueData));
/* pg_upgrade'd indexes may legally have equal sibling tuples */
if (!key->heapkeyspace)
return invariant_leq_offset(state, key, upperbound);
@@ -2905,8 +2804,8 @@ invariant_l_nontarget_offset(BtreeCheckState *state, BTScanInsert key,
Assert(key->pivotsearch);
/* Verify line pointer before checking tuple */
- itemid = PageGetItemIdCareful(state, nontargetblock, nontarget,
- upperbound);
+ itemid = PageGetItemIdCareful(state->rel, nontargetblock, nontarget,
+ upperbound, sizeof(BTPageOpaqueData));
cmp = _bt_compare(state->rel, key, nontarget, upperbound);
/* pg_upgrade'd indexes may legally have equal sibling tuples */
@@ -3143,55 +3042,6 @@ bt_mkscankey_pivotsearch(Relation rel, IndexTuple itup)
return skey;
}
-/*
- * PageGetItemId() wrapper that validates returned line pointer.
- *
- * Buffer page/page item access macros generally trust that line pointers are
- * not corrupt, which might cause problems for verification itself. For
- * example, there is no bounds checking in PageGetItem(). Passing it a
- * corrupt line pointer can cause it to return a tuple/pointer that is unsafe
- * to dereference.
- *
- * Validating line pointers before tuples avoids undefined behavior and
- * assertion failures with corrupt indexes, making the verification process
- * more robust and predictable.
- */
-static ItemId
-PageGetItemIdCareful(BtreeCheckState *state, BlockNumber block, Page page,
- OffsetNumber offset)
-{
- ItemId itemid = PageGetItemId(page, offset);
-
- if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
- BLCKSZ - MAXALIGN(sizeof(BTPageOpaqueData)))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("line pointer points past end of tuple space in index \"%s\"",
- RelationGetRelationName(state->rel)),
- errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
- block, offset, ItemIdGetOffset(itemid),
- ItemIdGetLength(itemid),
- ItemIdGetFlags(itemid))));
-
- /*
- * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since nbtree
- * never uses either. Verify that line pointer has storage, too, since
- * even LP_DEAD items should within nbtree.
- */
- if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
- ItemIdGetLength(itemid) == 0)
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("invalid line pointer storage in index \"%s\"",
- RelationGetRelationName(state->rel)),
- errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
- block, offset, ItemIdGetOffset(itemid),
- ItemIdGetLength(itemid),
- ItemIdGetFlags(itemid))));
-
- return itemid;
-}
-
/*
* BTreeTupleGetHeapTID() wrapper that enforces that a heap TID is present in
* cases where that is mandatory (i.e. for non-pivot tuples)
--
2.37.3.542.gdd3f6c4cae
>From 5c6709a38561adf4cf2d6f72ad5fcd86fdf4bbde Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <[email protected]>
Date: Sat, 23 Jul 2022 14:17:44 +0500
Subject: [PATCH v14 2/3] Add gist_index_parent_check() function to verify GiST
index
---
contrib/amcheck/Makefile | 6 +-
contrib/amcheck/amcheck--1.3--1.4.sql | 14 +
contrib/amcheck/amcheck.control | 2 +-
contrib/amcheck/expected/check_gist.out | 119 ++++++
contrib/amcheck/meson.build | 3 +
contrib/amcheck/sql/check_gist.sql | 42 ++
contrib/amcheck/verify_gist.c | 520 ++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 722 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/amcheck--1.3--1.4.sql
create mode 100644 contrib/amcheck/expected/check_gist.out
create mode 100644 contrib/amcheck/sql/check_gist.sql
create mode 100644 contrib/amcheck/verify_gist.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index f10fd9d89d5..a8174195817 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,15 +4,17 @@ MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
amcheck.o \
+ verify_gist.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
-DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql
+DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
+ amcheck--1.3--1.4.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_heap
+REGRESS = check check_btree check_heap check_gist
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.3--1.4.sql b/contrib/amcheck/amcheck--1.3--1.4.sql
new file mode 100644
index 00000000000..93297379efe
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.3--1.4.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.3--1.4.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.4'" to load this file. \quit
+
+
+-- gist_index_parent_check()
+--
+CREATE FUNCTION gist_index_parent_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_parent_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_parent_check(regclass, boolean) FROM PUBLIC;
\ No newline at end of file
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index ab50931f754..e67ace01c99 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.3'
+default_version = '1.4'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 00000000000..9749adfd340
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,119 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_parent_check('gist_check_idx1', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx1', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_parent_check('gist_check_idx1', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx1', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_parent_check('gist_check_idx1', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx1', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_parent_check('gist_check_idx1', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', false);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx1', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+SELECT gist_index_parent_check('gist_check_idx2', true);
+ gist_index_parent_check
+-------------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 227e68ff834..d84138f7fa6 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -1,5 +1,6 @@
amcheck = shared_module('amcheck', [
'amcheck.c',
+ 'verify_gist.c',
'verify_heapam.c',
'verify_nbtree.c',
],
@@ -13,6 +14,7 @@ install_data(
'amcheck--1.0--1.1.sql',
'amcheck--1.1--1.2.sql',
'amcheck--1.2--1.3.sql',
+ 'amcheck--1.3--1.4.sql',
kwargs: contrib_data_args,
)
@@ -25,6 +27,7 @@ tests += {
'check',
'check_btree',
'check_heap',
+ 'check_gist',
],
},
'tap': {
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 00000000000..75b9ff4b436
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,42 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_parent_check('gist_check_idx1', false);
+SELECT gist_index_parent_check('gist_check_idx2', false);
+SELECT gist_index_parent_check('gist_check_idx1', true);
+SELECT gist_index_parent_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_parent_check('gist_check_idx1', false);
+SELECT gist_index_parent_check('gist_check_idx2', false);
+SELECT gist_index_parent_check('gist_check_idx1', true);
+SELECT gist_index_parent_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_parent_check('gist_check_idx1', false);
+SELECT gist_index_parent_check('gist_check_idx2', false);
+SELECT gist_index_parent_check('gist_check_idx1', true);
+SELECT gist_index_parent_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_parent_check('gist_check_idx1', false);
+SELECT gist_index_parent_check('gist_check_idx2', false);
+SELECT gist_index_parent_check('gist_check_idx1', true);
+SELECT gist_index_parent_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 00000000000..db65880d871
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,520 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlinks, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/tableam.h"
+#include "access/transam.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "catalog/index.h"
+#include "lib/bloomfilter.h"
+#include "miscadmin.h"
+#include "storage/lmgr.h"
+#include "storage/smgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+#include "amcheck.h"
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+ IndexTuple parenttup;
+ BlockNumber parentblk;
+ XLogRecPtr parentlsn;
+ BlockNumber blkno;
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* Bloom filter fingerprints B-Tree index */
+ bloom_filter *filter;
+ /* Debug counter */
+ int64 heaptuplespresent;
+ /* GiST state */
+ GISTSTATE *state;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_parent_check);
+
+static GistCheckState gist_init_heapallindexed(Relation rel);
+static void gist_index_checkable(Relation rel);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void* callback_state);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate);
+
+/*
+ * gist_index_parent_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum gist_index_parent_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = false;
+
+ if (PG_NARGS() >= 2)
+ heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid, gist_index_checkable,
+ gist_check_parent_keys_consistency, AccessShareLock, &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Check that relation is eligible for GiST verification
+ */
+static void
+gist_index_checkable(Relation rel)
+{
+ if (rel->rd_rel->relkind != RELKIND_INDEX ||
+ rel->rd_rel->relam != GIST_AM_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("only GiST indexes are supported as targets for this verification"),
+ errdetail("Relation \"%s\" is not a GiST index.",
+ RelationGetRelationName(rel))));
+
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary tables of other sessions"),
+ errdetail("Index \"%s\" is associated with temporary relation.",
+ RelationGetRelationName(rel))));
+
+ if (!rel->rd_index->indisvalid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot check index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail("Index is not valid")));
+}
+
+static GistCheckState
+gist_init_heapallindexed(Relation rel)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+ GistCheckState result;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index.
+ * This logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = RelationGetNumberOfBlocks(rel);
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result.filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result.snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in
+ * READ COMMITTED mode. A new snapshot is guaranteed to have all
+ * the entries it requires in the index.
+ *
+ * We must defend against the possibility that an old xact
+ * snapshot was returned at higher isolation levels when that
+ * snapshot is not safe for index scans of the target index. This
+ * is possible when the snapshot sees tuples that are before the
+ * index's indcheckxmin horizon. Throwing an error here should be
+ * very rare. It doesn't seem worth using a secondary snapshot to
+ * avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result.snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+
+ return result;
+}
+
+/*
+ * Main entry point for GiST check. Allocates memory context and scans through
+ * GiST graph. This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * gist_check_internal_page() in it's turn takes every tuple and tries to
+ * adjust it by tuples on referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel, void* callback_state)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ int leafdepth;
+ bool heapallindexed = *((bool*)callback_state);
+ GistCheckState check_state;
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ if (heapallindexed)
+ check_state = gist_init_heapallindexed(rel);
+ check_state.state = state;
+ check_state.rel = rel;
+ check_state.heaprel = heaprel;
+
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ leafdepth = -1;
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ OffsetNumber i, maxoff;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (leafdepth == -1)
+ leafdepth = stack->depth;
+ else if (stack->depth != leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the
+ * downlink we followed when we stepped on this page.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i, sizeof(GISTPageOpaqueData));
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See
+ * also gistdoinsert() and gistbulkdelete() handling of such
+ * tuples. We do consider it error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the
+ * parent.
+ */
+ if (stack->parenttup &&
+ gistgetadjusted(rel, stack->parenttup, idxtuple, state))
+ {
+ /*
+ * There was a discrepancy between parent and child tuples.
+ * We need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find downlink
+ * for current page. It may be missing due to concurrent page
+ * split, this is OK.
+ *
+ * Note that when we aquire parent tuple now we hold lock for
+ * both parent and child buffers. Thus parent tuple must
+ * include keyspace of the child.
+ */
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(rel, stack->parenttup, idxtuple, state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ bloom_add_element(check_state.filter, (unsigned char *) idxtuple,
+ IndexTupleSize(idxtuple));
+ }
+ }
+ /* If this is an internal page, recurse into the child */
+ else
+ {
+ GistScanItem *ptr;
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state.snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) &check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state.heaptuplespresent, RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state.filter))));
+
+ UnregisterSnapshot(check_state.snapshot);
+ bloom_free(check_state.filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup = gistFormTuple(state->state, index, values, isnull, true);
+ itup->t_tid = *tid;
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) itup,
+ IndexTupleSize(itup)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+}
+
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno, BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber o,
+ parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ UnlockReleaseBuffer(parentbuf);
+ return result;
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o, sizeof(GISTPageOpaqueData));
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /* Found it! Make copy and return it */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 5d61a33936f..9397a69c6e5 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -179,6 +179,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</para>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>
+ <function>gist_index_parent_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gist_index_parent_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gist_index_parent_check</function> tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
</variablelist>
<tip>
<para>
--
2.37.3.542.gdd3f6c4cae
>From 5af85c93cb961040c5e256f79db38d285c5f1fb7 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <[email protected]>
Date: Sat, 23 Jul 2022 14:22:26 +0500
Subject: [PATCH v14 3/3] Add gin_index_parent_check() to verify GIN index
---
contrib/amcheck/Makefile | 3 +-
contrib/amcheck/amcheck--1.3--1.4.sql | 11 +-
contrib/amcheck/amcheck.c | 2 +-
contrib/amcheck/expected/check_gin.out | 64 ++
contrib/amcheck/meson.build | 2 +
contrib/amcheck/sql/check_gin.sql | 40 ++
contrib/amcheck/verify_gin.c | 800 +++++++++++++++++++++++++
doc/src/sgml/amcheck.sgml | 19 +
8 files changed, 938 insertions(+), 3 deletions(-)
create mode 100644 contrib/amcheck/expected/check_gin.out
create mode 100644 contrib/amcheck/sql/check_gin.sql
create mode 100644 contrib/amcheck/verify_gin.c
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index a8174195817..ecb849a605b 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -5,6 +5,7 @@ OBJS = \
$(WIN32RES) \
amcheck.o \
verify_gist.o \
+ verify_gin.o \
verify_heapam.o \
verify_nbtree.o
@@ -14,7 +15,7 @@ DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_heap check_gist
+REGRESS = check check_btree check_heap check_gist check_gin
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.3--1.4.sql b/contrib/amcheck/amcheck--1.3--1.4.sql
index 93297379efe..c914e6d0baa 100644
--- a/contrib/amcheck/amcheck--1.3--1.4.sql
+++ b/contrib/amcheck/amcheck--1.3--1.4.sql
@@ -11,4 +11,13 @@ RETURNS VOID
AS 'MODULE_PATHNAME', 'gist_index_parent_check'
LANGUAGE C STRICT;
-REVOKE ALL ON FUNCTION gist_index_parent_check(regclass, boolean) FROM PUBLIC;
\ No newline at end of file
+REVOKE ALL ON FUNCTION gist_index_parent_check(regclass, boolean) FROM PUBLIC;
+
+-- gin_index_parent_check()
+--
+CREATE FUNCTION gin_index_parent_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gin_index_parent_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gin_index_parent_check(regclass, boolean) FROM PUBLIC;
\ No newline at end of file
diff --git a/contrib/amcheck/amcheck.c b/contrib/amcheck/amcheck.c
index 0194ef0d7a2..1e0c875926c 100644
--- a/contrib/amcheck/amcheck.c
+++ b/contrib/amcheck/amcheck.c
@@ -83,7 +83,7 @@ amcheck_lock_relation_and_check(Oid indrelid, IndexCheckableCallback checkable,
else
{
heaprel = NULL;
- /* for "gcc -Og" https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78394 */
+ /* Set these just to suppress "uninitialized variable" warnings */
save_userid = InvalidOid;
save_sec_context = -1;
save_nestlevel = -1;
diff --git a/contrib/amcheck/expected/check_gin.out b/contrib/amcheck/expected/check_gin.out
new file mode 100644
index 00000000000..d98d525c660
--- /dev/null
+++ b/contrib/amcheck/expected/check_gin.out
@@ -0,0 +1,64 @@
+-- Test of index bulk load
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+CREATE TABLE "gin_check"("Column1" int[]);
+-- posting trees (frequently used entries)
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves (sparse entries)
+INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_idx', true);
+ gin_index_parent_check
+------------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check;
+-- Test index inserts
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+CREATE TABLE "gin_check"("Column1" int[]);
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+ALTER INDEX gin_check_idx SET (fastupdate = false);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
+SELECT gin_index_parent_check('gin_check_idx', true);
+ gin_index_parent_check
+------------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check;
+-- Test GIN over text array
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+CREATE TABLE "gin_check_text_array"("Column1" text[]);
+-- posting trees
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
+CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_text_array_idx', true);
+ gin_index_parent_check
+------------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gin_check_text_array;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index d84138f7fa6..0d7d651b24a 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -1,5 +1,6 @@
amcheck = shared_module('amcheck', [
'amcheck.c',
+ 'verify_gin.c',
'verify_gist.c',
'verify_heapam.c',
'verify_nbtree.c',
@@ -28,6 +29,7 @@ tests += {
'check_btree',
'check_heap',
'check_gist',
+ 'check_gin',
],
},
'tap': {
diff --git a/contrib/amcheck/sql/check_gin.sql b/contrib/amcheck/sql/check_gin.sql
new file mode 100644
index 00000000000..789259e6629
--- /dev/null
+++ b/contrib/amcheck/sql/check_gin.sql
@@ -0,0 +1,40 @@
+-- Test of index bulk load
+SELECT setseed(1);
+CREATE TABLE "gin_check"("Column1" int[]);
+-- posting trees (frequently used entries)
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves (sparse entries)
+INSERT INTO gin_check select array_agg(255 + round(random()*100)) from generate_series(1, 100) as i group by i % 100;
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_idx', true);
+
+-- cleanup
+DROP TABLE gin_check;
+
+-- Test index inserts
+SELECT setseed(1);
+CREATE TABLE "gin_check"("Column1" int[]);
+CREATE INDEX gin_check_idx on "gin_check" USING GIN("Column1");
+ALTER INDEX gin_check_idx SET (fastupdate = false);
+-- posting trees
+INSERT INTO gin_check select array_agg(round(random()*255) ) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check select array_agg(100 + round(random()*255)) from generate_series(1, 100) as i group by i % 100;
+
+SELECT gin_index_parent_check('gin_check_idx', true);
+
+-- cleanup
+DROP TABLE gin_check;
+
+-- Test GIN over text array
+SELECT setseed(1);
+CREATE TABLE "gin_check_text_array"("Column1" text[]);
+-- posting trees
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300)::text)::text) from generate_series(1, 100000) as i group by i % 10000;
+-- posting leaves
+INSERT INTO gin_check_text_array select array_agg(md5(round(random()*300 + 300)::text)::text) from generate_series(1, 10000) as i group by i % 100;
+CREATE INDEX gin_check_text_array_idx on "gin_check_text_array" USING GIN("Column1");
+SELECT gin_index_parent_check('gin_check_text_array_idx', true);
+
+-- cleanup
+DROP TABLE gin_check_text_array;
diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c
new file mode 100644
index 00000000000..90fe89501d8
--- /dev/null
+++ b/contrib/amcheck/verify_gin.c
@@ -0,0 +1,800 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gin.c
+ * Verifies the integrity of GIN indexes based on invariants.
+ *
+ * Verification checks that all paths in GIN graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlinks, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gin.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gin_private.h"
+#include "access/nbtree.h"
+#include "amcheck.h"
+#include "catalog/pg_am.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "string.h"
+
+/*
+ * GinScanItem represents one item of depth-first scan of GIN index.
+ */
+typedef struct GinScanItem
+{
+ int depth;
+ IndexTuple parenttup;
+ BlockNumber parentblk;
+ XLogRecPtr parentlsn;
+ BlockNumber blkno;
+ struct GinScanItem *next;
+} GinScanItem;
+
+/*
+ * GinPostingTreeScanItem represents one item of depth-first scan of GIN posting tree.
+ */
+typedef struct GinPostingTreeScanItem
+{
+ int depth;
+ ItemPointerData parentkey;
+ BlockNumber parentblk;
+ BlockNumber blkno;
+ struct GinPostingTreeScanItem *next;
+} GinPostingTreeScanItem;
+
+
+PG_FUNCTION_INFO_V1(gin_index_parent_check);
+
+static void gin_index_checkable(Relation rel);
+static void gin_check_parent_keys_consistency(Relation rel, Relation heaprel, void* callback_state);
+static bool check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gin_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+
+/*
+ * gin_index_parent_check(index regclass)
+ *
+ * Verify integrity of GIN index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gin_index_parent_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = false;
+
+ if (PG_NARGS() >= 2)
+ heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid, gin_index_checkable,
+ gin_check_parent_keys_consistency, AccessShareLock, &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Read item pointers from leaf entry tuple.
+ *
+ * Returns a palloc'd array of ItemPointers. The number of items is returned
+ * in *nitems.
+ */
+static ItemPointer
+ginReadTupleWithoutState(IndexTuple itup, int *nitems)
+{
+ Pointer ptr = GinGetPosting(itup);
+ int nipd = GinGetNPosting(itup);
+ ItemPointer ipd;
+ int ndecoded;
+
+ if (GinItupIsCompressed(itup))
+ {
+ if (nipd > 0)
+ {
+ ipd = ginPostingListDecode((GinPostingList *) ptr, &ndecoded);
+ if (nipd != ndecoded)
+ elog(ERROR, "number of items mismatch in GIN entry tuple, %d in tuple header, %d decoded",
+ nipd, ndecoded);
+ }
+ else
+ {
+ ipd = palloc(0);
+ }
+ }
+ else
+ {
+ ipd = (ItemPointer) palloc(sizeof(ItemPointerData) * nipd);
+ memcpy(ipd, ptr, sizeof(ItemPointerData) * nipd);
+ }
+ *nitems = nipd;
+ return ipd;
+}
+
+
+/*
+ * Check that relation is eligible for GIN verification
+ */
+static void
+gin_index_checkable(Relation rel)
+{
+ if (rel->rd_rel->relkind != RELKIND_INDEX ||
+ rel->rd_rel->relam != GIN_AM_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("only GIN indexes are supported as targets for this verification"),
+ errdetail("Relation \"%s\" is not a GIN index.",
+ RelationGetRelationName(rel))));
+
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary tables of other sessions"),
+ errdetail("Index \"%s\" is associated with temporary relation.",
+ RelationGetRelationName(rel))));
+
+ if (!rel->rd_index->indisvalid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot check index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail("Index is not valid")));
+}
+
+/*
+ * Allocates memory context and scans through postigTree graph
+ *
+ */
+static void
+gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting_tree_root)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GinPostingTreeScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+
+ int leafdepth;
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ leafdepth = -1;
+
+ /* Start the scan at the root page */
+ stack = (GinPostingTreeScanItem *) palloc0(sizeof(GinPostingTreeScanItem));
+ stack->depth = 0;
+ ItemPointerSetInvalid(&stack->parentkey);
+ stack->parentblk = InvalidBlockNumber;
+ stack->blkno = posting_tree_root;
+
+ elog(DEBUG3, "processing posting tree at blk %u", posting_tree_root);
+
+ while (stack)
+ {
+ GinPostingTreeScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ OffsetNumber i,
+ maxoff;
+
+ CHECK_FOR_INTERRUPTS();
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIN_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ Assert(GinPageIsData(page));
+
+ /* Check that the tree has the same height in all branches */
+ if (GinPageIsLeaf(page))
+ {
+ ItemPointerData minItem;
+ int nlist;
+ ItemPointerData *list;
+ char tidrange_buf[100];
+
+ ItemPointerSetMin(&minItem);
+
+ if (leafdepth == -1)
+ leafdepth = stack->depth;
+ else if (stack->depth != leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ list = GinDataLeafPageGetItems(page, &nlist, minItem);
+
+ if (nlist > 0)
+ {
+ snprintf(tidrange_buf, sizeof(tidrange_buf),
+ "%d tids (%u, %u) - (%u, %u)",
+ nlist,
+ ItemPointerGetBlockNumberNoCheck(&list[0]),
+ ItemPointerGetOffsetNumberNoCheck(&list[0]),
+ ItemPointerGetBlockNumberNoCheck(&list[nlist - 1]),
+ ItemPointerGetOffsetNumberNoCheck(&list[nlist - 1]));
+ } else {
+ snprintf(tidrange_buf, sizeof(tidrange_buf), "0 tids");
+ }
+
+ if (stack->parentblk != InvalidBlockNumber)
+ {
+ elog(DEBUG3, "blk %u: parent %u highkey (%u, %u), %s",
+ stack->blkno,
+ stack->parentblk,
+ ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey),
+ tidrange_buf);
+ }
+ else
+ {
+ elog(DEBUG3, "blk %u: root leaf, %s",
+ stack->blkno,
+ tidrange_buf);
+ }
+
+ if (stack->parentblk != InvalidBlockNumber &&
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey) != InvalidOffsetNumber &&
+ nlist > 0 &&
+ ItemPointerCompare(&stack->parentkey, &list[nlist - 1]) < 0)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": tid exceeds parent's high key in postingTree leaf on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ }
+ }
+ else
+ {
+ LocationIndex pd_lower;
+ int lowersize;
+ ItemPointerData bound;
+
+ /*
+ * Check that tuples in each page are properly ordered and
+ * consistent with parent high key
+ */
+ maxoff = GinPageGetOpaque(page)->maxoff;
+ if (stack->parentblk != InvalidBlockNumber)
+ elog(DEBUG3, "blk %u: internal posting tree page with %u items, parent %u highkey (%u, %u)",
+ stack->blkno,
+ maxoff,
+ stack->parentblk,
+ ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey));
+ else
+ elog(DEBUG3, "blk %u: root internal posting tree page with %u items", stack->blkno, maxoff);
+
+ /*
+ * A GIN posting tree internal page stores PostingItems in the
+ * 'lower' part of the page. The 'upper' part is unused. The
+ * number of elements is stored in the opaque area (maxoff).
+ * Make sure the size of the 'lower' part agrees with 'maxoff'
+ *
+ * We didn't set pd_lower until PostgreSQL version 9.4, so if this
+ * check fails, it could also be because the index was binary-upgraded
+ * from an earlier version. That was a long time ago, though, so let's
+ * warn if it doesn't match.
+ */
+ pd_lower = ((PageHeader) page)->pd_lower;
+ lowersize = pd_lower - MAXALIGN(SizeOfPageHeaderData);
+ if ((lowersize - MAXALIGN(sizeof(ItemPointerData))) / sizeof(PostingItem) != maxoff)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has unexpected pd_lower %u in posting tree block %u with maxoff %u)",
+ RelationGetRelationName(rel), pd_lower, stack->blkno, maxoff)));
+ }
+
+ /*
+ * Before the PostingItems, there's one ItemPointerData in the
+ * 'lower' part that stores the page's high key.
+ */
+ bound = *GinDataPageGetRightBound(page);
+
+ if (stack->parentblk != InvalidBlockNumber)
+ {
+ if (!ItemPointerEquals(&stack->parentkey, &bound))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": posting tree page's high key (%u, %u) doesn't match the downlink on block %u (parent blk %u, key (%u, %u))",
+ RelationGetRelationName(rel),
+ ItemPointerGetBlockNumberNoCheck(&bound),
+ ItemPointerGetOffsetNumberNoCheck(&bound),
+ stack->blkno,
+ stack->parentblk,
+ ItemPointerGetBlockNumberNoCheck(&stack->parentkey),
+ ItemPointerGetOffsetNumberNoCheck(&stack->parentkey))));
+ }
+ }
+
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ PostingItem *posting_item = GinDataPageGetPostingItem(page, i);
+
+ elog(DEBUG3, "key (%u, %u) -> %u",
+ ItemPointerGetBlockNumber(&posting_item->key),
+ ItemPointerGetOffsetNumber(&posting_item->key),
+ BlockIdGetBlockNumber(&posting_item->child_blkno));
+
+ if (i == maxoff && GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
+ {
+ /* The rightmost item in the tree level has (0, 0) as the key */
+ if (ItemPointerGetBlockNumberNoCheck(&posting_item->key) != 0 ||
+ ItemPointerGetOffsetNumberNoCheck(&posting_item->key) != 0)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": rightmost posting tree page (blk %u) has unexpected last key (%u, %u)",
+ RelationGetRelationName(rel),
+ stack->blkno,
+ ItemPointerGetBlockNumberNoCheck(&posting_item->key),
+ ItemPointerGetOffsetNumberNoCheck(&posting_item->key))));
+ }
+ }
+ else if (i != FirstOffsetNumber)
+ {
+ PostingItem *previous_posting_item = GinDataPageGetPostingItem(page, i - 1);
+
+ if (ItemPointerCompare(&posting_item->key, &previous_posting_item->key) < 0)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has wrong tuple order in posting tree, block %u, offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+ }
+ }
+
+ /*
+ * Check if this tuple is consistent with the downlink in the
+ * parent.
+ */
+ if (stack->parentblk != InvalidBlockNumber && i == maxoff)
+ {
+ if (ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": posting item exceeds parent's high key in postingTree internal page on block %u offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+
+ }
+ }
+
+ /* If this is an internal page, recurse into the child */
+ if (!GinPageIsLeaf(page))
+ {
+ GinPostingTreeScanItem *ptr;
+
+ ptr = (GinPostingTreeScanItem *) palloc(sizeof(GinPostingTreeScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parentkey = posting_item->key;
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno);
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ }
+ }
+ LockBuffer(buffer, GIN_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+}
+
+/*
+ * Main entry point for GIN check. Allocates memory context and scans through
+ * GIN graph.
+ */
+static void
+gin_check_parent_keys_consistency(Relation rel, Relation heaprel, void* callback_state)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GinScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GinState state;
+ bool heapallindexed = *((bool*)callback_state);
+
+ int leafdepth;
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+ initGinState(&state, rel);
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ leafdepth = -1;
+
+ /* Start the scan at the root page */
+ stack = (GinScanItem *) palloc0(sizeof(GinScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIN_ROOT_BLKNO;
+
+ while (stack)
+ {
+ GinScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ OffsetNumber i,
+ maxoff;
+ XLogRecPtr lsn;
+ IndexTuple prev_tuple;
+
+ CHECK_FOR_INTERRUPTS();
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIN_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ if (!check_index_page(rel, buffer, stack->blkno))
+ {
+ goto nextpage;
+ }
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (stack->parenttup != NULL)
+ {
+ GinNullCategory parent_key_category;
+ Datum parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category);
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, maxoff, sizeof(GinPageOpaqueData));
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
+ GinNullCategory page_max_key_category;
+ Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category);
+
+ if (GinPageGetOpaque(page)->rightlink != InvalidBlockNumber &&
+ ginCompareEntries(&state, attnum, page_max_key, page_max_key_category, parent_key, parent_key_category) > 0)
+ {
+ /* split page detected, install right link to the stack */
+ GinScanItem *ptr;
+
+ elog(DEBUG3, "split detected");
+
+ ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GinPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ /* Check that the tree has the same height in all branches */
+ if (GinPageIsLeaf(page))
+ {
+ if (leafdepth == -1)
+ leafdepth = stack->depth;
+ else if (stack->depth != leafdepth)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ goto nextpage;
+ }
+ }
+
+ /*
+ * Check that tuples in each page are properly ordered and consistent
+ * with parent high key
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ prev_tuple = NULL;
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i, sizeof(GinPageOpaqueData));
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple);
+ GinNullCategory prev_key_category;
+ Datum prev_key;
+ GinNullCategory current_key_category;
+ Datum current_key;
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+ continue;
+ }
+
+ current_key = gintuple_get_key(&state, idxtuple, ¤t_key_category);
+
+ /* (apparently) first block is metadata, skip order check */
+ if (i != FirstOffsetNumber && stack->blkno != (BlockNumber) 1)
+ {
+ prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category);
+ if (ginCompareEntries(&state, attnum, prev_key, prev_key_category, current_key, current_key_category) >= 0)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has wrong tuple order, block %u, offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+ }
+ }
+
+ /*
+ * Check if this tuple is consistent with the downlink in the
+ * parent.
+ */
+ if (stack->parenttup &&
+ i == maxoff)
+ {
+ GinNullCategory parent_key_category;
+ Datum parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category);
+
+ if (ginCompareEntries(&state, attnum, current_key, current_key_category, parent_key, parent_key_category) > 0)
+ {
+ /*
+ * There was a discrepancy between parent and child
+ * tuples. We need to verify it is not a result of
+ * concurrent call of gistplacetopage(). So, lock parent
+ * and try to find downlink for current page. It may be
+ * missing due to concurrent page split, this is OK.
+ */
+ pfree(stack->parenttup);
+ stack->parenttup = gin_refind_parent(rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else
+ {
+ parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category);
+ if (ginCompareEntries(&state, attnum, current_key, current_key_category, parent_key, parent_key_category) > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do
+ * here.
+ */
+ }
+ }
+ }
+ }
+
+ /* If this is an internal page, recurse into the child */
+ if (!GinPageIsLeaf(page))
+ {
+ GinScanItem *ptr;
+
+ ptr = (GinScanItem *) palloc(sizeof(GinScanItem));
+ ptr->depth = stack->depth + 1;
+ /* last tuple in layer has no high key */
+ if (i != maxoff && !GinPageGetOpaque(page)->rightlink)
+ {
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ }
+ else
+ {
+ ptr->parenttup = NULL;
+ }
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = GinGetDownlink(idxtuple);
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ /* If this item is a pointer to a posting tree, recurse into it */
+ else if (GinIsPostingTree(idxtuple))
+ {
+ BlockNumber rootPostingTree = GinGetPostingTree(idxtuple);
+
+ gin_check_posting_tree_parent_keys_consistency(rel, rootPostingTree);
+ }
+ else
+ {
+ ItemPointer ipd;
+ int nipd;
+
+ ipd = ginReadTupleWithoutState(idxtuple, &nipd);
+
+ for (int j = 0; j < nipd; j++)
+ {
+ if (!OffsetNumberIsValid(ItemPointerGetOffsetNumber(&ipd[j])))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": posting list contains invalid heap pointer on block %u",
+ RelationGetRelationName(rel), stack->blkno)));
+ }
+ }
+ pfree(ipd);
+ }
+
+ prev_tuple = CopyIndexTuple(idxtuple);
+ }
+
+nextpage:
+ LockBuffer(buffer, GIN_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+}
+
+/*
+ * Verify that a freshly-read page looks sane.
+ */
+static bool
+gincheckpage(Relation rel, Buffer buf)
+{
+ Page page = BufferGetPage(buf);
+
+ /*
+ * ReadBuffer verifies that every newly-read page passes
+ * PageHeaderIsValid, which means it either contains a reasonably sane
+ * page header or is all-zero. We have to defend against the all-zero
+ * case, however.
+ */
+ if (PageIsNew(page))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" contains unexpected zero page at block %u",
+ RelationGetRelationName(rel),
+ BufferGetBlockNumber(buf)),
+ errhint("Please REINDEX it.")));
+ return false;
+ }
+
+ /*
+ * Additionally check that the special area looks sane.
+ */
+ if (PageGetSpecialSize(page) != MAXALIGN(sizeof(GinPageOpaqueData)))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" contains corrupted page at block %u",
+ RelationGetRelationName(rel),
+ BufferGetBlockNumber(buf)),
+ errhint("Please REINDEX it.")));
+ return false;
+ }
+ return true;
+}
+
+static bool
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ if (!gincheckpage(rel, buffer))
+ return false;
+
+ if (GinPageIsDeleted(page))
+ {
+ if (!GinPageIsLeaf(page))
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ return false;
+ }
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ return false;
+ }
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gin_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno, BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber o,
+ parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIN_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GinPageIsLeaf(parentpage))
+ {
+ UnlockReleaseBuffer(parentbuf);
+ return result;
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o, sizeof(GinPageOpaqueData));
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /* Found it! Make copy and return it */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 9397a69c6e5..7ffa36b2057 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -180,6 +180,25 @@ ORDER BY c.relpages DESC LIMIT 10;
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <function>gin_index_parent_check(index regclass, heapallindexed boolean) returns void</function>
+ <indexterm>
+ <primary>gin_index_parent_check</primary>
+ </indexterm>
+ </term>
+
+ <listitem>
+ <para>
+ <function>gin_index_parent_check</function> tests that its target GIN index
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry>
<term>
<function>gist_index_parent_check(index regclass, heapallindexed boolean) returns void</function>
--
2.37.3.542.gdd3f6c4cae