On 21/09/2020 17:19, Andrey M. Borodin wrote:
21 сент. 2020 г., в 18:29, Andrey M. Borodin <x4...@yandex-team.ru> написал(а):
It was a conscious decision with incorrect motivation. I was thinking that it will help
to reduce number of "false positive" inspecting right pages. But now I see that:
1. There should be no such "false positives" that we can avoid
2. Valid rightlinks could help to do amcheck verification in future
Well, point number 2 here is invalid. There exist one leaf page p, so that if
we start traversing rightlink from p we will reach all leaf pages. But we
practically have no means to find this page. This makes rightlinks not very
helpful in amcheck for GiST.
Well, if you store all the right links in a hash table or something, you
can "connect the dots" after you have scanned all the pages to see that
the chain is unbroken. Probably would not be worth the trouble, since
the rightlinks are not actually needed after concurrent scans have
completed.
But for consistency I think it worth to install them.
I agree. I did some testing with your patch. It seems that the
rightlinks are still not always set. I didn't try to debug why.
I wrote a couple of 'pageinspect' function to inspect GiST pages for
this. See attached. I then created a test table and index like this:
create table points (p point);
insert into points select point(x,y) from generate_series(-2000, 2000)
x, generate_series(-2000, 2000) y;
create index points_idx on points using gist (p);
And this is what the root page looks like:
postgres=# select * from gist_page_items(get_raw_page('points_idx', 0));
itemoffset | ctid | itemlen
------------+---------------+---------
1 | (27891,65535) | 40
2 | (55614,65535) | 40
3 | (83337,65535) | 40
4 | (97019,65535) | 40
(4 rows)
And the right links on the next level:
postgres=# select * from (VALUES (27891), (55614), (83337), (97019)) b
(blkno), lateral gist_page_opaque_info(get_raw_page('points_idx', blkno));
blkno | lsn | nsn | rightlink | flags
-------+-----+-----+------------+-------
27891 | 0/1 | 0/0 | 4294967295 | {}
55614 | 0/1 | 0/0 | 4294967295 | {}
83337 | 0/1 | 0/0 | 27891 | {}
97019 | 0/1 | 0/0 | 55614 | {}
(4 rows)
I expected there to be only one page with invalid right link, but there
are two.
- Heikki
>From c388e458b196454d3535d84f6f7a617f0f3f819a Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakan...@iki.fi>
Date: Mon, 28 Sep 2020 11:01:45 +0300
Subject: [PATCH 1/1] Add functions to 'pageinspect' to inspect GiST indexes.
---
contrib/pageinspect/Makefile | 4 +-
contrib/pageinspect/gistfuncs.c | 173 ++++++++++++++++++
contrib/pageinspect/pageinspect--1.8--1.9.sql | 27 +++
contrib/pageinspect/pageinspect.control | 2 +-
4 files changed, 204 insertions(+), 2 deletions(-)
create mode 100644 contrib/pageinspect/gistfuncs.c
create mode 100644 contrib/pageinspect/pageinspect--1.8--1.9.sql
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
index d9d8177116b..0f9561616e1 100644
--- a/contrib/pageinspect/Makefile
+++ b/contrib/pageinspect/Makefile
@@ -7,12 +7,14 @@ OBJS = \
btreefuncs.o \
fsmfuncs.o \
ginfuncs.o \
+ gistfuncs.o \
hashfuncs.o \
heapfuncs.o \
rawpage.o
EXTENSION = pageinspect
-DATA = pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
+DATA = pageinspect--1.8--1.9.sql \
+ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \
pageinspect--1.2--1.3.sql pageinspect--1.1--1.2.sql \
diff --git a/contrib/pageinspect/gistfuncs.c b/contrib/pageinspect/gistfuncs.c
new file mode 100644
index 00000000000..8517b4c241f
--- /dev/null
+++ b/contrib/pageinspect/gistfuncs.c
@@ -0,0 +1,173 @@
+/*
+ * gistfuncs.c
+ * Functions to investigate the content of GiST indexes
+ *
+ * Copyright (c) 2014-2020, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/pageinspect/gitfuncs.c
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/htup.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pageinspect.h"
+#include "storage/itemptr.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/pg_lsn.h"
+
+PG_FUNCTION_INFO_V1(gist_page_opaque_info);
+PG_FUNCTION_INFO_V1(gist_page_items);
+
+#define ItemPointerGetDatum(X) PointerGetDatum(X)
+
+
+Datum
+gist_page_opaque_info(PG_FUNCTION_ARGS)
+{
+ bytea *raw_page = PG_GETARG_BYTEA_P(0);
+ TupleDesc tupdesc;
+ Page page;
+ GISTPageOpaque opaq;
+ HeapTuple resultTuple;
+ Datum values[4];
+ bool nulls[4];
+ Datum flags[16];
+ int nflags = 0;
+ uint16 flagbits;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to use raw page functions")));
+
+ page = get_page_from_raw(raw_page);
+
+ opaq = (GISTPageOpaque) PageGetSpecialPointer(page);
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ /* Convert the flags bitmask to an array of human-readable names */
+ flagbits = opaq->flags;
+ if (flagbits & F_LEAF)
+ flags[nflags++] = CStringGetTextDatum("leaf");
+ if (flagbits & F_DELETED)
+ flags[nflags++] = CStringGetTextDatum("deleted");
+ if (flagbits & F_TUPLES_DELETED)
+ flags[nflags++] = CStringGetTextDatum("tuples_deleted");
+ if (flagbits & F_FOLLOW_RIGHT)
+ flags[nflags++] = CStringGetTextDatum("follow_right");
+ if (flagbits & F_HAS_GARBAGE)
+ flags[nflags++] = CStringGetTextDatum("has_garbage");
+ flagbits &= ~(F_LEAF | F_DELETED | F_TUPLES_DELETED | F_FOLLOW_RIGHT | F_HAS_GARBAGE);
+ if (flagbits)
+ {
+ /* any flags we don't recognize are printed in hex */
+ flags[nflags++] = DirectFunctionCall1(to_hex32, Int32GetDatum(flagbits));
+ }
+
+ memset(nulls, 0, sizeof(nulls));
+
+ values[0] = LSNGetDatum(PageGetLSN(page));
+ values[1] = LSNGetDatum(GistPageGetNSN(page));
+ values[2] = Int64GetDatum(opaq->rightlink);
+ values[3] = PointerGetDatum(construct_array(flags, nflags,
+ TEXTOID,
+ -1, false, TYPALIGN_INT));
+
+ /* Build and return the result tuple. */
+ resultTuple = heap_form_tuple(tupdesc, values, nulls);
+
+ return HeapTupleGetDatum(resultTuple);
+}
+
+typedef struct gist_page_items_state
+{
+ Page page;
+ TupleDesc tupd;
+ OffsetNumber offset;
+} gist_page_items_state;
+
+Datum
+gist_page_items(PG_FUNCTION_ARGS)
+{
+ bytea *raw_page = PG_GETARG_BYTEA_P(0);
+ FuncCallContext *fctx;
+ gist_page_items_state *inter_call_data;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to use raw page functions")));
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TupleDesc tupdesc;
+ MemoryContext mctx;
+ Page page;
+
+ fctx = SRF_FIRSTCALL_INIT();
+ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+ page = get_page_from_raw(raw_page);
+
+ inter_call_data = palloc(sizeof(gist_page_items_state));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ inter_call_data->page = page;
+ inter_call_data->tupd = tupdesc;
+ inter_call_data->offset = FirstOffsetNumber;
+
+ fctx->max_calls = PageGetMaxOffsetNumber(page);
+ fctx->user_fctx = inter_call_data;
+
+ MemoryContextSwitchTo(mctx);
+ }
+
+ fctx = SRF_PERCALL_SETUP();
+ inter_call_data = fctx->user_fctx;
+
+ if (fctx->call_cntr < fctx->max_calls)
+ {
+ Page page = inter_call_data->page;
+ OffsetNumber offset = inter_call_data->offset;
+ HeapTuple resultTuple;
+ Datum result;
+ Datum values[3];
+ bool nulls[3];
+ ItemId id;
+ IndexTuple itup;
+
+ id = PageGetItemId(page, offset);
+
+ if (!ItemIdIsValid(id))
+ elog(ERROR, "invalid ItemId");
+
+ itup = (IndexTuple) PageGetItem(page, id);
+
+ memset(nulls, 0, sizeof(nulls));
+
+ values[0] = DatumGetInt16(offset);
+ values[1] = ItemPointerGetDatum(&itup->t_tid);
+ values[2] = Int32GetDatum((int) IndexTupleSize(itup));
+
+ /* TODO: also print the keys */
+
+ /* Build and return the result tuple. */
+ resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
+ result = HeapTupleGetDatum(resultTuple);
+
+ inter_call_data->offset++;
+ SRF_RETURN_NEXT(fctx, result);
+ }
+
+ SRF_RETURN_DONE(fctx);
+}
diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql
new file mode 100644
index 00000000000..e7a849cfedf
--- /dev/null
+++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql
@@ -0,0 +1,27 @@
+/* contrib/pageinspect/pageinspect--1.8--1.9.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.9'" to load this file. \quit
+
+--
+-- gist_page_opaque_info()
+--
+CREATE FUNCTION gist_page_opaque_info(IN page bytea,
+ OUT lsn pg_lsn,
+ OUT nsn pg_lsn,
+ OUT rightlink bigint,
+ OUT flags text[])
+AS 'MODULE_PATHNAME', 'gist_page_opaque_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+
+--
+-- gist_page_items()
+--
+CREATE FUNCTION gist_page_items(IN page bytea,
+ OUT itemoffset smallint,
+ OUT ctid tid,
+ OUT itemlen smallint)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'gist_page_items'
+LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control
index f8cdf526c65..bd716769a17 100644
--- a/contrib/pageinspect/pageinspect.control
+++ b/contrib/pageinspect/pageinspect.control
@@ -1,5 +1,5 @@
# pageinspect extension
comment = 'inspect the contents of database pages at a low level'
-default_version = '1.8'
+default_version = '1.9'
module_pathname = '$libdir/pageinspect'
relocatable = true
--
2.20.1