Hello Arseniy, I finally got time to look at this more closely, and do some testing.
Are there any cases when the current code incorrectly reports corruption for a valid index? So far I've been unable to find such case. Or am I wrong? It seems to me all the proposed changes are "tightening" the checks, in the sense that we might have missed certain types of issues before. This is supported by the fact that the new TAP test fails on master, i.e. master does not report the corruption the TAP introduces. (The TAP test is great, it would have been great to add something like this in the original commit.) Also, I've noticed that the TAP test passes even with some (most) of the verify_gin.c changes reverted. See the 0002 patch - this does not break the TAP test. Of course, that does not prove the changes are wrong and I'm not claiming that. But can we improve the TAP test to trigger this too? To show the current code (in master) misses this? Grigory, Andrey, Heikki, any opinions on the tweaks? regards -- Tomas Vondra
From 973de3eaeeca7ff2946a5b0f92f481d70ba5b78d Mon Sep 17 00:00:00 2001 From: Tomas Vondra <to...@vondra.me> Date: Mon, 26 May 2025 12:10:37 +0200 Subject: [PATCH v2 1/2] verify gin fixes and tests --- contrib/amcheck/expected/check_gin.out | 12 ++ contrib/amcheck/meson.build | 1 + contrib/amcheck/sql/check_gin.sql | 10 + contrib/amcheck/t/006_verify_gin.pl | 274 +++++++++++++++++++++++++ contrib/amcheck/verify_gin.c | 57 +++-- 5 files changed, 324 insertions(+), 30 deletions(-) create mode 100644 contrib/amcheck/t/006_verify_gin.pl diff --git a/contrib/amcheck/expected/check_gin.out b/contrib/amcheck/expected/check_gin.out index b4f0b110747..8dd01ced8d1 100644 --- a/contrib/amcheck/expected/check_gin.out +++ b/contrib/amcheck/expected/check_gin.out @@ -76,3 +76,15 @@ SELECT gin_index_check('gin_check_jsonb_idx'); -- cleanup DROP TABLE gin_check_jsonb; +-- Test GIN multicolumn index +CREATE TABLE "gin_check_multicolumn"(a text[], b text[]); +INSERT INTO gin_check_multicolumn (a,b) values ('{a,c,e}','{b,d,f}'); +CREATE INDEX "gin_check_multicolumn_idx" on gin_check_multicolumn USING GIN(a,b); +SELECT gin_index_check('gin_check_multicolumn_idx'); + gin_index_check +----------------- + +(1 row) + +-- cleanup +DROP TABLE gin_check_multicolumn; diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build index b33e8c9b062..1f0c347ed54 100644 --- a/contrib/amcheck/meson.build +++ b/contrib/amcheck/meson.build @@ -49,6 +49,7 @@ tests += { 't/003_cic_2pc.pl', 't/004_verify_nbtree_unique.pl', 't/005_pitr.pl', + 't/006_verify_gin.pl', ], }, } diff --git a/contrib/amcheck/sql/check_gin.sql b/contrib/amcheck/sql/check_gin.sql index 66f42c34311..11caed3d6a8 100644 --- a/contrib/amcheck/sql/check_gin.sql +++ b/contrib/amcheck/sql/check_gin.sql @@ -50,3 +50,13 @@ SELECT gin_index_check('gin_check_jsonb_idx'); -- cleanup DROP TABLE gin_check_jsonb; + +-- Test GIN multicolumn index +CREATE TABLE "gin_check_multicolumn"(a text[], b text[]); +INSERT INTO gin_check_multicolumn (a,b) values ('{a,c,e}','{b,d,f}'); +CREATE INDEX "gin_check_multicolumn_idx" on gin_check_multicolumn USING GIN(a,b); + +SELECT gin_index_check('gin_check_multicolumn_idx'); + +-- cleanup +DROP TABLE gin_check_multicolumn; diff --git a/contrib/amcheck/t/006_verify_gin.pl b/contrib/amcheck/t/006_verify_gin.pl new file mode 100644 index 00000000000..3b6077c6282 --- /dev/null +++ b/contrib/amcheck/t/006_verify_gin.pl @@ -0,0 +1,274 @@ + +# Copyright (c) 2021-2025, PostgreSQL Global Development Group + +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More; + +my $node; + +# +# Test set-up +# +$node = PostgreSQL::Test::Cluster->new('test'); +$node->init(no_data_checksums => 1); +$node->append_conf('postgresql.conf', 'autovacuum=off'); +$node->start; +$node->safe_psql('postgres', q(CREATE EXTENSION amcheck)); +$node->safe_psql( + 'postgres', q( + CREATE OR REPLACE FUNCTION random_string( INT ) RETURNS text AS $$ + SELECT string_agg(substring('0123456789bcdfghjkmnpqrstvwxyz', ceil(random() * 30)::integer, 1), '') from generate_series(1, $1); + $$ LANGUAGE SQL;)); + +# Tests +invalid_entry_order_leaf_page_test(); +invalid_entry_order_inner_page_test(); +invalid_entry_columns_order_test(); +inconsistent_with_parent_key_parent_key_corrupted_test(); +inconsistent_with_parent_key_child_key_corrupted_test(); + +sub invalid_entry_order_leaf_page_test +{ + my $relname = "test"; + my $indexname = "test_gin_idx"; + + $node->safe_psql( + 'postgres', qq( + DROP TABLE IF EXISTS $relname; + CREATE TABLE $relname (a text[]); + CREATE INDEX $indexname ON $relname USING gin (a); + INSERT INTO $relname (a) VALUES ('{aaaaa,bbbbb}'); + SELECT gin_clean_pending_list('$indexname'); + )); + my $relpath = relation_filepath($indexname); + + $node->stop; + + my $blksize = 8192; + my $blkno = 1; # root + + # produce wrong order by replacing aaaaa with ccccc + string_replace_block( + $relpath, + "aaaaa", + "ccccc", + $blksize, + $blkno + ); + + $node->start; + + my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + ok($stderr =~ 'index "test_gin_idx" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295'); +} + +sub invalid_entry_order_inner_page_test +{ + my $relname = "test"; + my $indexname = "test_gin_idx"; + + $node->safe_psql( + 'postgres', qq( + DROP TABLE IF EXISTS $relname; + CREATE TABLE $relname (a text[]); + CREATE INDEX $indexname ON $relname USING gin (a); + INSERT INTO $relname (a) VALUES (('{' || 'pppppppppp' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'qqqqqqqqqq' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'rrrrrrrrrr' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'ssssssssss' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'tttttttttt' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'uuuuuuuuuu' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'vvvvvvvvvv' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'wwwwwwwwww' || random_string(1870) ||'}')::text[]); + SELECT gin_clean_pending_list('$indexname'); + )); + my $relpath = relation_filepath($indexname); + + $node->stop; + + my $blksize = 8192; + my $blkno = 1; # root + + # we have rrrrrrrrr... and tttttttttt... as keys in the root, so produce wrong order by replacing rrrrrrrrrr.... + string_replace_block( + $relpath, + "rrrrrrrrrr", + "zzzzzzzzzz", + $blksize, + $blkno + ); + + $node->start; + + my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + ok($stderr =~ 'index "test_gin_idx" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295'); +} + +sub invalid_entry_columns_order_test +{ + my $relname = "test"; + my $indexname = "test_gin_idx"; + + $node->safe_psql( + 'postgres', qq( + DROP TABLE IF EXISTS $relname; + CREATE TABLE $relname (a text[],b text[]); + CREATE INDEX $indexname ON $relname USING gin (a,b); + INSERT INTO $relname (a,b) VALUES ('{aaa}','{bbb}'); + SELECT gin_clean_pending_list('$indexname'); + )); + my $relpath = relation_filepath($indexname); + + $node->stop; + + my $blksize = 8192; + my $blkno = 1; # root + + # mess column numbers + # root items order before: (1,aaa), (2,bbb) + # root items order after: (2,aaa), (1,bbb) + my $find = pack('s', 1) . pack('c', 0x09) . "aaa"; + my $replace = pack('s', 2) . pack('c', 0x09) . "aaa"; + string_replace_block( + $relpath, + $find, + $replace, + $blksize, + $blkno + ); + + $find = pack('s', 2) . pack('c', 0x09) . "bbb"; + $replace = pack('s', 1) . pack('c', 0x09) . "bbb"; + string_replace_block( + $relpath, + $find, + $replace, + $blksize, + $blkno + ); + + $node->start; + + my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + ok($stderr =~ 'index "test_gin_idx" has wrong tuple order on entry tree page, block 1, offset 2, rightlink 4294967295'); +} + +sub inconsistent_with_parent_key_parent_key_corrupted_test +{ + my $relname = "test"; + my $indexname = "test_gin_idx"; + + $node->safe_psql( + 'postgres', qq( + DROP TABLE IF EXISTS $relname; + CREATE TABLE $relname (a text[]); + CREATE INDEX $indexname ON $relname USING gin (a); + INSERT INTO $relname (a) VALUES (('{' || 'llllllllll' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'mmmmmmmmmm' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'nnnnnnnnnn' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'xxxxxxxxxx' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'yyyyyyyyyy' || random_string(1870) ||'}')::text[]); + SELECT gin_clean_pending_list('$indexname'); + )); + my $relpath = relation_filepath($indexname); + + $node->stop; + + my $blksize = 8192; + my $blkno = 1; # root + + # we have nnnnnnnnnn... as parent key in the root, so replace it with something smaller then child's keys + string_replace_block( + $relpath, + "nnnnnnnnnn", + "aaaaaaaaaa", + $blksize, + $blkno + ); + + $node->start; + + my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + ok($stderr =~ 'index "test_gin_idx" has inconsistent records on page 5 offset 3'); +} + +sub inconsistent_with_parent_key_child_key_corrupted_test +{ + my $relname = "test"; + my $indexname = "test_gin_idx"; + + $node->safe_psql( + 'postgres', qq( + DROP TABLE IF EXISTS $relname; + CREATE TABLE $relname (a text[]); + CREATE INDEX $indexname ON $relname USING gin (a); + INSERT INTO $relname (a) VALUES (('{' || 'llllllllll' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'mmmmmmmmmm' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'nnnnnnnnnn' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'xxxxxxxxxx' || random_string(1870) ||'}')::text[]); + INSERT INTO $relname (a) VALUES (('{' || 'yyyyyyyyyy' || random_string(1870) ||'}')::text[]); + SELECT gin_clean_pending_list('$indexname'); + )); + my $relpath = relation_filepath($indexname); + + $node->stop; + + my $blksize = 8192; + my $blkno = 5; # leaf + + # we have nnnnnnnnnn... as parent key in the root, so replace child key with something bigger + string_replace_block( + $relpath, + "nnnnnnnnnn", + "pppppppppp", + $blksize, + $blkno + ); + + $node->start; + + my ($result, $stdout, $stderr) = $node->psql('postgres', qq(SELECT gin_index_check('$indexname'))); + ok($stderr =~ 'index "test_gin_idx" has inconsistent records on page 5 offset 3'); +} + +# Returns the filesystem path for the named relation. +sub relation_filepath +{ + my ($relname) = @_; + + my $pgdata = $node->data_dir; + my $rel = $node->safe_psql('postgres', + qq(SELECT pg_relation_filepath('$relname'))); + die "path not found for relation $relname" unless defined $rel; + return "$pgdata/$rel"; +} + +sub string_replace_block { + my ($filename, $find, $replace, $blksize, $blkno) = @_; + + my $fh; + open($fh, '+<', $filename) or BAIL_OUT("open failed: $!"); + binmode $fh; + + my $offset = $blkno * $blksize; + my $buffer; + + sysseek($fh, $offset, 0) or BAIL_OUT("seek failed: $!"); + sysread($fh, $buffer, $blksize) or BAIL_OUT("read failed: $!"); + + $buffer =~ s/$find/$replace/g; + + sysseek($fh, $offset, 0) or BAIL_OUT("seek failed: $!"); + syswrite($fh, $buffer) or BAIL_OUT("write failed: $!"); + + close($fh) or BAIL_OUT("close failed: $!"); + + return; +} + +done_testing(); \ No newline at end of file diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c index b5f363562e3..427cf1669a6 100644 --- a/contrib/amcheck/verify_gin.c +++ b/contrib/amcheck/verify_gin.c @@ -346,7 +346,7 @@ gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting * Check if this tuple is consistent with the downlink in the * parent. */ - if (stack->parentblk != InvalidBlockNumber && i == maxoff && + if (i == maxoff && ItemPointerIsValid(&stack->parentkey) && ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), @@ -359,14 +359,10 @@ gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting ptr->depth = stack->depth + 1; /* - * Set rightmost parent key to invalid item pointer. Its value - * is 'Infinity' and not explicitly stored. + * The rightmost parent key is always invalid item pointer. + * Its value is 'Infinity' and not explicitly stored. */ - if (rightlink == InvalidBlockNumber) - ItemPointerSetInvalid(&ptr->parentkey); - else - ptr->parentkey = posting_item->key; - + ptr->parentkey = posting_item->key; ptr->parentblk = stack->blkno; ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno); ptr->next = stack->next; @@ -463,17 +459,18 @@ gin_check_parent_keys_consistency(Relation rel, Datum parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category); + OffsetNumber parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup); ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, maxoff); IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid); - OffsetNumber attnum = gintuple_get_attrnum(&state, idxtuple); + OffsetNumber page_max_key_attnum = gintuple_get_attrnum(&state, idxtuple); GinNullCategory page_max_key_category; Datum page_max_key = gintuple_get_key(&state, idxtuple, &page_max_key_category); if (rightlink != InvalidBlockNumber && - ginCompareEntries(&state, attnum, page_max_key, - page_max_key_category, parent_key, - parent_key_category) > 0) + ginCompareAttEntries(&state, page_max_key_attnum, page_max_key, + page_max_key_category, parent_key_attnum, + parent_key, parent_key_category) < 0) { /* split page detected, install right link to the stack */ GinScanItem *ptr; @@ -528,20 +525,18 @@ gin_check_parent_keys_consistency(Relation rel, current_key = gintuple_get_key(&state, idxtuple, ¤t_key_category); /* - * First block is metadata, skip order check. Also, never check - * for high key on rightmost page, as this key is not really - * stored explicitly. + * Never check for high key on rightmost inner page, as this key + * is not really stored explicitly. * * Also make sure to not compare entries for different attnums, * which may be stored on the same page. */ - if (i != FirstOffsetNumber && attnum == prev_attnum && stack->blkno != GIN_ROOT_BLKNO && - !(i == maxoff && rightlink == InvalidBlockNumber)) + if (i != FirstOffsetNumber && !(i == maxoff && rightlink == InvalidBlockNumber && !GinPageIsLeaf(page))) { prev_key = gintuple_get_key(&state, prev_tuple, &prev_key_category); - if (ginCompareEntries(&state, attnum, prev_key, - prev_key_category, current_key, - current_key_category) >= 0) + if (ginCompareAttEntries(&state, prev_attnum, prev_key, + prev_key_category, attnum, + current_key, current_key_category) >= 0) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" has wrong tuple order on entry tree page, block %u, offset %u, rightlink %u", @@ -556,13 +551,14 @@ gin_check_parent_keys_consistency(Relation rel, i == maxoff) { GinNullCategory parent_key_category; + OffsetNumber parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup); Datum parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category); - if (ginCompareEntries(&state, attnum, current_key, - current_key_category, parent_key, - parent_key_category) > 0) + if (ginCompareAttEntries(&state, attnum, current_key, + current_key_category, parent_key_attnum, + parent_key, parent_key_category) > 0) { /* * There was a discrepancy between parent and child @@ -581,6 +577,7 @@ gin_check_parent_keys_consistency(Relation rel, stack->blkno, stack->parentblk); else { + parent_key_attnum = gintuple_get_attrnum(&state, stack->parenttup); parent_key = gintuple_get_key(&state, stack->parenttup, &parent_key_category); @@ -589,9 +586,9 @@ gin_check_parent_keys_consistency(Relation rel, * Check if it is properly adjusted. If succeed, * proceed to the next key. */ - if (ginCompareEntries(&state, attnum, current_key, - current_key_category, parent_key, - parent_key_category) > 0) + if (ginCompareAttEntries(&state, attnum, current_key, + current_key_category, parent_key_attnum, + parent_key, parent_key_category) > 0) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" has inconsistent records on page %u offset %u", @@ -608,10 +605,10 @@ gin_check_parent_keys_consistency(Relation rel, ptr = (GinScanItem *) palloc(sizeof(GinScanItem)); ptr->depth = stack->depth + 1; /* last tuple in layer has no high key */ - if (i != maxoff && !GinPageGetOpaque(page)->rightlink) - ptr->parenttup = CopyIndexTuple(idxtuple); - else + if (i == maxoff && rightlink == InvalidBlockNumber) ptr->parenttup = NULL; + else + ptr->parenttup = CopyIndexTuple(idxtuple); ptr->parentblk = stack->blkno; ptr->blkno = GinGetDownlink(idxtuple); ptr->parentlsn = lsn; @@ -749,7 +746,7 @@ gin_refind_parent(Relation rel, BlockNumber parentblkno, ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o); IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid); - if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno) + if (GinGetDownlink(itup) == childblkno) { /* Found it! Make copy and return it */ result = CopyIndexTuple(itup); -- 2.49.0
From 1495281bb00810ed3c5c3ea20bbb3b626c73b580 Mon Sep 17 00:00:00 2001 From: Tomas Vondra <to...@vondra.me> Date: Mon, 26 May 2025 12:24:21 +0200 Subject: [PATCH v2 2/2] undo --- contrib/amcheck/verify_gin.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/contrib/amcheck/verify_gin.c b/contrib/amcheck/verify_gin.c index 427cf1669a6..8f6a5410cb7 100644 --- a/contrib/amcheck/verify_gin.c +++ b/contrib/amcheck/verify_gin.c @@ -346,7 +346,7 @@ gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting * Check if this tuple is consistent with the downlink in the * parent. */ - if (i == maxoff && ItemPointerIsValid(&stack->parentkey) && + if (stack->parentblk != InvalidBlockNumber && i == maxoff && ItemPointerCompare(&stack->parentkey, &posting_item->key) < 0) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), @@ -359,10 +359,14 @@ gin_check_posting_tree_parent_keys_consistency(Relation rel, BlockNumber posting ptr->depth = stack->depth + 1; /* - * The rightmost parent key is always invalid item pointer. - * Its value is 'Infinity' and not explicitly stored. + * Set rightmost parent key to invalid item pointer. Its value + * is 'Infinity' and not explicitly stored. */ - ptr->parentkey = posting_item->key; + if (rightlink == InvalidBlockNumber) + ItemPointerSetInvalid(&ptr->parentkey); + else + ptr->parentkey = posting_item->key; + ptr->parentblk = stack->blkno; ptr->blkno = BlockIdGetBlockNumber(&posting_item->child_blkno); ptr->next = stack->next; -- 2.49.0