The attached patch allows the vacuum to continue by emitting WARNING for the corrupted tuple instead of immediately error out as discussed at [1].
Basically, it provides a new GUC called vacuum_tolerate_damage, to control whether to continue the vacuum or to stop on the occurrence of a corrupted tuple. So if the vacuum_tolerate_damage is set then in all the cases in heap_prepare_freeze_tuple where the corrupted xid is detected, it will emit a warning and return that nothing is changed in the tuple and the 'tuple_totally_frozen' will also be set to false. Since we are returning false the caller will not try to freeze such tuple and the tuple_totally_frozen is also set to false so that the page will not be marked to all frozen even if all other tuples in the page are frozen. Alternatively, we can try to freeze other XIDs in the tuple which is not corrupted but I don't think we will gain anything from this, because if one of the xmin or xmax is wrong then next time also if we run the vacuum then we are going to get the same WARNING or the ERROR. Is there any other opinion on this? [1] http://postgr.es/m/ca+tgmoazwzhtffu6nujgeap6adds-qwfnoxpzgqpzmmm0vt...@mail.gmail.com -- Regards, Dilip Kumar EnterpriseDB: http://www.enterprisedb.com
From 5421c567a168705272bde425a02eb248c4468cb0 Mon Sep 17 00:00:00 2001 From: dilipkumar <dilipbalaut@gmail.com> Date: Thu, 16 Jul 2020 11:41:28 +0530 Subject: [PATCH v1] Provide a GUC to allow vacuum to continue on corrupted tuple A new GUC to control whether the vacuum will error out immediately on detection of a corrupted tuple or it will just emit a WARNING for each such instance and complete the rest of the vacuuming. --- doc/src/sgml/config.sgml | 21 ++++++++ src/backend/access/heap/heapam.c | 28 +++++++++-- src/backend/commands/vacuum.c | 15 ++++++ src/backend/utils/misc/guc.c | 9 ++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/commands/vacuum.h | 2 + .../test_misc/t/002_vacuum_tolerate_damage.pl | 49 +++++++++++++++++++ 7 files changed, 120 insertions(+), 5 deletions(-) create mode 100644 src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index b353c61683..3c1e647e27 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -8250,6 +8250,27 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-vacuum-tolerate-damage" xreflabel="vacuum_tolerate_damage"> + <term><varname>vacuum_tolerate_damage</varname> (<type>boolean</type>) + <indexterm> + <primary><varname>vacuum_tolerate_damage</varname> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + When set to off, which is the default, the <command>VACUUM</command> will raise + a ERROR-level error if detected a corrupted tuple. This causes the operation to + stop immediately. + </para> + + <para> + If set to on, the <command>VACUUM</command> will instead emit a WARNING for each + such tuple but the operation will continue. + vacuuming. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-bytea-output" xreflabel="bytea_output"> <term><varname>bytea_output</varname> (<type>enum</type>) <indexterm> diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index e09c8101e7..fc31799da5 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -51,6 +51,7 @@ #include "access/xloginsert.h" #include "access/xlogutils.h" #include "catalog/catalog.h" +#include "commands/vacuum.h" #include "miscadmin.h" #include "pgstat.h" #include "port/atomics.h" @@ -6123,6 +6124,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, frz->t_infomask = tuple->t_infomask; frz->xmax = HeapTupleHeaderGetRawXmax(tuple); + *totally_frozen_p = false; + /* * Process xmin. xmin_frozen has two slightly different meanings: in the * !XidIsNormal case, it means "the xmin doesn't need any freezing" (it's @@ -6137,19 +6140,25 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, else { if (TransactionIdPrecedes(xid, relfrozenxid)) - ereport(ERROR, + { + ereport(vacuum_damage_elevel(), (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("found xmin %u from before relfrozenxid %u", xid, relfrozenxid))); + return false; + } xmin_frozen = TransactionIdPrecedes(xid, cutoff_xid); if (xmin_frozen) { if (!TransactionIdDidCommit(xid)) - ereport(ERROR, + { + ereport(vacuum_damage_elevel(), (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen", xid, cutoff_xid))); + return false; + } frz->t_infomask |= HEAP_XMIN_FROZEN; changed = true; @@ -6218,10 +6227,13 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, else if (TransactionIdIsNormal(xid)) { if (TransactionIdPrecedes(xid, relfrozenxid)) - ereport(ERROR, + { + ereport(vacuum_damage_elevel(), (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("found xmax %u from before relfrozenxid %u", xid, relfrozenxid))); + return false; + } if (TransactionIdPrecedes(xid, cutoff_xid)) { @@ -6233,10 +6245,13 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, */ if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) && TransactionIdDidCommit(xid)) - ereport(ERROR, + { + ereport(vacuum_damage_elevel(), (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("cannot freeze committed xmax %u", xid))); + return false; + } freeze_xmax = true; } else @@ -6249,10 +6264,13 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, xmax_already_frozen = true; } else - ereport(ERROR, + { + ereport(vacuum_damage_elevel(), (errcode(ERRCODE_DATA_CORRUPTED), errmsg_internal("found xmax %u (infomask 0x%04x) not frozen, not multi, not normal", xid, tuple->t_infomask))); + return false; + } if (freeze_xmax) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 576c7e63e9..5a093c231c 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -63,6 +63,8 @@ int vacuum_freeze_table_age; int vacuum_multixact_freeze_min_age; int vacuum_multixact_freeze_table_age; +/* Whether to continue with the vacuuming after detecting a corruped tuple */ +bool vacuum_tolerate_damage = false; /* A few variables that don't seem worth passing around as parameters */ static MemoryContext vac_context = NULL; @@ -2102,3 +2104,16 @@ get_vacopt_ternary_value(DefElem *def) { return defGetBoolean(def) ? VACOPT_TERNARY_ENABLED : VACOPT_TERNARY_DISABLED; } + +/* + * vacuum_damage_elevel + * + * Return the error level for reporting the corrupted tuple while trying to + * freeze the tuple during vacuum. Based on the GUC value, it will return + * whether to report with ERROR or to report with WARNING. + */ +int +vacuum_damage_elevel(void) +{ + return vacuum_tolerate_damage ? WARNING : ERROR; +} diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 031ca0327f..683812dd4a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2041,6 +2041,15 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"vacuum_tolerate_damage", PGC_USERSET, ERROR_HANDLING_OPTIONS, + gettext_noop("Whether to continue running vacuum after detecting corrupted tuple."), + }, + &vacuum_tolerate_damage, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index e430e33c7b..88654f4983 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -666,6 +666,7 @@ #vacuum_cleanup_index_scale_factor = 0.1 # fraction of total number of tuples # before index cleanup, 0 always performs # index cleanup +#vacuum_tolerate_damage = false #bytea_output = 'hex' # hex, escape #xmlbinary = 'base64' #xmloption = 'content' diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index a4cd721400..5e711832bc 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -237,6 +237,7 @@ extern int vacuum_freeze_min_age; extern int vacuum_freeze_table_age; extern int vacuum_multixact_freeze_min_age; extern int vacuum_multixact_freeze_table_age; +extern bool vacuum_tolerate_damage; /* Variables for cost-based parallel vacuum */ extern pg_atomic_uint32 *VacuumSharedCostBalance; @@ -278,6 +279,7 @@ extern bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple, int options); extern Relation vacuum_open_relation(Oid relid, RangeVar *relation, int options, bool verbose, LOCKMODE lmode); +extern int vacuum_damage_elevel(void); /* in commands/analyze.c */ extern void analyze_rel(Oid relid, RangeVar *relation, diff --git a/src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl b/src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl new file mode 100644 index 0000000000..acf1266ab1 --- /dev/null +++ b/src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl @@ -0,0 +1,49 @@ +# Verify the vacuum_tolerate_damage GUC + +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 2; + +# Initialize a test cluster +my $node = get_new_node('primary'); +$node->init(); +$node->start; + +# Run a SQL command and return psql's stderr +sub run_sql_command +{ + my $sql = shift; + my $stderr; + + $node->psql( + 'postgres', + $sql, + stderr => \$stderr); + return $stderr; +} + +my $output; + +# Insert 2 tuple in the table and update the relfrozenxid for the table to +# the future xid. +run_sql_command( + "create table test_vac (a int) WITH (autovacuum_enabled = off); + insert into test_vac values (1), (2); + update pg_class set relfrozenxid=txid_current()::text::xid where relname='test_vac';"); + +$output = run_sql_command('vacuum(freeze, disable_page_skipping) test_vac;'); +ok( $output =~ m/ERROR:.*found xmin.*before relfrozenxid/); + +# set the vacuum_tolerate_damage and run again +$output = run_sql_command('set vacuum_tolerate_damage=true; + vacuum(freeze, disable_page_skipping) test_vac;'); + + +# this time we should get WARNING for both the tuples +ok( scalar( @{[ $output=~/WARNING:.*found xmin.*before relfrozenxid/gi ]}) == 2); + +run_sql_command('DROP TABLE test_vac;'); + +$node->stop('fast'); -- 2.23.0