The attached patch allows the vacuum to continue by emitting WARNING
for the corrupted tuple instead of immediately error out as discussed
at [1].

Basically, it provides a new GUC called vacuum_tolerate_damage, to
control whether to continue the vacuum or to stop on the occurrence of
a corrupted tuple.  So if the vacuum_tolerate_damage is set then in
all the cases in heap_prepare_freeze_tuple where the corrupted xid is
detected, it will emit a warning and return that nothing is changed in
the tuple and the 'tuple_totally_frozen' will also be set to false.
Since we are returning false the caller will not try to freeze such
tuple and the tuple_totally_frozen is also set to false so that the
page will not be marked to all frozen even if all other tuples in the
page are frozen.

Alternatively,  we can try to freeze other XIDs in the tuple which is
not corrupted but I don't think we will gain anything from this,
because if one of the xmin or xmax is wrong then next time also if we
run the vacuum then we are going to get the same WARNING or the ERROR.
Is there any other opinion on this?

[1] 
http://postgr.es/m/ca+tgmoazwzhtffu6nujgeap6adds-qwfnoxpzgqpzmmm0vt...@mail.gmail.com

--
Regards,
Dilip Kumar
EnterpriseDB: http://www.enterprisedb.com
From 5421c567a168705272bde425a02eb248c4468cb0 Mon Sep 17 00:00:00 2001
From: dilipkumar <dilipbalaut@gmail.com>
Date: Thu, 16 Jul 2020 11:41:28 +0530
Subject: [PATCH v1] Provide a GUC to allow vacuum to continue on corrupted
 tuple

A new GUC to control whether the vacuum will error out immediately
on detection of a corrupted tuple or it will just emit a WARNING for
each such instance and complete the rest of the vacuuming.
---
 doc/src/sgml/config.sgml                      | 21 ++++++++
 src/backend/access/heap/heapam.c              | 28 +++++++++--
 src/backend/commands/vacuum.c                 | 15 ++++++
 src/backend/utils/misc/guc.c                  |  9 ++++
 src/backend/utils/misc/postgresql.conf.sample |  1 +
 src/include/commands/vacuum.h                 |  2 +
 .../test_misc/t/002_vacuum_tolerate_damage.pl | 49 +++++++++++++++++++
 7 files changed, 120 insertions(+), 5 deletions(-)
 create mode 100644 src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b353c61683..3c1e647e27 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8250,6 +8250,27 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+      <varlistentry id="guc-vacuum-tolerate-damage" xreflabel="vacuum_tolerate_damage">
+      <term><varname>vacuum_tolerate_damage</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>vacuum_tolerate_damage</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        When set to off, which is the default, the <command>VACUUM</command> will raise
+        a ERROR-level error if detected a corrupted tuple.  This causes the operation to
+        stop immediately.
+       </para>
+
+       <para>
+        If set to on, the <command>VACUUM</command> will instead emit a WARNING for each
+        such tuple but the operation will continue.
+        vacuuming.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-bytea-output" xreflabel="bytea_output">
       <term><varname>bytea_output</varname> (<type>enum</type>)
       <indexterm>
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index e09c8101e7..fc31799da5 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -51,6 +51,7 @@
 #include "access/xloginsert.h"
 #include "access/xlogutils.h"
 #include "catalog/catalog.h"
+#include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "port/atomics.h"
@@ -6123,6 +6124,8 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 	frz->t_infomask = tuple->t_infomask;
 	frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
 
+	*totally_frozen_p = false;
+
 	/*
 	 * Process xmin.  xmin_frozen has two slightly different meanings: in the
 	 * !XidIsNormal case, it means "the xmin doesn't need any freezing" (it's
@@ -6137,19 +6140,25 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 	else
 	{
 		if (TransactionIdPrecedes(xid, relfrozenxid))
-			ereport(ERROR,
+		{
+			ereport(vacuum_damage_elevel(),
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg_internal("found xmin %u from before relfrozenxid %u",
 									 xid, relfrozenxid)));
+			return false;
+		}
 
 		xmin_frozen = TransactionIdPrecedes(xid, cutoff_xid);
 		if (xmin_frozen)
 		{
 			if (!TransactionIdDidCommit(xid))
-				ereport(ERROR,
+			{
+				ereport(vacuum_damage_elevel(),
 						(errcode(ERRCODE_DATA_CORRUPTED),
 						 errmsg_internal("uncommitted xmin %u from before xid cutoff %u needs to be frozen",
 										 xid, cutoff_xid)));
+				return false;
+			}
 
 			frz->t_infomask |= HEAP_XMIN_FROZEN;
 			changed = true;
@@ -6218,10 +6227,13 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 	else if (TransactionIdIsNormal(xid))
 	{
 		if (TransactionIdPrecedes(xid, relfrozenxid))
-			ereport(ERROR,
+		{
+			ereport(vacuum_damage_elevel(),
 					(errcode(ERRCODE_DATA_CORRUPTED),
 					 errmsg_internal("found xmax %u from before relfrozenxid %u",
 									 xid, relfrozenxid)));
+			return false;
+		}
 
 		if (TransactionIdPrecedes(xid, cutoff_xid))
 		{
@@ -6233,10 +6245,13 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			 */
 			if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask) &&
 				TransactionIdDidCommit(xid))
-				ereport(ERROR,
+			{
+				ereport(vacuum_damage_elevel(),
 						(errcode(ERRCODE_DATA_CORRUPTED),
 						 errmsg_internal("cannot freeze committed xmax %u",
 										 xid)));
+				return false;
+			}
 			freeze_xmax = true;
 		}
 		else
@@ -6249,10 +6264,13 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 		xmax_already_frozen = true;
 	}
 	else
-		ereport(ERROR,
+	{
+		ereport(vacuum_damage_elevel(),
 				(errcode(ERRCODE_DATA_CORRUPTED),
 				 errmsg_internal("found xmax %u (infomask 0x%04x) not frozen, not multi, not normal",
 								 xid, tuple->t_infomask)));
+		return false;
+	}
 
 	if (freeze_xmax)
 	{
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 576c7e63e9..5a093c231c 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -63,6 +63,8 @@ int			vacuum_freeze_table_age;
 int			vacuum_multixact_freeze_min_age;
 int			vacuum_multixact_freeze_table_age;
 
+/* Whether to continue with the vacuuming after detecting a corruped tuple */
+bool vacuum_tolerate_damage = false;
 
 /* A few variables that don't seem worth passing around as parameters */
 static MemoryContext vac_context = NULL;
@@ -2102,3 +2104,16 @@ get_vacopt_ternary_value(DefElem *def)
 {
 	return defGetBoolean(def) ? VACOPT_TERNARY_ENABLED : VACOPT_TERNARY_DISABLED;
 }
+
+/*
+ * vacuum_damage_elevel
+ *
+ * Return the error level for reporting the corrupted tuple while trying to
+ * freeze the tuple during vacuum.  Based on the GUC value, it will return
+ * whether to report with ERROR or to report with WARNING.
+ */
+int
+vacuum_damage_elevel(void)
+{
+	return vacuum_tolerate_damage ? WARNING : ERROR;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 031ca0327f..683812dd4a 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -2041,6 +2041,15 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"vacuum_tolerate_damage", PGC_USERSET, ERROR_HANDLING_OPTIONS,
+			gettext_noop("Whether to continue running vacuum after detecting corrupted tuple."),
+		},
+		&vacuum_tolerate_damage,
+		false,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index e430e33c7b..88654f4983 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -666,6 +666,7 @@
 #vacuum_cleanup_index_scale_factor = 0.1	# fraction of total number of tuples
 						# before index cleanup, 0 always performs
 						# index cleanup
+#vacuum_tolerate_damage = false
 #bytea_output = 'hex'			# hex, escape
 #xmlbinary = 'base64'
 #xmloption = 'content'
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index a4cd721400..5e711832bc 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -237,6 +237,7 @@ extern int	vacuum_freeze_min_age;
 extern int	vacuum_freeze_table_age;
 extern int	vacuum_multixact_freeze_min_age;
 extern int	vacuum_multixact_freeze_table_age;
+extern bool vacuum_tolerate_damage;
 
 /* Variables for cost-based parallel vacuum */
 extern pg_atomic_uint32 *VacuumSharedCostBalance;
@@ -278,6 +279,7 @@ extern bool vacuum_is_relation_owner(Oid relid, Form_pg_class reltuple,
 									 int options);
 extern Relation vacuum_open_relation(Oid relid, RangeVar *relation,
 									 int options, bool verbose, LOCKMODE lmode);
+extern int vacuum_damage_elevel(void);
 
 /* in commands/analyze.c */
 extern void analyze_rel(Oid relid, RangeVar *relation,
diff --git a/src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl b/src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl
new file mode 100644
index 0000000000..acf1266ab1
--- /dev/null
+++ b/src/test/modules/test_misc/t/002_vacuum_tolerate_damage.pl
@@ -0,0 +1,49 @@
+# Verify the vacuum_tolerate_damage GUC
+
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 2;
+
+# Initialize a test cluster
+my $node = get_new_node('primary');
+$node->init();
+$node->start;
+
+# Run a SQL command and return psql's stderr
+sub run_sql_command
+{
+	my $sql = shift;
+	my $stderr;
+
+	$node->psql(
+		'postgres',
+		$sql,
+		stderr        => \$stderr);
+	return $stderr;
+}
+
+my $output;
+
+# Insert 2 tuple in the table and update the relfrozenxid for the table to
+# the future xid.
+run_sql_command(
+	"create table test_vac (a int) WITH (autovacuum_enabled = off);
+	 insert into test_vac values (1), (2);
+	 update pg_class set relfrozenxid=txid_current()::text::xid where relname='test_vac';");
+
+$output = run_sql_command('vacuum(freeze, disable_page_skipping) test_vac;');
+ok( $output =~ m/ERROR:.*found xmin.*before relfrozenxid/);
+
+# set the vacuum_tolerate_damage and run again
+$output = run_sql_command('set vacuum_tolerate_damage=true;
+						   vacuum(freeze, disable_page_skipping) test_vac;');
+
+
+# this time we should get WARNING for both the tuples
+ok( scalar( @{[ $output=~/WARNING:.*found xmin.*before relfrozenxid/gi ]}) == 2);
+
+run_sql_command('DROP TABLE test_vac;');
+
+$node->stop('fast');
-- 
2.23.0

Reply via email to