hi.
attached  patch trying to speedup ALTER TABLE ADD CHECK CONSTRAINT.
demo:

drop table if exists t7;
create table t7 (a int not  null, b int, c int);
insert into t7 select g, g+1, g %100 from generate_series(1,1_000_000) g;
create index on t7(a,b);

alter table t7 add check (a > 0);
patch: Time: 4.281 ms
master: Time: 491.689 ms
----------------------------
implementation:

step1.  during execute command `ALTER TABLE ADD CHECK CONSTRAINT`
in AddRelationNewConstraints->StoreRelCheck
after StoreRelCheck add a CommandCounterIncrement();
so previously added CHECK pg_constraint can be seen by us.
we need to use pg_get_constraintdef to retrieve the CHECK constraint definition.

step2. check if this relation has any suitable index (not expression
index, not predicate index)
--whole patch hinges on SPI query can use indexscan to quickly
retrieve certain information.

step3: construct and execute these three SPI query:
("set enable_seqscan to off")
(SELECT 1 FROM the_table WHERE NOT (check_constraint_def)
AND check_constraint_def IS NOT NULL LIMIT 1)
("reset enable_seqscan")

the SPI query will be faster, if the qual(check_constraint_def) can be
utilized by indexscan.
if SPI_processed == 0 means we toggle this check constraint as
"already_validated" (bool) is true.
we stored already_validated in CookedConstraint. later pass it to
AlteredTableInfo->constraints.
then phrase 3 within ATRewriteTable, we can do
```
if (constraint->already_validated)
    needscan = false;
```

----------------------------
concern:
only certain kinds of check constraint expressions can be used for
this optimization.
i do all the CHECK constraint expressions filter in checkconstraint_expr_walker.

use contain_volatile_functions to filter out volatile expressions,
add (check_constraint_def IS NOT NULL) in the above SPI query, i think
null handling is correct?

ALTER TABLE ADD CHECK CONSTRAINT is using ACCESS EXCLUSIVE lock.
but i need to think more about concurrently related issues.

idea come from this thread:
https://postgr.es/m/canwftzk2mz7js_56v+zclxzyh1utbzx4ueg03p7cee86k2r...@mail.gmail.com
From 5a4039d058d34467696fd6e6238fdb1132d9af14 Mon Sep 17 00:00:00 2001
From: jian he <jian.universal...@gmail.com>
Date: Sat, 30 Nov 2024 19:20:22 +0800
Subject: [PATCH v1 1/1] speedup alter table add check constraint

dicussion: https://postgr.es/m/
---
 src/backend/catalog/heap.c                | 218 ++++++++++++++++++++++
 src/backend/catalog/index.c               |  74 ++++++++
 src/backend/catalog/pg_constraint.c       |   1 +
 src/backend/commands/tablecmds.c          |   9 +-
 src/include/catalog/heap.h                |   1 +
 src/include/catalog/index.h               |   1 +
 src/test/regress/expected/constraints.out |  20 ++
 src/test/regress/sql/constraints.sql      |  22 +++
 8 files changed, 345 insertions(+), 1 deletion(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 003af4bf21..5eb3450208 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -57,6 +57,7 @@
 #include "commands/tablecmds.h"
 #include "commands/typecmds.h"
 #include "common/int.h"
+#include "executor/spi.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
@@ -115,6 +116,17 @@ static Node *cookConstraint(ParseState *pstate,
 							Node *raw_constraint,
 							char *relname);
 
+typedef struct checkexpr_context
+{
+	Bitmapset  *bms_indexattno;
+	bool		cannot_be_indexed;
+} checkexpr_context;
+static bool
+checkconstraint_expr_walker(Node *node, checkexpr_context *context);
+static bool
+index_validate_check_constraint(Oid constrOid, Relation rel,
+								Node *expr,
+								const char *constrname);
 
 /* ----------------------------------------------------------------
  *				XXX UGLY HARD CODED BADNESS FOLLOWS XXX
@@ -2409,6 +2421,7 @@ AddRelationNewConstraints(Relation rel,
 		cooked->is_local = is_local;
 		cooked->inhcount = is_local ? 0 : 1;
 		cooked->is_no_inherit = false;
+		cooked->already_validated = false;
 		cookedConstraints = lappend(cookedConstraints, cooked);
 	}
 
@@ -2527,6 +2540,8 @@ AddRelationNewConstraints(Relation rel,
 				StoreRelCheck(rel, ccname, expr, cdef->initially_valid, is_local,
 							  is_local ? 0 : 1, cdef->is_no_inherit, is_internal);
 
+			/* we need this for index_validate_check_constraint*/
+			CommandCounterIncrement();
 			numchecks++;
 
 			cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint));
@@ -2539,6 +2554,13 @@ AddRelationNewConstraints(Relation rel,
 			cooked->is_local = is_local;
 			cooked->inhcount = is_local ? 0 : 1;
 			cooked->is_no_inherit = cdef->is_no_inherit;
+			/* internal, readd check constraint cannot be prevalidated */
+			if (is_internal)
+				cooked->already_validated = false;
+			else
+				cooked->already_validated =
+					index_validate_check_constraint(constrOid, rel, expr, ccname);
+
 			cookedConstraints = lappend(cookedConstraints, cooked);
 		}
 		else if (cdef->contype == CONSTR_NOTNULL)
@@ -2609,6 +2631,7 @@ AddRelationNewConstraints(Relation rel,
 			nncooked->is_local = is_local;
 			nncooked->inhcount = inhcount;
 			nncooked->is_no_inherit = cdef->is_no_inherit;
+			nncooked->already_validated = false;
 
 			cookedConstraints = lappend(cookedConstraints, nncooked);
 		}
@@ -2626,6 +2649,201 @@ AddRelationNewConstraints(Relation rel,
 	return cookedConstraints;
 }
 
+static bool
+index_validate_check_constraint(Oid constrOid, Relation rel, Node *expr, const char *constrname)
+{
+	StringInfoData querybuf;
+	BMS_Comparison cmp;
+	text	   	*constrdef_text;
+	Bitmapset	*idx_attnums = NULL;
+	char	   	*constrdef = NULL;
+	char	   	*constrdef_value = NULL;
+	bool		check_constraint_ok = false;
+
+	checkexpr_context	context = {0};
+	context.bms_indexattno = NULL;
+	context.cannot_be_indexed = false;
+
+	/*
+	 * we use CHECK constraint definition for constructing SQL query, later we
+	 * using SPI to execute it.
+	 */
+	constrdef_text = DatumGetTextPP(DirectFunctionCall2(pg_get_constraintdef,
+													ObjectIdGetDatum(constrOid),
+													BoolGetDatum(false)));
+	constrdef = text_to_cstring(constrdef_text);
+
+	if (constrdef == NULL)
+		return false;
+
+	/* CHECK constraint is a plain Var node, var type should only be bool */
+	if (IsA(expr, Var))
+	{
+		Var		   *var = (Var *) expr;
+		if (var->vartype == BOOLOID)
+			return true;
+		else
+			return false;
+	}
+
+	idx_attnums = relation_get_indexattnums(rel);
+	/* no appropriate index on this relation, so gave up */
+	if (idx_attnums == NULL)
+		return false;
+
+	checkconstraint_expr_walker(expr, &context);
+	if (context.cannot_be_indexed || context.bms_indexattno == NULL)
+		return false;
+
+	/* all the referenced vars in the expr should have index on it */
+	cmp = bms_subset_compare(context.bms_indexattno, idx_attnums);
+	if (cmp == BMS_SUBSET2 || cmp == BMS_DIFFERENT)
+		return false;
+
+	/*
+	 * set enable_seqscan to off to force optimizer using index scan.
+	 * we already checked that check constraint expr varnode is not-null
+	 * and have appropriate index on it.
+	 */
+	SPI_connect();
+	if (SPI_execute("set enable_seqscan to off", false, 0) != SPI_OK_UTILITY)
+		elog(ERROR, "SPI_exec failed for query: \"%s\"", "set enable_seqscan to off");
+
+	initStringInfo(&querybuf);
+	constrdef_value  = pstrdup(constrdef + 5); /*check constraint first word is CHECK, that's why offset 5*/
+	appendStringInfo(&querybuf, "SELECT 1 FROM %s WHERE (NOT %s) AND %s IS NOT NULL LIMIT 1",
+								RelationGetRelationName(rel),
+								constrdef_value,
+								constrdef_value);
+
+	if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT)
+		elog(ERROR, "SPI_exec failed: %s", querybuf.data);
+
+	check_constraint_ok = (SPI_processed == 0);
+
+	/*we need reset enable_seqscan GUC */
+	if (SPI_execute("reset enable_seqscan;", false, 0) != SPI_OK_UTILITY)
+		elog(ERROR, "SPI_exec failed for query: \"%s\"", "reset enable_seqscan");
+	SPI_finish();
+
+	return check_constraint_ok;
+}
+
+static bool
+checkconstraint_expr_walker(Node *node, checkexpr_context *context)
+{
+	Node	   *leftop = NULL;
+	Node	   *rightop = NULL;
+
+	if (node == NULL)
+		return false;
+
+	/* volatile check expression will influence the result of to be executed SPI
+	 * query. so we forbiden it.
+	 */
+	if (contain_volatile_functions(node))
+	{
+		context->cannot_be_indexed = true;
+		return false;
+	}
+
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		/* wholerow expression cannot be used for indexscan */
+		if (var->varattno == 0)
+		{
+			context->cannot_be_indexed = true;
+			return false;
+		}
+		context->bms_indexattno = bms_add_member(context->bms_indexattno,
+												var->varattno);
+	}
+
+	if (IsA(node, OpExpr))
+	{
+		bool		leftop_have_var;
+		bool		rightop_have_var;
+		OpExpr	   *opexpr = (OpExpr *) node;
+
+		if (opexpr->opresulttype != BOOLOID
+			|| !OidIsValid(get_negator(opexpr->opno)))
+		{
+			context->cannot_be_indexed = true;
+			return false;
+		}
+
+		/*
+		* Check the expression form: (Var node operator constant) or (constant
+		* operator Var).
+		*/
+		leftop = (Node *) linitial(opexpr->args);
+		rightop = (Node *) lsecond(opexpr->args);
+
+		if (IsA(leftop, RelabelType))
+			leftop = (Node *) ((RelabelType *) leftop)->arg;
+		if (IsA(rightop, RelabelType))
+			rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+		/*
+		 * cannot both side contain Var node. current index scan cannot handle
+		 * qual like `where col1 < colb`. we also skip case where bothside don't
+		 * have var clause.
+		 */
+		leftop_have_var = contain_var_clause(leftop);
+		rightop_have_var = contain_var_clause(rightop);
+		if ((leftop_have_var && rightop_have_var) ||
+		   (!leftop_have_var && !rightop_have_var))
+		{
+			context->cannot_be_indexed = true;
+			return false;
+		}
+
+		if (IsA(leftop, Const) && !OidIsValid(get_commutator(opexpr->opno)))
+		{
+			/* commutator doesn't exist, we can't reverse the order */
+			context->cannot_be_indexed = true;
+			return false;
+		}
+
+		/* maybe this is not necessary? skip row/composite type first.*/
+		if (type_is_rowtype(exprType(leftop)) ||
+			type_is_rowtype(exprType(rightop)))
+		{
+			context->cannot_be_indexed = true;
+			return false;
+		}
+	}
+
+	if (IsA(node, ScalarArrayOpExpr))
+	{
+		ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
+
+		leftop = (Node *) linitial(saop->args);
+		if (leftop && IsA(leftop, RelabelType))
+			leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+		if (!IsA(leftop, Var))
+		{
+			context->cannot_be_indexed = true;
+			return false;
+		}
+
+		rightop = (Node *) lsecond(saop->args);
+		if (rightop && IsA(rightop, RelabelType))
+			rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+		if (!IsA(rightop, Const))
+		{
+			context->cannot_be_indexed = true;
+			return false;
+		}
+	}
+
+	return expression_tree_walker(node, checkconstraint_expr_walker,
+								  (void *) context);
+}
 /*
  * Check for a pre-existing check constraint that conflicts with a proposed
  * new one, and either adjust its conislocal/coninhcount settings or throw
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index f9bb721c5f..89c8873389 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -4257,3 +4257,77 @@ RestoreReindexState(const void *reindexstate)
 	/* Note the worker has its own transaction nesting level */
 	reindexingNestLevel = GetCurrentTransactionNestLevel();
 }
+
+/*
+ * relation_get_indexattnums
+ * 		Returns a Bitmapsets with member of attribute number (ref:
+ * 		pg_attribute.attnum) that have suitable index on it.
+*/
+Bitmapset *
+relation_get_indexattnums(Relation rel)
+{
+	Bitmapset *attnums = NULL;
+
+	Relation	pg_index;
+	HeapTuple	indexTuple;
+	SysScanDesc scan;
+	ScanKeyData skey;
+
+	/* Scan pg_index for unique index of the target rel */
+	pg_index = table_open(IndexRelationId, AccessShareLock);
+
+	ScanKeyInit(&skey,
+				Anum_pg_index_indrelid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(RelationGetRelid(rel)));
+	scan = systable_beginscan(pg_index, IndexIndrelidIndexId, true,
+							  NULL, 1, &skey);
+
+	while (HeapTupleIsValid(indexTuple = systable_getnext(scan)))
+	{
+		Datum		adatum;
+		bool		isNull;
+		Datum	   *keys;
+		int			nelems;
+
+		Form_pg_index index = (Form_pg_index) GETSTRUCT(indexTuple);
+		if (!index->indislive)
+			continue;
+
+		/* Skip invalid, exclusion index or deferred index */
+		if (!index->indisvalid || index->indisexclusion || !index->indimmediate)
+			continue;
+
+		/* Skip expression index or predicate index */
+		if (!heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) ||
+			!heap_attisnull(indexTuple, Anum_pg_index_indexprs, NULL))
+			continue;
+
+		/* Extract the pg_index->indkey int2vector */
+		adatum = heap_getattr(indexTuple, Anum_pg_index_indkey,
+							RelationGetDescr(pg_index), &isNull);
+		if (isNull)
+			elog(ERROR, "null int2vector for index %u", index->indexrelid);
+
+		deconstruct_array_builtin(DatumGetArrayTypeP(adatum), INT2OID,
+								  &keys,
+								  NULL,
+								  &nelems);
+
+		if (nelems != index->indnatts)
+			elog(ERROR, "corrupted int2vector for index %u", index->indexrelid);
+
+		Assert(nelems >= index->indnkeyatts);
+
+		for (int i = 0; i < index->indnkeyatts; i++)
+		{
+			int	attno = DatumGetInt16(keys[i]);
+
+			attnums = bms_add_member(attnums, attno);
+		}
+	}
+	systable_endscan(scan);
+
+	table_close(pg_index, AccessShareLock);
+	return attnums;
+}
\ No newline at end of file
diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index 9c05a98d28..27f164ad75 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -826,6 +826,7 @@ RelationGetNotNullConstraints(Oid relid, bool cooked, bool include_noinh)
 			cooked->is_local = true;
 			cooked->inhcount = 0;
 			cooked->is_no_inherit = conForm->connoinherit;
+			cooked->already_validated = false;
 
 			notnulls = lappend(notnulls, cooked);
 		}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 1af2e2bffb..8608c57356 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -218,6 +218,7 @@ typedef struct NewConstraint
 	Oid			refrelid;		/* PK rel, if FOREIGN */
 	Oid			refindid;		/* OID of PK's index, if FOREIGN */
 	bool		conwithperiod;	/* Whether the new FOREIGN KEY uses PERIOD */
+	bool		already_validated;	/* already validated for check constraint */
 	Oid			conid;			/* OID of pg_constraint entry, if FOREIGN */
 	Node	   *qual;			/* Check expr or CONSTR_FOREIGN Constraint */
 	ExprState  *qualstate;		/* Execution state for CHECK expr */
@@ -977,6 +978,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId,
 			cooked->is_local = true;	/* not used for defaults */
 			cooked->inhcount = 0;	/* ditto */
 			cooked->is_no_inherit = false;
+			cooked->already_validated = false;
 			cookedDefaults = lappend(cookedDefaults, cooked);
 			attr->atthasdef = true;
 		}
@@ -6093,6 +6095,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode)
 		{
 			case CONSTR_CHECK:
 				needscan = true;
+				if(con->already_validated)
+					needscan = false;
 				con->qualstate = ExecPrepareExpr((Expr *) con->qual, estate);
 				break;
 			case CONSTR_FOREIGN:
@@ -9597,7 +9601,7 @@ ATAddCheckNNConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
 			newcon->name = ccon->name;
 			newcon->contype = ccon->contype;
 			newcon->qual = ccon->expr;
-
+			newcon->already_validated = ccon->already_validated;
 			tab->constraints = lappend(tab->constraints, newcon);
 		}
 
@@ -10718,6 +10722,7 @@ addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel,
 			newcon->refindid = indexOid;
 			newcon->conid = parentConstr;
 			newcon->conwithperiod = fkconstraint->fk_with_period;
+			newcon->already_validated = false;
 			newcon->qual = (Node *) fkconstraint;
 
 			tab->constraints = lappend(tab->constraints, newcon);
@@ -12027,6 +12032,7 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
 			newcon->refindid = con->conindid;
 			newcon->conid = con->oid;
 			newcon->qual = (Node *) fkconstraint;
+			newcon->already_validated = false;
 
 			/* Find or create work queue entry for this table */
 			tab = ATGetQueueEntry(wqueue, rel);
@@ -12095,6 +12101,7 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
 			newcon->refrelid = InvalidOid;
 			newcon->refindid = InvalidOid;
 			newcon->conid = con->oid;
+			newcon->already_validated = false;
 
 			val = SysCacheGetAttrNotNull(CONSTROID, tuple,
 										 Anum_pg_constraint_conbin);
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index 8c278f202b..766ff00e6f 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -45,6 +45,7 @@ typedef struct CookedConstraint
 	int16		inhcount;		/* number of times constraint is inherited */
 	bool		is_no_inherit;	/* constraint has local def and cannot be
 								 * inherited */
+	bool		already_validated; /* already validated for check constraint */
 } CookedConstraint;
 
 extern Relation heap_create(const char *relname,
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 2dea96f47c..a4386a6f52 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -175,6 +175,7 @@ extern void RestoreReindexState(const void *reindexstate);
 
 extern void IndexSetParentIndex(Relation partitionIdx, Oid parentOid);
 
+extern Bitmapset *relation_get_indexattnums(Relation rel);
 
 /*
  * itemptr_encode - Encode ItemPointer as int64/int8
diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out
index 71200c90ed..5504bd7edd 100644
--- a/src/test/regress/expected/constraints.out
+++ b/src/test/regress/expected/constraints.out
@@ -112,6 +112,26 @@ SELECT * from CHECK2_TBL;
  7 | check ok |  7
 (2 rows)
 
+create table t1(a int not null, b int not null, c bool not null, d int[] not null);
+create index t1_idx_a on t1(a);
+create index t1_idx_b on t1(b);
+create index t1_idx_c on t1(c);
+create index t1_idx_d on t1(d);
+--empty table, can be optimized
+alter table t1 add constraint nc check(a>0);
+insert into t1 values(1,2, true, '{1,2}');
+insert into t1 values(1,2, true, '{1,3,4}');
+insert into t1 values(11,2, true, '{1,3,5}');
+alter table t1 add  check(a>0);
+alter table t1 add  check(a <> 12);
+alter table t1 add  check(a>0 and a < 12);
+alter table t1 add  check((a>0 and a < 12) or (a < 13 or a > -1) or b = 2);
+alter table t1 add  check((a>0 and a < 12) or (d = '{13,12}'));
+alter table t1 add  check((a = any('{1,11}')));
+alter table t1 add  check((b = all('{2}')));
+--cannot optimzied for now.
+alter table t1 add  check((d[1] = 1));
+drop table t1;
 --
 -- Check constraints on INSERT
 --
diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql
index e607eb1fdd..87e237f50d 100644
--- a/src/test/regress/sql/constraints.sql
+++ b/src/test/regress/sql/constraints.sql
@@ -82,6 +82,28 @@ INSERT INTO CHECK2_TBL VALUES (7, 'check ok', 7);
 
 SELECT * from CHECK2_TBL;
 
+create table t1(a int not null, b int not null, c bool not null, d int[] not null);
+create index t1_idx_a on t1(a);
+create index t1_idx_b on t1(b);
+create index t1_idx_c on t1(c);
+create index t1_idx_d on t1(d);
+--empty table, can be optimized
+alter table t1 add constraint nc check(a>0);
+insert into t1 values(1,2, true, '{1,2}');
+insert into t1 values(1,2, true, '{1,3,4}');
+insert into t1 values(11,2, true, '{1,3,5}');
+
+alter table t1 add  check(a>0);
+alter table t1 add  check(a <> 12);
+alter table t1 add  check(a>0 and a < 12);
+alter table t1 add  check((a>0 and a < 12) or (a < 13 or a > -1) or b = 2);
+alter table t1 add  check((a>0 and a < 12) or (d = '{13,12}'));
+alter table t1 add  check((a = any('{1,11}')));
+alter table t1 add  check((b = all('{2}')));
+--cannot optimzied for now.
+alter table t1 add  check((d[1] = 1));
+drop table t1;
+
 --
 -- Check constraints on INSERT
 --
-- 
2.34.1

Reply via email to