hi. attached patch trying to speedup ALTER TABLE ADD CHECK CONSTRAINT. demo:
drop table if exists t7; create table t7 (a int not null, b int, c int); insert into t7 select g, g+1, g %100 from generate_series(1,1_000_000) g; create index on t7(a,b); alter table t7 add check (a > 0); patch: Time: 4.281 ms master: Time: 491.689 ms ---------------------------- implementation: step1. during execute command `ALTER TABLE ADD CHECK CONSTRAINT` in AddRelationNewConstraints->StoreRelCheck after StoreRelCheck add a CommandCounterIncrement(); so previously added CHECK pg_constraint can be seen by us. we need to use pg_get_constraintdef to retrieve the CHECK constraint definition. step2. check if this relation has any suitable index (not expression index, not predicate index) --whole patch hinges on SPI query can use indexscan to quickly retrieve certain information. step3: construct and execute these three SPI query: ("set enable_seqscan to off") (SELECT 1 FROM the_table WHERE NOT (check_constraint_def) AND check_constraint_def IS NOT NULL LIMIT 1) ("reset enable_seqscan") the SPI query will be faster, if the qual(check_constraint_def) can be utilized by indexscan. if SPI_processed == 0 means we toggle this check constraint as "already_validated" (bool) is true. we stored already_validated in CookedConstraint. later pass it to AlteredTableInfo->constraints. then phrase 3 within ATRewriteTable, we can do ``` if (constraint->already_validated) needscan = false; ``` ---------------------------- concern: only certain kinds of check constraint expressions can be used for this optimization. i do all the CHECK constraint expressions filter in checkconstraint_expr_walker. use contain_volatile_functions to filter out volatile expressions, add (check_constraint_def IS NOT NULL) in the above SPI query, i think null handling is correct? ALTER TABLE ADD CHECK CONSTRAINT is using ACCESS EXCLUSIVE lock. but i need to think more about concurrently related issues. idea come from this thread: https://postgr.es/m/canwftzk2mz7js_56v+zclxzyh1utbzx4ueg03p7cee86k2r...@mail.gmail.com
From 5a4039d058d34467696fd6e6238fdb1132d9af14 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Sat, 30 Nov 2024 19:20:22 +0800 Subject: [PATCH v1 1/1] speedup alter table add check constraint dicussion: https://postgr.es/m/ --- src/backend/catalog/heap.c | 218 ++++++++++++++++++++++ src/backend/catalog/index.c | 74 ++++++++ src/backend/catalog/pg_constraint.c | 1 + src/backend/commands/tablecmds.c | 9 +- src/include/catalog/heap.h | 1 + src/include/catalog/index.h | 1 + src/test/regress/expected/constraints.out | 20 ++ src/test/regress/sql/constraints.sql | 22 +++ 8 files changed, 345 insertions(+), 1 deletion(-) diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index 003af4bf21..5eb3450208 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -57,6 +57,7 @@ #include "commands/tablecmds.h" #include "commands/typecmds.h" #include "common/int.h" +#include "executor/spi.h" #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" @@ -115,6 +116,17 @@ static Node *cookConstraint(ParseState *pstate, Node *raw_constraint, char *relname); +typedef struct checkexpr_context +{ + Bitmapset *bms_indexattno; + bool cannot_be_indexed; +} checkexpr_context; +static bool +checkconstraint_expr_walker(Node *node, checkexpr_context *context); +static bool +index_validate_check_constraint(Oid constrOid, Relation rel, + Node *expr, + const char *constrname); /* ---------------------------------------------------------------- * XXX UGLY HARD CODED BADNESS FOLLOWS XXX @@ -2409,6 +2421,7 @@ AddRelationNewConstraints(Relation rel, cooked->is_local = is_local; cooked->inhcount = is_local ? 0 : 1; cooked->is_no_inherit = false; + cooked->already_validated = false; cookedConstraints = lappend(cookedConstraints, cooked); } @@ -2527,6 +2540,8 @@ AddRelationNewConstraints(Relation rel, StoreRelCheck(rel, ccname, expr, cdef->initially_valid, is_local, is_local ? 0 : 1, cdef->is_no_inherit, is_internal); + /* we need this for index_validate_check_constraint*/ + CommandCounterIncrement(); numchecks++; cooked = (CookedConstraint *) palloc(sizeof(CookedConstraint)); @@ -2539,6 +2554,13 @@ AddRelationNewConstraints(Relation rel, cooked->is_local = is_local; cooked->inhcount = is_local ? 0 : 1; cooked->is_no_inherit = cdef->is_no_inherit; + /* internal, readd check constraint cannot be prevalidated */ + if (is_internal) + cooked->already_validated = false; + else + cooked->already_validated = + index_validate_check_constraint(constrOid, rel, expr, ccname); + cookedConstraints = lappend(cookedConstraints, cooked); } else if (cdef->contype == CONSTR_NOTNULL) @@ -2609,6 +2631,7 @@ AddRelationNewConstraints(Relation rel, nncooked->is_local = is_local; nncooked->inhcount = inhcount; nncooked->is_no_inherit = cdef->is_no_inherit; + nncooked->already_validated = false; cookedConstraints = lappend(cookedConstraints, nncooked); } @@ -2626,6 +2649,201 @@ AddRelationNewConstraints(Relation rel, return cookedConstraints; } +static bool +index_validate_check_constraint(Oid constrOid, Relation rel, Node *expr, const char *constrname) +{ + StringInfoData querybuf; + BMS_Comparison cmp; + text *constrdef_text; + Bitmapset *idx_attnums = NULL; + char *constrdef = NULL; + char *constrdef_value = NULL; + bool check_constraint_ok = false; + + checkexpr_context context = {0}; + context.bms_indexattno = NULL; + context.cannot_be_indexed = false; + + /* + * we use CHECK constraint definition for constructing SQL query, later we + * using SPI to execute it. + */ + constrdef_text = DatumGetTextPP(DirectFunctionCall2(pg_get_constraintdef, + ObjectIdGetDatum(constrOid), + BoolGetDatum(false))); + constrdef = text_to_cstring(constrdef_text); + + if (constrdef == NULL) + return false; + + /* CHECK constraint is a plain Var node, var type should only be bool */ + if (IsA(expr, Var)) + { + Var *var = (Var *) expr; + if (var->vartype == BOOLOID) + return true; + else + return false; + } + + idx_attnums = relation_get_indexattnums(rel); + /* no appropriate index on this relation, so gave up */ + if (idx_attnums == NULL) + return false; + + checkconstraint_expr_walker(expr, &context); + if (context.cannot_be_indexed || context.bms_indexattno == NULL) + return false; + + /* all the referenced vars in the expr should have index on it */ + cmp = bms_subset_compare(context.bms_indexattno, idx_attnums); + if (cmp == BMS_SUBSET2 || cmp == BMS_DIFFERENT) + return false; + + /* + * set enable_seqscan to off to force optimizer using index scan. + * we already checked that check constraint expr varnode is not-null + * and have appropriate index on it. + */ + SPI_connect(); + if (SPI_execute("set enable_seqscan to off", false, 0) != SPI_OK_UTILITY) + elog(ERROR, "SPI_exec failed for query: \"%s\"", "set enable_seqscan to off"); + + initStringInfo(&querybuf); + constrdef_value = pstrdup(constrdef + 5); /*check constraint first word is CHECK, that's why offset 5*/ + appendStringInfo(&querybuf, "SELECT 1 FROM %s WHERE (NOT %s) AND %s IS NOT NULL LIMIT 1", + RelationGetRelationName(rel), + constrdef_value, + constrdef_value); + + if (SPI_execute(querybuf.data, false, 1) != SPI_OK_SELECT) + elog(ERROR, "SPI_exec failed: %s", querybuf.data); + + check_constraint_ok = (SPI_processed == 0); + + /*we need reset enable_seqscan GUC */ + if (SPI_execute("reset enable_seqscan;", false, 0) != SPI_OK_UTILITY) + elog(ERROR, "SPI_exec failed for query: \"%s\"", "reset enable_seqscan"); + SPI_finish(); + + return check_constraint_ok; +} + +static bool +checkconstraint_expr_walker(Node *node, checkexpr_context *context) +{ + Node *leftop = NULL; + Node *rightop = NULL; + + if (node == NULL) + return false; + + /* volatile check expression will influence the result of to be executed SPI + * query. so we forbiden it. + */ + if (contain_volatile_functions(node)) + { + context->cannot_be_indexed = true; + return false; + } + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + /* wholerow expression cannot be used for indexscan */ + if (var->varattno == 0) + { + context->cannot_be_indexed = true; + return false; + } + context->bms_indexattno = bms_add_member(context->bms_indexattno, + var->varattno); + } + + if (IsA(node, OpExpr)) + { + bool leftop_have_var; + bool rightop_have_var; + OpExpr *opexpr = (OpExpr *) node; + + if (opexpr->opresulttype != BOOLOID + || !OidIsValid(get_negator(opexpr->opno))) + { + context->cannot_be_indexed = true; + return false; + } + + /* + * Check the expression form: (Var node operator constant) or (constant + * operator Var). + */ + leftop = (Node *) linitial(opexpr->args); + rightop = (Node *) lsecond(opexpr->args); + + if (IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + if (IsA(rightop, RelabelType)) + rightop = (Node *) ((RelabelType *) rightop)->arg; + + /* + * cannot both side contain Var node. current index scan cannot handle + * qual like `where col1 < colb`. we also skip case where bothside don't + * have var clause. + */ + leftop_have_var = contain_var_clause(leftop); + rightop_have_var = contain_var_clause(rightop); + if ((leftop_have_var && rightop_have_var) || + (!leftop_have_var && !rightop_have_var)) + { + context->cannot_be_indexed = true; + return false; + } + + if (IsA(leftop, Const) && !OidIsValid(get_commutator(opexpr->opno))) + { + /* commutator doesn't exist, we can't reverse the order */ + context->cannot_be_indexed = true; + return false; + } + + /* maybe this is not necessary? skip row/composite type first.*/ + if (type_is_rowtype(exprType(leftop)) || + type_is_rowtype(exprType(rightop))) + { + context->cannot_be_indexed = true; + return false; + } + } + + if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; + + leftop = (Node *) linitial(saop->args); + if (leftop && IsA(leftop, RelabelType)) + leftop = (Node *) ((RelabelType *) leftop)->arg; + + if (!IsA(leftop, Var)) + { + context->cannot_be_indexed = true; + return false; + } + + rightop = (Node *) lsecond(saop->args); + if (rightop && IsA(rightop, RelabelType)) + rightop = (Node *) ((RelabelType *) rightop)->arg; + + if (!IsA(rightop, Const)) + { + context->cannot_be_indexed = true; + return false; + } + } + + return expression_tree_walker(node, checkconstraint_expr_walker, + (void *) context); +} /* * Check for a pre-existing check constraint that conflicts with a proposed * new one, and either adjust its conislocal/coninhcount settings or throw diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index f9bb721c5f..89c8873389 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -4257,3 +4257,77 @@ RestoreReindexState(const void *reindexstate) /* Note the worker has its own transaction nesting level */ reindexingNestLevel = GetCurrentTransactionNestLevel(); } + +/* + * relation_get_indexattnums + * Returns a Bitmapsets with member of attribute number (ref: + * pg_attribute.attnum) that have suitable index on it. +*/ +Bitmapset * +relation_get_indexattnums(Relation rel) +{ + Bitmapset *attnums = NULL; + + Relation pg_index; + HeapTuple indexTuple; + SysScanDesc scan; + ScanKeyData skey; + + /* Scan pg_index for unique index of the target rel */ + pg_index = table_open(IndexRelationId, AccessShareLock); + + ScanKeyInit(&skey, + Anum_pg_index_indrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(rel))); + scan = systable_beginscan(pg_index, IndexIndrelidIndexId, true, + NULL, 1, &skey); + + while (HeapTupleIsValid(indexTuple = systable_getnext(scan))) + { + Datum adatum; + bool isNull; + Datum *keys; + int nelems; + + Form_pg_index index = (Form_pg_index) GETSTRUCT(indexTuple); + if (!index->indislive) + continue; + + /* Skip invalid, exclusion index or deferred index */ + if (!index->indisvalid || index->indisexclusion || !index->indimmediate) + continue; + + /* Skip expression index or predicate index */ + if (!heap_attisnull(indexTuple, Anum_pg_index_indpred, NULL) || + !heap_attisnull(indexTuple, Anum_pg_index_indexprs, NULL)) + continue; + + /* Extract the pg_index->indkey int2vector */ + adatum = heap_getattr(indexTuple, Anum_pg_index_indkey, + RelationGetDescr(pg_index), &isNull); + if (isNull) + elog(ERROR, "null int2vector for index %u", index->indexrelid); + + deconstruct_array_builtin(DatumGetArrayTypeP(adatum), INT2OID, + &keys, + NULL, + &nelems); + + if (nelems != index->indnatts) + elog(ERROR, "corrupted int2vector for index %u", index->indexrelid); + + Assert(nelems >= index->indnkeyatts); + + for (int i = 0; i < index->indnkeyatts; i++) + { + int attno = DatumGetInt16(keys[i]); + + attnums = bms_add_member(attnums, attno); + } + } + systable_endscan(scan); + + table_close(pg_index, AccessShareLock); + return attnums; +} \ No newline at end of file diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c index 9c05a98d28..27f164ad75 100644 --- a/src/backend/catalog/pg_constraint.c +++ b/src/backend/catalog/pg_constraint.c @@ -826,6 +826,7 @@ RelationGetNotNullConstraints(Oid relid, bool cooked, bool include_noinh) cooked->is_local = true; cooked->inhcount = 0; cooked->is_no_inherit = conForm->connoinherit; + cooked->already_validated = false; notnulls = lappend(notnulls, cooked); } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 1af2e2bffb..8608c57356 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -218,6 +218,7 @@ typedef struct NewConstraint Oid refrelid; /* PK rel, if FOREIGN */ Oid refindid; /* OID of PK's index, if FOREIGN */ bool conwithperiod; /* Whether the new FOREIGN KEY uses PERIOD */ + bool already_validated; /* already validated for check constraint */ Oid conid; /* OID of pg_constraint entry, if FOREIGN */ Node *qual; /* Check expr or CONSTR_FOREIGN Constraint */ ExprState *qualstate; /* Execution state for CHECK expr */ @@ -977,6 +978,7 @@ DefineRelation(CreateStmt *stmt, char relkind, Oid ownerId, cooked->is_local = true; /* not used for defaults */ cooked->inhcount = 0; /* ditto */ cooked->is_no_inherit = false; + cooked->already_validated = false; cookedDefaults = lappend(cookedDefaults, cooked); attr->atthasdef = true; } @@ -6093,6 +6095,8 @@ ATRewriteTable(AlteredTableInfo *tab, Oid OIDNewHeap, LOCKMODE lockmode) { case CONSTR_CHECK: needscan = true; + if(con->already_validated) + needscan = false; con->qualstate = ExecPrepareExpr((Expr *) con->qual, estate); break; case CONSTR_FOREIGN: @@ -9597,7 +9601,7 @@ ATAddCheckNNConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, newcon->name = ccon->name; newcon->contype = ccon->contype; newcon->qual = ccon->expr; - + newcon->already_validated = ccon->already_validated; tab->constraints = lappend(tab->constraints, newcon); } @@ -10718,6 +10722,7 @@ addFkRecurseReferencing(List **wqueue, Constraint *fkconstraint, Relation rel, newcon->refindid = indexOid; newcon->conid = parentConstr; newcon->conwithperiod = fkconstraint->fk_with_period; + newcon->already_validated = false; newcon->qual = (Node *) fkconstraint; tab->constraints = lappend(tab->constraints, newcon); @@ -12027,6 +12032,7 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, newcon->refindid = con->conindid; newcon->conid = con->oid; newcon->qual = (Node *) fkconstraint; + newcon->already_validated = false; /* Find or create work queue entry for this table */ tab = ATGetQueueEntry(wqueue, rel); @@ -12095,6 +12101,7 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, newcon->refrelid = InvalidOid; newcon->refindid = InvalidOid; newcon->conid = con->oid; + newcon->already_validated = false; val = SysCacheGetAttrNotNull(CONSTROID, tuple, Anum_pg_constraint_conbin); diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h index 8c278f202b..766ff00e6f 100644 --- a/src/include/catalog/heap.h +++ b/src/include/catalog/heap.h @@ -45,6 +45,7 @@ typedef struct CookedConstraint int16 inhcount; /* number of times constraint is inherited */ bool is_no_inherit; /* constraint has local def and cannot be * inherited */ + bool already_validated; /* already validated for check constraint */ } CookedConstraint; extern Relation heap_create(const char *relname, diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h index 2dea96f47c..a4386a6f52 100644 --- a/src/include/catalog/index.h +++ b/src/include/catalog/index.h @@ -175,6 +175,7 @@ extern void RestoreReindexState(const void *reindexstate); extern void IndexSetParentIndex(Relation partitionIdx, Oid parentOid); +extern Bitmapset *relation_get_indexattnums(Relation rel); /* * itemptr_encode - Encode ItemPointer as int64/int8 diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out index 71200c90ed..5504bd7edd 100644 --- a/src/test/regress/expected/constraints.out +++ b/src/test/regress/expected/constraints.out @@ -112,6 +112,26 @@ SELECT * from CHECK2_TBL; 7 | check ok | 7 (2 rows) +create table t1(a int not null, b int not null, c bool not null, d int[] not null); +create index t1_idx_a on t1(a); +create index t1_idx_b on t1(b); +create index t1_idx_c on t1(c); +create index t1_idx_d on t1(d); +--empty table, can be optimized +alter table t1 add constraint nc check(a>0); +insert into t1 values(1,2, true, '{1,2}'); +insert into t1 values(1,2, true, '{1,3,4}'); +insert into t1 values(11,2, true, '{1,3,5}'); +alter table t1 add check(a>0); +alter table t1 add check(a <> 12); +alter table t1 add check(a>0 and a < 12); +alter table t1 add check((a>0 and a < 12) or (a < 13 or a > -1) or b = 2); +alter table t1 add check((a>0 and a < 12) or (d = '{13,12}')); +alter table t1 add check((a = any('{1,11}'))); +alter table t1 add check((b = all('{2}'))); +--cannot optimzied for now. +alter table t1 add check((d[1] = 1)); +drop table t1; -- -- Check constraints on INSERT -- diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql index e607eb1fdd..87e237f50d 100644 --- a/src/test/regress/sql/constraints.sql +++ b/src/test/regress/sql/constraints.sql @@ -82,6 +82,28 @@ INSERT INTO CHECK2_TBL VALUES (7, 'check ok', 7); SELECT * from CHECK2_TBL; +create table t1(a int not null, b int not null, c bool not null, d int[] not null); +create index t1_idx_a on t1(a); +create index t1_idx_b on t1(b); +create index t1_idx_c on t1(c); +create index t1_idx_d on t1(d); +--empty table, can be optimized +alter table t1 add constraint nc check(a>0); +insert into t1 values(1,2, true, '{1,2}'); +insert into t1 values(1,2, true, '{1,3,4}'); +insert into t1 values(11,2, true, '{1,3,5}'); + +alter table t1 add check(a>0); +alter table t1 add check(a <> 12); +alter table t1 add check(a>0 and a < 12); +alter table t1 add check((a>0 and a < 12) or (a < 13 or a > -1) or b = 2); +alter table t1 add check((a>0 and a < 12) or (d = '{13,12}')); +alter table t1 add check((a = any('{1,11}'))); +alter table t1 add check((b = all('{2}'))); +--cannot optimzied for now. +alter table t1 add check((d[1] = 1)); +drop table t1; + -- -- Check constraints on INSERT -- -- 2.34.1