Hello everyone, > After some more thoughts about the diference of the two ideas, then I > find we are resolving two different issues, just that in the wrong index > choose cases, both of them should work generally.
Here is the formal version for the attribute reloptions direction. commit 0d842e39275710a544b11033f5eec476147daf06 (HEAD -> force_generic) Author: yizhi.fzh <yizhi....@alibaba-inc.com> Date: Sun Mar 31 11:51:28 2024 +0800 Add a attopt to disable MCV when estimating for Var = Const As of current code, when calculating the selectivity for Var = Const, planner first checks if the Const is an most common value and if not, it takes out all the portions of MCV's selectivity and num of distinct value, and treat the selectivity for Const equal for the rest n_distinct. This logic works great when the optimizer statistic is up to date, however if the known most common value has taken up most of the selectivity at the last run of analyze, and the new most common value in reality has not been gathered, the estimation for the new MCV will be pretty bad. A common case for this would be created_at = {current_date}; To overcome this issue, we provides a new syntax: ALTER TABLE tablename ALTER COLUMN created_at SET (force_generic=on); After this, planner ignores the value of MCV for this column when estimating for Var = Const and treating all the values equally. This would cause some badness if the values for a column are pretty not equal which is what MCV is designed for, however this patch just provide one more option to user and let user make the decision. Here is an example about its user case. create table t(a int, b int, c int) with (autovacuum_enabled=off); create index on t(a, b); create index on t(a, c); create table t2 (id int primary key, a int); insert into t2 select i , i from generate_series(1, 800)i; insert into t select floor(random() * 100 + 1)::int, i, i from generate_series(1, 100000) i; analyze t,t2; insert into t select floor(random() * 10 + 1)::int + 100 , i, i from generate_series(1, 10000) i; explain (costs off) select * from t where a = 109 and b = 8; explain (costs off, analyze) select * from t join t2 on t.c = t2.id where t.a = 109; ALTER TABLE t ALTER COLUMN a SET (force_generic=on); -- We will see some good result now. explain (costs off) select * from t where a = 109 and b = 8; explain (costs off, analyze) select * from t join t2 on t.c = t2.id where t.a = 109; I will add this to our commitfest application, any feedback is welcome! -- Best Regards Andy Fan
>From 0d842e39275710a544b11033f5eec476147daf06 Mon Sep 17 00:00:00 2001 From: "yizhi.fzh" <yizhi....@alibaba-inc.com> Date: Sun, 31 Mar 2024 11:51:28 +0800 Subject: [PATCH v1 1/1] Add a attopt to disable MCV when estimating for Var = Const As of current code, when calculating the selectivity for Var = Const, planner first checks if the Const is an most common value and if not, it takes out all the portions of MCV's selectivity and num of distinct value, and treat the selectivity for Const equal for the rest n_distinct. This logic works great when the optimizer statistic is up to date, however if the known most common value has taken up most of the selectivity at the last run of analyze, and the new most common value in reality has not been gathered, the estimation for the new MCV will be pretty bad. A common case for this would be created_at = {current_date}; To overcome this issue, we provides a new syntax: ALTER TABLE tablename ALTER COLUMN created_at SET (force_generic=on); After this, planner ignores the value of MCV for this column when estimating for Var = Const and treating all the values equally. This would cause some badness if the values for a column are pretty not equal which is what MCV is designed for, however this patch just provide one more option to user and let user make the decision. --- src/backend/access/common/reloptions.c | 12 +++++++- src/backend/utils/adt/selfuncs.c | 36 +++++++++++++++++++---- src/bin/psql/tab-complete.c | 2 +- src/include/utils/attoptcache.h | 1 + src/test/regress/expected/alter_table.out | 2 ++ src/test/regress/sql/alter_table.sql | 2 ++ 6 files changed, 48 insertions(+), 7 deletions(-) diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index d6eb5d8559..2c74a3fcf9 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -129,6 +129,15 @@ static relopt_bool boolRelOpts[] = }, true }, + { + { + "force_generic", + "estimate the selectivity like generic plan", + RELOPT_KIND_ATTRIBUTE, + ShareUpdateExclusiveLock + }, + false + }, { { "security_barrier", @@ -2070,7 +2079,8 @@ attribute_reloptions(Datum reloptions, bool validate) { static const relopt_parse_elt tab[] = { {"n_distinct", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct)}, - {"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)} + {"n_distinct_inherited", RELOPT_TYPE_REAL, offsetof(AttributeOpts, n_distinct_inherited)}, + {"force_generic", RELOPT_TYPE_BOOL, offsetof(AttributeOpts, force_generic)} }; return (bytea *) build_reloptions(reloptions, validate, diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index cea777e9d4..b3c6bf1e45 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -125,6 +125,7 @@ #include "storage/bufmgr.h" #include "utils/acl.h" #include "utils/array.h" +#include "utils/attoptcache.h" #include "utils/builtins.h" #include "utils/date.h" #include "utils/datum.h" @@ -213,7 +214,7 @@ static bool get_actual_variable_endpoint(Relation heapRel, MemoryContext outercontext, Datum *endpointDatum); static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids); - +static bool est_selectivity_generic(PlannerInfo *root, VariableStatData *vardata); /* * eqsel - Selectivity of "=" for any data types. @@ -268,11 +269,11 @@ eqsel_internal(PG_FUNCTION_ARGS, bool negate) return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL; /* - * We can do a lot better if the something is a constant. (Note: the - * Const might result from estimation rather than being a simple constant - * in the query.) + * We can do a lot better if the something is a constant unless user disallow + * that. (Note: the Const might result from estimation rather than being a + * simple constant in the query.) */ - if (IsA(other, Const)) + if (IsA(other, Const) && !est_selectivity_generic(root, &vardata)) selec = var_eq_const(&vardata, operator, collation, ((Const *) other)->constvalue, ((Const *) other)->constisnull, @@ -8141,3 +8142,28 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count, *indexPages = index->pages; } + +/* + * Treat the vardata as generic, so that all the Const in the qual is treated + * as parameter. This logic is helpful when the interested statistics is likely + * missing. + */ +static bool +est_selectivity_generic(PlannerInfo *root, VariableStatData *vardata) +{ + Var *var; + Oid relid; + AttributeOpts *opts; + + if (!IsA(vardata->var, Var)) + return false; + + var = castNode(Var, vardata->var); + + relid = root->simple_rte_array[var->varno]->relid; + + opts = get_attribute_options(relid, var->varattno); + + return opts ? opts->force_generic : false; + +} diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c index aa1acf8523..f93892c46c 100644 --- a/src/bin/psql/tab-complete.c +++ b/src/bin/psql/tab-complete.c @@ -2511,7 +2511,7 @@ psql_completion(const char *text, int start, int end) /* ALTER TABLE ALTER [COLUMN] <foo> SET ( */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "(") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "(")) - COMPLETE_WITH("n_distinct", "n_distinct_inherited"); + COMPLETE_WITH("n_distinct", "n_distinct_inherited", "force_generic"); /* ALTER TABLE ALTER [COLUMN] <foo> SET COMPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "COMPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "COMPRESSION")) diff --git a/src/include/utils/attoptcache.h b/src/include/utils/attoptcache.h index a1a9bfc0fb..584cd099e4 100644 --- a/src/include/utils/attoptcache.h +++ b/src/include/utils/attoptcache.h @@ -21,6 +21,7 @@ typedef struct AttributeOpts int32 vl_len_; /* varlena header (do not touch directly!) */ float8 n_distinct; float8 n_distinct_inherited; + bool force_generic; } AttributeOpts; extern AttributeOpts *get_attribute_options(Oid attrelid, int attnum); diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 7666c76238..294bcc64da 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -4475,7 +4475,9 @@ CREATE TABLE attmp(i integer); INSERT INTO attmp VALUES (1); ALTER TABLE attmp ALTER COLUMN i SET (n_distinct = 1, n_distinct_inherited = 2); ALTER TABLE attmp ALTER COLUMN i RESET (n_distinct_inherited); +ALTER TABLE attmp ALTER COLUMN i SET (force_generic=on); ANALYZE attmp; +ALTER TABLE attmp ALTER COLUMN i RESET (force_generic); DROP TABLE attmp; DROP USER regress_alter_table_user1; -- check that violating rows are correctly reported when attaching as the diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 9df5a63bdf..cb2b566ba5 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -2885,7 +2885,9 @@ CREATE TABLE attmp(i integer); INSERT INTO attmp VALUES (1); ALTER TABLE attmp ALTER COLUMN i SET (n_distinct = 1, n_distinct_inherited = 2); ALTER TABLE attmp ALTER COLUMN i RESET (n_distinct_inherited); +ALTER TABLE attmp ALTER COLUMN i SET (force_generic=on); ANALYZE attmp; +ALTER TABLE attmp ALTER COLUMN i RESET (force_generic); DROP TABLE attmp; DROP USER regress_alter_table_user1; -- 2.34.1