Hi, I realized there's one more thing that probably needs discussing. Essentially, these two clause types are the same:
a IN (1, 2, 3) (a = 1 OR a = 2 OR a = 3) but with 8f321bd1 we only recognize the first one as compatible with functional dependencies. It was always the case that we estimated those two clauses a bit differently, but the differences were usually small. But now that we recognize IN as compatible with dependencies, the difference may be much larger, which bugs me a bit ... So I wonder if we should recognize the special form of an OR clause, with all Vars referencing to the same attribute etc. and treat this as supported by functional dependencies - the attached patch does that. MCV lists there's already no difference because OR clauses are supported. The question is whether we want to do this, and whether we should also teach the per-column estimates to recognize this special case of IN clause. That would allow producing exactly the same estimates even with functional dependencies - recognizing the OR clause as supported gets us only half-way there, because we still use estimates for each clause (and those will be slightly different). regards -- Tomas Vondra http://www.2ndQuadrant.com PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 72dc1cd1bd..5f9b43bf7f 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -753,24 +753,27 @@ pg_dependencies_send(PG_FUNCTION_ARGS) static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) { - RestrictInfo *rinfo = (RestrictInfo *) clause; Var *var; - if (!IsA(rinfo, RestrictInfo)) - return false; + if (IsA(clause, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) clause; - /* Pseudoconstants are not interesting (they couldn't contain a Var) */ - if (rinfo->pseudoconstant) - return false; + /* Pseudoconstants are not interesting (they couldn't contain a Var) */ + if (rinfo->pseudoconstant) + return false; - /* Clauses referencing multiple, or no, varnos are incompatible */ - if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) - return false; + /* Clauses referencing multiple, or no, varnos are incompatible */ + if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) + return false; - if (is_opclause(rinfo->clause)) + clause = (Node *) rinfo->clause; + } + + if (is_opclause(clause)) { /* If it's an opclause, check for Var = Const or Const = Var. */ - OpExpr *expr = (OpExpr *) rinfo->clause; + OpExpr *expr = (OpExpr *) clause; /* Only expressions with two arguments are candidates. */ if (list_length(expr->args) != 2) @@ -801,10 +804,10 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) /* OK to proceed with checking "var" */ } - else if (IsA(rinfo->clause, ScalarArrayOpExpr)) + else if (IsA(clause, ScalarArrayOpExpr)) { /* If it's an scalar array operator, check for Var IN Const. */ - ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) rinfo->clause; + ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) clause; /* * Reject ALL() variant, we only care about ANY/IN. @@ -839,13 +842,43 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) /* OK to proceed with checking "var" */ } - else if (is_notclause(rinfo->clause)) + else if (is_orclause(clause)) + { + BoolExpr *expr = (BoolExpr *) clause; + ListCell *lc; + + /* start with no attribute number */ + *attnum = InvalidAttrNumber; + + foreach(lc, expr->args) + { + AttrNumber clause_attnum; + + /* + * Had we found incompatible clause in the arguments, treat the + * whole clause as incompatible. + */ + if (!dependency_is_compatible_clause((Node *) lfirst(lc), + relid, &clause_attnum)) + return false; + + if (*attnum == InvalidAttrNumber) + *attnum = clause_attnum; + + if (*attnum != clause_attnum) + return false; + } + + /* the Var is already checked by the recursive call */ + return true; + } + else if (is_notclause(clause)) { /* * "NOT x" can be interpreted as "x = false", so get the argument and * proceed with seeing if it's a suitable Var. */ - var = (Var *) get_notclausearg(rinfo->clause); + var = (Var *) get_notclausearg(clause); } else { @@ -853,7 +886,7 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) * A boolean expression "x" can be interpreted as "x = true", so * proceed with seeing if it's a suitable Var. */ - var = (Var *) rinfo->clause; + var = (Var *) clause; } /* diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 9fa659c71d..74da75c3c6 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -458,6 +458,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 3 | 400 (1 row) +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 2 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 4 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 8 | 200 +(1 row) + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); estimated | actual @@ -585,6 +611,32 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE 400 | 400 (1 row) +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 99 | 100 +(1 row) + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + estimated | actual +-----------+-------- + 197 | 200 +(1 row) + +-- OR clauses referencing different attributes are incompatible +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + estimated | actual +-----------+-------- + 3 | 100 +(1 row) + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); estimated | actual diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index 0ece39a279..d9c935896b 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -286,6 +286,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + +-- OR clauses referencing different attributes +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1'''); @@ -335,6 +345,16 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a IN (1, 2, 26, 27, 51, 52, 76, 77) AND b IN (''1'', ''2'', ''26'', ''27'') AND c IN (1, 2)'); +-- OR clauses referencing the same attribute +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND b = ''1'''); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 51) AND (b = ''1'' OR b = ''2'')'); + +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR a = 2 OR a = 51 OR a = 52) AND (b = ''1'' OR b = ''2'')'); + +-- OR clauses referencing different attributes are incompatible +SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a = 1 OR b = ''1'') AND b = ''1'''); + -- ANY SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = ANY (ARRAY[1, 51]) AND b = ''1''');