Hi hackers,
I'd like to submit a patch that improves the estimated rows for queries
containing (Var op Var) clauses by applying extended MCV statistics.
*New functions:*
* mcv_clauselist_selectivity_var_op_var() - calculates the selectivity
for (Var op Var) clauses.
* is_opclause_var_op_var() - Checks whether a clause is of the (Var op
Var) form.
*Implementation Details:*
* A new 'if' statement was added to the 'clause_selectivity_ext()'
function to handle (Var op Var) clauses. This allows the process to
locate matching MCV extended statistics and calculate selectivity
using the newly introduced function.
* Additionally, I added 'if' statement
in statext_is_compatible_clause_internal() function to determine
which columns are included in the clause, find matching extended
statistics, and then calculate selectivity through the new function.
I did the same in mcv_get_match_bitmap() to check what values are
true for (Var op Var).
* To support this, I created a new enum type to differentiate between
OR/AND and (Var op Var) clauses.
*Examples:*
create table t (a int, b int);
insert into t select mod(i,10), mod(i,10)+1 from
generate_series(1,100000) s(i);
analyze t;
explain select * from t where a < b;
`
Estimated: 33333
Actual: 100000
explain select * from t where a > b;
`
Estimated: 33333
Actual: 100000
create statistics s (mcv) on a,b from t;
analyze t;
explain select * from t where a < b;
`
Estimated without patch: 33333
Estimated with patch: 100000
Actual: 100000
explain select * from t where a > b;
`
Estimated without patch: 33333
Estimated with patch: 100000
Actual: 100000
If you want to see more examples, see regress tests in the patch.
*Previous thread:*
This feature was originally developed two years ago in [1], and at that
time, the approach was almost the same. My implementation uses dedicated
functions and 'if' statements directly for better readability and
maintainability. Additionally, there was a bug in the previous approach
that has been resolved with my patch. Here’s an example of the bug and
its fix:
CREATE TABLE foo (a int, b int);
INSERT INTO foo SELECT x/10+1, x FROM generate_series(1,10000) g(x);
ANALYZE foo;
EXPLAIN ANALYZE SELECT * FROM foo WHERE a = 1 OR (b > 0 AND b < 10);
`
Estimated: 18
Actual: 9
CREATE STATISTICS foo_s (mcv) ON a,b FROM foo;
ANALYZE foo;
EXPLAIN ANALYZE SELECT * FROM foo WHERE a = 1 OR (b > 0 AND b < 10);
`
Estimated previous patch: 18
Estimated current patch: 9
Actual: 9
[1]:
https://www.postgresql.org/message-id/flat/9e0a12e0-c05f-b193-ed3d-fe88bc1e5fe1%40enterprisedb.com
I look forward to any feedback or suggestions from the community.
Best regars,
Ilia Evdokimov
Tantor Labs LLC.
From fd468972f0ce27291523a28fdf0d9966c2fdf6e1 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Sat, 10 Aug 2024 14:35:25 +0300
Subject: [PATCH] Add support for (Var op Var) clause in extended MCV
statistics
Added a new leaf to the existing clauses tree, allowing the calculation
of selectivity specifically for (Var op Var) clauses. The new function
for this selectivity calculation has been integratedinto
the extended statistics mechanism, ensuring accurate application
during query planning.
---
src/backend/optimizer/path/clausesel.c | 25 +-
src/backend/statistics/README | 6 +-
src/backend/statistics/README.mcv | 6 +-
src/backend/statistics/extended_stats.c | 108 +++-
src/backend/statistics/mcv.c | 184 ++++--
.../statistics/extended_stats_internal.h | 6 +
src/include/statistics/statistics.h | 11 +-
src/test/regress/expected/stats_ext.out | 553 ++++++++++++++++++
src/test/regress/sql/stats_ext.sql | 166 ++++++
9 files changed, 982 insertions(+), 83 deletions(-)
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 0ab021c1e8..eec6c6182c 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -152,7 +152,7 @@ clauselist_selectivity_ext(PlannerInfo *root,
*/
s1 = statext_clauselist_selectivity(root, clauses, varRelid,
jointype, sjinfo, rel,
- &estimatedclauses, false);
+ &estimatedclauses, AND_TYPE);
}
/*
@@ -384,7 +384,7 @@ clauselist_selectivity_or(PlannerInfo *root,
*/
s1 = statext_clauselist_selectivity(root, clauses, varRelid,
jointype, sjinfo, rel,
- &estimatedclauses, true);
+ &estimatedclauses, OR_TYPE);
}
/*
@@ -691,6 +691,7 @@ clause_selectivity_ext(PlannerInfo *root,
Selectivity s1 = 0.5; /* default for any unhandled clause type */
RestrictInfo *rinfo = NULL;
bool cacheable = false;
+ Node *src = clause;
if (clause == NULL) /* can this still happen? */
return s1;
@@ -832,6 +833,7 @@ clause_selectivity_ext(PlannerInfo *root,
{
OpExpr *opclause = (OpExpr *) clause;
Oid opno = opclause->opno;
+ List *clauses = list_make1(src);
if (treat_as_join_clause(root, clause, rinfo, varRelid, sjinfo))
{
@@ -842,6 +844,25 @@ clause_selectivity_ext(PlannerInfo *root,
jointype,
sjinfo);
}
+ else if(use_extended_stats)
+ {
+ /* Check whether clauses are from one relation */
+ RelOptInfo *rel = find_single_rel_for_clauses(root, clauses);
+ Bitmapset *estimatedclauses = NULL;
+ if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+ jointype, sjinfo, rel,
+ &estimatedclauses, VAR_OP_VAR_TYPE);
+
+ if (bms_num_members(estimatedclauses) != 1)
+ {
+ /* If there is no multi-column MCV statistics */
+ s1 = restriction_selectivity(root, opno,
+ opclause->args,
+ opclause->inputcollid,
+ varRelid);
+ }
+ }
else
{
/* Estimate selectivity for a restriction clause. */
diff --git a/src/backend/statistics/README b/src/backend/statistics/README
index 13a97a3566..d33b039aad 100644
--- a/src/backend/statistics/README
+++ b/src/backend/statistics/README
@@ -28,11 +28,7 @@ Each type of statistics may be used to estimate some subset of clause types.
(a) functional dependencies - equality clauses (AND), possibly IS NULL
- (b) MCV lists - equality and inequality clauses (AND, OR, NOT), IS [NOT] NULL
-
-Currently, only OpExprs in the form Var op Const, or Const op Var are
-supported, however it's feasible to expand the code later to also estimate the
-selectivities on clauses such as Var op Var.
+ (b) MCV lists - equality and inequality clauses (AND, OR, NOT), IS [NOT] NULL, Var op Var
Complex clauses
diff --git a/src/backend/statistics/README.mcv b/src/backend/statistics/README.mcv
index a918fb5634..0ba169b26c 100644
--- a/src/backend/statistics/README.mcv
+++ b/src/backend/statistics/README.mcv
@@ -39,12 +39,10 @@ Currently MCV lists support estimation of the following clause types:
(b) inequality clauses WHERE (a < 1) AND (b >= 2)
(c) NULL clauses WHERE (a IS NULL) AND (b IS NOT NULL)
(d) OR clauses WHERE (a < 1) OR (b >= 2)
-
-It's possible to add support for additional clauses, for example:
-
(e) multi-var clauses WHERE (a > b)
-and possibly others. These are tasks for the future, not yet implemented.
+It's possible to add support for additional clauses, and possibly others.
+These are tasks for the future, not yet implemented.
Hashed MCV (not yet implemented)
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 99fdf208db..cb7d6d04ce 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1309,8 +1309,10 @@ choose_best_statistics(List *stats, char requiredkind, bool inh,
* (d) ScalarArrayOpExprs of the form (Var/Expr op ANY (Const)) or
* (Var/Expr op ALL (Const))
*
+ * (e) (Var op Var)
+ *
* In the future, the range of supported clauses may be expanded to more
- * complex cases, for example (Var op Var).
+ * complex cases.
*
* Arguments:
* clause: (sub)clause to be inspected (bare clause, not a RestrictInfo)
@@ -1365,15 +1367,13 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
RangeTblEntry *rte = root->simple_rte_array[relid];
OpExpr *expr = (OpExpr *) clause;
Node *clause_expr;
+ Node *clause_expr_left = NULL,
+ *clause_expr_right = NULL;
/* Only expressions with two arguments are considered compatible. */
if (list_length(expr->args) != 2)
return false;
- /* Check if the expression has the right shape */
- if (!examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
- return false;
-
/*
* If it's not one of the supported operators ("=", "<", ">", etc.),
* just ignore the clause, as it's not compatible with MCV lists.
@@ -1411,14 +1411,27 @@ statext_is_compatible_clause_internal(PlannerInfo *root, Node *clause,
!get_func_leakproof(get_opcode(expr->opno)))
return false;
- /* Check (Var op Const) or (Const op Var) clauses by recursing. */
- if (IsA(clause_expr, Var))
- return statext_is_compatible_clause_internal(root, clause_expr,
+ if (examine_opclause_args(expr->args, &clause_expr, NULL, NULL))
+ {
+ /* Check (Var op Const) or (Const op Var) clauses by recursing. */
+ if (IsA(clause_expr, Var))
+ return statext_is_compatible_clause_internal(root, clause_expr,
relid, attnums, exprs);
- /* Otherwise we have (Expr op Const) or (Const op Expr). */
- *exprs = lappend(*exprs, clause_expr);
- return true;
+ /* Otherwise we have (Expr op Const) or (Const op Expr). */
+ *exprs = lappend(*exprs, clause_expr);
+ return true;
+ }
+ else if (is_opclause_var_op_var(expr->args, &clause_expr_left, &clause_expr_right))
+ {
+ /* Check (Var op Var) clauses by recursing. */
+ if (!statext_is_compatible_clause_internal(root, clause_expr_left, relid, attnums, exprs))
+ return false;
+ if (!statext_is_compatible_clause_internal(root, clause_expr_right, relid, attnums, exprs))
+ return false;
+
+ return true;
+ }
}
/* Var/Expr IN Array */
@@ -1716,13 +1729,13 @@ static Selectivity
statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo,
RelOptInfo *rel, Bitmapset **estimatedclauses,
- bool is_or)
+ enum ClauseType clause_type)
{
ListCell *l;
Bitmapset **list_attnums; /* attnums extracted from the clause */
List **list_exprs; /* expressions matched to any statistic */
int listidx;
- Selectivity sel = (is_or) ? 0.0 : 1.0;
+ Selectivity sel = (clause_type == OR_TYPE) ? 0.0 : 1.0;
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
/* check if there's any stats that might be useful for us. */
@@ -1857,7 +1870,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
list_exprs[listidx] = NULL;
}
- if (is_or)
+ if (clause_type == OR_TYPE)
{
bool *or_matches = NULL;
Selectivity simple_or_sel = 0.0,
@@ -1957,7 +1970,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
*/
sel = sel + stat_sel - sel * stat_sel;
}
- else /* Implicitly-ANDed list of clauses */
+ else if (clause_type == AND_TYPE)
{
Selectivity simple_sel,
mcv_sel,
@@ -1991,6 +2004,16 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
/* Factor this into the overall result */
sel *= stat_sel;
}
+ else if (clause_type == VAR_OP_VAR_TYPE)
+ {
+ /*
+ * Multi-column estimate using MCV statistics, along with base and
+ * total selectivities.
+ */
+ sel = mcv_clauselist_selectivity_var_op_var(root, stat, stat_clauses,
+ varRelid, jointype, sjinfo,
+ rel);
+ }
}
return sel;
@@ -2004,19 +2027,19 @@ Selectivity
statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo,
RelOptInfo *rel, Bitmapset **estimatedclauses,
- bool is_or)
+ enum ClauseType clause_type)
{
Selectivity sel;
/* First, try estimating clauses using a multivariate MCV list. */
sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
- sjinfo, rel, estimatedclauses, is_or);
+ sjinfo, rel, estimatedclauses, clause_type);
/*
* Functional dependencies only work for clauses connected by AND, so for
* OR clauses we're done.
*/
- if (is_or)
+ if (clause_type == OR_TYPE)
return sel;
/*
@@ -2102,6 +2125,55 @@ examine_opclause_args(List *args, Node **exprp, Const **cstp,
return true;
}
+/*
+ * is_opclause_var_op_var
+ * Split an operator expression's arguments into Var and Var parts.
+ *
+ * Attempts to match the arguments to (Var op Var), possibly with
+ * a RelabelType on top. When the expression matches this
+ * form, returns true, otherwise returns false.
+ *
+ * Optionally returns pointers to the extracted Var nodes, when passed
+ * non-null pointers (exprp_left, exprp_right).
+ */
+bool
+is_opclause_var_op_var(List *args, Node **exprp_left, Node **exprp_right)
+{
+ Node *leftop,
+ *rightop;
+ Node *expr_left,
+ *expr_right;
+
+
+ /* enforced by statext_is_compatible_clause_internal */
+ Assert(list_length(args) == 2);
+
+ leftop = linitial(args);
+ rightop = lsecond(args);
+
+ if (IsA(leftop, RelabelType))
+ leftop = (Node *) ((RelabelType *) leftop)->arg;
+
+ if (IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ if (IsA(rightop, Var) && IsA(leftop, Var))
+ {
+ expr_left = (Node *) leftop;
+ expr_right = (Node *) rightop;
+ }
+ else
+ return false;
+
+ /* return pointers to the extracted parts if requested */
+ if (exprp_left && exprp_right)
+ {
+ *exprp_left = expr_left;
+ *exprp_right = expr_right;
+ }
+
+ return true;
+}
/*
* Compute statistics about expressions of a relation.
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index b0e9aead84..2e2c028e0f 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1610,6 +1610,7 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
Assert(mcvlist->nitems <= STATS_MCVLIST_MAX_ITEMS);
matches = palloc(sizeof(bool) * mcvlist->nitems);
+
memset(matches, !is_or, sizeof(bool) * mcvlist->nitems);
/*
@@ -1639,72 +1640,110 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
bool expronleft;
int idx;
Oid collid;
+ Node *clause_expr_left,
+ *clause_expr_right;
fmgr_info(get_opcode(expr->opno), &opproc);
/* extract the var/expr and const from the expression */
- if (!examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
- elog(ERROR, "incompatible clause");
+ if (examine_opclause_args(expr->args, &clause_expr, &cst, &expronleft))
+ {
+ /* match the attribute/expression to a dimension of the statistic */
+ idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
- /* match the attribute/expression to a dimension of the statistic */
- idx = mcv_match_expression(clause_expr, keys, exprs, &collid);
+ /*
+ * Walk through the MCV items and evaluate the current clause. We
+ * can skip items that were already ruled out, and terminate if
+ * there are no remaining MCV items that might possibly match.
+ */
+ for (int i = 0; i < mcvlist->nitems; i++)
+ {
+ bool match = true;
+ MCVItem *item = &mcvlist->items[i];
- /*
- * Walk through the MCV items and evaluate the current clause. We
- * can skip items that were already ruled out, and terminate if
- * there are no remaining MCV items that might possibly match.
- */
- for (int i = 0; i < mcvlist->nitems; i++)
+ Assert(idx >= 0);
+
+ /*
+ * When the MCV item or the Const value is NULL we can treat
+ * this as a mismatch. We must not call the operator because
+ * of strictness.
+ */
+ if (item->isnull[idx] || cst->constisnull)
+ {
+ matches[i] = RESULT_MERGE(matches[i], is_or, false);
+ continue;
+ }
+
+ /*
+ * Skip MCV items that can't change result in the bitmap. Once
+ * the value gets false for AND-lists, or true for OR-lists,
+ * we don't need to look at more clauses.
+ */
+ if (RESULT_IS_FINAL(matches[i], is_or))
+ continue;
+
+ /*
+ * First check whether the constant is below the lower
+ * boundary (in that case we can skip the bucket, because
+ * there's no overlap).
+ *
+ * We don't store collations used to build the statistics, but
+ * we can use the collation for the attribute itself, as
+ * stored in varcollid. We do reset the statistics after a
+ * type change (including collation change), so this is OK.
+ * For expressions, we use the collation extracted from the
+ * expression itself.
+ */
+ if (expronleft)
+ match = DatumGetBool(FunctionCall2Coll(&opproc,
+ collid,
+ item->values[idx],
+ cst->constvalue));
+ else
+ match = DatumGetBool(FunctionCall2Coll(&opproc,
+ collid,
+ cst->constvalue,
+ item->values[idx]));
+
+ /* update the match bitmap with the result */
+ matches[i] = RESULT_MERGE(matches[i], is_or, match);
+ }
+ }
+ else if(is_opclause_var_op_var(expr->args, &clause_expr_left, &clause_expr_right))
{
- bool match = true;
- MCVItem *item = &mcvlist->items[i];
+ /* extract the var/expr and const from the expression */
+ int idx_left = mcv_match_expression(clause_expr_left, keys, exprs, &collid);
+ int idx_right = mcv_match_expression(clause_expr_right, keys, exprs, &collid);
- Assert(idx >= 0);
+ Assert((idx_left >= 0) && (idx_left < bms_num_members(keys) + list_length(exprs)));
+ Assert((idx_right >= 0) && (idx_right < bms_num_members(keys) + list_length(exprs)));
- /*
- * When the MCV item or the Const value is NULL we can treat
- * this as a mismatch. We must not call the operator because
- * of strictness.
- */
- if (item->isnull[idx] || cst->constisnull)
+ for (int i = 0; i < mcvlist->nitems; i++)
{
- matches[i] = RESULT_MERGE(matches[i], is_or, false);
- continue;
- }
+ MCVItem *item = &mcvlist->items[i];
- /*
- * Skip MCV items that can't change result in the bitmap. Once
- * the value gets false for AND-lists, or true for OR-lists,
- * we don't need to look at more clauses.
- */
- if (RESULT_IS_FINAL(matches[i], is_or))
- continue;
+ /*
+ * When either of the MCV items is NULL we can treat this
+ * as a mismatch. We must not call the operator because
+ * of strictness.
+ */
+ if (item->isnull[idx_left] || item->isnull[idx_right])
+ {
+ matches[i] = false;
+ continue;
+ }
- /*
- * First check whether the constant is below the lower
- * boundary (in that case we can skip the bucket, because
- * there's no overlap).
- *
- * We don't store collations used to build the statistics, but
- * we can use the collation for the attribute itself, as
- * stored in varcollid. We do reset the statistics after a
- * type change (including collation change), so this is OK.
- * For expressions, we use the collation extracted from the
- * expression itself.
- */
- if (expronleft)
- match = DatumGetBool(FunctionCall2Coll(&opproc,
- collid,
- item->values[idx],
- cst->constvalue));
- else
- match = DatumGetBool(FunctionCall2Coll(&opproc,
+ /*
+ * We don't store collations used to build the statistics,
+ * but we can use the collation for the attribute itself,
+ * as stored in varcollid. We do reset the statistics after
+ * a type change (including collation change), so this is OK.
+ */
+ matches[i] = DatumGetBool(FunctionCall2Coll(&opproc,
collid,
- cst->constvalue,
- item->values[idx]));
-
- /* update the match bitmap with the result */
- matches[i] = RESULT_MERGE(matches[i], is_or, match);
+ item->values[idx_left],
+ item->values[idx_right]));
+ }
}
}
else if (IsA(clause, ScalarArrayOpExpr))
@@ -2083,6 +2122,45 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
return s;
}
+/*
+ * mcv_clauselist_selectivity_var_op_var
+ * Use MCV statistics to estimate the selectivity of (Var op Var) clause.
+ *
+ * This determines which MCV items match clause and returns the sum of
+ * the frequencies of those items.
+ */
+Selectivity
+mcv_clauselist_selectivity_var_op_var(PlannerInfo *root, StatisticExtInfo *stat,
+ List *clauses, int varRelid,
+ JoinType jointype, SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel)
+{
+ int i;
+ MCVList *mcv;
+ Selectivity s = 0.0;
+ RangeTblEntry *rte = root->simple_rte_array[rel->relid];
+
+ /* match/mismatch bitmap for each MCV item */
+ bool *matches = NULL;
+
+ /* load the MCV list stored in the statistics object */
+ mcv = statext_mcv_load(stat->statOid, rte->inh);
+
+ /* build a match bitmap for the clause */
+ matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
+ mcv, false);
+
+ /* sum frequencies for all the matching MCV items */
+ for (i = 0; i < mcv->nitems; i++)
+ {
+ if (matches[i] != false)
+ {
+ s += mcv->items[i].frequency;
+ }
+ }
+
+ return s;
+}
/*
* mcv_clause_selectivity_or
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 8eed9b338d..66b8b0d0b6 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -127,4 +127,10 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
Selectivity *overlap_basesel,
Selectivity *totalsel);
+extern Selectivity mcv_clauselist_selectivity_var_op_var(PlannerInfo *root,
+ StatisticExtInfo *stat,
+ List *clauses, int varRelid,
+ JoinType jointype, SpecialJoinInfo *sjinfo,
+ RelOptInfo *rel);
+
#endif /* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 7f2bf18716..d36b866436 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -94,6 +94,14 @@ typedef struct MCVList
MCVItem items[FLEXIBLE_ARRAY_MEMBER]; /* array of MCV items */
} MCVList;
+/* Clause types of MCV lists */
+typedef enum ClauseType
+{
+ OR_TYPE, /* OR-clause */
+ AND_TYPE, /* AND-clause */
+ VAR_OP_VAR_TYPE, /* Var op Var */
+} ClauseType;
+
extern MVNDistinct *statext_ndistinct_load(Oid mvoid, bool inh);
extern MVDependencies *statext_dependencies_load(Oid mvoid, bool inh);
extern MCVList *statext_mcv_load(Oid mvoid, bool inh);
@@ -118,7 +126,7 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
SpecialJoinInfo *sjinfo,
RelOptInfo *rel,
Bitmapset **estimatedclauses,
- bool is_or);
+ enum ClauseType clause_type);
extern bool has_stats_of_kind(List *stats, char requiredkind);
extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
bool inh,
@@ -126,5 +134,6 @@ extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
List **clause_exprs,
int nclauses);
extern HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx);
+extern bool is_opclause_var_op_var(List *args, Node **exprp_left, Node **exprp_right);
#endif /* STATISTICS_H */
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 8c4da95508..5a76ddbc72 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -2005,6 +2005,42 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
343 | 200
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = c');
+ estimated | actual
+-----------+--------
+ 25 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a != c');
+ estimated | actual
+-----------+--------
+ 4975 | 3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual
+-----------+--------
+ 1667 | 3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a >= c');
+ estimated | actual
+-----------+--------
+ 1667 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= c');
+ estimated | actual
+-----------+--------
+ 1667 | 1250
+(1 row)
+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
estimated | actual
-----------+--------
@@ -2158,6 +2194,42 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
200 | 200
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = c');
+ estimated | actual
+-----------+--------
+ 1250 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a != c');
+ estimated | actual
+-----------+--------
+ 3750 | 3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual
+-----------+--------
+ 3750 | 3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a >= c');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= c');
+ estimated | actual
+-----------+--------
+ 1250 | 1250
+(1 row)
+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
estimated | actual
-----------+--------
@@ -2573,6 +2645,109 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND
3750 | 2500
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = c');
+ estimated | actual
+-----------+--------
+ 25 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a != c');
+ estimated | actual
+-----------+--------
+ 4975 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a >= c');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= c');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual
+-----------+--------
+ 25 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+ estimated | actual
+-----------+--------
+ 4975 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+ estimated | actual
+-----------+--------
+ 1667 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+ estimated | actual
+-----------+--------
+ 1667 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+ estimated | actual
+-----------+--------
+ 4975 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+ estimated | actual
+-----------+--------
+ 1667 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+ estimated | actual
+-----------+--------
+ 1667 | 2500
+(1 row)
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists;
ANALYZE mcv_lists;
@@ -2606,6 +2781,72 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND
2500 | 2500
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
-- mcv with pass-by-ref fixlen types, e.g. uuid
CREATE TABLE mcv_lists_uuid (
a UUID,
@@ -2700,6 +2941,78 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
1094 | 0
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+ estimated | actual
+-----------+--------
+ 9950 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b');
+ estimated | actual
+-----------+--------
+ 50 | 7500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b');
+ estimated | actual
+-----------+--------
+ 3333 | 10000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b');
+ estimated | actual
+-----------+--------
+ 3333 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b');
+ estimated | actual
+-----------+--------
+ 3333 | 7500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b');
+ estimated | actual
+-----------+--------
+ 3333 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+ estimated | actual
+-----------+--------
+ 50 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b AND b != c');
+ estimated | actual
+-----------+--------
+ 50 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b AND b < c');
+ estimated | actual
+-----------+--------
+ 1111 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b AND b <= c');
+ estimated | actual
+-----------+--------
+ 1111 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b AND b > c');
+ estimated | actual
+-----------+--------
+ 1111 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b AND b >= c');
+ estimated | actual
+-----------+--------
+ 1111 | 0
+(1 row)
+
CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
FROM mcv_lists_bool;
ANALYZE mcv_lists_bool;
@@ -2727,6 +3040,78 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
1 | 0
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b');
+ estimated | actual
+-----------+--------
+ 7500 | 7500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b');
+ estimated | actual
+-----------+--------
+ 10000 | 10000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b');
+ estimated | actual
+-----------+--------
+ 7500 | 7500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+ estimated | actual
+-----------+--------
+ 8750 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b AND b != c');
+ estimated | actual
+-----------+--------
+ 7500 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b AND b < c');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b AND b <= c');
+ estimated | actual
+-----------+--------
+ 8750 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b AND b > c');
+ estimated | actual
+-----------+--------
+ 1250 | 1250
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b AND b >= c');
+ estimated | actual
+-----------+--------
+ 10000 | 0
+(1 row)
+
-- mcv covering just a small fraction of data
CREATE TABLE mcv_lists_partial (
a INT,
@@ -2914,6 +3299,90 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
2649 | 1572
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+ estimated | actual
+-----------+--------
+ 1 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b AND c != d');
+ estimated | actual
+-----------+--------
+ 4950 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b AND c > d');
+ estimated | actual
+-----------+--------
+ 556 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND c >= d');
+ estimated | actual
+-----------+--------
+ 556 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b OR c = d');
+ estimated | actual
+-----------+--------
+ 50 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b OR c != d');
+ estimated | actual
+-----------+--------
+ 5000 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b OR c > d');
+ estimated | actual
+-----------+--------
+ 2778 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR c >= d');
+ estimated | actual
+-----------+--------
+ 2778 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND d >= c');
+ estimated | actual
+-----------+--------
+ 556 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR d >= c');
+ estimated | actual
+-----------+--------
+ 2778 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a > b');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+ estimated | actual
+-----------+--------
+ 1667 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a >= b');
+ estimated | actual
+-----------+--------
+ 1667 | 5000
+(1 row)
+
-- create separate MCV statistics
CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
@@ -2960,6 +3429,90 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR
1571 | 1572
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b AND c != d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b AND c > d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND c >= d');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b OR c = d');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b OR c != d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b OR c > d');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR c >= d');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND d >= c');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR d >= c');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a > b');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a >= b');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
DROP TABLE mcv_lists_multi;
-- statistics on integer expressions
CREATE TABLE expr_stats (a int, b int, c int);
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 0c08a6cc42..34513224ed 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -999,6 +999,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a != c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a >= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= c');
+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -1054,6 +1066,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a != c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a >= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= c');
+
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -1255,6 +1279,40 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a != c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a >= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a <= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, d FROM mcv_lists;
@@ -1273,6 +1331,28 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IS NULL AND (b = ''x'' OR d = ''x'')');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b != d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b < d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b <= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b > d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b >= d');
+
-- mcv with pass-by-ref fixlen types, e.g. uuid
CREATE TABLE mcv_lists_uuid (
a UUID,
@@ -1348,6 +1428,30 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b AND b != c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b AND b < c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b AND b <= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b AND b > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b AND b >= c');
+
CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
FROM mcv_lists_bool;
@@ -1361,6 +1465,30 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = b AND b = c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a != b AND b != c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a < b AND b < c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a <= b AND b <= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a > b AND b > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a >= b AND b >= c');
+
-- mcv covering just a small fraction of data
CREATE TABLE mcv_lists_partial (
a INT,
@@ -1461,6 +1589,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AN
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b AND c != d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b AND c > d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND c >= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b OR c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b OR c != d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b OR c > d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR c >= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND d >= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR d >= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a > b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a >= b');
+
-- create separate MCV statistics
CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
@@ -1475,6 +1622,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AN
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b AND c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b AND c != d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b AND c > d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND c >= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b OR c = d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a != b OR c != d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b OR c > d');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR c >= d');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b AND d >= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b OR d >= c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a < b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a > b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a <= b');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a >= b');
+
DROP TABLE mcv_lists_multi;
--
2.34.1