On Fri, Mar 06, 2020 at 01:15:56AM +0100, Tomas Vondra wrote:
Hi,
Here is a rebased version of this patch series. I've polished the first
two parts a bit - estimation of OR clauses and (Var op Var) clauses, and
added a bunch of regression tests to exercise this code. It's not quite
there yet, but I think it's feasible to get this committed for PG13.
The last part (extended stats on expressions) is far from complete, and
it's not feasible to get it into PG13. There's too much missing stuff.
Meh, the last part with stats on expression is not quite right and it
breaks the cputube tester, so here are the first two parts only. I don't
plan to pursue the 0003 part for PG13 anyway, as mentioned.
regards
--
Tomas Vondra http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From d7f639b6150fe9fd179066af2a536465d877842a Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Mon, 2 Dec 2019 23:02:17 +0100
Subject: [PATCH 1/3] Support using extended stats for parts of OR clauses
---
src/backend/optimizer/path/clausesel.c | 109 +++++++++++++++---
src/backend/statistics/extended_stats.c | 45 +++++++-
src/backend/statistics/mcv.c | 5 +-
.../statistics/extended_stats_internal.h | 3 +-
src/include/statistics/statistics.h | 3 +-
src/test/regress/expected/stats_ext.out | 3 +-
src/test/regress/sql/stats_ext.sql | 1 -
7 files changed, 138 insertions(+), 31 deletions(-)
diff --git a/src/backend/optimizer/path/clausesel.c
b/src/backend/optimizer/path/clausesel.c
index a3ebe10592..8c1a404ce2 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -92,7 +92,7 @@ clauselist_selectivity(PlannerInfo *root,
*/
s1 *= statext_clauselist_selectivity(root, clauses, varRelid,
jointype, sjinfo, rel,
-
&estimatedclauses);
+
&estimatedclauses, false);
}
/*
@@ -104,6 +104,89 @@ clauselist_selectivity(PlannerInfo *root,
estimatedclauses);
}
+/*
+ * clauselist_selectivity_or -
+ * Compute the selectivity of an implicitly-ORed list of boolean
+ * expression clauses. The list can be empty, in which case 0.0
+ * must be returned. List elements may be either RestrictInfos
+ * or bare expression clauses --- the former is preferred since
+ * it allows caching of results.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ *
+ * The basic approach is to apply extended statistics first, on as many
+ * clauses as possible, in order to capture cross-column dependencies etc.
+ * The remaining clauses are then estimated using regular statistics tracked
+ * for individual columns. This is done by simply passing the clauses to
+ * clauselist_selectivity and then combining the selectivities using the
+ * regular formula (s1+s2 - s1*s2).
+ */
+static Selectivity
+clauselist_selectivity_or(PlannerInfo *root,
+ List *clauses,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo)
+{
+ ListCell *lc;
+ Selectivity s1 = 0.0;
+ RelOptInfo *rel;
+ Bitmapset *estimatedclauses = NULL;
+ int listidx;
+
+ /*
+ * Determine if these clauses reference a single relation. If so, and
if
+ * it has extended statistics, try to apply those.
+ */
+ rel = find_single_rel_for_clauses(root, clauses);
+ if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+ {
+ /*
+ * Estimate as many clauses as possible using extended
statistics.
+ *
+ * 'estimatedclauses' tracks the 0-based list position index of
+ * clauses that we've estimated using extended statistics, and
that
+ * should be ignored.
+ *
+ * XXX We can't multiply with current value, because for OR
clauses
+ * we start with 0.0, so we simply assign to s1 directly.
+ */
+ s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+
jointype, sjinfo, rel,
+
&estimatedclauses, true);
+ }
+
+ /*
+ * Selectivities of the remaining clauses for an OR clause are computed
+ * as s1+s2 - s1*s2 to account for the probable overlap of selected
tuple
+ * sets. The clauses estimated using extended statistics are effectively
+ * treated as a single clause.
+ *
+ * XXX is this too conservative?
+ */
+ listidx = -1;
+ foreach(lc, clauses)
+ {
+ Selectivity s2;
+
+ listidx++;
+
+ /* skip already estimated clauses */
+ if (bms_is_member(listidx, estimatedclauses))
+ continue;
+
+ s2 = clause_selectivity(root,
+ (Node *)
lfirst(lc),
+ varRelid,
+ jointype,
+ sjinfo);
+
+ s1 = s1 + s2 - s1 * s2;
+ }
+
+ return s1;
+}
+
/*
* clauselist_selectivity_simple -
* Compute the selectivity of an implicitly-ANDed list of boolean
@@ -735,24 +818,14 @@ clause_selectivity(PlannerInfo *root,
else if (is_orclause(clause))
{
/*
- * Selectivities for an OR clause are computed as s1+s2 - s1*s2
to
- * account for the probable overlap of selected tuple sets.
- *
- * XXX is this too conservative?
+ * Almost the same thing as clauselist_selectivity, but with the
+ * clauses connected by OR.
*/
- ListCell *arg;
-
- s1 = 0.0;
- foreach(arg, ((BoolExpr *) clause)->args)
- {
- Selectivity s2 = clause_selectivity(root,
-
(Node *) lfirst(arg),
-
varRelid,
-
jointype,
-
sjinfo);
-
- s1 = s1 + s2 - s1 * s2;
- }
+ s1 = clauselist_selectivity_or(root,
+
((BoolExpr *) clause)->args,
+
varRelid,
+
jointype,
+
sjinfo);
}
else if (is_opclause(clause) || IsA(clause, DistinctExpr))
{
diff --git a/src/backend/statistics/extended_stats.c
b/src/backend/statistics/extended_stats.c
index 03e69d057f..24ece6f99c 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1225,7 +1225,8 @@ statext_is_compatible_clause(PlannerInfo *root, Node
*clause, Index relid,
static Selectivity
statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int
varRelid,
JoinType
jointype, SpecialJoinInfo *sjinfo,
- RelOptInfo
*rel, Bitmapset **estimatedclauses)
+ RelOptInfo
*rel, Bitmapset **estimatedclauses,
+ bool is_or)
{
ListCell *l;
Bitmapset **list_attnums;
@@ -1317,8 +1318,32 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root,
List *clauses, int varReli
* columns/clauses. We'll then use the various selectivities
computed from
* MCV list to improve it.
*/
- simple_sel = clauselist_selectivity_simple(root, stat_clauses,
varRelid,
-
jointype, sjinfo, NULL);
+ if (is_or)
+ {
+ ListCell *lc;
+ Selectivity s1 = 0.0,
+ s2;
+
+ /*
+ * Selectivities of OR clauses are computed s1+s2 -
s1*s2 to account
+ * for the probable overlap of selected tuple sets.
+ */
+ foreach(lc, stat_clauses)
+ {
+ s2 = clause_selectivity(root,
+
(Node *) lfirst(lc),
+
varRelid,
+
jointype,
+
sjinfo);
+
+ s1 = s1 + s2 - s1 * s2;
+ }
+
+ simple_sel = s1;
+ }
+ else
+ simple_sel = clauselist_selectivity_simple(root,
stat_clauses, varRelid,
+
jointype, sjinfo, NULL);
/*
* Now compute the multi-column estimate from the MCV list,
along with the
@@ -1326,7 +1351,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root,
List *clauses, int varReli
*/
mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses,
varRelid,
jointype, sjinfo, rel,
-
&mcv_basesel, &mcv_totalsel);
+
&mcv_basesel, &mcv_totalsel, is_or);
/* Estimated selectivity of values not covered by MCV matches */
other_sel = simple_sel - mcv_basesel;
@@ -1354,13 +1379,21 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root,
List *clauses, int varReli
Selectivity
statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
JoinType jointype,
SpecialJoinInfo *sjinfo,
- RelOptInfo *rel,
Bitmapset **estimatedclauses)
+ RelOptInfo *rel,
Bitmapset **estimatedclauses,
+ bool is_or)
{
Selectivity sel;
/* First, try estimating clauses using a multivariate MCV list. */
sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid,
jointype,
-
sjinfo, rel, estimatedclauses);
+
sjinfo, rel, estimatedclauses, is_or);
+
+ /*
+ * Functional dependencies only work for clauses connected by AND, so
for
+ * OR clauses we're done.
+ */
+ if (is_or)
+ return sel;
/*
* Then, apply functional dependencies on the remaining clauses by
calling
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 87e232fdd4..3f42713aa2 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1795,7 +1795,8 @@ mcv_clauselist_selectivity(PlannerInfo *root,
StatisticExtInfo *stat,
List *clauses, int varRelid,
JoinType jointype,
SpecialJoinInfo *sjinfo,
RelOptInfo *rel,
- Selectivity *basesel,
Selectivity *totalsel)
+ Selectivity *basesel,
Selectivity *totalsel,
+ bool is_or)
{
int i;
MCVList *mcv;
@@ -1808,7 +1809,7 @@ mcv_clauselist_selectivity(PlannerInfo *root,
StatisticExtInfo *stat,
mcv = statext_mcv_load(stat->statOid);
/* build a match bitmap for the clauses */
- matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
+ matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, is_or);
/* sum frequencies for all the matching MCV items */
*basesel = 0.0;
diff --git a/src/include/statistics/extended_stats_internal.h
b/src/include/statistics/extended_stats_internal.h
index b512ee908a..5171895bba 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -107,6 +107,7 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo
*root,
SpecialJoinInfo *sjinfo,
RelOptInfo *rel,
Selectivity *basesel,
-
Selectivity *totalsel);
+
Selectivity *totalsel,
+
bool is_or);
#endif /*
EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h
b/src/include/statistics/statistics.h
index f5d9b6c73a..e18c9a6539 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -116,7 +116,8 @@ extern Selectivity
statext_clauselist_selectivity(PlannerInfo *root,
JoinType jointype,
SpecialJoinInfo *sjinfo,
RelOptInfo *rel,
-
Bitmapset **estimatedclauses);
+
Bitmapset **estimatedclauses,
+
bool is_or);
extern bool has_stats_of_kind(List *stats, char requiredkind);
extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
Bitmapset **clause_attnums,
diff --git a/src/test/regress/expected/stats_ext.out
b/src/test/regress/expected/stats_ext.out
index 61237dfb11..5344b70cf4 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -648,11 +648,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists WHERE a = 1 OR b = '
200 | 200
(1 row)
--- we can't use the statistic for OR clauses that are not fully covered
(missing 'd' attribute)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b =
''1'' OR c = 1 OR d IS NOT NULL');
estimated | actual
-----------+--------
- 343 | 200
+ 200 | 200
(1 row)
-- check change of unrelated column type does not reset the MCV statistics
diff --git a/src/test/regress/sql/stats_ext.sql
b/src/test/regress/sql/stats_ext.sql
index 84f13e8814..fa989fccb0 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -400,7 +400,6 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists
WHERE a <= 4 AND b <
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b =
''1'' OR c = 1');
--- we can't use the statistic for OR clauses that are not fully covered
(missing 'd' attribute)
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b =
''1'' OR c = 1 OR d IS NOT NULL');
-- check change of unrelated column type does not reset the MCV statistics
--
2.21.1
>From af5921a73a71a8c6adf454c35e2b8e911c94cee7 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <t...@fuzzy.cz>
Date: Mon, 11 Nov 2019 01:34:11 +0100
Subject: [PATCH 2/3] Support clauses of the form Var op Var
---
src/backend/statistics/extended_stats.c | 63 ++++++++++++----
src/backend/statistics/mcv.c | 75 ++++++++++++++++++-
.../statistics/extended_stats_internal.h | 2 +-
src/test/regress/expected/stats_ext.out | 72 ++++++++++++++++++
src/test/regress/sql/stats_ext.sql | 22 ++++++
5 files changed, 217 insertions(+), 17 deletions(-)
diff --git a/src/backend/statistics/extended_stats.c
b/src/backend/statistics/extended_stats.c
index 24ece6f99c..1872cd4529 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -986,14 +986,18 @@ statext_is_compatible_clause_internal(PlannerInfo *root,
Node *clause,
{
RangeTblEntry *rte = root->simple_rte_array[relid];
OpExpr *expr = (OpExpr *) clause;
- Var *var;
+ Var *var,
+ *var2;
/* Only expressions with two arguments are considered
compatible. */
if (list_length(expr->args) != 2)
return false;
- /* Check if the expression the right shape (one Var, one Const)
*/
- if (!examine_opclause_expression(expr, &var, NULL, NULL))
+ /*
+ * Check if the expression the right shape (one Var and one
Const,
+ * or two Vars).
+ */
+ if (!examine_opclause_expression(expr, &var, &var2, NULL, NULL))
return false;
/*
@@ -1033,7 +1037,20 @@ statext_is_compatible_clause_internal(PlannerInfo *root,
Node *clause,
!get_func_leakproof(get_opcode(expr->opno)))
return false;
- return statext_is_compatible_clause_internal(root, (Node *) var,
+ /*
+ * Check compatibility of the first Var - we get this one for
both
+ * types of supported expressions (Var op Const) and (Var op
Var).
+ */
+ if (!statext_is_compatible_clause_internal(root, (Node *) var,
+
relid, attnums))
+ return false;
+
+ /* For (Var op Const) we don't get the second Var, and we're
done. */
+ if (!var2)
+ return true;
+
+ /* For (Var op Var) check compatibility of the second Var. */
+ return statext_is_compatible_clause_internal(root, (Node *)
var2,
relid, attnums);
}
@@ -1419,19 +1436,21 @@ statext_clauselist_selectivity(PlannerInfo *root, List
*clauses, int varRelid,
* examine_opclause_expression
* Split expression into Var and Const parts.
*
- * Attempts to match the arguments to either (Var op Const) or (Const op Var),
- * possibly with a RelabelType on top. When the expression matches this form,
- * returns true, otherwise returns false.
+ * Attempts to match the arguments to either (Var op Const) or (Const op Var)
+ * or (Var op Var), possibly with a RelabelType on top. When the expression
+ * matches this form, returns true, otherwise returns false.
*
* Optionally returns pointers to the extracted Var/Const nodes, when passed
* non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies
* on which side of the operator we found the Var node.
*/
bool
-examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool
*varonleftp)
+examine_opclause_expression(OpExpr *expr, Var **var1p, Var **var2p,
+ Const **cstp, bool
*varonleftp)
{
- Var *var;
- Const *cst;
+ Var *var1 = NULL;
+ Var *var2 = NULL;
+ Const *cst = NULL;
bool varonleft;
Node *leftop,
*rightop;
@@ -1451,22 +1470,38 @@ examine_opclause_expression(OpExpr *expr, Var **varp,
Const **cstp, bool *varonl
if (IsA(leftop, Var) && IsA(rightop, Const))
{
- var = (Var *) leftop;
+ var1 = (Var *) leftop;
cst = (Const *) rightop;
varonleft = true;
}
else if (IsA(leftop, Const) && IsA(rightop, Var))
{
- var = (Var *) rightop;
+ var1 = (Var *) rightop;
cst = (Const *) leftop;
varonleft = false;
}
+ else if (IsA(leftop, Var) && IsA(rightop, Var))
+ {
+ var1 = (Var *) leftop;
+ var2 = (Var *) rightop;
+ varonleft = false;
+
+ /*
+ * Both variables have to be for the same relation (otherwise
it's
+ * a join clause, and we don't deal with those yet.
+ */
+ if (var1->varno != var2->varno)
+ return false;
+ }
else
return false;
/* return pointers to the extracted parts if requested */
- if (varp)
- *varp = var;
+ if (var1p)
+ *var1p = var1;
+
+ if (var2p)
+ *var2p = var2;
if (cstp)
*cstp = cst;
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 3f42713aa2..97d3083451 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1581,16 +1581,25 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
/* valid only after examine_opclause_expression returns
true */
Var *var;
+ Var *var2;
Const *cst;
bool varonleft;
fmgr_info(get_opcode(expr->opno), &opproc);
- /* extract the var and const from the expression */
- if (examine_opclause_expression(expr, &var, &cst,
&varonleft))
+ /* extract the vars and const from the expression */
+ if (!examine_opclause_expression(expr, &var, &var2,
&cst, &varonleft))
+ continue; /* XXX Can this actually
happen? */
+
+ /* We should always get at least one Var. */
+ Assert(var);
+
+ if (cst)
{
int idx;
+ Assert(!var2);
+
/* match the attribute to a dimension of the
statistic */
idx = bms_member_index(keys, var->varattno);
@@ -1651,6 +1660,68 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
matches[i] = RESULT_MERGE(matches[i],
is_or, match);
}
}
+ else
+ {
+ int idx;
+ int idx2;
+
+ Assert(var2);
+
+ /* match the attribute to a dimension of the
statistic */
+ idx = bms_member_index(keys, var->varattno);
+ idx2 = bms_member_index(keys, var2->varattno);
+
+ /*
+ * Walk through the MCV items and evaluate the
current clause.
+ * We can skip items that were already ruled
out, and
+ * terminate if there are no remaining MCV
items that might
+ * possibly match.
+ */
+ for (i = 0; i < mcvlist->nitems; i++)
+ {
+ bool match = true;
+ MCVItem *item = &mcvlist->items[i];
+
+ /*
+ * When either of the MCV items is NULL
we can treat this
+ * as a mismatch. We must not call the
operator because
+ * of strictness.
+ */
+ if (item->isnull[idx] ||
item->isnull[idx2])
+ {
+ matches[i] =
RESULT_MERGE(matches[i], is_or, false);
+ continue;
+ }
+
+ /*
+ * Skip MCV items that can't change
result in the bitmap.
+ * Once the value gets false for
AND-lists, or true for
+ * OR-lists, we don't need to look at
more clauses.
+ */
+ if (RESULT_IS_FINAL(matches[i], is_or))
+ continue;
+
+ /*
+ * First check whether the constant is
below the lower
+ * boundary (in that case we can skip
the bucket, because
+ * there's no overlap).
+ *
+ * We don't store collations used to
build the statistics,
+ * but we can use the collation for the
attribute itself,
+ * as stored in varcollid. We do reset
the statistics after
+ * a type change (including collation
change), so this is
+ * OK. We may need to relax this after
allowing extended
+ * statistics on expressions.
+ */
+ match =
DatumGetBool(FunctionCall2Coll(&opproc,
+
var->varcollid,
+
item->values[idx],
+
item->values[idx2]));
+
+ /* update the match bitmap with the
result */
+ matches[i] = RESULT_MERGE(matches[i],
is_or, match);
+ }
+ }
}
else if (IsA(clause, NullTest))
{
diff --git a/src/include/statistics/extended_stats_internal.h
b/src/include/statistics/extended_stats_internal.h
index 5171895bba..804089bc57 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -96,7 +96,7 @@ extern SortItem *build_sorted_items(int numrows, int *nitems,
HeapTuple *rows,
TupleDesc tdesc, MultiSortSupport mss,
int
numattrs, AttrNumber *attnums);
-extern bool examine_opclause_expression(OpExpr *expr, Var **varp,
+extern bool examine_opclause_expression(OpExpr *expr, Var **var1p, Var **var2p,
Const **cstp, bool *varonleftp);
extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
diff --git a/src/test/regress/expected/stats_ext.out
b/src/test/regress/expected/stats_ext.out
index 5344b70cf4..4c078ae61f 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -603,6 +603,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists WHERE a = 1 OR b = '
343 | 200
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual
+-----------+--------
+ 1667 | 3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual
+-----------+--------
+ 1667 | 0
+(1 row)
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
ANALYZE mcv_lists;
@@ -654,6 +666,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists WHERE a = 1 OR b = '
200 | 200
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual
+-----------+--------
+ 3750 | 3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual
+-----------+--------
+ 1 | 0
+(1 row)
+
-- check change of unrelated column type does not reset the MCV statistics
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
SELECT d.stxdmcv IS NOT NULL
@@ -749,6 +773,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists WHERE b = ''x'' OR d
3750 | 2500
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual
+-----------+--------
+ 25 | 2500
+(1 row)
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON b, d FROM mcv_lists;
ANALYZE mcv_lists;
@@ -758,6 +788,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists WHERE b = ''x'' OR d
2500 | 2500
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
-- mcv with arrays
CREATE TABLE mcv_lists_arrays (
a TEXT[],
@@ -808,6 +844,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists_bool WHERE NOT a AND
1094 | 0
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b');
+ estimated | actual
+-----------+--------
+ 9950 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b AND b = c');
+ estimated | actual
+-----------+--------
+ 50 | 2500
+(1 row)
+
CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
FROM mcv_lists_bool;
ANALYZE mcv_lists_bool;
@@ -835,6 +883,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists_bool WHERE NOT a AND
1 | 0
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b AND b = c');
+ estimated | actual
+-----------+--------
+ 2500 | 2500
+(1 row)
+
-- check the ability to use multiple MCV lists
CREATE TABLE mcv_lists_multi (
a INTEGER,
@@ -869,6 +929,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists_multi WHERE a = 0 AN
4 | 142
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b
AND c = d');
+ estimated | actual
+-----------+--------
+ 1 | 5000
+(1 row)
+
-- create separate MCV statistics
CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
@@ -891,6 +957,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists_multi WHERE a = 0 AN
143 | 142
(1 row)
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b
AND c = d');
+ estimated | actual
+-----------+--------
+ 5000 | 5000
+(1 row)
+
DROP TABLE mcv_lists_multi;
-- Permission tests. Users should not be able to see specific data values in
-- the extended statistics, if they lack permission to see those values in
diff --git a/src/test/regress/sql/stats_ext.sql
b/src/test/regress/sql/stats_ext.sql
index fa989fccb0..b7519b275b 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -381,6 +381,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists WHERE a = 1 OR b = '
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b =
''1'' OR c = 1 OR d IS NOT NULL');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
@@ -402,6 +406,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists WHERE a = 1 OR b = '
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b =
''1'' OR c = 1 OR d IS NOT NULL');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
-- check change of unrelated column type does not reset the MCV statistics
ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
@@ -473,6 +481,8 @@ ANALYZE mcv_lists;
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR
d = ''x''');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
-- create statistics
CREATE STATISTICS mcv_lists_stats (mcv) ON b, d FROM mcv_lists;
@@ -480,6 +490,8 @@ ANALYZE mcv_lists;
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR
d = ''x''');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
-- mcv with arrays
CREATE TABLE mcv_lists_arrays (
a TEXT[],
@@ -521,6 +533,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists_bool WHERE NOT a AND
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a
AND b AND NOT c');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b AND b = c');
+
CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
FROM mcv_lists_bool;
@@ -534,6 +550,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM
mcv_lists_bool WHERE NOT a AND
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a
AND b AND NOT c');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a =
b AND b = c');
+
-- check the ability to use multiple MCV lists
CREATE TABLE mcv_lists_multi (
a INTEGER,
@@ -556,6 +576,7 @@ ANALYZE mcv_lists_multi;
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0
AND b = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0
AND d = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0
AND b = 0 AND c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b
AND c = d');
-- create separate MCV statistics
CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
@@ -566,6 +587,7 @@ ANALYZE mcv_lists_multi;
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0
AND b = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0
AND d = 0');
SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0
AND b = 0 AND c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b
AND c = d');
DROP TABLE mcv_lists_multi;
--
2.21.1