Hi,

Here is a rebased version of this patch series. I've polished the first
two parts a bit - estimation of OR clauses and (Var op Var) clauses, and
added a bunch of regression tests to exercise this code. It's not quite
there yet, but I think it's feasible to get this committed for PG13.

The last part (extended stats on expressions) is far from complete, and
it's not feasible to get it into PG13. There's too much missing stuff.

regards

--
Tomas Vondra                  http://www.2ndQuadrant.com
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From d7f639b6150fe9fd179066af2a536465d877842a Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Mon, 2 Dec 2019 23:02:17 +0100
Subject: [PATCH 1/3] Support using extended stats for parts of OR clauses

---
 src/backend/optimizer/path/clausesel.c        | 109 +++++++++++++++---
 src/backend/statistics/extended_stats.c       |  45 +++++++-
 src/backend/statistics/mcv.c                  |   5 +-
 .../statistics/extended_stats_internal.h      |   3 +-
 src/include/statistics/statistics.h           |   3 +-
 src/test/regress/expected/stats_ext.out       |   3 +-
 src/test/regress/sql/stats_ext.sql            |   1 -
 7 files changed, 138 insertions(+), 31 deletions(-)

diff --git a/src/backend/optimizer/path/clausesel.c 
b/src/backend/optimizer/path/clausesel.c
index a3ebe10592..8c1a404ce2 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -92,7 +92,7 @@ clauselist_selectivity(PlannerInfo *root,
                 */
                s1 *= statext_clauselist_selectivity(root, clauses, varRelid,
                                                                                
         jointype, sjinfo, rel,
-                                                                               
         &estimatedclauses);
+                                                                               
         &estimatedclauses, false);
        }
 
        /*
@@ -104,6 +104,89 @@ clauselist_selectivity(PlannerInfo *root,
                                                                                
          estimatedclauses);
 }
 
+/*
+ * clauselist_selectivity_or -
+ *       Compute the selectivity of an implicitly-ORed list of boolean
+ *       expression clauses.  The list can be empty, in which case 0.0
+ *       must be returned.  List elements may be either RestrictInfos
+ *       or bare expression clauses --- the former is preferred since
+ *       it allows caching of results.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ *
+ * The basic approach is to apply extended statistics first, on as many
+ * clauses as possible, in order to capture cross-column dependencies etc.
+ * The remaining clauses are then estimated using regular statistics tracked
+ * for individual columns.  This is done by simply passing the clauses to
+ * clauselist_selectivity and then combining the selectivities using the
+ * regular formula (s1+s2 - s1*s2).
+ */
+static Selectivity
+clauselist_selectivity_or(PlannerInfo *root,
+                                                 List *clauses,
+                                                 int varRelid,
+                                                 JoinType jointype,
+                                                 SpecialJoinInfo *sjinfo)
+{
+       ListCell   *lc;
+       Selectivity     s1 = 0.0;
+       RelOptInfo *rel;
+       Bitmapset  *estimatedclauses = NULL;
+       int                     listidx;
+
+       /*
+        * Determine if these clauses reference a single relation.  If so, and 
if
+        * it has extended statistics, try to apply those.
+        */
+       rel = find_single_rel_for_clauses(root, clauses);
+       if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+       {
+               /*
+                * Estimate as many clauses as possible using extended 
statistics.
+                *
+                * 'estimatedclauses' tracks the 0-based list position index of
+                * clauses that we've estimated using extended statistics, and 
that
+                * should be ignored.
+                *
+                * XXX We can't multiply with current value, because for OR 
clauses
+                * we start with 0.0, so we simply assign to s1 directly.
+                */
+               s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+                                                                               
        jointype, sjinfo, rel,
+                                                                               
        &estimatedclauses, true);
+       }
+
+       /*
+        * Selectivities of the remaining clauses for an OR clause are computed
+        * as s1+s2 - s1*s2 to account for the probable overlap of selected 
tuple
+        * sets. The clauses estimated using extended statistics are effectively
+        * treated as a single clause.
+        *
+        * XXX is this too conservative?
+        */
+       listidx = -1;
+       foreach(lc, clauses)
+       {
+               Selectivity s2;
+
+               listidx++;
+
+               /* skip already estimated clauses */
+               if (bms_is_member(listidx, estimatedclauses))
+                       continue;
+
+               s2 = clause_selectivity(root,
+                                                               (Node *) 
lfirst(lc),
+                                                               varRelid,
+                                                               jointype,
+                                                               sjinfo);
+
+               s1 = s1 + s2 - s1 * s2;
+       }
+
+       return s1;
+}
+
 /*
  * clauselist_selectivity_simple -
  *       Compute the selectivity of an implicitly-ANDed list of boolean
@@ -735,24 +818,14 @@ clause_selectivity(PlannerInfo *root,
        else if (is_orclause(clause))
        {
                /*
-                * Selectivities for an OR clause are computed as s1+s2 - s1*s2 
to
-                * account for the probable overlap of selected tuple sets.
-                *
-                * XXX is this too conservative?
+                * Almost the same thing as clauselist_selectivity, but with the
+                * clauses connected by OR.
                 */
-               ListCell   *arg;
-
-               s1 = 0.0;
-               foreach(arg, ((BoolExpr *) clause)->args)
-               {
-                       Selectivity s2 = clause_selectivity(root,
-                                                                               
                (Node *) lfirst(arg),
-                                                                               
                varRelid,
-                                                                               
                jointype,
-                                                                               
                sjinfo);
-
-                       s1 = s1 + s2 - s1 * s2;
-               }
+               s1 = clauselist_selectivity_or(root,
+                                                                          
((BoolExpr *) clause)->args,
+                                                                          
varRelid,
+                                                                          
jointype,
+                                                                          
sjinfo);
        }
        else if (is_opclause(clause) || IsA(clause, DistinctExpr))
        {
diff --git a/src/backend/statistics/extended_stats.c 
b/src/backend/statistics/extended_stats.c
index 03e69d057f..24ece6f99c 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1225,7 +1225,8 @@ statext_is_compatible_clause(PlannerInfo *root, Node 
*clause, Index relid,
 static Selectivity
 statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int 
varRelid,
                                                                   JoinType 
jointype, SpecialJoinInfo *sjinfo,
-                                                                  RelOptInfo 
*rel, Bitmapset **estimatedclauses)
+                                                                  RelOptInfo 
*rel, Bitmapset **estimatedclauses,
+                                                                  bool is_or)
 {
        ListCell   *l;
        Bitmapset **list_attnums;
@@ -1317,8 +1318,32 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, 
List *clauses, int varReli
                 * columns/clauses. We'll then use the various selectivities 
computed from
                 * MCV list to improve it.
                 */
-               simple_sel = clauselist_selectivity_simple(root, stat_clauses, 
varRelid,
-                                                                               
                jointype, sjinfo, NULL);
+               if (is_or)
+               {
+                       ListCell   *lc;
+                       Selectivity     s1 = 0.0,
+                                               s2;
+
+                       /*
+                        * Selectivities of OR clauses are computed s1+s2 - 
s1*s2 to account
+                        * for the probable overlap of selected tuple sets.
+                        */
+                       foreach(lc, stat_clauses)
+                       {
+                               s2 = clause_selectivity(root,
+                                                                               
(Node *) lfirst(lc),
+                                                                               
varRelid,
+                                                                               
jointype,
+                                                                               
sjinfo);
+
+                               s1 = s1 + s2 - s1 * s2;
+                       }
+
+                       simple_sel = s1;
+               }
+               else
+                       simple_sel = clauselist_selectivity_simple(root, 
stat_clauses, varRelid,
+                                                                               
                           jointype, sjinfo, NULL);
 
                /*
                 * Now compute the multi-column estimate from the MCV list, 
along with the
@@ -1326,7 +1351,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, 
List *clauses, int varReli
                 */
                mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, 
varRelid,
                                                                                
         jointype, sjinfo, rel,
-                                                                               
         &mcv_basesel, &mcv_totalsel);
+                                                                               
         &mcv_basesel, &mcv_totalsel, is_or);
 
                /* Estimated selectivity of values not covered by MCV matches */
                other_sel = simple_sel - mcv_basesel;
@@ -1354,13 +1379,21 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, 
List *clauses, int varReli
 Selectivity
 statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
                                                           JoinType jointype, 
SpecialJoinInfo *sjinfo,
-                                                          RelOptInfo *rel, 
Bitmapset **estimatedclauses)
+                                                          RelOptInfo *rel, 
Bitmapset **estimatedclauses,
+                                                          bool is_or)
 {
        Selectivity sel;
 
        /* First, try estimating clauses using a multivariate MCV list. */
        sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, 
jointype,
-                                                                               
         sjinfo, rel, estimatedclauses);
+                                                                               
         sjinfo, rel, estimatedclauses, is_or);
+
+       /*
+        * Functional dependencies only work for clauses connected by AND, so 
for
+        * OR clauses we're done.
+        */
+       if (is_or)
+               return sel;
 
        /*
         * Then, apply functional dependencies on the remaining clauses by 
calling
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 87e232fdd4..3f42713aa2 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1795,7 +1795,8 @@ mcv_clauselist_selectivity(PlannerInfo *root, 
StatisticExtInfo *stat,
                                                   List *clauses, int varRelid,
                                                   JoinType jointype, 
SpecialJoinInfo *sjinfo,
                                                   RelOptInfo *rel,
-                                                  Selectivity *basesel, 
Selectivity *totalsel)
+                                                  Selectivity *basesel, 
Selectivity *totalsel,
+                                                  bool is_or)
 {
        int                     i;
        MCVList    *mcv;
@@ -1808,7 +1809,7 @@ mcv_clauselist_selectivity(PlannerInfo *root, 
StatisticExtInfo *stat,
        mcv = statext_mcv_load(stat->statOid);
 
        /* build a match bitmap for the clauses */
-       matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, false);
+       matches = mcv_get_match_bitmap(root, clauses, stat->keys, mcv, is_or);
 
        /* sum frequencies for all the matching MCV items */
        *basesel = 0.0;
diff --git a/src/include/statistics/extended_stats_internal.h 
b/src/include/statistics/extended_stats_internal.h
index b512ee908a..5171895bba 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -107,6 +107,7 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo 
*root,
                                                                                
          SpecialJoinInfo *sjinfo,
                                                                                
          RelOptInfo *rel,
                                                                                
          Selectivity *basesel,
-                                                                               
          Selectivity *totalsel);
+                                                                               
          Selectivity *totalsel,
+                                                                               
          bool is_or);
 
 #endif                                                 /* 
EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h 
b/src/include/statistics/statistics.h
index f5d9b6c73a..e18c9a6539 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -116,7 +116,8 @@ extern Selectivity 
statext_clauselist_selectivity(PlannerInfo *root,
                                                                                
                  JoinType jointype,
                                                                                
                  SpecialJoinInfo *sjinfo,
                                                                                
                  RelOptInfo *rel,
-                                                                               
                  Bitmapset **estimatedclauses);
+                                                                               
                  Bitmapset **estimatedclauses,
+                                                                               
                  bool is_or);
 extern bool has_stats_of_kind(List *stats, char requiredkind);
 extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
                                                                                
                Bitmapset **clause_attnums,
diff --git a/src/test/regress/expected/stats_ext.out 
b/src/test/regress/expected/stats_ext.out
index 61237dfb11..5344b70cf4 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -648,11 +648,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
--- we can't use the statistic for OR clauses that are not fully covered 
(missing 'd' attribute)
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = 
''1'' OR c = 1 OR d IS NOT NULL');
  estimated | actual 
 -----------+--------
-       343 |    200
+       200 |    200
 (1 row)
 
 -- check change of unrelated column type does not reset the MCV statistics
diff --git a/src/test/regress/sql/stats_ext.sql 
b/src/test/regress/sql/stats_ext.sql
index 84f13e8814..fa989fccb0 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -400,7 +400,6 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists 
WHERE a <= 4 AND b <
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = 
''1'' OR c = 1');
 
--- we can't use the statistic for OR clauses that are not fully covered 
(missing 'd' attribute)
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = 
''1'' OR c = 1 OR d IS NOT NULL');
 
 -- check change of unrelated column type does not reset the MCV statistics
-- 
2.21.1

>From af5921a73a71a8c6adf454c35e2b8e911c94cee7 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <t...@fuzzy.cz>
Date: Mon, 11 Nov 2019 01:34:11 +0100
Subject: [PATCH 2/3] Support clauses of the form Var op Var

---
 src/backend/statistics/extended_stats.c       | 63 ++++++++++++----
 src/backend/statistics/mcv.c                  | 75 ++++++++++++++++++-
 .../statistics/extended_stats_internal.h      |  2 +-
 src/test/regress/expected/stats_ext.out       | 72 ++++++++++++++++++
 src/test/regress/sql/stats_ext.sql            | 22 ++++++
 5 files changed, 217 insertions(+), 17 deletions(-)

diff --git a/src/backend/statistics/extended_stats.c 
b/src/backend/statistics/extended_stats.c
index 24ece6f99c..1872cd4529 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -986,14 +986,18 @@ statext_is_compatible_clause_internal(PlannerInfo *root, 
Node *clause,
        {
                RangeTblEntry *rte = root->simple_rte_array[relid];
                OpExpr     *expr = (OpExpr *) clause;
-               Var                *var;
+               Var                *var,
+                                  *var2;
 
                /* Only expressions with two arguments are considered 
compatible. */
                if (list_length(expr->args) != 2)
                        return false;
 
-               /* Check if the expression the right shape (one Var, one Const) 
*/
-               if (!examine_opclause_expression(expr, &var, NULL, NULL))
+               /*
+                * Check if the expression the right shape (one Var and one 
Const,
+                * or two Vars).
+                */
+               if (!examine_opclause_expression(expr, &var, &var2, NULL, NULL))
                        return false;
 
                /*
@@ -1033,7 +1037,20 @@ statext_is_compatible_clause_internal(PlannerInfo *root, 
Node *clause,
                        !get_func_leakproof(get_opcode(expr->opno)))
                        return false;
 
-               return statext_is_compatible_clause_internal(root, (Node *) var,
+               /*
+                * Check compatibility of the first Var - we get this one for 
both
+                * types of supported expressions (Var op Const) and (Var op 
Var).
+                */
+               if (!statext_is_compatible_clause_internal(root, (Node *) var,
+                                                                               
                   relid, attnums))
+                       return false;
+
+               /* For (Var op Const) we don't get the second Var, and we're 
done. */
+               if (!var2)
+                       return true;
+
+               /* For (Var op Var) check compatibility of the second Var. */
+               return statext_is_compatible_clause_internal(root, (Node *) 
var2,
                                                                                
                         relid, attnums);
        }
 
@@ -1419,19 +1436,21 @@ statext_clauselist_selectivity(PlannerInfo *root, List 
*clauses, int varRelid,
  * examine_opclause_expression
  *             Split expression into Var and Const parts.
  *
- * Attempts to match the arguments to either (Var op Const) or (Const op Var),
- * possibly with a RelabelType on top. When the expression matches this form,
- * returns true, otherwise returns false.
+ * Attempts to match the arguments to either (Var op Const) or (Const op Var)
+ * or (Var op Var), possibly with a RelabelType on top. When the expression
+ * matches this form, returns true, otherwise returns false.
  *
  * Optionally returns pointers to the extracted Var/Const nodes, when passed
  * non-null pointers (varp, cstp and varonleftp). The varonleftp flag specifies
  * on which side of the operator we found the Var node.
  */
 bool
-examine_opclause_expression(OpExpr *expr, Var **varp, Const **cstp, bool 
*varonleftp)
+examine_opclause_expression(OpExpr *expr, Var **var1p, Var **var2p,
+                                                       Const **cstp, bool 
*varonleftp)
 {
-       Var        *var;
-       Const  *cst;
+       Var        *var1 = NULL;
+       Var        *var2 = NULL;
+       Const  *cst = NULL;
        bool    varonleft;
        Node   *leftop,
                   *rightop;
@@ -1451,22 +1470,38 @@ examine_opclause_expression(OpExpr *expr, Var **varp, 
Const **cstp, bool *varonl
 
        if (IsA(leftop, Var) && IsA(rightop, Const))
        {
-               var = (Var *) leftop;
+               var1 = (Var *) leftop;
                cst = (Const *) rightop;
                varonleft = true;
        }
        else if (IsA(leftop, Const) && IsA(rightop, Var))
        {
-               var = (Var *) rightop;
+               var1 = (Var *) rightop;
                cst = (Const *) leftop;
                varonleft = false;
        }
+       else if (IsA(leftop, Var) && IsA(rightop, Var))
+       {
+               var1 = (Var *) leftop;
+               var2 = (Var *) rightop;
+               varonleft = false;
+
+               /*
+                * Both variables have to be for the same relation (otherwise 
it's
+                * a join clause, and we don't deal with those yet.
+                */
+               if (var1->varno != var2->varno)
+                       return false;
+       }
        else
                return false;
 
        /* return pointers to the extracted parts if requested */
-       if (varp)
-               *varp = var;
+       if (var1p)
+               *var1p = var1;
+
+       if (var2p)
+               *var2p = var2;
 
        if (cstp)
                *cstp = cst;
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 3f42713aa2..97d3083451 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1581,16 +1581,25 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
                        /* valid only after examine_opclause_expression returns 
true */
                        Var                *var;
+                       Var                *var2;
                        Const      *cst;
                        bool            varonleft;
 
                        fmgr_info(get_opcode(expr->opno), &opproc);
 
-                       /* extract the var and const from the expression */
-                       if (examine_opclause_expression(expr, &var, &cst, 
&varonleft))
+                       /* extract the vars and const from the expression */
+                       if (!examine_opclause_expression(expr, &var, &var2, 
&cst, &varonleft))
+                               continue;       /* XXX Can this actually 
happen? */
+
+                       /* We should always get at least one Var. */
+                       Assert(var);
+
+                       if (cst)
                        {
                                int                     idx;
 
+                               Assert(!var2);
+
                                /* match the attribute to a dimension of the 
statistic */
                                idx = bms_member_index(keys, var->varattno);
 
@@ -1651,6 +1660,68 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
                                        matches[i] = RESULT_MERGE(matches[i], 
is_or, match);
                                }
                        }
+                       else
+                       {
+                               int                     idx;
+                               int                     idx2;
+
+                               Assert(var2);
+
+                               /* match the attribute to a dimension of the 
statistic */
+                               idx = bms_member_index(keys, var->varattno);
+                               idx2 = bms_member_index(keys, var2->varattno);
+
+                               /*
+                                * Walk through the MCV items and evaluate the 
current clause.
+                                * We can skip items that were already ruled 
out, and
+                                * terminate if there are no remaining MCV 
items that might
+                                * possibly match.
+                                */
+                               for (i = 0; i < mcvlist->nitems; i++)
+                               {
+                                       bool            match = true;
+                                       MCVItem    *item = &mcvlist->items[i];
+
+                                       /*
+                                        * When either of the MCV items is NULL 
we can treat this
+                                        * as a mismatch. We must not call the 
operator because
+                                        * of strictness.
+                                        */
+                                       if (item->isnull[idx] || 
item->isnull[idx2])
+                                       {
+                                               matches[i] = 
RESULT_MERGE(matches[i], is_or, false);
+                                               continue;
+                                       }
+
+                                       /*
+                                        * Skip MCV items that can't change 
result in the bitmap.
+                                        * Once the value gets false for 
AND-lists, or true for
+                                        * OR-lists, we don't need to look at 
more clauses.
+                                        */
+                                       if (RESULT_IS_FINAL(matches[i], is_or))
+                                               continue;
+
+                                       /*
+                                        * First check whether the constant is 
below the lower
+                                        * boundary (in that case we can skip 
the bucket, because
+                                        * there's no overlap).
+                                        *
+                                        * We don't store collations used to 
build the statistics,
+                                        * but we can use the collation for the 
attribute itself,
+                                        * as stored in varcollid. We do reset 
the statistics after
+                                        * a type change (including collation 
change), so this is
+                                        * OK. We may need to relax this after 
allowing extended
+                                        * statistics on expressions.
+                                        */
+                                       match = 
DatumGetBool(FunctionCall2Coll(&opproc,
+                                                                               
                                   var->varcollid,
+                                                                               
                                   item->values[idx],
+                                                                               
                                   item->values[idx2]));
+
+                                       /* update the match bitmap with the 
result */
+                                       matches[i] = RESULT_MERGE(matches[i], 
is_or, match);
+                               }
+                       }
                }
                else if (IsA(clause, NullTest))
                {
diff --git a/src/include/statistics/extended_stats_internal.h 
b/src/include/statistics/extended_stats_internal.h
index 5171895bba..804089bc57 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -96,7 +96,7 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, 
HeapTuple *rows,
                                                                        
TupleDesc tdesc, MultiSortSupport mss,
                                                                        int 
numattrs, AttrNumber *attnums);
 
-extern bool examine_opclause_expression(OpExpr *expr, Var **varp,
+extern bool examine_opclause_expression(OpExpr *expr, Var **var1p, Var **var2p,
                                                                                
Const **cstp, bool *varonleftp);
 
 extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
diff --git a/src/test/regress/expected/stats_ext.out 
b/src/test/regress/expected/stats_ext.out
index 5344b70cf4..4c078ae61f 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -603,6 +603,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a = 1 OR b = '
        343 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual 
+-----------+--------
+      1667 |   3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual 
+-----------+--------
+      1667 |      0
+(1 row)
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
 ANALYZE mcv_lists;
@@ -654,6 +666,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+ estimated | actual 
+-----------+--------
+      3750 |   3750
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
 -- check change of unrelated column type does not reset the MCV statistics
 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
 SELECT d.stxdmcv IS NOT NULL
@@ -749,6 +773,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE b = ''x'' OR d
       3750 |   2500
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual 
+-----------+--------
+        25 |   2500
+(1 row)
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON b, d FROM mcv_lists;
 ANALYZE mcv_lists;
@@ -758,6 +788,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE b = ''x'' OR d
       2500 |   2500
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
 -- mcv with arrays
 CREATE TABLE mcv_lists_arrays (
     a TEXT[],
@@ -808,6 +844,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists_bool WHERE NOT a AND
       1094 |      0
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b');
+ estimated | actual 
+-----------+--------
+      9950 |   2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b AND b = c');
+ estimated | actual 
+-----------+--------
+        50 |   2500
+(1 row)
+
 CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
   FROM mcv_lists_bool;
 ANALYZE mcv_lists_bool;
@@ -835,6 +883,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists_bool WHERE NOT a AND
          1 |      0
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b AND b = c');
+ estimated | actual 
+-----------+--------
+      2500 |   2500
+(1 row)
+
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
        a INTEGER,
@@ -869,6 +929,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists_multi WHERE a = 0 AN
          4 |    142
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b 
AND c = d');
+ estimated | actual 
+-----------+--------
+         1 |   5000
+(1 row)
+
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
 CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
@@ -891,6 +957,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists_multi WHERE a = 0 AN
        143 |    142
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b 
AND c = d');
+ estimated | actual 
+-----------+--------
+      5000 |   5000
+(1 row)
+
 DROP TABLE mcv_lists_multi;
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
diff --git a/src/test/regress/sql/stats_ext.sql 
b/src/test/regress/sql/stats_ext.sql
index fa989fccb0..b7519b275b 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -381,6 +381,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = 
''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON a, b, c FROM mcv_lists;
 
@@ -402,6 +406,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists WHERE a = 1 OR b = '
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = 
''1'' OR c = 1 OR d IS NOT NULL');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a > c');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < c');
+
 -- check change of unrelated column type does not reset the MCV statistics
 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
 
@@ -473,6 +481,8 @@ ANALYZE mcv_lists;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR 
d = ''x''');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
 -- create statistics
 CREATE STATISTICS mcv_lists_stats (mcv) ON b, d FROM mcv_lists;
 
@@ -480,6 +490,8 @@ ANALYZE mcv_lists;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = ''x'' OR 
d = ''x''');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE b = d');
+
 -- mcv with arrays
 CREATE TABLE mcv_lists_arrays (
     a TEXT[],
@@ -521,6 +533,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists_bool WHERE NOT a AND
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a 
AND b AND NOT c');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b AND b = c');
+
 CREATE STATISTICS mcv_lists_bool_stats (mcv) ON a, b, c
   FROM mcv_lists_bool;
 
@@ -534,6 +550,10 @@ SELECT * FROM check_estimated_rows('SELECT * FROM 
mcv_lists_bool WHERE NOT a AND
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a 
AND b AND NOT c');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a = 
b AND b = c');
+
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
        a INTEGER,
@@ -556,6 +576,7 @@ ANALYZE mcv_lists_multi;
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 
AND b = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 
AND d = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 
AND b = 0 AND c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b 
AND c = d');
 
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
@@ -566,6 +587,7 @@ ANALYZE mcv_lists_multi;
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 
AND b = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 
AND d = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 
AND b = 0 AND c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = b 
AND c = d');
 
 DROP TABLE mcv_lists_multi;
 
-- 
2.21.1

>From 7957f0cc7bd96981fcbc34b4f5eb5948538769ae Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Thu, 5 Mar 2020 22:36:03 +0100
Subject: [PATCH 3/3] Support for extended statistics on expressions

---
 src/backend/commands/statscmds.c              | 190 +++++--
 src/backend/nodes/copyfuncs.c                 |  14 +
 src/backend/nodes/equalfuncs.c                |  13 +
 src/backend/nodes/outfuncs.c                  |  12 +
 src/backend/optimizer/util/plancat.c          |  40 ++
 src/backend/parser/gram.y                     |  31 +-
 src/backend/parser/parse_agg.c                |  10 +
 src/backend/parser/parse_expr.c               |   6 +
 src/backend/parser/parse_func.c               |   3 +
 src/backend/parser/parse_utilcmd.c            |  89 ++-
 src/backend/statistics/dependencies.c         | 159 +++++-
 src/backend/statistics/extended_stats.c       | 532 +++++++++++++++++-
 src/backend/statistics/mcv.c                  |  17 +-
 src/backend/statistics/mvdistinct.c           |  51 +-
 src/backend/tcop/utility.c                    |  16 +-
 src/backend/utils/adt/ruleutils.c             |  59 ++
 src/backend/utils/adt/selfuncs.c              |  11 +
 src/bin/psql/describe.c                       |   1 +
 src/include/catalog/pg_statistic_ext.h        |   3 +
 src/include/nodes/nodes.h                     |   1 +
 src/include/nodes/parsenodes.h                |  16 +
 src/include/nodes/pathnodes.h                 |   1 +
 src/include/parser/parse_node.h               |   1 +
 src/include/parser/parse_utilcmd.h            |   2 +
 .../statistics/extended_stats_internal.h      |  13 +-
 25 files changed, 1191 insertions(+), 100 deletions(-)

diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 988cdba6f5..56559a1e91 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -29,6 +29,8 @@
 #include "commands/comment.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
 #include "statistics/statistics.h"
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
@@ -42,6 +44,7 @@
 static char *ChooseExtendedStatisticName(const char *name1, const char *name2,
                                                                                
 const char *label, Oid namespaceid);
 static char *ChooseExtendedStatisticNameAddition(List *exprs);
+static bool CheckMutability(Expr *expr);
 
 
 /* qsort comparator for the attnums in CreateStatistics */
@@ -62,6 +65,7 @@ ObjectAddress
 CreateStatistics(CreateStatsStmt *stmt)
 {
        int16           attnums[STATS_MAX_DIMENSIONS];
+       int                     nattnums = 0;
        int                     numcols = 0;
        char       *namestr;
        NameData        stxname;
@@ -74,6 +78,8 @@ CreateStatistics(CreateStatsStmt *stmt)
        Datum           datavalues[Natts_pg_statistic_ext_data];
        bool            datanulls[Natts_pg_statistic_ext_data];
        int2vector *stxkeys;
+       List       *stxexprs = NIL;
+       Datum           exprsDatum;
        Relation        statrel;
        Relation        datarel;
        Relation        rel = NULL;
@@ -192,56 +198,95 @@ CreateStatistics(CreateStatsStmt *stmt)
        foreach(cell, stmt->exprs)
        {
                Node       *expr = (Node *) lfirst(cell);
-               ColumnRef  *cref;
-               char       *attname;
+               StatsElem  *selem;
                HeapTuple       atttuple;
                Form_pg_attribute attForm;
                TypeCacheEntry *type;
 
-               if (!IsA(expr, ColumnRef))
+               if (!IsA(expr, StatsElem))
                        ereport(ERROR,
                                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                         errmsg("only simple column references 
are allowed in CREATE STATISTICS")));
-               cref = (ColumnRef *) expr;
+               selem = (StatsElem *) expr;
 
-               if (list_length(cref->fields) != 1)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("only simple column references 
are allowed in CREATE STATISTICS")));
-               attname = strVal((Value *) linitial(cref->fields));
-
-               atttuple = SearchSysCacheAttName(relid, attname);
-               if (!HeapTupleIsValid(atttuple))
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_UNDEFINED_COLUMN),
-                                        errmsg("column \"%s\" does not exist",
-                                                       attname)));
-               attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
-
-               /* Disallow use of system attributes in extended stats */
-               if (attForm->attnum <= 0)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("statistics creation on system 
columns is not supported")));
-
-               /* Disallow data types without a less-than operator */
-               type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
-               if (type->lt_opr == InvalidOid)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                        errmsg("column \"%s\" cannot be used 
in statistics because its type %s has no default btree operator class",
-                                                       attname, 
format_type_be(attForm->atttypid))));
-
-               /* Make sure no more than STATS_MAX_DIMENSIONS columns are used 
*/
-               if (numcols >= STATS_MAX_DIMENSIONS)
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_TOO_MANY_COLUMNS),
-                                        errmsg("cannot have more than %d 
columns in statistics",
-                                                       STATS_MAX_DIMENSIONS)));
-
-               attnums[numcols] = attForm->attnum;
-               numcols++;
-               ReleaseSysCache(atttuple);
+               if (selem->name)        /* column reference */
+               {
+                       char       *attname;
+                       attname = selem->name;
+
+                       atttuple = SearchSysCacheAttName(relid, attname);
+                       if (!HeapTupleIsValid(atttuple))
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_UNDEFINED_COLUMN),
+                                                errmsg("column \"%s\" does not 
exist",
+                                                               attname)));
+                       attForm = (Form_pg_attribute) GETSTRUCT(atttuple);
+
+                       /* Disallow use of system attributes in extended stats 
*/
+                       if (attForm->attnum <= 0)
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("statistics creation on 
system columns is not supported")));
+
+                       /* Disallow data types without a less-than operator */
+                       type = lookup_type_cache(attForm->atttypid, 
TYPECACHE_LT_OPR);
+                       if (type->lt_opr == InvalidOid)
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("column \"%s\" cannot 
be used in statistics because its type %s has no default btree operator class",
+                                                               attname, 
format_type_be(attForm->atttypid))));
+
+                       /* Make sure no more than STATS_MAX_DIMENSIONS columns 
are used */
+                       if (numcols >= STATS_MAX_DIMENSIONS)
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_TOO_MANY_COLUMNS),
+                                                errmsg("cannot have more than 
%d columns in statistics",
+                                                               
STATS_MAX_DIMENSIONS)));
+
+                       attnums[nattnums] = attForm->attnum;
+                       nattnums++;
+                       numcols++;
+                       ReleaseSysCache(atttuple);
+               }
+               else    /* expression */
+               {
+                       Node       *expr = selem->expr;
+                       TypeCacheEntry *type;
+                       Oid                     atttype;
+
+                       Assert(expr != NULL);
+
+                       /*
+                        * An expression using mutable functions is probably 
wrong,
+                        * since if you aren't going to get the same result for 
the
+                        * same data every time, it's not clear what the index 
entries
+                        * mean at all.
+                        */
+                       if (CheckMutability((Expr *) expr))
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+                                                errmsg("functions in 
statistics expression must be marked IMMUTABLE")));
+
+                       /* Disallow data types without a less-than operator */
+                       atttype = exprType(expr);
+                       type = lookup_type_cache(atttype, TYPECACHE_LT_OPR);
+                       if (type->lt_opr == InvalidOid)
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                                errmsg("expression cannot be 
used in statistics because its type %s has no default btree operator class",
+                                                               
format_type_be(atttype))));
+
+                       /* Make sure no more than STATS_MAX_DIMENSIONS columns 
are used */
+                       if (numcols >= STATS_MAX_DIMENSIONS)
+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_TOO_MANY_COLUMNS),
+                                                errmsg("cannot have more than 
%d columns in statistics",
+                                                               
STATS_MAX_DIMENSIONS)));
+
+                       numcols++;
+
+                       stxexprs = lappend(stxexprs, expr);
+               }
        }
 
        /*
@@ -258,13 +303,13 @@ CreateStatistics(CreateStatsStmt *stmt)
         * it does not hurt (it does not affect the efficiency, unlike for
         * indexes, for example).
         */
-       qsort(attnums, numcols, sizeof(int16), compare_int16);
+       qsort(attnums, nattnums, sizeof(int16), compare_int16);
 
        /*
         * Check for duplicates in the list of columns. The attnums are sorted 
so
         * just check consecutive elements.
         */
-       for (i = 1; i < numcols; i++)
+       for (i = 1; i < nattnums; i++)
        {
                if (attnums[i] == attnums[i - 1])
                        ereport(ERROR,
@@ -273,7 +318,7 @@ CreateStatistics(CreateStatsStmt *stmt)
        }
 
        /* Form an int2vector representation of the sorted column list */
-       stxkeys = buildint2vector(attnums, numcols);
+       stxkeys = buildint2vector(attnums, nattnums);
 
        /*
         * Parse the statistics kinds.
@@ -325,6 +370,18 @@ CreateStatistics(CreateStatsStmt *stmt)
        Assert(ntypes > 0 && ntypes <= lengthof(types));
        stxkind = construct_array(types, ntypes, CHAROID, 1, true, 
TYPALIGN_CHAR);
 
+       /* convert the expressions (if any) to a text datum */
+       if (stxexprs != NIL)
+       {
+               char       *exprsString;
+
+               exprsString = nodeToString(stxexprs);
+               exprsDatum = CStringGetTextDatum(exprsString);
+               pfree(exprsString);
+       }
+       else
+               exprsDatum = (Datum) 0;
+
        statrel = table_open(StatisticExtRelationId, RowExclusiveLock);
 
        /*
@@ -344,6 +401,15 @@ CreateStatistics(CreateStatsStmt *stmt)
        values[Anum_pg_statistic_ext_stxkeys - 1] = PointerGetDatum(stxkeys);
        values[Anum_pg_statistic_ext_stxkind - 1] = PointerGetDatum(stxkind);
 
+       values[Anum_pg_statistic_ext_stxexprs - 1] = exprsDatum;
+       if (exprsDatum == (Datum) 0)
+               nulls[Anum_pg_statistic_ext_stxexprs - 1] = true;
+
+       /*
+        * FIXME add dependencies on anything mentioned in the expressions,
+        * see recordDependencyOnSingleRelExpr in index_create
+        */
+
        /* insert it into pg_statistic_ext */
        htup = heap_form_tuple(statrel->rd_att, values, nulls);
        CatalogTupleInsert(statrel, htup);
@@ -387,7 +453,7 @@ CreateStatistics(CreateStatsStmt *stmt)
         */
        ObjectAddressSet(myself, StatisticExtRelationId, statoid);
 
-       for (i = 0; i < numcols; i++)
+       for (i = 0; i < nattnums; i++)
        {
                ObjectAddressSubSet(parentobject, RelationRelationId, relid, 
attnums[i]);
                recordDependencyOn(&myself, &parentobject, DEPENDENCY_AUTO);
@@ -722,14 +788,14 @@ ChooseExtendedStatisticNameAddition(List *exprs)
        buf[0] = '\0';
        foreach(lc, exprs)
        {
-               ColumnRef  *cref = (ColumnRef *) lfirst(lc);
+               StatsElem  *selem = (StatsElem *) lfirst(lc);
                const char *name;
 
                /* It should be one of these, but just skip if it happens not 
to be */
-               if (!IsA(cref, ColumnRef))
+               if (!IsA(selem, StatsElem))
                        continue;
 
-               name = strVal((Value *) linitial(cref->fields));
+               name = selem->name;
 
                if (buflen > 0)
                        buf[buflen++] = '_';    /* insert _ between names */
@@ -745,3 +811,29 @@ ChooseExtendedStatisticNameAddition(List *exprs)
        }
        return pstrdup(buf);
 }
+
+/*
+ * CheckMutability
+ *             Test whether given expression is mutable
+ */
+static bool
+CheckMutability(Expr *expr)
+{
+       /*
+        * First run the expression through the planner.  This has a couple of
+        * important consequences.  First, function default arguments will get
+        * inserted, which may affect volatility (consider "default now()").
+        * Second, inline-able functions will get inlined, which may allow us to
+        * conclude that the function is really less volatile than it's marked. 
As
+        * an example, polymorphic functions must be marked with the most 
volatile
+        * behavior that they have for any input type, but once we inline the
+        * function we may be able to conclude that it's not so volatile for the
+        * particular input type we're dealing with.
+        *
+        * We assume here that expression_planner() won't scribble on its input.
+        */
+       expr = expression_planner(expr);
+
+       /* Now we can search for non-immutable functions */
+       return contain_mutable_functions((Node *) expr);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index e04c33e4ad..fee5d3b086 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -2883,6 +2883,17 @@ _copyIndexElem(const IndexElem *from)
        return newnode;
 }
 
+static StatsElem *
+_copyStatsElem(const StatsElem *from)
+{
+       StatsElem  *newnode = makeNode(StatsElem);
+
+       COPY_STRING_FIELD(name);
+       COPY_NODE_FIELD(expr);
+
+       return newnode;
+}
+
 static ColumnDef *
 _copyColumnDef(const ColumnDef *from)
 {
@@ -5566,6 +5577,9 @@ copyObjectImpl(const void *from)
                case T_IndexElem:
                        retval = _copyIndexElem(from);
                        break;
+               case T_StatsElem:
+                       retval = _copyStatsElem(from);
+                       break;
                case T_ColumnDef:
                        retval = _copyColumnDef(from);
                        break;
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 5b1ba143b1..956420cce9 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -2569,6 +2569,16 @@ _equalIndexElem(const IndexElem *a, const IndexElem *b)
        return true;
 }
 
+
+static bool
+_equalStatsElem(const StatsElem *a, const StatsElem *b)
+{
+       COMPARE_STRING_FIELD(name);
+       COMPARE_NODE_FIELD(expr);
+
+       return true;
+}
+
 static bool
 _equalColumnDef(const ColumnDef *a, const ColumnDef *b)
 {
@@ -3662,6 +3672,9 @@ equal(const void *a, const void *b)
                case T_IndexElem:
                        retval = _equalIndexElem(a, b);
                        break;
+               case T_StatsElem:
+                       retval = _equalStatsElem(a, b);
+                       break;
                case T_ColumnDef:
                        retval = _equalColumnDef(a, b);
                        break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index e084c3f069..dabf62ed55 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -2873,6 +2873,15 @@ _outIndexElem(StringInfo str, const IndexElem *node)
        WRITE_ENUM_FIELD(nulls_ordering, SortByNulls);
 }
 
+static void
+_outStatsElem(StringInfo str, const StatsElem *node)
+{
+       WRITE_NODE_TYPE("STATSELEM");
+
+       WRITE_STRING_FIELD(name);
+       WRITE_NODE_FIELD(expr);
+}
+
 static void
 _outQuery(StringInfo str, const Query *node)
 {
@@ -4179,6 +4188,9 @@ outNode(StringInfo str, const void *obj)
                        case T_IndexElem:
                                _outIndexElem(str, obj);
                                break;
+                       case T_StatsElem:
+                               _outStatsElem(str, obj);
+                               break;
                        case T_Query:
                                _outQuery(str, obj);
                                break;
diff --git a/src/backend/optimizer/util/plancat.c 
b/src/backend/optimizer/util/plancat.c
index d82fc5ab8b..01130c5779 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -34,6 +34,7 @@
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
 #include "nodes/supportnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
@@ -1304,6 +1305,7 @@ get_relation_statistics(RelOptInfo *rel, Relation 
relation)
                HeapTuple       dtup;
                Bitmapset  *keys = NULL;
                int                     i;
+               List       *exprs = NIL;
 
                htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
                if (!HeapTupleIsValid(htup))
@@ -1322,6 +1324,41 @@ get_relation_statistics(RelOptInfo *rel, Relation 
relation)
                for (i = 0; i < staForm->stxkeys.dim1; i++)
                        keys = bms_add_member(keys, staForm->stxkeys.values[i]);
 
+               /*
+                * preprocess expression (if any)
+                *
+                * FIXME we probably need to cache the result somewhere
+                */
+               {
+                       bool            isnull;
+                       Datum           datum;
+
+                       /* decode expression (if any) */
+                       datum = SysCacheGetAttr(STATEXTOID, htup,
+                                                                       
Anum_pg_statistic_ext_stxexprs, &isnull);
+
+                       if (!isnull)
+                       {
+                               char *exprsString;
+
+                               exprsString = TextDatumGetCString(datum);
+                               exprs = (List *) stringToNode(exprsString);
+                               pfree(exprsString);
+
+                               /*
+                                * Run the expressions through 
eval_const_expressions. This is not just an
+                                * optimization, but is necessary, because the 
planner will be comparing
+                                * them to similarly-processed qual clauses, 
and may fail to detect valid
+                                * matches without this.  We must not use 
canonicalize_qual, however,
+                                * since these aren't qual expressions.
+                                */
+                               exprs = (List *) eval_const_expressions(NULL, 
(Node *) exprs);
+
+                               /* May as well fix opfuncids too */
+                               fix_opfuncids((Node *) exprs);
+                       }
+               }
+
                /* add one StatisticExtInfo for each kind built */
                if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT))
                {
@@ -1331,6 +1368,7 @@ get_relation_statistics(RelOptInfo *rel, Relation 
relation)
                        info->rel = rel;
                        info->kind = STATS_EXT_NDISTINCT;
                        info->keys = bms_copy(keys);
+                       info->exprs = exprs;
 
                        stainfos = lappend(stainfos, info);
                }
@@ -1343,6 +1381,7 @@ get_relation_statistics(RelOptInfo *rel, Relation 
relation)
                        info->rel = rel;
                        info->kind = STATS_EXT_DEPENDENCIES;
                        info->keys = bms_copy(keys);
+                       info->exprs = exprs;
 
                        stainfos = lappend(stainfos, info);
                }
@@ -1355,6 +1394,7 @@ get_relation_statistics(RelOptInfo *rel, Relation 
relation)
                        info->rel = rel;
                        info->kind = STATS_EXT_MCV;
                        info->keys = bms_copy(keys);
+                       info->exprs = exprs;
 
                        stainfos = lappend(stainfos, info);
                }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 96e7fdbcfe..90204b5768 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -225,6 +225,7 @@ static Node *makeRecursiveViewSelect(char *relname, List 
*aliases, Node *query);
        WindowDef                       *windef;
        JoinExpr                        *jexpr;
        IndexElem                       *ielem;
+       StatsElem                       *selem;
        Alias                           *alias;
        RangeVar                        *range;
        IntoClause                      *into;
@@ -386,7 +387,7 @@ static Node *makeRecursiveViewSelect(char *relname, List 
*aliases, Node *query);
                                old_aggr_definition old_aggr_list
                                oper_argtypes RuleActionList RuleActionMulti
                                opt_column_list columnList opt_name_list
-                               sort_clause opt_sort_clause sortby_list 
index_params
+                               sort_clause opt_sort_clause sortby_list 
index_params stats_params
                                opt_include opt_c_include index_including_params
                                name_list role_list from_clause from_list 
opt_array_bounds
                                qualified_name_list any_name any_name_list 
type_name_list
@@ -494,6 +495,7 @@ static Node *makeRecursiveViewSelect(char *relname, List 
*aliases, Node *query);
 %type <list>   func_alias_clause
 %type <sortby> sortby
 %type <ielem>  index_elem
+%type <selem>  stats_param
 %type <node>   table_ref
 %type <jexpr>  joined_table
 %type <range>  relation_expr
@@ -3982,7 +3984,7 @@ ExistingIndex:   USING INDEX index_name                   
        { $$ = $3; }
 
 CreateStatsStmt:
                        CREATE STATISTICS any_name
-                       opt_name_list ON expr_list FROM from_list
+                       opt_name_list ON stats_params FROM from_list
                                {
                                        CreateStatsStmt *n = 
makeNode(CreateStatsStmt);
                                        n->defnames = $3;
@@ -3994,7 +3996,7 @@ CreateStatsStmt:
                                        $$ = (Node *)n;
                                }
                        | CREATE STATISTICS IF_P NOT EXISTS any_name
-                       opt_name_list ON expr_list FROM from_list
+                       opt_name_list ON stats_params FROM from_list
                                {
                                        CreateStatsStmt *n = 
makeNode(CreateStatsStmt);
                                        n->defnames = $6;
@@ -4007,6 +4009,29 @@ CreateStatsStmt:
                                }
                        ;
 
+stats_params:  stats_param                                                     
{ $$ = list_make1($1); }
+                       | stats_params ',' stats_param                  { $$ = 
lappend($1, $3); }
+               ;
+
+stats_param:   ColId
+                               {
+                                       $$ = makeNode(StatsElem);
+                                       $$->name = $1;
+                                       $$->expr = NULL;
+                               }
+                       | func_expr_windowless
+                               {
+                                       $$ = makeNode(StatsElem);
+                                       $$->name = NULL;
+                                       $$->expr = $1;
+                               }
+                       | '(' a_expr ')'
+                               {
+                                       $$ = makeNode(StatsElem);
+                                       $$->name = NULL;
+                                       $$->expr = $2;
+                               }
+               ;
 
 /*****************************************************************************
  *
diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c
index f1cc5479e4..169a31bf37 100644
--- a/src/backend/parser/parse_agg.c
+++ b/src/backend/parser/parse_agg.c
@@ -484,6 +484,13 @@ check_agglevels_and_constraints(ParseState *pstate, Node 
*expr)
                        else
                                err = _("grouping operations are not allowed in 
index predicates");
 
+                       break;
+               case EXPR_KIND_STATS_EXPRESSION:
+                       if (isAgg)
+                               err = _("aggregate functions are not allowed in 
statistics expressions");
+                       else
+                               err = _("grouping operations are not allowed in 
statistics expressions");
+
                        break;
                case EXPR_KIND_ALTER_COL_TRANSFORM:
                        if (isAgg)
@@ -906,6 +913,9 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc 
*wfunc,
                case EXPR_KIND_INDEX_EXPRESSION:
                        err = _("window functions are not allowed in index 
expressions");
                        break;
+               case EXPR_KIND_STATS_EXPRESSION:
+                       err = _("window functions are not allowed in stats 
expressions");
+                       break;
                case EXPR_KIND_INDEX_PREDICATE:
                        err = _("window functions are not allowed in index 
predicates");
                        break;
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 831db4af95..6ddd839654 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -564,6 +564,7 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
                case EXPR_KIND_FUNCTION_DEFAULT:
                case EXPR_KIND_INDEX_EXPRESSION:
                case EXPR_KIND_INDEX_PREDICATE:
+               case EXPR_KIND_STATS_EXPRESSION:
                case EXPR_KIND_ALTER_COL_TRANSFORM:
                case EXPR_KIND_EXECUTE_PARAMETER:
                case EXPR_KIND_TRIGGER_WHEN:
@@ -1913,6 +1914,9 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
                case EXPR_KIND_INDEX_PREDICATE:
                        err = _("cannot use subquery in index predicate");
                        break;
+               case EXPR_KIND_STATS_EXPRESSION:
+                       err = _("cannot use subquery in statistics expression");
+                       break;
                case EXPR_KIND_ALTER_COL_TRANSFORM:
                        err = _("cannot use subquery in transform expression");
                        break;
@@ -3543,6 +3547,8 @@ ParseExprKindName(ParseExprKind exprKind)
                        return "index expression";
                case EXPR_KIND_INDEX_PREDICATE:
                        return "index predicate";
+               case EXPR_KIND_STATS_EXPRESSION:
+                       return "statistics expression";
                case EXPR_KIND_ALTER_COL_TRANSFORM:
                        return "USING";
                case EXPR_KIND_EXECUTE_PARAMETER:
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index 9c3b6ad916..cffc276de0 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -2495,6 +2495,9 @@ check_srf_call_placement(ParseState *pstate, Node 
*last_srf, int location)
                case EXPR_KIND_INDEX_PREDICATE:
                        err = _("set-returning functions are not allowed in 
index predicates");
                        break;
+               case EXPR_KIND_STATS_EXPRESSION:
+                       err = _("set-returning functions are not allowed in 
stats expressions");
+                       break;
                case EXPR_KIND_ALTER_COL_TRANSFORM:
                        err = _("set-returning functions are not allowed in 
transform expressions");
                        break;
diff --git a/src/backend/parser/parse_utilcmd.c 
b/src/backend/parser/parse_utilcmd.c
index af77f1890f..f63068e5fc 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1747,14 +1747,15 @@ generateClonedExtStatsStmt(RangeVar *heapRel, Oid 
heapRelid,
        /* Determine which columns the statistics are on */
        for (i = 0; i < statsrec->stxkeys.dim1; i++)
        {
-               ColumnRef  *cref = makeNode(ColumnRef);
+               StatsElem  *selem = makeNode(StatsElem);
                AttrNumber      attnum = statsrec->stxkeys.values[i];
 
-               cref->fields = list_make1(makeString(get_attname(heapRelid,
-                                                                               
                                 attnum, false)));
-               cref->location = -1;
+               selem->name = get_attname(heapRelid, attnum, false);
+               selem->expr = NULL;
 
-               def_names = lappend(def_names, cref);
+               /* FIXME handle expressions properly */
+
+               def_names = lappend(def_names, selem);
        }
 
        /* finally, build the output node */
@@ -2699,6 +2700,84 @@ transformIndexStmt(Oid relid, IndexStmt *stmt, const 
char *queryString)
        return stmt;
 }
 
+/*
+ * transformStatsStmt - parse analysis for CREATE STATISTICS
+ *
+ * To avoid race conditions, it's important that this function rely only on
+ * the passed-in relid (and not on stmt->relation) to determine the target
+ * relation.
+ */
+CreateStatsStmt *
+transformStatsStmt(Oid relid, CreateStatsStmt *stmt, const char *queryString)
+{
+       ParseState *pstate;
+       RangeTblEntry *rte;
+       ListCell   *l;
+       Relation        rel;
+
+       /* Nothing to do if statement already transformed. */
+       if (stmt->transformed)
+               return stmt;
+
+       /*
+        * We must not scribble on the passed-in CreateStatsStmt, so copy it.  
(This is
+        * overkill, but easy.)
+        */
+       stmt = copyObject(stmt);
+
+       /* Set up pstate */
+       pstate = make_parsestate(NULL);
+       pstate->p_sourcetext = queryString;
+
+       /*
+        * Put the parent table into the rtable so that the expressions can 
refer
+        * to its fields without qualification.  Caller is responsible for 
locking
+        * relation, but we still need to open it.
+        */
+       rel = relation_open(relid, NoLock);
+       rte = addRangeTableEntryForRelation(pstate, rel,
+                                                                               
AccessShareLock,
+                                                                               
NULL, false, true);
+
+       /* no to join list, yes to namespaces */
+       addRTEtoQuery(pstate, rte, false, true, true);
+
+       /* take care of any expressions */
+       foreach(l, stmt->exprs)
+       {
+               StatsElem  *selem = (StatsElem *) lfirst(l);
+
+               if (selem->expr)
+               {
+                       /* Now do parse transformation of the expression */
+                       selem->expr = transformExpr(pstate, selem->expr,
+                                                                               
EXPR_KIND_STATS_EXPRESSION);
+
+                       /* We have to fix its collations too */
+                       assign_expr_collations(pstate, selem->expr);
+               }
+       }
+
+       /*
+        * Check that only the base rel is mentioned.  (This should be dead code
+        * now that add_missing_from is history.)
+        */
+       if (list_length(pstate->p_rtable) != 1)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+                                errmsg("index expressions and predicates can 
refer only to the table being indexed")));
+
+       free_parsestate(pstate);
+
+       /* Close relation */
+       table_close(rel, NoLock);
+
+       /* Mark statement as successfully transformed */
+       stmt->transformed = true;
+
+       return stmt;
+}
+
 
 /*
  * transformRuleStmt -
diff --git a/src/backend/statistics/dependencies.c 
b/src/backend/statistics/dependencies.c
index e2f6c5bb97..76afb0ea2a 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -69,8 +69,10 @@ static void generate_dependencies(DependencyGenerator state);
 static DependencyGenerator DependencyGenerator_init(int n, int k);
 static void DependencyGenerator_free(DependencyGenerator state);
 static AttrNumber *DependencyGenerator_next(DependencyGenerator state);
-static double dependency_degree(int numrows, HeapTuple *rows, int k,
-                                                               AttrNumber 
*dependency, VacAttrStats **stats, Bitmapset *attrs);
+static double dependency_degree(int numrows, HeapTuple *rows,
+                                                               Datum 
*exprvals, bool *exprnulls, int nexprs, int k,
+                                                               AttrNumber 
*dependency, VacAttrStats **stats,
+                                                               Bitmapset 
*attrs);
 static bool dependency_is_fully_matched(MVDependency *dependency,
                                                                                
Bitmapset *attnums);
 static bool dependency_implies_attribute(MVDependency *dependency,
@@ -213,8 +215,8 @@ DependencyGenerator_next(DependencyGenerator state)
  * the last one.
  */
 static double
-dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency,
-                                 VacAttrStats **stats, Bitmapset *attrs)
+dependency_degree(int numrows, HeapTuple *rows, Datum *exprvals, bool 
*exprnulls,
+                                 int nexprs, int k, AttrNumber *dependency, 
VacAttrStats **stats, Bitmapset *attrs)
 {
        int                     i,
                                nitems;
@@ -283,8 +285,8 @@ dependency_degree(int numrows, HeapTuple *rows, int k, 
AttrNumber *dependency,
         * descriptor.  For now that assumption holds, but it might change in 
the
         * future for example if we support statistics on multiple tables.
         */
-       items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
-                                                          mss, k, attnums_dep);
+       items = build_sorted_items(numrows, &nitems, rows, exprvals, exprnulls,
+                                                          nexprs, 
stats[0]->tupDesc, mss, k, attnums_dep);
 
        /*
         * Walk through the sorted array, split it into rows according to the
@@ -354,7 +356,9 @@ dependency_degree(int numrows, HeapTuple *rows, int k, 
AttrNumber *dependency,
  *        (c) -> b
  */
 MVDependencies *
-statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
+statext_dependencies_build(int numrows, HeapTuple *rows,
+                                                  Datum *exprvals, bool 
*exprnulls,
+                                                  Bitmapset *attrs, List 
*exprs,
                                                   VacAttrStats **stats)
 {
        int                     i,
@@ -365,6 +369,15 @@ statext_dependencies_build(int numrows, HeapTuple *rows, 
Bitmapset *attrs,
        /* result */
        MVDependencies *dependencies = NULL;
 
+       /*
+        * Copy the bitmapset and add fake attnums representing expressions,
+        * starting above MaxHeapAttributeNumber.
+        */
+       attrs = bms_copy(attrs);
+
+       for (i = 1; i <= list_length(exprs); i++)
+               attrs = bms_add_member(attrs, MaxHeapAttributeNumber + i);
+
        /*
         * Transform the bms into an array, to make accessing i-th member 
easier.
         */
@@ -392,7 +405,9 @@ statext_dependencies_build(int numrows, HeapTuple *rows, 
Bitmapset *attrs,
                        MVDependency *d;
 
                        /* compute how valid the dependency seems */
-                       degree = dependency_degree(numrows, rows, k, 
dependency, stats, attrs);
+                       degree = dependency_degree(numrows, rows, exprvals, 
exprnulls,
+                                                                          
list_length(exprs), k, dependency,
+                                                                          
stats, attrs);
 
                        /*
                         * if the dependency seems entirely invalid, don't 
store it
@@ -435,6 +450,8 @@ statext_dependencies_build(int numrows, HeapTuple *rows, 
Bitmapset *attrs,
                DependencyGenerator_free(DependencyGenerator);
        }
 
+       pfree(attrs);
+
        return dependencies;
 }
 
@@ -914,6 +931,128 @@ find_strongest_dependency(MVDependencies **dependencies, 
int ndependencies,
        return strongest;
 }
 
+/*
+ * Similar to dependency_is_compatible_clause, but don't enforce that the
+ * expression is a simple Var.
+ */
+static bool
+dependency_clause_matches_expression(Node *clause, Index relid, List *statlist)
+{
+       List       *vars;
+       ListCell   *lc, *lc2;
+
+       RestrictInfo *rinfo = (RestrictInfo *) clause;
+       Node               *clause_expr;
+
+       if (!IsA(rinfo, RestrictInfo))
+               return false;
+
+       /* Pseudoconstants are not interesting (they couldn't contain a Var) */
+       if (rinfo->pseudoconstant)
+               return false;
+
+       /* Clauses referencing multiple, or no, varnos are incompatible */
+       if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+               return false;
+
+       if (is_opclause(rinfo->clause))
+       {
+               /* If it's an opclause, check for Var = Const or Const = Var. */
+               OpExpr     *expr = (OpExpr *) rinfo->clause;
+
+               /* Only expressions with two arguments are candidates. */
+               if (list_length(expr->args) != 2)
+                       return false;
+
+               /* Make sure non-selected argument is a pseudoconstant. */
+               if (is_pseudo_constant_clause(lsecond(expr->args)))
+                       clause_expr = linitial(expr->args);
+               else if (is_pseudo_constant_clause(linitial(expr->args)))
+                       clause_expr = lsecond(expr->args);
+               else
+                       return false;
+
+               /*
+                * If it's not an "=" operator, just ignore the clause, as it's 
not
+                * compatible with functional dependencies.
+                *
+                * This uses the function for estimating selectivity, not the 
operator
+                * directly (a bit awkward, but well ...).
+                *
+                * XXX this is pretty dubious; probably it'd be better to check 
btree
+                * or hash opclass membership, so as not to be fooled by custom
+                * selectivity functions, and to be more consistent with 
decisions
+                * elsewhere in the planner.
+                */
+               if (get_oprrest(expr->opno) != F_EQSEL)
+                       return false;
+
+               /* OK to proceed with checking "var" */
+       }
+       else if (is_notclause(rinfo->clause))
+       {
+               /*
+                * "NOT x" can be interpreted as "x = false", so get the 
argument and
+                * proceed with seeing if it's a suitable Var.
+                */
+               clause_expr = (Node *) get_notclausearg(rinfo->clause);
+       }
+       else
+       {
+               /*
+                * A boolean expression "x" can be interpreted as "x = true", so
+                * proceed with seeing if it's a suitable Var.
+                */
+               clause_expr = (Node *) rinfo->clause;
+       }
+
+       /*
+        * We may ignore any RelabelType node above the operand.  (There won't 
be
+        * more than one, since eval_const_expressions has been applied 
already.)
+        */
+       if (IsA(clause_expr, RelabelType))
+               clause_expr = (Node *) ((RelabelType *) clause_expr)->arg;
+
+       vars = pull_var_clause(clause_expr, 0);
+
+       elog(WARNING, "nvars = %d", list_length(vars));
+
+       foreach (lc, vars)
+       {
+               Var *var = (Var *) lfirst(lc);
+
+               /* Ensure Var is from the correct relation */
+               if (var->varno != relid)
+                       return false;
+
+               /* We also better ensure the Var is from the current level */
+               if (var->varlevelsup != 0)
+                       return false;
+
+               /* Also ignore system attributes (we don't allow stats on 
those) */
+               if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+                       return false;
+       }
+
+       foreach (lc, statlist)
+       {
+               StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+
+               foreach (lc2, info->exprs)
+               {
+                       Node *expr = (Node *) lfirst(lc2);
+
+                       if (equal(clause_expr, expr))
+                       {
+                               elog(WARNING, "match");
+                               return true;
+                       }
+               }
+       }
+
+       return false;
+}
+
 /*
  * dependencies_clauselist_selectivity
  *             Return the estimated selectivity of (a subset of) the given 
clauses
@@ -982,8 +1121,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
                Node       *clause = (Node *) lfirst(l);
                AttrNumber      attnum;
 
+               dependency_clause_matches_expression(clause, rel->relid, 
rel->statlist);
+
                if (!bms_is_member(listidx, *estimatedclauses) &&
-                       dependency_is_compatible_clause(clause, rel->relid, 
&attnum))
+                        dependency_is_compatible_clause(clause, rel->relid, 
&attnum))
                {
                        list_attnums[listidx] = bms_make_singleton(attnum);
                        clauses_attnums = bms_add_member(clauses_attnums, 
attnum);
diff --git a/src/backend/statistics/extended_stats.c 
b/src/backend/statistics/extended_stats.c
index 1872cd4529..9f70db7377 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -24,6 +24,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
+#include "executor/executor.h"
 #include "commands/progress.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
@@ -65,11 +66,12 @@ typedef struct StatExtEntry
        Bitmapset  *columns;            /* attribute numbers covered by the 
object */
        List       *types;                      /* 'char' list of enabled 
statistic kinds */
        int                     stattarget;             /* statistics target 
(-1 for default) */
+       List       *exprs;                      /* expressions */
 } StatExtEntry;
 
 
 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
-static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, 
List *exprs,
                                                                                
        int nvacatts, VacAttrStats **vacatts);
 static void statext_store(Oid relid,
                                                  MVNDistinct *ndistinct, 
MVDependencies *dependencies,
@@ -130,11 +132,15 @@ BuildRelationExtStatistics(Relation onerel, double 
totalrows,
                ListCell   *lc2;
                int                     stattarget;
 
+               /* evaluated expressions */
+               Datum      *exprvals = NULL;
+               bool       *exprnulls = NULL;
+
                /*
                 * Check if we can build these stats based on the column 
analyzed. If
                 * not, report this fact (except in autovacuum) and move on.
                 */
-               stats = lookup_var_attr_stats(onerel, stat->columns,
+               stats = lookup_var_attr_stats(onerel, stat->columns, 
stat->exprs,
                                                                          
natts, vacattrstats);
                if (!stats)
                {
@@ -150,8 +156,8 @@ BuildRelationExtStatistics(Relation onerel, double 
totalrows,
                }
 
                /* check allowed number of dimensions */
-               Assert(bms_num_members(stat->columns) >= 2 &&
-                          bms_num_members(stat->columns) <= 
STATS_MAX_DIMENSIONS);
+               Assert(bms_num_members(stat->columns) + 
list_length(stat->exprs) >= 2 &&
+                          bms_num_members(stat->columns) + 
list_length(stat->exprs) <= STATS_MAX_DIMENSIONS);
 
                /* compute statistics target for this statistics */
                stattarget = statext_compute_stattarget(stat->stattarget,
@@ -166,6 +172,78 @@ BuildRelationExtStatistics(Relation onerel, double 
totalrows,
                if (stattarget == 0)
                        continue;
 
+               if (stat->exprs)
+               {
+                       int                     i;
+                       int                     idx;
+                       TupleTableSlot *slot;
+                       EState     *estate;
+                       ExprContext *econtext;
+                       List       *exprstates = NIL;
+
+                       /*
+                        * Need an EState for evaluation of index expressions 
and
+                        * partial-index predicates.  Create it in the 
per-index context to be
+                        * sure it gets cleaned up at the bottom of the loop.
+                        */
+                       estate = CreateExecutorState();
+                       econtext = GetPerTupleExprContext(estate);
+                       /* Need a slot to hold the current heap tuple, too */
+                       slot = 
MakeSingleTupleTableSlot(RelationGetDescr(onerel),
+                                                                               
        &TTSOpsHeapTuple);
+
+                       /* Arrange for econtext's scan tuple to be the tuple 
under test */
+                       econtext->ecxt_scantuple = slot;
+
+                       /* Compute and save index expression values */
+                       exprvals = (Datum *) palloc(numrows * 
list_length(stat->exprs) * sizeof(Datum));
+                       exprnulls = (bool *) palloc(numrows * 
list_length(stat->exprs) * sizeof(bool));
+
+                       /* Set up expression evaluation state */
+                       exprstates = ExecPrepareExprList(stat->exprs, estate);
+
+                       idx = 0;
+                       for (i = 0; i < numrows; i++)
+                       {
+                               /*
+                                * Reset the per-tuple context each time, to 
reclaim any cruft
+                                * left behind by evaluating the predicate or 
index expressions.
+                                */
+                               ResetExprContext(econtext);
+
+                               /* Set up for predicate or expression 
evaluation */
+                               ExecStoreHeapTuple(rows[i], slot, false);
+
+                               foreach (lc2, exprstates)
+                               {
+                                       Datum   datum;
+                                       bool    isnull;
+                                       ExprState *exprstate = (ExprState *) 
lfirst(lc2);
+
+                                       datum = 
ExecEvalExprSwitchContext(exprstate,
+                                                                               
           GetPerTupleExprContext(estate),
+                                                                               
           &isnull);
+                                       if (isnull)
+                                       {
+                                               exprvals[idx] = (Datum) 0;
+                                               exprnulls[idx] = true;
+                                       }
+                                       else
+                                       {
+                                               exprvals[idx] = (Datum) datum;
+                                               exprnulls[idx] = false;
+                                       }
+
+                                       idx++;
+                               }
+                       }
+
+                       ExecDropSingleTupleTableSlot(slot);
+                       FreeExecutorState(estate);
+
+                       elog(WARNING, "idx = %d", idx);
+               }
+
                /* compute statistic of each requested type */
                foreach(lc2, stat->types)
                {
@@ -173,13 +251,19 @@ BuildRelationExtStatistics(Relation onerel, double 
totalrows,
 
                        if (t == STATS_EXT_NDISTINCT)
                                ndistinct = statext_ndistinct_build(totalrows, 
numrows, rows,
-                                                                               
                        stat->columns, stats);
+                                                                               
                        exprvals, exprnulls,
+                                                                               
                        stat->columns, stat->exprs,
+                                                                               
                        stats);
                        else if (t == STATS_EXT_DEPENDENCIES)
                                dependencies = 
statext_dependencies_build(numrows, rows,
-                                                                               
                                  stat->columns, stats);
+                                                                               
                                  exprvals, exprnulls,
+                                                                               
                                  stat->columns,
+                                                                               
                                  stat->exprs, stats);
                        else if (t == STATS_EXT_MCV)
-                               mcv = statext_mcv_build(numrows, rows, 
stat->columns, stats,
-                                                                               
totalrows, stattarget);
+                               mcv = statext_mcv_build(numrows, rows,
+                                                                               
exprvals, exprnulls,
+                                                                               
stat->columns, stat->exprs,
+                                                                               
stats, totalrows, stattarget);
                }
 
                /* store the statistics in the catalog */
@@ -240,7 +324,7 @@ ComputeExtStatisticsRows(Relation onerel,
                 * analyzed. If not, ignore it (don't report anything, we'll do 
that
                 * during the actual build BuildRelationExtStatistics).
                 */
-               stats = lookup_var_attr_stats(onerel, stat->columns,
+               stats = lookup_var_attr_stats(onerel, stat->columns, 
stat->exprs,
                                                                          
natts, vacattrstats);
 
                if (!stats)
@@ -387,6 +471,7 @@ fetch_statentries_for_relation(Relation pg_statext, Oid 
relid)
                ArrayType  *arr;
                char       *enabled;
                Form_pg_statistic_ext staForm;
+               List       *exprs = NIL;
 
                entry = palloc0(sizeof(StatExtEntry));
                staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
@@ -418,6 +503,34 @@ fetch_statentries_for_relation(Relation pg_statext, Oid 
relid)
                        entry->types = lappend_int(entry->types, (int) 
enabled[i]);
                }
 
+               /* decode expression (if any) */
+               datum = SysCacheGetAttr(STATEXTOID, htup,
+                                                               
Anum_pg_statistic_ext_stxexprs, &isnull);
+
+               if (!isnull)
+               {
+                       char *exprsString;
+
+                       exprsString = TextDatumGetCString(datum);
+                       exprs = (List *) stringToNode(exprsString);
+
+                       pfree(exprsString);
+
+                       /*
+                        * Run the expressions through eval_const_expressions. 
This is not just an
+                        * optimization, but is necessary, because the planner 
will be comparing
+                        * them to similarly-processed qual clauses, and may 
fail to detect valid
+                        * matches without this.  We must not use 
canonicalize_qual, however,
+                        * since these aren't qual expressions.
+                        */
+                       exprs = (List *) eval_const_expressions(NULL, (Node *) 
exprs);
+
+                       /* May as well fix opfuncids too */
+                       fix_opfuncids((Node *) exprs);
+               }
+
+               entry->exprs = exprs;
+
                result = lappend(result, entry);
        }
 
@@ -426,6 +539,89 @@ fetch_statentries_for_relation(Relation pg_statext, Oid 
relid)
        return result;
 }
 
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ *
+ * If index_expr isn't NULL, then we're trying to analyze an expression index,
+ * and index_expr is the expression tree representing the column's data.
+ */
+static VacAttrStats *
+examine_attribute(Node *expr)
+{
+       HeapTuple       typtuple;
+       VacAttrStats *stats;
+       int                     i;
+       bool            ok;
+
+       /*
+        * Create the VacAttrStats struct.  Note that we only have a copy of the
+        * fixed fields of the pg_attribute tuple.
+        */
+       stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));
+
+       /* fake the attribute */
+       stats->attr = (Form_pg_attribute) palloc0(ATTRIBUTE_FIXED_PART_SIZE);
+       stats->attr->attstattarget = -1;
+
+       /*
+        * When analyzing an expression index, believe the expression tree's 
type
+        * not the column datatype --- the latter might be the opckeytype 
storage
+        * type of the opclass, which is not interesting for our purposes.  
(Note:
+        * if we did anything with non-expression index columns, we'd need to
+        * figure out where to get the correct type info from, but for now 
that's
+        * not a problem.)      It's not clear whether anyone will care about 
the
+        * typmod, but we store that too just in case.
+        */
+       stats->attrtypid = exprType(expr);
+       stats->attrtypmod = exprTypmod(expr);
+       stats->attrcollid = exprCollation(expr);
+
+       typtuple = SearchSysCacheCopy1(TYPEOID,
+                                                                  
ObjectIdGetDatum(stats->attrtypid));
+       if (!HeapTupleIsValid(typtuple))
+               elog(ERROR, "cache lookup failed for type %u", 
stats->attrtypid);
+       stats->attrtype = (Form_pg_type) GETSTRUCT(typtuple);
+       // stats->anl_context = anl_context;
+       stats->tupattnum = InvalidAttrNumber;
+
+       /*
+        * The fields describing the stats->stavalues[n] element types default 
to
+        * the type of the data being analyzed, but the type-specific typanalyze
+        * function can change them if it wants to store something else.
+        */
+       for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+       {
+               stats->statypid[i] = stats->attrtypid;
+               stats->statyplen[i] = stats->attrtype->typlen;
+               stats->statypbyval[i] = stats->attrtype->typbyval;
+               stats->statypalign[i] = stats->attrtype->typalign;
+       }
+
+       /*
+        * Call the type-specific typanalyze function.  If none is specified, 
use
+        * std_typanalyze().
+        */
+       if (OidIsValid(stats->attrtype->typanalyze))
+               ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,
+                                                                               
   PointerGetDatum(stats)));
+       else
+               ok = std_typanalyze(stats);
+
+       if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)
+       {
+               heap_freetuple(typtuple);
+               pfree(stats->attr);
+               pfree(stats);
+               return NULL;
+       }
+
+       return stats;
+}
+
 /*
  * Using 'vacatts' of size 'nvacatts' as input data, return a newly built
  * VacAttrStats array which includes only the items corresponding to
@@ -434,15 +630,18 @@ fetch_statentries_for_relation(Relation pg_statext, Oid 
relid)
  * to the caller that the stats should not be built.
  */
 static VacAttrStats **
-lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
+lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
                                          int nvacatts, VacAttrStats **vacatts)
 {
        int                     i = 0;
        int                     x = -1;
+       int                     natts;
        VacAttrStats **stats;
+       ListCell   *lc;
 
-       stats = (VacAttrStats **)
-               palloc(bms_num_members(attrs) * sizeof(VacAttrStats *));
+       natts = bms_num_members(attrs) + list_length(exprs);
+
+       stats = (VacAttrStats **) palloc(natts * sizeof(VacAttrStats *));
 
        /* lookup VacAttrStats info for the requested columns (same attnum) */
        while ((x = bms_next_member(attrs, x)) >= 0)
@@ -476,6 +675,19 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs,
                 */
                Assert(!stats[i]->attr->attisdropped);
 
+               elog(WARNING, "A: %d => %p", i, stats[i]);
+
+               i++;
+       }
+
+       foreach (lc, exprs)
+       {
+               Node *expr = (Node *) lfirst(lc);
+
+               stats[i] = examine_attribute(expr);
+
+               elog(WARNING, "B: %d => %p (%s)", i, stats[i], 
nodeToString(expr));
+
                i++;
        }
 
@@ -740,8 +952,10 @@ build_attnums_array(Bitmapset *attrs, int *numattrs)
  * can simply pfree the return value to release all of it.
  */
 SortItem *
-build_sorted_items(int numrows, int *nitems, HeapTuple *rows, TupleDesc tdesc,
-                                  MultiSortSupport mss, int numattrs, 
AttrNumber *attnums)
+build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
+                                  Datum *exprvals, bool *exprnulls, int nexprs,
+                                  TupleDesc tdesc, MultiSortSupport mss,
+                                  int numattrs, AttrNumber *attnums)
 {
        int                     i,
                                j,
@@ -789,7 +1003,16 @@ build_sorted_items(int numrows, int *nitems, HeapTuple 
*rows, TupleDesc tdesc,
                        Datum           value;
                        bool            isnull;
 
-                       value = heap_getattr(rows[i], attnums[j], tdesc, 
&isnull);
+                       if (attnums[j] <= MaxHeapAttributeNumber)
+                               value = heap_getattr(rows[i], attnums[j], 
tdesc, &isnull);
+                       else
+                       {
+                               int     expridx = (attnums[j] - 
MaxHeapAttributeNumber - 1);
+                               int     idx = i * nexprs + expridx;
+
+                               value = exprvals[idx];
+                               isnull = exprnulls[idx];
+                       }
 
                        /*
                         * If this is a varlena value, check if it's too wide 
and if yes
@@ -1110,6 +1333,168 @@ statext_is_compatible_clause_internal(PlannerInfo 
*root, Node *clause,
        return false;
 }
 
+
+
+/*
+ * statext_extract_clause_internal
+ *             Determines if the clause is compatible with MCV lists.
+ *
+ * Does the heavy lifting of actually inspecting the clauses for
+ * statext_is_compatible_clause. It needs to be split like this because
+ * of recursion.  The attnums bitmap is an input/output parameter collecting
+ * attribute numbers from all compatible clauses (recursively).
+ */
+static List *
+statext_extract_clause_internal(PlannerInfo *root, Node *clause, Index relid)
+{
+       List   *result = NIL;
+
+       /* Look inside any binary-compatible relabeling (as in 
examine_variable) */
+       if (IsA(clause, RelabelType))
+               clause = (Node *) ((RelabelType *) clause)->arg;
+
+       /* plain Var references (boolean Vars or recursive checks) */
+       if (IsA(clause, Var))
+       {
+               Var                *var = (Var *) clause;
+
+               /* Ensure var is from the correct relation */
+               if (var->varno != relid)
+                       return NIL;
+
+               /* we also better ensure the Var is from the current level */
+               if (var->varlevelsup > 0)
+                       return NIL;
+
+               /* Also skip system attributes (we don't allow stats on those). 
*/
+               if (!AttrNumberIsForUserDefinedAttr(var->varattno))
+                       return NIL;
+
+               // *attnums = bms_add_member(*attnums, var->varattno);
+
+               result = lappend(result, clause);
+
+               return result;
+       }
+
+       /* (Var op Const) or (Const op Var) */
+       if (is_opclause(clause))
+       {
+               RangeTblEntry *rte = root->simple_rte_array[relid];
+               OpExpr     *expr = (OpExpr *) clause;
+               Var                *var;
+               Var                *var2 = NULL;
+
+               /* Only expressions with two arguments are considered 
compatible. */
+               if (list_length(expr->args) != 2)
+                       return NIL;
+
+               /* Check if the expression the right shape (one Var, one Const) 
*/
+               if ((!examine_opclause_expression(expr, &var, NULL, NULL)) &&
+                       (!examine_opclause_expression2(expr, &var, &var2)))
+                       return NIL;
+
+               /*
+                * If it's not one of the supported operators ("=", "<", ">", 
etc.),
+                * just ignore the clause, as it's not compatible with MCV 
lists.
+                *
+                * This uses the function for estimating selectivity, not the 
operator
+                * directly (a bit awkward, but well ...).
+                */
+               switch (get_oprrest(expr->opno))
+               {
+                       case F_EQSEL:
+                       case F_NEQSEL:
+                       case F_SCALARLTSEL:
+                       case F_SCALARLESEL:
+                       case F_SCALARGTSEL:
+                       case F_SCALARGESEL:
+                               /* supported, will continue with inspection of 
the Var */
+                               break;
+
+                       default:
+                               /* other estimators are considered 
unknown/unsupported */
+                               return NIL;
+               }
+
+               /*
+                * If there are any securityQuals on the RTE from security 
barrier
+                * views or RLS policies, then the user may not have access to 
all the
+                * table's data, and we must check that the operator is 
leak-proof.
+                *
+                * If the operator is leaky, then we must ignore this clause 
for the
+                * purposes of estimating with MCV lists, otherwise the 
operator might
+                * reveal values from the MCV list that the user doesn't have
+                * permission to see.
+                */
+               if (rte->securityQuals != NIL &&
+                       !get_func_leakproof(get_opcode(expr->opno)))
+                       return NIL;
+
+               result = lappend(result, var);
+
+               if (var2)
+                       result = lappend(result, var2);
+
+               return result;
+       }
+
+       /* AND/OR/NOT clause */
+       if (is_andclause(clause) ||
+               is_orclause(clause) ||
+               is_notclause(clause))
+       {
+               /*
+                * AND/OR/NOT-clauses are supported if all sub-clauses are 
supported
+                *
+                * Perhaps we could improve this by handling mixed cases, when 
some of
+                * the clauses are supported and some are not. Selectivity for 
the
+                * supported subclauses would be computed using extended 
statistics,
+                * and the remaining clauses would be estimated using the 
traditional
+                * algorithm (product of selectivities).
+                *
+                * It however seems overly complex, and in a way we already do 
that
+                * because if we reject the whole clause as unsupported here, 
it will
+                * be eventually passed to clauselist_selectivity() which does 
exactly
+                * this (split into supported/unsupported clauses etc).
+                */
+               BoolExpr   *expr = (BoolExpr *) clause;
+               ListCell   *lc;
+
+               foreach(lc, expr->args)
+               {
+                       /*
+                        * Had we found incompatible clause in the arguments, 
treat the
+                        * whole clause as incompatible.
+                        */
+                       if (!statext_extract_clause_internal(root,
+                                                                               
                 (Node *) lfirst(lc),
+                                                                               
                 relid))
+                               return NIL;
+               }
+
+               return result;
+       }
+
+       /* Var IS NULL */
+       if (IsA(clause, NullTest))
+       {
+               NullTest   *nt = (NullTest *) clause;
+
+               /*
+                * Only simple (Var IS NULL) expressions supported for now. 
Maybe we
+                * could use examine_variable to fix this?
+                */
+               if (!IsA(nt->arg, Var))
+                       return false;
+
+               return statext_extract_clause_internal(root, (Node *) (nt->arg),
+                                                                               
           relid);
+       }
+
+       return false;
+}
+
 /*
  * statext_is_compatible_clause
  *             Determines if the clause is compatible with MCV lists.
@@ -1184,6 +1569,51 @@ statext_is_compatible_clause(PlannerInfo *root, Node 
*clause, Index relid,
        return true;
 }
 
+/*
+ * statext_extract_clause
+ *             Determines if the clause is compatible with MCV lists.
+ *
+ * Currently, we only support three types of clauses:
+ *
+ * (a) OpExprs of the form (Var op Const), or (Const op Var), where the op
+ * is one of ("=", "<", ">", ">=", "<=")
+ *
+ * (b) (Var IS [NOT] NULL)
+ *
+ * (c) combinations using AND/OR/NOT
+ *
+ * In the future, the range of supported clauses may be expanded to more
+ * complex cases, for example (Var op Var).
+ */
+static List *
+statext_extract_clause(PlannerInfo *root, Node *clause, Index relid)
+{
+       RestrictInfo *rinfo = (RestrictInfo *) clause;
+       List             *exprs;
+
+       if (!IsA(rinfo, RestrictInfo))
+               return false;
+
+       /* Pseudoconstants are not really interesting here. */
+       if (rinfo->pseudoconstant)
+               return false;
+
+       /* clauses referencing multiple varnos are incompatible */
+       if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
+               return false;
+
+       /* Check the clause and determine what attributes it references. */
+       exprs = statext_extract_clause_internal(root, (Node *) rinfo->clause, 
relid);
+
+       if (!exprs)
+               return NULL;
+
+       /* FIXME do the same ACL check as in statext_is_compatible_clause */
+
+       /* If we reach here, the clause is OK */
+       return exprs;
+}
+
 /*
  * statext_mcv_clauselist_selectivity
  *             Estimate clauses using the best multi-column statistics.
@@ -1246,7 +1676,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, 
List *clauses, int varReli
                                                                   bool is_or)
 {
        ListCell   *l;
-       Bitmapset **list_attnums;
+       Bitmapset **list_attnums;       /* attnums extracted from the clause */
+       bool       *exact_clauses;      /* covered as-is by at least one 
statistic */
        int                     listidx;
        Selectivity     sel = 1.0;
 
@@ -1257,6 +1688,8 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, 
List *clauses, int varReli
        list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
                                                                                
 list_length(clauses));
 
+       exact_clauses = (bool *) palloc(sizeof(bool) * list_length(clauses));
+
        /*
         * Pre-process the clauses list to extract the attnums seen in each 
item.
         * We need to determine if there's any clauses which will be useful for
@@ -1274,11 +1707,76 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, 
List *clauses, int varReli
                Node       *clause = (Node *) lfirst(l);
                Bitmapset  *attnums = NULL;
 
+               /* the clause is considered incompatible by default */
+               list_attnums[listidx] = NULL;
+
+               /* and it's also not covered exactly by the statistic */
+               exact_clauses[listidx] = false;
+
+               /*
+                * First see if the clause is simple enough to be covered 
directly
+                * by the attributes. If not, see if there's at least one 
statistic
+                * object using the expression as-is.
+                */
                if (!bms_is_member(listidx, *estimatedclauses) &&
                        statext_is_compatible_clause(root, clause, rel->relid, 
&attnums))
+                       /* simple expression, covered through attnum(s) */
                        list_attnums[listidx] = attnums;
                else
-                       list_attnums[listidx] = NULL;
+               {
+                       ListCell   *lc;
+
+                       List *exprs = statext_extract_clause(root, clause, 
rel->relid);
+
+                       /* complex expression, search for statistic */
+                       foreach(lc, rel->statlist)
+                       {
+                               ListCell                   *lc2;
+                               StatisticExtInfo   *info = (StatisticExtInfo *) 
lfirst(lc);
+                               bool                            all_found = 
true;
+
+                               /* have we already found all expressions in a 
statistic? */
+                               Assert(!exact_clauses[listidx]);
+
+                               /* no expressions */
+                               if (!info->exprs)
+                                       continue;
+
+                               foreach (lc2, exprs)
+                               {
+                                       Node   *expr = (Node *) lfirst(lc2);
+
+                                       /*
+                                        * Walk the expressions, see if all 
expressions extracted from
+                                        * the clause are covered by the 
extended statistic object.
+                                        */
+                                       foreach (lc2, info->exprs)
+                                       {
+                                               Node   *stat_expr = (Node *) 
lfirst(lc2);
+                                               bool    expr_found = false;
+
+                                               if (equal(expr, stat_expr))
+                                               {
+                                                       expr_found = true;
+                                                       break;
+                                               }
+
+                                               if (!expr_found)
+                                               {
+                                                       all_found = false;
+                                                       break;
+                                               }
+                                       }
+                               }
+
+                               /* stop looking for another statistic */
+                               if (all_found)
+                               {
+                                       exact_clauses[listidx] = true;
+                                       break;
+                               }
+                       }
+               }
 
                listidx++;
        }
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 97d3083451..9334504714 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -180,7 +180,9 @@ get_mincount_for_mcv_list(int samplerows, double totalrows)
  *
  */
 MCVList *
-statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset *attrs,
+statext_mcv_build(int numrows, HeapTuple *rows,
+                                 Datum *exprvals, bool *exprnulls,
+                                 Bitmapset *attrs, List *exprs,
                                  VacAttrStats **stats, double totalrows, int 
stattarget)
 {
        int                     i,
@@ -194,13 +196,23 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset 
*attrs,
        MCVList    *mcvlist = NULL;
        MultiSortSupport mss;
 
+       /*
+        * Copy the bitmapset and add fake attnums representing expressions,
+        * starting above MaxHeapAttributeNumber.
+        */
+       attrs = bms_copy(attrs);
+
+       for (i = 1; i <= list_length(exprs); i++)
+               attrs = bms_add_member(attrs, MaxHeapAttributeNumber + i);
+
        attnums = build_attnums_array(attrs, &numattrs);
 
        /* comparator for all the columns */
        mss = build_mss(stats, numattrs);
 
        /* sort the rows */
-       items = build_sorted_items(numrows, &nitems, rows, stats[0]->tupDesc,
+       items = build_sorted_items(numrows, &nitems, rows, exprvals, exprnulls,
+                                                          list_length(exprs), 
stats[0]->tupDesc,
                                                           mss, numattrs, 
attnums);
 
        if (!items)
@@ -337,6 +349,7 @@ statext_mcv_build(int numrows, HeapTuple *rows, Bitmapset 
*attrs,
 
        pfree(items);
        pfree(groups);
+       pfree(attrs);
 
        return mcvlist;
 }
diff --git a/src/backend/statistics/mvdistinct.c 
b/src/backend/statistics/mvdistinct.c
index 977d6f3e2e..dd874c7a04 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -37,8 +37,10 @@
 #include "utils/typcache.h"
 
 static double ndistinct_for_combination(double totalrows, int numrows,
-                                                                               
HeapTuple *rows, VacAttrStats **stats,
-                                                                               
int k, int *combination);
+                                                                               
HeapTuple *rows, Datum *exprvals,
+                                                                               
bool *exprnulls, int nexprs,
+                                                                               
VacAttrStats **stats, int k,
+                                                                               
int *combination);
 static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
 static int     n_choose_k(int n, int k);
 static int     num_combinations(int n);
@@ -84,14 +86,26 @@ static void generate_combinations(CombinationGenerator 
*state);
  */
 MVNDistinct *
 statext_ndistinct_build(double totalrows, int numrows, HeapTuple *rows,
-                                               Bitmapset *attrs, VacAttrStats 
**stats)
+                                               Datum *exprvals, bool 
*exprnulls,
+                                               Bitmapset *attrs, List *exprs,
+                                               VacAttrStats **stats)
 {
        MVNDistinct *result;
+       int                     i;
        int                     k;
        int                     itemcnt;
-       int                     numattrs = bms_num_members(attrs);
+       int                     numattrs = bms_num_members(attrs) + 
list_length(exprs);
        int                     numcombs = num_combinations(numattrs);
 
+       /*
+        * Copy the bitmapset and add fake attnums representing expressions,
+        * starting above MaxHeapAttributeNumber.
+        */
+       attrs = bms_copy(attrs);
+
+       for (i = 1; i <= list_length(exprs); i++)
+               attrs = bms_add_member(attrs, MaxHeapAttributeNumber + i);
+
        result = palloc(offsetof(MVNDistinct, items) +
                                        numcombs * sizeof(MVNDistinctItem));
        result->magic = STATS_NDISTINCT_MAGIC;
@@ -114,10 +128,18 @@ statext_ndistinct_build(double totalrows, int numrows, 
HeapTuple *rows,
 
                        item->attrs = NULL;
                        for (j = 0; j < k; j++)
-                               item->attrs = bms_add_member(item->attrs,
-                                                                               
         stats[combination[j]]->attr->attnum);
+                       {
+                               if (combination[j] <= MaxHeapAttributeNumber)
+                                       item->attrs = 
bms_add_member(item->attrs,
+                                                                               
                 stats[combination[j]]->attr->attnum);
+                               else
+                                       item->attrs = 
bms_add_member(item->attrs, combination[j]);
+                       }
+
                        item->ndistinct =
                                ndistinct_for_combination(totalrows, numrows, 
rows,
+                                                                               
  exprvals, exprnulls,
+                                                                               
  list_length(exprs),
                                                                                
  stats, k, combination);
 
                        itemcnt++;
@@ -428,6 +450,7 @@ pg_ndistinct_send(PG_FUNCTION_ARGS)
  */
 static double
 ndistinct_for_combination(double totalrows, int numrows, HeapTuple *rows,
+                                                 Datum *exprvals, bool 
*exprnulls, int nexprs,
                                                  VacAttrStats **stats, int k, 
int *combination)
 {
        int                     i,
@@ -481,11 +504,17 @@ ndistinct_for_combination(double totalrows, int numrows, 
HeapTuple *rows,
                /* accumulate all the data for this dimension into the arrays */
                for (j = 0; j < numrows; j++)
                {
-                       items[j].values[i] =
-                               heap_getattr(rows[j],
-                                                        colstat->attr->attnum,
-                                                        colstat->tupDesc,
-                                                        &items[j].isnull[i]);
+                       if (combination[i] <= MaxHeapAttributeNumber)
+                               items[j].values[i] =
+                                       heap_getattr(rows[j],
+                                                                
colstat->attr->attnum,
+                                                                
colstat->tupDesc,
+                                                                
&items[j].isnull[i]);
+                       else
+                       {
+                               items[j].values[i] = exprvals[j * nexprs + 
combination[i] - MaxHeapAttributeNumber - 1];
+                               items[j].isnull[i] = exprnulls[j * nexprs + 
combination[i] - MaxHeapAttributeNumber - 1];
+                       }
                }
        }
 
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 1b460a2612..8c36f516e1 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1781,7 +1781,21 @@ ProcessUtilitySlow(ParseState *pstate,
                                break;
 
                        case T_CreateStatsStmt:
-                               address = CreateStatistics((CreateStatsStmt *) 
parsetree);
+                               {
+                                       Oid                     relid;
+                                       CreateStatsStmt *stmt = 
(CreateStatsStmt *) parsetree;
+                                       RangeVar   *rel = (RangeVar *) 
linitial(stmt->relations);
+
+                                       relid = RangeVarGetRelidExtended(rel, 
ShareLock,
+                                                                               
                 0,
+                                                                               
                 RangeVarCallbackOwnsRelation,
+                                                                               
                 NULL);
+
+                                       /* Run parse analysis ... */
+                                       stmt = transformStatsStmt(relid, stmt, 
queryString);
+
+                                       address = CreateStatistics(stmt);
+                               }
                                break;
 
                        case T_AlterStatsStmt:
diff --git a/src/backend/utils/adt/ruleutils.c 
b/src/backend/utils/adt/ruleutils.c
index 5e63238f03..e811a54667 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -1524,6 +1524,9 @@ pg_get_statisticsobj_worker(Oid statextid, bool 
missing_ok)
        bool            dependencies_enabled;
        bool            mcv_enabled;
        int                     i;
+       List       *context;
+       ListCell   *lc;
+       List       *exprs = NIL;
 
        statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
 
@@ -1616,6 +1619,62 @@ pg_get_statisticsobj_worker(Oid statextid, bool 
missing_ok)
                appendStringInfoString(&buf, quote_identifier(attname));
        }
 
+       /* deparse expressions */
+
+       {
+                       bool            isnull;
+                       Datum           datum;
+
+                       /* decode expression (if any) */
+                       datum = SysCacheGetAttr(STATEXTOID, statexttup,
+                                                                       
Anum_pg_statistic_ext_stxexprs, &isnull);
+
+                       if (!isnull)
+                       {
+                               char *exprsString;
+
+                               exprsString = TextDatumGetCString(datum);
+                               exprs = (List *) stringToNode(exprsString);
+                               pfree(exprsString);
+
+                               /*
+                                * Run the expressions through 
eval_const_expressions. This is not just an
+                                * optimization, but is necessary, because the 
planner will be comparing
+                                * them to similarly-processed qual clauses, 
and may fail to detect valid
+                                * matches without this.  We must not use 
canonicalize_qual, however,
+                                * since these aren't qual expressions.
+                                */
+                               exprs = (List *) eval_const_expressions(NULL, 
(Node *) exprs);
+
+                               /* May as well fix opfuncids too */
+                               fix_opfuncids((Node *) exprs);
+                       }
+       }
+
+       context = deparse_context_for(get_relation_name(statextrec->stxrelid),
+                                                                 
statextrec->stxrelid);
+
+       foreach (lc, exprs)
+       {
+               Node       *expr = (Node *) lfirst(lc);
+               char       *str;
+               int                     prettyFlags = PRETTYFLAG_INDENT;
+
+               str = deparse_expression_pretty(expr, context, false, false,
+                                                                               
prettyFlags, 0);
+
+               if (colno > 0)
+                       appendStringInfoString(&buf, ", ");
+
+               /* Need parens if it's not a bare function call */
+               if (looks_like_function(expr))
+                       appendStringInfoString(&buf, str);
+               else
+                       appendStringInfo(&buf, "(%s)", str);
+
+               colno++;
+       }
+
        appendStringInfo(&buf, " FROM %s",
                                         
generate_relation_name(statextrec->stxrelid, NIL));
 
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 0be26fe037..7574a5395a 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3082,6 +3082,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, 
double input_rows,
                double          this_srf_multiplier;
                VariableStatData vardata;
                List       *varshere;
+               Relids          varnos;
                ListCell   *l2;
 
                /* is expression in this grouping set? */
@@ -3149,6 +3150,16 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, 
double input_rows,
                        continue;
                }
 
+               /*
+                * Are all the variables from the same relation? If yes, search 
for
+                * an extended statistic matching this expression exactly.
+                */
+               varnos = pull_varnos((Node *) varshere);
+               if (bms_membership(varnos) == BMS_SINGLETON)
+               {
+                       // FIXME try to match it to expressions in mvdistinct 
stats
+               }
+
                /*
                 * Else add variables to varinfos list
                 */
diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c
index f3c7eb96fa..92c2deb1ba 100644
--- a/src/bin/psql/describe.c
+++ b/src/bin/psql/describe.c
@@ -2671,6 +2671,7 @@ describeOneTableDetails(const char *schemaname,
                /* print any extended statistics */
                if (pset.sversion >= 100000)
                {
+                       /* FIXME improve this with printing expressions the 
statistics is defined on */
                        printfPQExpBuffer(&buf,
                                                          "SELECT oid, "
                                                          
"stxrelid::pg_catalog.regclass, "
diff --git a/src/include/catalog/pg_statistic_ext.h 
b/src/include/catalog/pg_statistic_ext.h
index e9491a0a87..dd0f41cd14 100644
--- a/src/include/catalog/pg_statistic_ext.h
+++ b/src/include/catalog/pg_statistic_ext.h
@@ -52,6 +52,9 @@ CATALOG(pg_statistic_ext,3381,StatisticExtRelationId)
 #ifdef CATALOG_VARLEN
        char            stxkind[1] BKI_FORCE_NOT_NULL;  /* statistics kinds 
requested
                                                                                
                 * to build */
+       pg_node_tree stxexprs;          /* expression trees for stats 
attributes that
+                                                                * are not 
simple column references; one for
+                                                                * each zero 
entry in stxkeys[] */
 #endif
 
 } FormData_pg_statistic_ext;
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index baced7eec0..72f6534ceb 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -448,6 +448,7 @@ typedef enum NodeTag
        T_TypeName,
        T_ColumnDef,
        T_IndexElem,
+       T_StatsElem,
        T_Constraint,
        T_DefElem,
        T_RangeTblEntry,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index da0706add5..74e5a855ca 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2807,8 +2807,24 @@ typedef struct CreateStatsStmt
        List       *relations;          /* rels to build stats on (list of 
RangeVar) */
        char       *stxcomment;         /* comment to apply to stats, or NULL */
        bool            if_not_exists;  /* do nothing if stats name already 
exists */
+       bool            transformed;    /* true when transformIndexStmt is 
finished */
 } CreateStatsStmt;
 
+/*
+ * StatsElem - statistics parameters (used in CREATE STATISTICS)
+ *
+ * For a plain attribute, 'name' is the name of the referenced table column
+ * and 'expr' is NULL.  For an expression, 'name' is NULL and 'expr' is the
+ * expression tree.
+ */
+typedef struct StatsElem
+{
+       NodeTag         type;
+       char       *name;                       /* name of attribute to index, 
or NULL */
+       Node       *expr;                       /* expression to index, or NULL 
*/
+} StatsElem;
+
+
 /* ----------------------
  *             Alter Statistics Statement
  * ----------------------
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 0ceb809644..7e9aeb409b 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -885,6 +885,7 @@ typedef struct StatisticExtInfo
        RelOptInfo *rel;                        /* back-link to statistic's 
table */
        char            kind;                   /* statistic kind of this entry 
*/
        Bitmapset  *keys;                       /* attnums of the columns 
covered */
+       List       *exprs;                      /* expressions */
 } StatisticExtInfo;
 
 /*
diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h
index d25819aa28..82e5190964 100644
--- a/src/include/parser/parse_node.h
+++ b/src/include/parser/parse_node.h
@@ -69,6 +69,7 @@ typedef enum ParseExprKind
        EXPR_KIND_FUNCTION_DEFAULT, /* default parameter value for function */
        EXPR_KIND_INDEX_EXPRESSION, /* index expression */
        EXPR_KIND_INDEX_PREDICATE,      /* index predicate */
+       EXPR_KIND_STATS_EXPRESSION, /* extended statistics expression */
        EXPR_KIND_ALTER_COL_TRANSFORM,  /* transform expr in ALTER COLUMN TYPE 
*/
        EXPR_KIND_EXECUTE_PARAMETER,    /* parameter value in EXECUTE */
        EXPR_KIND_TRIGGER_WHEN,         /* WHEN condition in CREATE TRIGGER */
diff --git a/src/include/parser/parse_utilcmd.h 
b/src/include/parser/parse_utilcmd.h
index 1a5e0b83a7..43247186b0 100644
--- a/src/include/parser/parse_utilcmd.h
+++ b/src/include/parser/parse_utilcmd.h
@@ -26,6 +26,8 @@ extern AlterTableStmt *transformAlterTableStmt(Oid relid, 
AlterTableStmt *stmt,
                                                                                
           List **afterStmts);
 extern IndexStmt *transformIndexStmt(Oid relid, IndexStmt *stmt,
                                                                         const 
char *queryString);
+extern CreateStatsStmt *transformStatsStmt(Oid relid, CreateStatsStmt *stmt,
+                                                                        const 
char *queryString);
 extern void transformRuleStmt(RuleStmt *stmt, const char *queryString,
                                                          List **actions, Node 
**whereClause);
 extern List *transformCreateSchemaStmt(CreateSchemaStmt *stmt);
diff --git a/src/include/statistics/extended_stats_internal.h 
b/src/include/statistics/extended_stats_internal.h
index 804089bc57..b159ea0313 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -59,17 +59,23 @@ typedef struct SortItem
 
 extern MVNDistinct *statext_ndistinct_build(double totalrows,
                                                                                
        int numrows, HeapTuple *rows,
-                                                                               
        Bitmapset *attrs, VacAttrStats **stats);
+                                                                               
        Datum *exprvals, bool *exprnulls,
+                                                                               
        Bitmapset *attrs, List *exprs,
+                                                                               
        VacAttrStats **stats);
 extern bytea *statext_ndistinct_serialize(MVNDistinct *ndistinct);
 extern MVNDistinct *statext_ndistinct_deserialize(bytea *data);
 
 extern MVDependencies *statext_dependencies_build(int numrows, HeapTuple *rows,
-                                                                               
                  Bitmapset *attrs, VacAttrStats **stats);
+                                                                               
                  Datum *exprvals, bool *exprnulls,
+                                                                               
                  Bitmapset *attrs, List *exprs,
+                                                                               
                  VacAttrStats **stats);
 extern bytea *statext_dependencies_serialize(MVDependencies *dependencies);
 extern MVDependencies *statext_dependencies_deserialize(bytea *data);
 
 extern MCVList *statext_mcv_build(int numrows, HeapTuple *rows,
-                                                                 Bitmapset 
*attrs, VacAttrStats **stats,
+                                                                 Datum 
*exprvals, bool *exprnulls,
+                                                                 Bitmapset 
*attrs, List *exprs,
+                                                                 VacAttrStats 
**stats,
                                                                  double 
totalrows, int stattarget);
 extern bytea *statext_mcv_serialize(MCVList *mcv, VacAttrStats **stats);
 extern MCVList *statext_mcv_deserialize(bytea *data);
@@ -93,6 +99,7 @@ extern void *bsearch_arg(const void *key, const void *base,
 extern AttrNumber *build_attnums_array(Bitmapset *attrs, int *numattrs);
 
 extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
+                                                                       Datum 
*exprvals, bool *exprnulls, int nexprs,
                                                                        
TupleDesc tdesc, MultiSortSupport mss,
                                                                        int 
numattrs, AttrNumber *attnums);
 
-- 
2.21.1

Reply via email to