On 11/13/19 7:28 AM, Tomas Vondra wrote:
Hi,
here's an updated patch, with some minor tweaks based on the review and
added tests (I ended up reworking those a bit, to make them more like
the existing ones).
Thanks, Tomas, for the new patch set!
Attached are my review comments so far, in the form of a patch applied
on top of yours.
--
Mark Dilger
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index a6ca11c675..ccf6e41b9c 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -850,7 +850,8 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
* find the strongest dependency on the attributes
*
* When applying functional dependencies, we start with the strongest
- * dependencies. That is, we select the dependency that:
+ * dependencies. That is, we select the best dependency in the following
+ * descending order of preference:
*
* (a) has all attributes covered by equality clauses
*
@@ -860,6 +861,9 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
*
* This guarantees that we eliminate the most redundant conditions first
* (see the comment in dependencies_clauselist_selectivity).
+ *
+ * TODO: Rename 'dependencies', as that name is what you'd expect for an
+ * argument of type (MVDependencies *), not (MVDependencies **)
*/
static MVDependency *
find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
@@ -897,11 +901,14 @@ find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
/* also skip weaker dependencies when attribute count matches */
if (strongest->nattributes == dependency->nattributes &&
- strongest->degree > dependency->degree)
+ strongest->degree > dependency->degree) /* TODO: Why not ">=" here? */
continue;
}
/*
+ * TODO: As coded, this dependency is "at least as strong as", not
+ * necessarily "stronger".
+ *
* this dependency is stronger, but we must still check that it's
* fully matched to these attnums. We perform this check last as it's
* slightly more expensive than the previous checks.
@@ -942,7 +949,7 @@ find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
*/
Selectivity
dependencies_clauselist_selectivity(PlannerInfo *root,
- List *clauses,
+ const List *clauses,
int varRelid,
JoinType jointype,
SpecialJoinInfo *sjinfo,
@@ -1084,6 +1091,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
* to a different Const in another clause then no rows will match
* anyway. If it happens to be compared to the same Const, then
* ignoring the additional clause is just the thing to do.
+ *
+ * TODO: Improve this code comment. Specifically, why would we
+ * ignore that no rows will match? It seems that such a discovery
+ * would allow us to return an estimate of 0 rows, and that would
+ * be useful.
*/
if (dependency_implies_attribute(dependency,
list_attnums[listidx]))
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index b55799b8b1..a1798a6b91 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1202,7 +1202,7 @@ statext_mcv_remaining_attnums(int nclauses, Bitmapset **estimatedclauses,
* already have a bit set.
*/
static Selectivity
-statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
+statext_mcv_clauselist_selectivity(PlannerInfo *root, const List *clauses, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo,
RelOptInfo *rel, Bitmapset **estimatedclauses)
{
@@ -1340,7 +1340,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
* Estimate clauses using the best multi-column statistics.
*/
Selectivity
-statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
+statext_clauselist_selectivity(PlannerInfo *root, const List *clauses, int varRelid,
JoinType jointype, SpecialJoinInfo *sjinfo,
RelOptInfo *rel, Bitmapset **estimatedclauses)
{
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 588b6738b2..ebac910d72 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -104,14 +104,14 @@ extern int ComputeExtStatisticsRows(Relation onerel,
int natts, VacAttrStats **stats);
extern bool statext_is_kind_built(HeapTuple htup, char kind);
extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root,
- List *clauses,
+ const List *clauses,
int varRelid,
JoinType jointype,
SpecialJoinInfo *sjinfo,
RelOptInfo *rel,
Bitmapset **estimatedclauses);
extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
- List *clauses,
+ const List *clauses,
int varRelid,
JoinType jointype,
SpecialJoinInfo *sjinfo,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index a35d8f2c2e..1892a856f9 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -451,7 +451,7 @@ CREATE TABLE functional_dependencies_multi (
c INTEGER,
d INTEGER
);
---
+--
INSERT INTO functional_dependencies_multi (a, b, c, d)
SELECT
mod(i,7),
@@ -812,7 +812,7 @@ CREATE TABLE mcv_lists_multi (
c INTEGER,
d INTEGER
);
---
+--
INSERT INTO mcv_lists_multi (a, b, c, d)
SELECT
mod(i,5),
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 69c3f726e6..bf700204d2 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -296,7 +296,7 @@ CREATE TABLE functional_dependencies_multi (
d INTEGER
);
---
+--
INSERT INTO functional_dependencies_multi (a, b, c, d)
SELECT
mod(i,7),
@@ -532,7 +532,7 @@ CREATE TABLE mcv_lists_multi (
d INTEGER
);
---
+--
INSERT INTO mcv_lists_multi (a, b, c, d)
SELECT
mod(i,5),