On 11/13/19 7:28 AM, Tomas Vondra wrote:
Hi,

here's an updated patch, with some minor tweaks based on the review and
added tests (I ended up reworking those a bit, to make them more like
the existing ones).

Thanks, Tomas, for the new patch set!

Attached are my review comments so far, in the form of a patch applied on top of yours.

--
Mark Dilger
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index a6ca11c675..ccf6e41b9c 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -850,7 +850,8 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
  *		find the strongest dependency on the attributes
  *
  * When applying functional dependencies, we start with the strongest
- * dependencies. That is, we select the dependency that:
+ * dependencies. That is, we select the best dependency in the following
+ * descending order of preference:
  *
  * (a) has all attributes covered by equality clauses
  *
@@ -860,6 +861,9 @@ dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
  *
  * This guarantees that we eliminate the most redundant conditions first
  * (see the comment in dependencies_clauselist_selectivity).
+ *
+ * TODO: Rename 'dependencies', as that name is what you'd expect for an
+ *       argument of type (MVDependencies *), not (MVDependencies **)
  */
 static MVDependency *
 find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
@@ -897,11 +901,14 @@ find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
 
 				/* also skip weaker dependencies when attribute count matches */
 				if (strongest->nattributes == dependency->nattributes &&
-					strongest->degree > dependency->degree)
+					strongest->degree > dependency->degree)		/* TODO: Why not ">=" here? */
 					continue;
 			}
 
 			/*
+			 * TODO: As coded, this dependency is "at least as strong as", not
+			 * necessarily "stronger".
+			 *
 			 * this dependency is stronger, but we must still check that it's
 			 * fully matched to these attnums. We perform this check last as it's
 			 * slightly more expensive than the previous checks.
@@ -942,7 +949,7 @@ find_strongest_dependency(MVDependencies **dependencies, int ndependencies,
  */
 Selectivity
 dependencies_clauselist_selectivity(PlannerInfo *root,
-									List *clauses,
+									const List *clauses,
 									int varRelid,
 									JoinType jointype,
 									SpecialJoinInfo *sjinfo,
@@ -1084,6 +1091,11 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 			 * to a different Const in another clause then no rows will match
 			 * anyway. If it happens to be compared to the same Const, then
 			 * ignoring the additional clause is just the thing to do.
+			 *
+			 * TODO: Improve this code comment.  Specifically, why would we
+			 * ignore that no rows will match?  It seems that such a discovery
+			 * would allow us to return an estimate of 0 rows, and that would
+			 * be useful.
 			 */
 			if (dependency_implies_attribute(dependency,
 											 list_attnums[listidx]))
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index b55799b8b1..a1798a6b91 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1202,7 +1202,7 @@ statext_mcv_remaining_attnums(int nclauses, Bitmapset **estimatedclauses,
  * already have a bit set.
  */
 static Selectivity
-statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
+statext_mcv_clauselist_selectivity(PlannerInfo *root, const List *clauses, int varRelid,
 								   JoinType jointype, SpecialJoinInfo *sjinfo,
 								   RelOptInfo *rel, Bitmapset **estimatedclauses)
 {
@@ -1340,7 +1340,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
  *		Estimate clauses using the best multi-column statistics.
  */
 Selectivity
-statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
+statext_clauselist_selectivity(PlannerInfo *root, const List *clauses, int varRelid,
 							   JoinType jointype, SpecialJoinInfo *sjinfo,
 							   RelOptInfo *rel, Bitmapset **estimatedclauses)
 {
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 588b6738b2..ebac910d72 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -104,14 +104,14 @@ extern int ComputeExtStatisticsRows(Relation onerel,
 									int natts, VacAttrStats **stats);
 extern bool statext_is_kind_built(HeapTuple htup, char kind);
 extern Selectivity dependencies_clauselist_selectivity(PlannerInfo *root,
-													   List *clauses,
+													   const List *clauses,
 													   int varRelid,
 													   JoinType jointype,
 													   SpecialJoinInfo *sjinfo,
 													   RelOptInfo *rel,
 													   Bitmapset **estimatedclauses);
 extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
-												  List *clauses,
+												  const List *clauses,
 												  int varRelid,
 												  JoinType jointype,
 												  SpecialJoinInfo *sjinfo,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index a35d8f2c2e..1892a856f9 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -451,7 +451,7 @@ CREATE TABLE functional_dependencies_multi (
 	c INTEGER,
 	d INTEGER
 );
--- 
+--
 INSERT INTO functional_dependencies_multi (a, b, c, d)
     SELECT
          mod(i,7),
@@ -812,7 +812,7 @@ CREATE TABLE mcv_lists_multi (
 	c INTEGER,
 	d INTEGER
 );
--- 
+--
 INSERT INTO mcv_lists_multi (a, b, c, d)
     SELECT
          mod(i,5),
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 69c3f726e6..bf700204d2 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -296,7 +296,7 @@ CREATE TABLE functional_dependencies_multi (
 	d INTEGER
 );
 
--- 
+--
 INSERT INTO functional_dependencies_multi (a, b, c, d)
     SELECT
          mod(i,7),
@@ -532,7 +532,7 @@ CREATE TABLE mcv_lists_multi (
 	d INTEGER
 );
 
--- 
+--
 INSERT INTO mcv_lists_multi (a, b, c, d)
     SELECT
          mod(i,5),

Reply via email to