Re: Incremental Sort Cost Estimation Instability

Andrei Lepikhov Mon, 11 Nov 2024 06:11:33 -0800

Hi,

Revising the patch I found out that it may make sense to teachestimate_num_groups to process a PathKey node inside the presortedExprslist.In this case it can pass through the EquivalenceClass members and choosecorrect number of distinct values.

Here is a sketch of the solution (see in attachment).


--
regards, Andrei Lepikhov

From 3d97c3aa36170460a82e66a06af13f95ed4a37c0 Mon Sep 17 00:00:00 2001
From: "Andrei V. Lepikhov" <lepi...@gmail.com>
Date: Tue, 29 Oct 2024 08:49:33 +0700
Subject: [PATCH] Improve group number estimation.

Estimating GROUP BY x optimisation employs a distinct statistic for column x.
But if we have an expression like 'x=y' somewhere down the query tree, the
number of different values can't be more than the smaller distinct value on
columns 'x' and 'y'. That means it is possible to correct the estimation with
knowledge provided by the equivalence class.

In this commit, the estimate_num_groups routine is changed to include PathKey
nodes in the presortedExprs list. With the PathKey node, we can pass through
its equivalence class members and correct the distinct estimation.

To avoid multiple calls on statistic tuples, the em_ndistinct cache field
is introduced.
---
 src/backend/optimizer/path/costsize.c         | 40 ++-------
 src/backend/optimizer/path/equivclass.c       |  1 +
 src/backend/utils/adt/selfuncs.c              | 87 ++++++++++++++++++-
 src/include/nodes/pathnodes.h                 |  2 +
 .../regress/expected/incremental_sort.out     | 51 +++++++++++
 src/test/regress/sql/incremental_sort.sql     | 31 +++++++
 6 files changed, 180 insertions(+), 32 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 2bb6db1df7..fad63f694d 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2008,13 +2008,12 @@ cost_incremental_sort(Path *path,
 				run_cost,
 				input_run_cost = input_total_cost - input_startup_cost;
 	double		group_tuples,
-				input_groups;
+				input_groups,
+				estimated_groups;
 	Cost		group_startup_cost,
 				group_run_cost,
 				group_input_run_cost;
 	List	   *presortedExprs = NIL;
-	ListCell   *l;
-	bool		unknown_varno = false;
 
 	Assert(presorted_keys > 0 && presorted_keys < list_length(pathkeys));
 
@@ -2025,9 +2024,6 @@ cost_incremental_sort(Path *path,
 	if (input_tuples < 2.0)
 		input_tuples = 2.0;
 
-	/* Default estimate of number of groups, capped to one group per row. */
-	input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
-
 	/*
 	 * Extract presorted keys as list of expressions.
 	 *
@@ -2050,33 +2046,15 @@ cost_incremental_sort(Path *path,
 	 * anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive
 	 * while maintaining lower startup cost.
 	 */
-	foreach(l, pathkeys)
-	{
-		PathKey    *key = (PathKey *) lfirst(l);
-		EquivalenceMember *member = (EquivalenceMember *)
-			linitial(key->pk_eclass->ec_members);
-
-		/*
-		 * Check if the expression contains Var with "varno 0" so that we
-		 * don't call estimate_num_groups in that case.
-		 */
-		if (bms_is_member(0, pull_varnos(root, (Node *) member->em_expr)))
-		{
-			unknown_varno = true;
-			break;
-		}
+	presortedExprs = list_copy_head(pathkeys, presorted_keys);
 
-		/* expression not containing any Vars with "varno 0" */
-		presortedExprs = lappend(presortedExprs, member->em_expr);
-
-		if (foreach_current_index(l) + 1 >= presorted_keys)
-			break;
-	}
-
-	/* Estimate the number of groups with equal presorted keys. */
-	if (!unknown_varno)
-		input_groups = estimate_num_groups(root, presortedExprs, input_tuples,
+	estimated_groups = estimate_num_groups(root, presortedExprs, input_tuples,
 										   NULL, NULL);
+	if (estimated_groups > 0.0)
+		input_groups = estimated_groups;
+	else
+		/* Default estimate of number of groups, capped to one group per row. */
+		input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
 
 	group_tuples = input_tuples / input_groups;
 	group_input_run_cost = input_run_cost / input_groups;
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index fae137dd82..3552614502 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -525,6 +525,7 @@ add_eq_member(EquivalenceClass *ec, Expr *expr, Relids relids,
 	em->em_datatype = datatype;
 	em->em_jdomain = jdomain;
 	em->em_parent = parent;
+	em->em_ndistinct = -1.0;
 
 	if (bms_is_empty(relids))
 	{
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 08fa6774d9..e49101f50e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -213,6 +213,8 @@ static bool get_actual_variable_endpoint(Relation heapRel,
 										 MemoryContext outercontext,
 										 Datum *endpointDatum);
 static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
+static EquivalenceMember *identify_sort_ecmember(PlannerInfo *root,
+												 EquivalenceClass *ec);
 
 
 /*
@@ -3458,12 +3460,34 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
 	i = 0;
 	foreach(l, groupExprs)
 	{
-		Node	   *groupexpr = (Node *) lfirst(l);
+		Node	   *node = (Node *) lfirst(l);
+		Node	   *groupexpr;
 		double		this_srf_multiplier;
 		VariableStatData vardata;
 		List	   *varshere;
 		ListCell   *l2;
 
+		/* Find an expression beforehand */
+		if (IsA(node, PathKey))
+		{
+			PathKey			   *key = (PathKey *) node;
+			EquivalenceMember  *em = identify_sort_ecmember(root, key->pk_eclass);
+
+			/*
+			 * Check if the expression contains Var with "varno 0" so that we
+			 * don't call estimate_num_groups in that case.
+			 */
+			if (bms_is_member(0, pull_varnos(root, (Node *) em->em_expr)))
+			{
+				/* Return 'unknown' value */
+				return 0.0;
+			}
+
+			groupexpr = (Node *) em->em_expr;
+		}
+		else
+			groupexpr = node;
+
 		/* is expression in this grouping set? */
 		if (pgset && !list_member_int(*pgset, i++))
 			continue;
@@ -8197,3 +8221,64 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 
 	*indexPages = index->pages;
 }
+
+/*
+ * Find suitable member of the equivalence class.
+ * Passing through the list of EC members find the member with minimum of
+ * distinct values. Cache estimated number of distincts in the em_ndistinct
+ * field of each member.
+ */
+static EquivalenceMember *
+identify_sort_ecmember(PlannerInfo *root, EquivalenceClass *ec)
+{
+	EquivalenceMember  *candidate = linitial(ec->ec_members);
+
+	if (root == NULL)
+		/* Fast path */
+		return candidate;
+
+	foreach_node(EquivalenceMember, em, ec->ec_members)
+	{
+		VariableStatData	vardata;
+
+		if (em->em_ndistinct == 0.)
+			/* Nothing helpful */
+			continue;
+
+		if (em->em_is_child || em->em_is_const || bms_is_empty(em->em_relids) ||
+			bms_is_member(0, em->em_relids))
+		{
+			em->em_ndistinct = 0.;
+			continue;
+		}
+
+		if (em->em_ndistinct < 0.)
+		{
+			bool	isdefault = true;
+			double	ndist = 0.;
+
+			/* Let's check candidate's ndistinct value */
+			examine_variable(root, (Node *) em->em_expr, 0, &vardata);
+			if (HeapTupleIsValid(vardata.statsTuple))
+				ndist = get_variable_numdistinct(&vardata, &isdefault);
+			ReleaseVariableStats(vardata);
+
+			if (isdefault)
+			{
+				em->em_ndistinct = 0.;
+				continue;
+			}
+
+			em->em_ndistinct = ndist;
+		}
+
+		Assert(em->em_ndistinct > 0.);
+
+		if (candidate->em_ndistinct == 0. ||
+			em->em_ndistinct < candidate->em_ndistinct)
+			candidate = em;
+	}
+
+	Assert(candidate != NULL);
+	return candidate;
+}
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index add0f9e45f..4ef5256a7f 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1448,6 +1448,8 @@ typedef struct EquivalenceMember
 	JoinDomain *em_jdomain;		/* join domain containing the source clause */
 	/* if em_is_child is true, this links to corresponding EM for top parent */
 	struct EquivalenceMember *em_parent pg_node_attr(read_write_ignore);
+
+	double em_ndistinct; /* cached value of ndistinct: 0- default value or 'unknown'; -1 - not defined yet */
 } EquivalenceMember;
 
 /*
diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out
index 2df7a5db12..409eb8a4b8 100644
--- a/src/test/regress/expected/incremental_sort.out
+++ b/src/test/regress/expected/incremental_sort.out
@@ -1722,3 +1722,54 @@ order by t1.four, t1.two limit 1;
                ->  Seq Scan on tenk1 t2
 (12 rows)
 
+-- Check:
+-- commuting of sides in an expression doesn't influence cost estimation
+CREATE TABLE sort_ndist_t1 (x numeric, y numeric);
+CREATE TABLE sort_ndist_t2 (x numeric, y numeric);
+INSERT INTO sort_ndist_t1 (x,y)
+  SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;
+INSERT INTO sort_ndist_t2 (x,y)
+  SELECT gs, gs FROM generate_series(1,1E4) AS gs;
+CREATE INDEX t1_idx ON sort_ndist_t1 (x);
+CREATE INDEX t2_idx ON sort_ndist_t2 (x);
+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
+SET enable_hashjoin = 'off';
+-- Having lots of duplicates after the join it is more effective to use plain
+-- Sort instead of incremental sort: with small number of groups we do the same
+-- stuff like Sort but with extra penalty.
+EXPLAIN (COSTS OFF)
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t1.x=t2.x
+ORDER BY t1.x,t1.y;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: t1.x, t1.y
+   ->  Nested Loop
+         ->  Seq Scan on sort_ndist_t1 t1
+         ->  Memoize
+               Cache Key: t1.x
+               Cache Mode: logical
+               ->  Index Only Scan using t2_idx on sort_ndist_t2 t2
+                     Index Cond: (x = t1.x)
+(9 rows)
+
+EXPLAIN (COSTS OFF) -- the plan must be the same as above
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t2.x=t1.x
+ORDER BY t1.x,t1.y;
+                             QUERY PLAN                             
+--------------------------------------------------------------------
+ Sort
+   Sort Key: t1.x, t1.y
+   ->  Nested Loop
+         ->  Seq Scan on sort_ndist_t1 t1
+         ->  Memoize
+               Cache Key: t1.x
+               Cache Mode: logical
+               ->  Index Only Scan using t2_idx on sort_ndist_t2 t2
+                     Index Cond: (x = t1.x)
+(9 rows)
+
+RESET enable_hashjoin;
+DROP TABLE sort_ndist_t1, sort_ndist_t2;
diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql
index 98b20e17e1..af4d145395 100644
--- a/src/test/regress/sql/incremental_sort.sql
+++ b/src/test/regress/sql/incremental_sort.sql
@@ -298,3 +298,34 @@ explain (costs off)
 select * from
   (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two
 order by t1.four, t1.two limit 1;
+
+-- Check:
+-- commuting of sides in an expression doesn't influence cost estimation
+CREATE TABLE sort_ndist_t1 (x numeric, y numeric);
+CREATE TABLE sort_ndist_t2 (x numeric, y numeric);
+
+INSERT INTO sort_ndist_t1 (x,y)
+  SELECT gs%10, gs%10 FROM generate_series(1,1E4) AS gs;
+INSERT INTO sort_ndist_t2 (x,y)
+  SELECT gs, gs FROM generate_series(1,1E4) AS gs;
+CREATE INDEX t1_idx ON sort_ndist_t1 (x);
+CREATE INDEX t2_idx ON sort_ndist_t2 (x);
+VACUUM ANALYZE sort_ndist_t1, sort_ndist_t2;
+
+SET enable_hashjoin = 'off';
+
+-- Having lots of duplicates after the join it is more effective to use plain
+-- Sort instead of incremental sort: with small number of groups we do the same
+-- stuff like Sort but with extra penalty.
+EXPLAIN (COSTS OFF)
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t1.x=t2.x
+ORDER BY t1.x,t1.y;
+
+EXPLAIN (COSTS OFF) -- the plan must be the same as above
+SELECT t1.x, t1.y FROM sort_ndist_t1 t1, sort_ndist_t2 t2
+WHERE t2.x=t1.x
+ORDER BY t1.x,t1.y;
+
+RESET enable_hashjoin;
+DROP TABLE sort_ndist_t1, sort_ndist_t2;
-- 
2.39.5

Re: Incremental Sort Cost Estimation Instability

Reply via email to