Re: bitmaps and correlation

Justin Pryzby Sun, 12 Jan 2020 17:48:44 -0800

On Mon, Jan 06, 2020 at 11:26:06PM -0600, Justin Pryzby wrote:
> As Jeff has pointed out, high correlation has two effects in cost_index():
> 1) the number of pages read will be less;
> 2) the pages will be read more sequentially;
> 
> cost_index reuses the pages_fetched variable, so (1) isn't particularly clear,


I tried to make this more clear in 0001

> +               cost_per_page_corr = spc_random_page_cost -
> +                       (spc_random_page_cost - spc_seq_page_cost)
> +                       * (1-correlation*correlation);

And fixed bug: this should be c*c not 1-c*c.

>From d0819177ef1c6f86a588e3d2700ecff638f83b4a Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Wed, 8 Jan 2020 19:23:51 -0600
Subject: [PATCH v4 1/2] Make more clear the computation of min/max IO..

..and specifically the double use and effect of correlation.

Avoid re-use of the "pages_fetched" variable
---
 src/backend/optimizer/path/costsize.c | 47 +++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 22 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b5a0033..bdc23a0 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -491,12 +491,13 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 				csquared;
 	double		spc_seq_page_cost,
 				spc_random_page_cost;
-	Cost		min_IO_cost,
+	double		min_pages_fetched,	/* The min and max page count based on index correlation */
+				max_pages_fetched;
+	Cost		min_IO_cost,	/* The min and max cost based on index correlation */
 				max_IO_cost;
 	QualCost	qpqual_cost;
 	Cost		cpu_per_tuple;
 	double		tuples_fetched;
-	double		pages_fetched;
 	double		rand_heap_pages;
 	double		index_pages;
 
@@ -579,7 +580,8 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 	 * (just after a CLUSTER, for example), the number of pages fetched should
 	 * be exactly selectivity * table_size.  What's more, all but the first
 	 * will be sequential fetches, not the random fetches that occur in the
-	 * uncorrelated case.  So if the number of pages is more than 1, we
+	 * uncorrelated case (the index is expected to read fewer pages, *and* each
+	 * page read is cheaper).  So if the number of pages is more than 1, we
 	 * ought to charge
 	 *		spc_random_page_cost + (pages_fetched - 1) * spc_seq_page_cost
 	 * For partially-correlated indexes, we ought to charge somewhere between
@@ -604,17 +606,17 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 		 * pro-rate the costs for one scan.  In this case we assume all the
 		 * fetches are random accesses.
 		 */
-		pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
+		max_pages_fetched = index_pages_fetched(tuples_fetched * loop_count,
 											baserel->pages,
 											(double) index->pages,
 											root);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			max_pages_fetched = ceil(max_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		rand_heap_pages = pages_fetched;
+		rand_heap_pages = max_pages_fetched;
 
-		max_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
+		max_IO_cost = (max_pages_fetched * spc_random_page_cost) / loop_count;
 
 		/*
 		 * In the perfectly correlated case, the number of pages touched by
@@ -626,17 +628,17 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 		 * where such a plan is actually interesting, only one page would get
 		 * fetched per scan anyway, so it shouldn't matter much.)
 		 */
-		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+		min_pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
 
-		pages_fetched = index_pages_fetched(pages_fetched * loop_count,
+		min_pages_fetched = index_pages_fetched(min_pages_fetched * loop_count,
 											baserel->pages,
 											(double) index->pages,
 											root);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			min_pages_fetched = ceil(min_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		min_IO_cost = (pages_fetched * spc_random_page_cost) / loop_count;
+		min_IO_cost = (min_pages_fetched * spc_random_page_cost) / loop_count;
 	}
 	else
 	{
@@ -644,30 +646,31 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 		 * Normal case: apply the Mackert and Lohman formula, and then
 		 * interpolate between that and the correlation-derived result.
 		 */
-		pages_fetched = index_pages_fetched(tuples_fetched,
+
+		/* For the perfectly uncorrelated case (csquared=0) */
+		max_pages_fetched = index_pages_fetched(tuples_fetched,
 											baserel->pages,
 											(double) index->pages,
 											root);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			max_pages_fetched = ceil(max_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		rand_heap_pages = pages_fetched;
+		rand_heap_pages = max_pages_fetched;
 
-		/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
-		max_IO_cost = pages_fetched * spc_random_page_cost;
+		max_IO_cost = max_pages_fetched * spc_random_page_cost;
 
-		/* min_IO_cost is for the perfectly correlated case (csquared=1) */
-		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
+		/* For the perfectly correlated case (csquared=1) */
+		min_pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
 
 		if (indexonly)
-			pages_fetched = ceil(pages_fetched * (1.0 - baserel->allvisfrac));
+			min_pages_fetched = ceil(min_pages_fetched * (1.0 - baserel->allvisfrac));
 
-		if (pages_fetched > 0)
+		if (min_pages_fetched > 0)
 		{
 			min_IO_cost = spc_random_page_cost;
-			if (pages_fetched > 1)
-				min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;
+			if (min_pages_fetched > 1)
+				min_IO_cost += (min_pages_fetched - 1) * spc_seq_page_cost;
 		}
 		else
 			min_IO_cost = 0;
-- 
2.7.4

>From 2608f1743c750cefa7c9d7658ea8c2a4543aef5f Mon Sep 17 00:00:00 2001
From: Justin Pryzby <[email protected]>
Date: Tue, 1 Jan 2019 16:17:28 -0600
Subject: [PATCH v4 2/2] Use correlation statistic in costing bitmap scans..

Same as for an index scan, a correlated bitmap which accesses pages across a
small portion of the table should have a cost estimate much less than an
uncorrelated scan (like modulus) across the entire length of the table, the
latter having a high component of random access.

Note, Tom points out that there are cases where a column could be
tightly-"clumped" without being highly-ordered.  Since we have correlation
already, we use that until such time as someone implements a new statistic for
clumpiness.  This patch only intends to make costing of bitmap heap scan on par
with the same cost of index scan without bitmap.
---
 contrib/postgres_fdw/expected/postgres_fdw.out | 15 ++--
 src/backend/optimizer/path/costsize.c          | 94 +++++++++++++++++++++-----
 src/backend/optimizer/path/indxpath.c          | 10 ++-
 src/include/nodes/pathnodes.h                  |  3 +
 src/include/optimizer/cost.h                   |  2 +-
 src/test/regress/expected/create_index.out     | 16 ++---
 src/test/regress/expected/identity.out         |  2 +-
 src/test/regress/expected/join.out             | 42 +++++++-----
 src/test/regress/expected/select.out           | 16 ++---
 src/test/regress/sql/create_index.sql          |  8 ++-
 src/test/regress/sql/identity.sql              |  2 +-
 11 files changed, 140 insertions(+), 70 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 0912d6c..732e66c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -2269,11 +2269,12 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f
                                              ->  Foreign Scan on public.ft1
                                                    Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.*
                                                    Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
-                                       ->  Materialize
+                                       ->  Sort
                                              Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
+                                             Sort Key: ft2.c1
                                              ->  Foreign Scan on public.ft2
                                                    Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.*
-                                                   Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) ORDER BY "C 1" ASC NULLS LAST FOR UPDATE
+                                                   Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE
                            ->  Sort
                                  Output: ft4.c1, ft4.c2, ft4.c3, ft4.*
                                  Sort Key: ft4.c1
@@ -2288,7 +2289,7 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f
                                  Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" FOR UPDATE
          ->  Index Scan using local_tbl_pkey on public.local_tbl
                Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid
-(47 rows)
+(48 rows)
 
 SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = ft4.c1
     AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE;
@@ -3330,10 +3331,12 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr
    Sort Key: t1.c2
    ->  Nested Loop
          Output: t1.c2, qry.sum
-         ->  Index Scan using t1_pkey on "S 1"."T 1" t1
+         ->  Bitmap Heap Scan on "S 1"."T 1" t1
                Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8
-               Index Cond: (t1."C 1" < 100)
+               Recheck Cond: (t1."C 1" < 100)
                Filter: (t1.c2 < 3)
+               ->  Bitmap Index Scan on t1_pkey
+                     Index Cond: (t1."C 1" < 100)
          ->  Subquery Scan on qry
                Output: qry.sum, t2.c1
                Filter: ((t1.c2 * 2) = qry.sum)
@@ -3341,7 +3344,7 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr
                      Output: (sum((t2.c1 + t1."C 1"))), t2.c1
                      Relations: Aggregate on (public.ft2 t2)
                      Remote SQL: SELECT sum(("C 1" + $1::integer)), "C 1" FROM "S 1"."T 1" GROUP BY 2
-(16 rows)
+(18 rows)
 
 select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum from ft2 t2 group by t2.c1) qry where t1.c2 * 2 = qry.sum and t1.c2 < 3 and t1."C 1" < 100 order by 1;
  c2 | sum 
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index bdc23a0..9cd314f 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -550,11 +550,13 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count,
 
 	/*
 	 * Save amcostestimate's results for possible use in bitmap scan planning.
-	 * We don't bother to save indexStartupCost or indexCorrelation, because a
-	 * bitmap scan doesn't care about either.
+	 * We don't bother to save indexStartupCost, because a bitmap scan
+	 * can't start until the index scan completes, so only cares about its
+	 * total cost.
 	 */
 	path->indextotalcost = indexTotalCost;
 	path->indexselectivity = indexSelectivity;
+	path->indexCorrelation = indexCorrelation;
 
 	/* all costs for touching index itself included here */
 	startup_cost += indexStartupCost;
@@ -989,12 +991,31 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 	 * appropriate to charge spc_seq_page_cost apiece.  The effect is
 	 * nonlinear, too. For lack of a better idea, interpolate like this to
 	 * determine the cost per page.
+	 * Note this works at PAGE granularity, so even if we read 1% of a
+	 * table's tuples, if we have to read nearly every page, it should be
+	 * considered sequential.
 	 */
-	if (pages_fetched >= 2.0)
+	if (pages_fetched >= 2.0) {
+		double correlation = ((IndexPath *)bitmapqual)->indexCorrelation;
+		double cost_per_page_corr;
+		/*
+		 * Interpolate based on pages_fetched and correlation from seq_page_cost to rand_page_cost.
+		 * A highly correlated bitmap scan 1) likely reads fewer pages (handled in
+		 * compute_bitmap_pages); and, 2) at higher "density" (more sequential).
+		 */
 		cost_per_page = spc_random_page_cost -
 			(spc_random_page_cost - spc_seq_page_cost)
 			* sqrt(pages_fetched / T);
-	else
+		cost_per_page_corr = spc_random_page_cost -
+			(spc_random_page_cost - spc_seq_page_cost)
+			* (correlation*correlation);
+
+		/*
+		 * We expect sequential reads and low cost_per_page when *either*
+		 * T is high or correlation is high.
+		 */
+		cost_per_page = Min(cost_per_page,cost_per_page_corr);
+	} else
 		cost_per_page = spc_random_page_cost;
 
 	run_cost += pages_fetched * cost_per_page;
@@ -1038,15 +1059,18 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 
 /*
  * cost_bitmap_tree_node
- *		Extract cost and selectivity from a bitmap tree node (index/and/or)
+ *		Extract cost, selectivity, and correlation from a bitmap tree node (index/and/or)
  */
 void
-cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
+cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec, double *correlation)
 {
 	if (IsA(path, IndexPath))
 	{
 		*cost = ((IndexPath *) path)->indextotalcost;
-		*selec = ((IndexPath *) path)->indexselectivity;
+		if (selec)
+			*selec = ((IndexPath *) path)->indexselectivity;
+		if (correlation)
+			*correlation = ((IndexPath *) path)->indexCorrelation;
 
 		/*
 		 * Charge a small amount per retrieved tuple to reflect the costs of
@@ -1059,12 +1083,18 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
 	else if (IsA(path, BitmapAndPath))
 	{
 		*cost = path->total_cost;
-		*selec = ((BitmapAndPath *) path)->bitmapselectivity;
+		if (selec)
+			*selec = ((BitmapAndPath *) path)->bitmapselectivity;
+		if (correlation)
+			*correlation = ((BitmapAndPath *) path)->bitmapcorrelation;
 	}
 	else if (IsA(path, BitmapOrPath))
 	{
 		*cost = path->total_cost;
-		*selec = ((BitmapOrPath *) path)->bitmapselectivity;
+		if (selec)
+			*selec = ((BitmapOrPath *) path)->bitmapselectivity;
+		if (correlation)
+			*correlation = ((BitmapOrPath *) path)->bitmapcorrelation;
 	}
 	else
 	{
@@ -1087,8 +1117,9 @@ void
 cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
 {
 	Cost		totalCost;
-	Selectivity selec;
+	Selectivity selec, minsubselec;
 	ListCell   *l;
+	double		correlation;
 
 	/*
 	 * We estimate AND selectivity on the assumption that the inputs are
@@ -1100,22 +1131,31 @@ cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
 	 * definitely too simplistic?
 	 */
 	totalCost = 0.0;
-	selec = 1.0;
+	minsubselec = selec = 1.0;
+	correlation = 0;
 	foreach(l, path->bitmapquals)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
 		Cost		subCost;
 		Selectivity subselec;
+		double		subcorrelation;
 
-		cost_bitmap_tree_node(subpath, &subCost, &subselec);
+		cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation);
 
 		selec *= subselec;
 
+		/* For an AND node, use the correlation of its most-selective subpath */
+		if (subselec <= minsubselec) {
+				correlation = subcorrelation;
+				minsubselec = subselec;
+		}
+
 		totalCost += subCost;
 		if (l != list_head(path->bitmapquals))
 			totalCost += 100.0 * cpu_operator_cost;
 	}
 	path->bitmapselectivity = selec;
+	path->bitmapcorrelation = correlation;
 	path->path.rows = 0;		/* per above, not used */
 	path->path.startup_cost = totalCost;
 	path->path.total_cost = totalCost;
@@ -1131,8 +1171,9 @@ void
 cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
 {
 	Cost		totalCost;
-	Selectivity selec;
+	Selectivity selec, maxsubselec;
 	ListCell   *l;
+	double		correlation;
 
 	/*
 	 * We estimate OR selectivity on the assumption that the inputs are
@@ -1145,23 +1186,32 @@ cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
 	 * optimized out when the inputs are BitmapIndexScans.
 	 */
 	totalCost = 0.0;
-	selec = 0.0;
+	maxsubselec = selec = 0.0;
+	correlation = 0;
 	foreach(l, path->bitmapquals)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
 		Cost		subCost;
 		Selectivity subselec;
+		double		subcorrelation;
 
-		cost_bitmap_tree_node(subpath, &subCost, &subselec);
+		cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation);
 
 		selec += subselec;
 
+		/* For an OR node, use the correlation of its least-selective subpath */
+		if (subselec >= maxsubselec) {
+				correlation = subcorrelation;
+				maxsubselec = subselec;
+		}
+
 		totalCost += subCost;
 		if (l != list_head(path->bitmapquals) &&
 			!IsA(subpath, IndexPath))
 			totalCost += 100.0 * cpu_operator_cost;
 	}
 	path->bitmapselectivity = Min(selec, 1.0);
+	path->bitmapcorrelation = correlation;
 	path->path.rows = 0;		/* per above, not used */
 	path->path.startup_cost = totalCost;
 	path->path.total_cost = totalCost;
@@ -5513,8 +5563,11 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
 {
 	Cost		indexTotalCost;
 	Selectivity indexSelectivity;
+	double		indexCorrelation;
 	double		T;
-	double		pages_fetched;
+	double		pages_fetched,
+				pages_fetchedMIN,
+				pages_fetchedMAX;
 	double		tuples_fetched;
 	double		heap_pages;
 	long		maxentries;
@@ -5523,7 +5576,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
 	 * Fetch total cost of obtaining the bitmap, as well as its total
 	 * selectivity.
 	 */
-	cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
+	cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity, &indexCorrelation);
 
 	/*
 	 * Estimate number of main-table pages fetched.
@@ -5537,7 +5590,12 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
 	 * the same as the Mackert and Lohman formula for the case T <= b (ie, no
 	 * re-reads needed).
 	 */
-	pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+	pages_fetchedMAX = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+
+	/* pages_fetchedMIN is for the perfectly correlated case (csquared=1) */
+	pages_fetchedMIN = ceil(indexSelectivity * (double) baserel->pages);
+
+	pages_fetched = pages_fetchedMAX + indexCorrelation*indexCorrelation*(pages_fetchedMIN - pages_fetchedMAX);
 
 	/*
 	 * Calculate the number of pages fetched from the heap.  Then based on
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 2a50272..8ace295 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -1464,11 +1464,9 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths)
 			/* duplicate clauseids, keep the cheaper one */
 			Cost		ncost;
 			Cost		ocost;
-			Selectivity nselec;
-			Selectivity oselec;
 
-			cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec);
-			cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec);
+			cost_bitmap_tree_node(pathinfo->path, &ncost, NULL, NULL);
+			cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, NULL, NULL);
 			if (ncost < ocost)
 				pathinfoarray[i] = pathinfo;
 		}
@@ -1580,8 +1578,8 @@ path_usage_comparator(const void *a, const void *b)
 	Selectivity aselec;
 	Selectivity bselec;
 
-	cost_bitmap_tree_node(pa->path, &acost, &aselec);
-	cost_bitmap_tree_node(pb->path, &bcost, &bselec);
+	cost_bitmap_tree_node(pa->path, &acost, &aselec, NULL);
+	cost_bitmap_tree_node(pb->path, &bcost, &bselec, NULL);
 
 	/*
 	 * If costs are the same, sort by selectivity.
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 3d3be19..19ddf5b 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1183,6 +1183,7 @@ typedef struct IndexPath
 	ScanDirection indexscandir;
 	Cost		indextotalcost;
 	Selectivity indexselectivity;
+	double		indexCorrelation;
 } IndexPath;
 
 /*
@@ -1263,6 +1264,7 @@ typedef struct BitmapAndPath
 	Path		path;
 	List	   *bitmapquals;	/* IndexPaths and BitmapOrPaths */
 	Selectivity bitmapselectivity;
+	double		bitmapcorrelation;
 } BitmapAndPath;
 
 /*
@@ -1276,6 +1278,7 @@ typedef struct BitmapOrPath
 	Path		path;
 	List	   *bitmapquals;	/* IndexPaths and BitmapAndPaths */
 	Selectivity bitmapselectivity;
+	double		bitmapcorrelation;
 } BitmapOrPath;
 
 /*
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index cb012ba..7d5edb5 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -79,7 +79,7 @@ extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *bas
 								  Path *bitmapqual, double loop_count);
 extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root);
 extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
-extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
+extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec, double *correlation);
 extern void cost_tidscan(Path *path, PlannerInfo *root,
 						 RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info);
 extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index 6446907..5bd8aae 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -1770,24 +1770,23 @@ DROP TABLE onek_with_null;
 --
 -- Check bitmap index path planning
 --
+SET cpu_operator_cost=0;
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1
-  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
-                                                               QUERY PLAN                                                                
------------------------------------------------------------------------------------------------------------------------------------------
+  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
+                                           QUERY PLAN                                            
+-------------------------------------------------------------------------------------------------
  Bitmap Heap Scan on tenk1
-   Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 3)) OR ((thousand = 42) AND (tenthous = 42)))
+   Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 42)))
    ->  BitmapOr
          ->  Bitmap Index Scan on tenk1_thous_tenthous
                Index Cond: ((thousand = 42) AND (tenthous = 1))
          ->  Bitmap Index Scan on tenk1_thous_tenthous
-               Index Cond: ((thousand = 42) AND (tenthous = 3))
-         ->  Bitmap Index Scan on tenk1_thous_tenthous
                Index Cond: ((thousand = 42) AND (tenthous = 42))
-(9 rows)
+(7 rows)
 
 SELECT * FROM tenk1
-  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
  unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 
 ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------
       42 |    5530 |   0 |    2 |   2 |      2 |      42 |       42 |          42 |        42 |       42 |  84 |   85 | QBAAAA   | SEIAAA   | OOOOxx
@@ -1818,6 +1817,7 @@ SELECT count(*) FROM tenk1
     10
 (1 row)
 
+RESET cpu_operator_cost;
 --
 -- Check behavior with duplicate index column contents
 --
diff --git a/src/test/regress/expected/identity.out b/src/test/regress/expected/identity.out
index 36a2393..2642740 100644
--- a/src/test/regress/expected/identity.out
+++ b/src/test/regress/expected/identity.out
@@ -316,7 +316,7 @@ SELECT * FROM itest6;
  102 | 
 (3 rows)
 
-SELECT table_name, column_name, is_identity, identity_generation FROM information_schema.columns WHERE table_name = 'itest6';
+SELECT table_name, column_name, is_identity, identity_generation FROM information_schema.columns WHERE table_name = 'itest6' ORDER BY 1,2;
  table_name | column_name | is_identity | identity_generation 
 ------------+-------------+-------------+---------------------
  itest6     | a           | YES         | BY DEFAULT
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 761376b..80cfab8 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -3144,22 +3144,26 @@ select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
   left join tenk1 t2
   on (subq1.y1 = t2.unique1)
 where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
-                              QUERY PLAN                               
------------------------------------------------------------------------
+                         QUERY PLAN                         
+------------------------------------------------------------
  Nested Loop
    ->  Nested Loop
          Join Filter: (t1.stringu1 > t2.stringu2)
-         ->  Nested Loop
-               ->  Nested Loop
-                     ->  Seq Scan on onerow
-                     ->  Seq Scan on onerow onerow_1
-               ->  Index Scan using tenk1_unique2 on tenk1 t1
-                     Index Cond: ((unique2 = (11)) AND (unique2 < 42))
+         ->  Hash Join
+               Hash Cond: (t1.unique2 = (11))
+               ->  Bitmap Heap Scan on tenk1 t1
+                     Recheck Cond: (unique2 < 42)
+                     ->  Bitmap Index Scan on tenk1_unique2
+                           Index Cond: (unique2 < 42)
+               ->  Hash
+                     ->  Nested Loop
+                           ->  Seq Scan on onerow
+                           ->  Seq Scan on onerow onerow_1
          ->  Index Scan using tenk1_unique1 on tenk1 t2
                Index Cond: (unique1 = (3))
    ->  Seq Scan on int4_tbl i1
          Filter: (f1 = 0)
-(13 rows)
+(17 rows)
 
 select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
   tenk1 t1
@@ -3212,18 +3216,22 @@ select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
   left join tenk1 t2
   on (subq1.y1 = t2.unique1)
 where t1.unique2 < 42 and t1.stringu1 > t2.stringu2;
-                           QUERY PLAN                            
------------------------------------------------------------------
+                      QUERY PLAN                      
+------------------------------------------------------
  Nested Loop
    Join Filter: (t1.stringu1 > t2.stringu2)
-   ->  Nested Loop
-         ->  Seq Scan on int4_tbl i1
-               Filter: (f1 = 0)
-         ->  Index Scan using tenk1_unique2 on tenk1 t1
-               Index Cond: ((unique2 = (11)) AND (unique2 < 42))
+   ->  Hash Join
+         Hash Cond: (t1.unique2 = (11))
+         ->  Bitmap Heap Scan on tenk1 t1
+               Recheck Cond: (unique2 < 42)
+               ->  Bitmap Index Scan on tenk1_unique2
+                     Index Cond: (unique2 < 42)
+         ->  Hash
+               ->  Seq Scan on int4_tbl i1
+                     Filter: (f1 = 0)
    ->  Index Scan using tenk1_unique1 on tenk1 t2
          Index Cond: (unique1 = (3))
-(9 rows)
+(13 rows)
 
 select t1.unique2, t1.stringu1, t2.unique1, t2.stringu2 from
   tenk1 t1
diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out
index c441049..675bf63 100644
--- a/src/test/regress/expected/select.out
+++ b/src/test/regress/expected/select.out
@@ -861,17 +861,13 @@ RESET enable_indexscan;
 explain (costs off)
 select unique1, unique2 from onek2
   where (unique2 = 11 or unique1 = 0) and stringu1 < 'B';
-                                   QUERY PLAN                                   
---------------------------------------------------------------------------------
+                 QUERY PLAN                  
+---------------------------------------------
  Bitmap Heap Scan on onek2
-   Recheck Cond: (((unique2 = 11) AND (stringu1 < 'B'::name)) OR (unique1 = 0))
-   Filter: (stringu1 < 'B'::name)
-   ->  BitmapOr
-         ->  Bitmap Index Scan on onek2_u2_prtl
-               Index Cond: (unique2 = 11)
-         ->  Bitmap Index Scan on onek2_u1_prtl
-               Index Cond: (unique1 = 0)
-(8 rows)
+   Recheck Cond: (stringu1 < 'B'::name)
+   Filter: ((unique2 = 11) OR (unique1 = 0))
+   ->  Bitmap Index Scan on onek2_u2_prtl
+(4 rows)
 
 select unique1, unique2 from onek2
   where (unique2 = 11 or unique1 = 0) and stringu1 < 'B';
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index 3c0c1cd..6607a7b 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -666,11 +666,13 @@ DROP TABLE onek_with_null;
 -- Check bitmap index path planning
 --
 
+SET cpu_operator_cost=0;
+
 EXPLAIN (COSTS OFF)
 SELECT * FROM tenk1
-  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
 SELECT * FROM tenk1
-  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42);
+  WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42);
 
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1
@@ -678,6 +680,8 @@ SELECT count(*) FROM tenk1
 SELECT count(*) FROM tenk1
   WHERE hundred = 42 AND (thousand = 42 OR thousand = 99);
 
+RESET cpu_operator_cost;
+
 --
 -- Check behavior with duplicate index column contents
 --
diff --git a/src/test/regress/sql/identity.sql b/src/test/regress/sql/identity.sql
index 4b03d24..a19b99b 100644
--- a/src/test/regress/sql/identity.sql
+++ b/src/test/regress/sql/identity.sql
@@ -191,7 +191,7 @@ INSERT INTO itest6 DEFAULT VALUES;
 INSERT INTO itest6 DEFAULT VALUES;
 SELECT * FROM itest6;
 
-SELECT table_name, column_name, is_identity, identity_generation FROM information_schema.columns WHERE table_name = 'itest6';
+SELECT table_name, column_name, is_identity, identity_generation FROM information_schema.columns WHERE table_name = 'itest6' ORDER BY 1,2;
 
 ALTER TABLE itest6 ALTER COLUMN b SET INCREMENT BY 2;  -- fail, not identity
 
-- 
2.7.4

Re: bitmaps and correlation

Reply via email to