Find attached cleaned up patch. For now, I updated the regress/expected/, but I think the test maybe has to be updated to do what it was written to do.
>From 36f547d69b8fee25869d6ef3ef26d327a8ba1205 Mon Sep 17 00:00:00 2001 From: Justin Pryzby <pryz...@telsasoft.com> Date: Tue, 1 Jan 2019 16:17:28 -0600 Subject: [PATCH v1] Use correlation statistic in costing bitmap scans..
Same as for an index scan, a correlated bitmap which accesses pages across a small portion of the table should have a cost estimate much less than an uncorrelated scan (like modulus) across the entire length of the table, the latter having a high component of random access. Note, Tom points out that there are cases where a column could be tightly-"clumped" without being highly-ordered. Since we have correlation already, we use that until such time as someone implements a new statistic for clumpiness. This patch only intends to make costing of bitmap heap scan on par with the same cost of index scan without bitmap. --- contrib/postgres_fdw/expected/postgres_fdw.out | 15 ++-- src/backend/optimizer/path/costsize.c | 94 +++++++++++++++++++++----- src/backend/optimizer/path/indxpath.c | 10 ++- src/include/nodes/pathnodes.h | 3 + src/include/optimizer/cost.h | 2 +- src/test/regress/expected/create_index.out | 16 ++--- src/test/regress/expected/join.out | 18 +++-- src/test/regress/sql/create_index.sql | 8 ++- 8 files changed, 118 insertions(+), 48 deletions(-) diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index c915885..fb4c7f2 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -2257,11 +2257,12 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f -> Foreign Scan on public.ft1 Output: ft1.c1, ft1.c2, ft1.c3, ft1.c4, ft1.c5, ft1.c6, ft1.c7, ft1.c8, ft1.* Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE - -> Materialize + -> Sort Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.* + Sort Key: ft2.c1 -> Foreign Scan on public.ft2 Output: ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft2.* - Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) ORDER BY "C 1" ASC NULLS LAST FOR UPDATE + Remote SQL: SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" WHERE (("C 1" < 100)) FOR UPDATE -> Sort Output: ft4.c1, ft4.c2, ft4.c3, ft4.* Sort Key: ft4.c1 @@ -2276,7 +2277,7 @@ SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = f Remote SQL: SELECT c1, c2, c3 FROM "S 1"."T 4" FOR UPDATE -> Index Scan using local_tbl_pkey on public.local_tbl Output: local_tbl.c1, local_tbl.c2, local_tbl.c3, local_tbl.ctid -(47 rows) +(48 rows) SELECT * FROM ft1, ft2, ft4, ft5, local_tbl WHERE ft1.c1 = ft2.c1 AND ft1.c2 = ft4.c1 AND ft1.c2 = ft5.c1 AND ft1.c2 = local_tbl.c1 AND ft1.c1 < 100 AND ft2.c1 < 100 FOR UPDATE; @@ -3318,10 +3319,12 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr Sort Key: t1.c2 -> Nested Loop Output: t1.c2, qry.sum - -> Index Scan using t1_pkey on "S 1"."T 1" t1 + -> Bitmap Heap Scan on "S 1"."T 1" t1 Output: t1."C 1", t1.c2, t1.c3, t1.c4, t1.c5, t1.c6, t1.c7, t1.c8 - Index Cond: (t1."C 1" < 100) + Recheck Cond: (t1."C 1" < 100) Filter: (t1.c2 < 3) + -> Bitmap Index Scan on t1_pkey + Index Cond: (t1."C 1" < 100) -> Subquery Scan on qry Output: qry.sum, t2.c1 Filter: ((t1.c2 * 2) = qry.sum) @@ -3329,7 +3332,7 @@ select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum fr Output: (sum((t2.c1 + t1."C 1"))), t2.c1 Relations: Aggregate on (public.ft2 t2) Remote SQL: SELECT sum(("C 1" + $1::integer)), "C 1" FROM "S 1"."T 1" GROUP BY 2 -(16 rows) +(18 rows) select c2, sum from "S 1"."T 1" t1, lateral (select sum(t2.c1 + t1."C 1") sum from ft2 t2 group by t2.c1) qry where t1.c2 * 2 = qry.sum and t1.c2 < 3 and t1."C 1" < 100 order by 1; c2 | sum diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b5a0033..d39e12d 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -549,11 +549,13 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, /* * Save amcostestimate's results for possible use in bitmap scan planning. - * We don't bother to save indexStartupCost or indexCorrelation, because a - * bitmap scan doesn't care about either. + * We don't bother to save indexStartupCost, because a bitmap scan + * can't start until the index scan completes, so only cares about its + * total cost. */ path->indextotalcost = indexTotalCost; path->indexselectivity = indexSelectivity; + path->indexCorrelation = indexCorrelation; /* all costs for touching index itself included here */ startup_cost += indexStartupCost; @@ -986,12 +988,31 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, * appropriate to charge spc_seq_page_cost apiece. The effect is * nonlinear, too. For lack of a better idea, interpolate like this to * determine the cost per page. + * Note this works at PAGE granularity, so even if we read 1% of a + * table's tuples, if we have to read nearly every page, it should be + * considered sequential. */ - if (pages_fetched >= 2.0) + if (pages_fetched >= 2.0) { + double correlation = ((IndexPath *)bitmapqual)->indexCorrelation; + double cost_per_page_corr; + /* + * Interpolate based on pages_fetched and correlation from seq_page_cost to rand_page_cost. + * A highly correlated bitmap scan 1) likely reads fewer pages (handled in + * compute_bitmap_pages); and, 2) at higher "density" (more sequential). + */ cost_per_page = spc_random_page_cost - (spc_random_page_cost - spc_seq_page_cost) * sqrt(pages_fetched / T); - else + cost_per_page_corr = spc_random_page_cost - + (spc_random_page_cost - spc_seq_page_cost) + * (1-correlation*correlation); + + /* + * We expect sequential reads and low cost_per_page when *either* + * T is high or correlation is high. + */ + cost_per_page = Min(cost_per_page,cost_per_page_corr); + } else cost_per_page = spc_random_page_cost; run_cost += pages_fetched * cost_per_page; @@ -1035,15 +1056,18 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, /* * cost_bitmap_tree_node - * Extract cost and selectivity from a bitmap tree node (index/and/or) + * Extract cost, selectivity, and correlation from a bitmap tree node (index/and/or) */ void -cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec) +cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec, double *correlation) { if (IsA(path, IndexPath)) { *cost = ((IndexPath *) path)->indextotalcost; - *selec = ((IndexPath *) path)->indexselectivity; + if (selec) + *selec = ((IndexPath *) path)->indexselectivity; + if (correlation) + *correlation = ((IndexPath *) path)->indexCorrelation; /* * Charge a small amount per retrieved tuple to reflect the costs of @@ -1056,12 +1080,18 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec) else if (IsA(path, BitmapAndPath)) { *cost = path->total_cost; - *selec = ((BitmapAndPath *) path)->bitmapselectivity; + if (selec) + *selec = ((BitmapAndPath *) path)->bitmapselectivity; + if (correlation) + *correlation = ((BitmapAndPath *) path)->bitmapcorrelation; } else if (IsA(path, BitmapOrPath)) { *cost = path->total_cost; - *selec = ((BitmapOrPath *) path)->bitmapselectivity; + if (selec) + *selec = ((BitmapOrPath *) path)->bitmapselectivity; + if (correlation) + *correlation = ((BitmapOrPath *) path)->bitmapcorrelation; } else { @@ -1084,8 +1114,9 @@ void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root) { Cost totalCost; - Selectivity selec; + Selectivity selec, minsubselec; ListCell *l; + double correlation; /* * We estimate AND selectivity on the assumption that the inputs are @@ -1097,22 +1128,31 @@ cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root) * definitely too simplistic? */ totalCost = 0.0; - selec = 1.0; + minsubselec = selec = 1.0; + correlation = 0; foreach(l, path->bitmapquals) { Path *subpath = (Path *) lfirst(l); Cost subCost; Selectivity subselec; + double subcorrelation; - cost_bitmap_tree_node(subpath, &subCost, &subselec); + cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation); selec *= subselec; + /* For an AND node, use the correlation of its most-selective subpath */ + if (subselec <= minsubselec) { + correlation = subcorrelation; + minsubselec = subselec; + } + totalCost += subCost; if (l != list_head(path->bitmapquals)) totalCost += 100.0 * cpu_operator_cost; } path->bitmapselectivity = selec; + path->bitmapcorrelation = correlation; path->path.rows = 0; /* per above, not used */ path->path.startup_cost = totalCost; path->path.total_cost = totalCost; @@ -1128,8 +1168,9 @@ void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root) { Cost totalCost; - Selectivity selec; + Selectivity selec, maxsubselec; ListCell *l; + double correlation; /* * We estimate OR selectivity on the assumption that the inputs are @@ -1142,23 +1183,32 @@ cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root) * optimized out when the inputs are BitmapIndexScans. */ totalCost = 0.0; - selec = 0.0; + maxsubselec = selec = 0.0; + correlation = 0; foreach(l, path->bitmapquals) { Path *subpath = (Path *) lfirst(l); Cost subCost; Selectivity subselec; + double subcorrelation; - cost_bitmap_tree_node(subpath, &subCost, &subselec); + cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation); selec += subselec; + /* For an OR node, use the correlation of its least-selective subpath */ + if (subselec >= maxsubselec) { + correlation = subcorrelation; + maxsubselec = subselec; + } + totalCost += subCost; if (l != list_head(path->bitmapquals) && !IsA(subpath, IndexPath)) totalCost += 100.0 * cpu_operator_cost; } path->bitmapselectivity = Min(selec, 1.0); + path->bitmapcorrelation = correlation; path->path.rows = 0; /* per above, not used */ path->path.startup_cost = totalCost; path->path.total_cost = totalCost; @@ -5510,8 +5560,11 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, { Cost indexTotalCost; Selectivity indexSelectivity; + double indexCorrelation; double T; - double pages_fetched; + double pages_fetched, + pages_fetchedMIN, + pages_fetchedMAX; double tuples_fetched; double heap_pages; long maxentries; @@ -5520,7 +5573,7 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, * Fetch total cost of obtaining the bitmap, as well as its total * selectivity. */ - cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity); + cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity, &indexCorrelation); /* * Estimate number of main-table pages fetched. @@ -5534,7 +5587,12 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, * the same as the Mackert and Lohman formula for the case T <= b (ie, no * re-reads needed). */ - pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + pages_fetchedMAX = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + + /* pages_fetchedMIN is for the perfectly correlated case (csquared=1) */ + pages_fetchedMIN = ceil(indexSelectivity * (double) baserel->pages); + + pages_fetched = pages_fetchedMAX + indexCorrelation*indexCorrelation*(pages_fetchedMIN - pages_fetchedMAX); /* * Calculate the number of pages fetched from the heap. Then based on diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index f42926e..5d111fc 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -1483,11 +1483,9 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths) /* duplicate clauseids, keep the cheaper one */ Cost ncost; Cost ocost; - Selectivity nselec; - Selectivity oselec; - cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec); - cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec); + cost_bitmap_tree_node(pathinfo->path, &ncost, NULL, NULL); + cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, NULL, NULL); if (ncost < ocost) pathinfoarray[i] = pathinfo; } @@ -1599,8 +1597,8 @@ path_usage_comparator(const void *a, const void *b) Selectivity aselec; Selectivity bselec; - cost_bitmap_tree_node(pa->path, &acost, &aselec); - cost_bitmap_tree_node(pb->path, &bcost, &bselec); + cost_bitmap_tree_node(pa->path, &acost, &aselec, NULL); + cost_bitmap_tree_node(pb->path, &bcost, &bselec, NULL); /* * If costs are the same, sort by selectivity. diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 3d3be19..19ddf5b 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1183,6 +1183,7 @@ typedef struct IndexPath ScanDirection indexscandir; Cost indextotalcost; Selectivity indexselectivity; + double indexCorrelation; } IndexPath; /* @@ -1263,6 +1264,7 @@ typedef struct BitmapAndPath Path path; List *bitmapquals; /* IndexPaths and BitmapOrPaths */ Selectivity bitmapselectivity; + double bitmapcorrelation; } BitmapAndPath; /* @@ -1276,6 +1278,7 @@ typedef struct BitmapOrPath Path path; List *bitmapquals; /* IndexPaths and BitmapAndPaths */ Selectivity bitmapselectivity; + double bitmapcorrelation; } BitmapOrPath; /* diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index cb012ba..7d5edb5 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -79,7 +79,7 @@ extern void cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *bas Path *bitmapqual, double loop_count); extern void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root); extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root); -extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec); +extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec, double *correlation); extern void cost_tidscan(Path *path, PlannerInfo *root, RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info); extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root, diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 6446907..5bd8aae 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -1770,24 +1770,23 @@ DROP TABLE onek_with_null; -- -- Check bitmap index path planning -- +SET cpu_operator_cost=0; EXPLAIN (COSTS OFF) SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); - QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------------------ + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42); + QUERY PLAN +------------------------------------------------------------------------------------------------- Bitmap Heap Scan on tenk1 - Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 3)) OR ((thousand = 42) AND (tenthous = 42))) + Recheck Cond: (((thousand = 42) AND (tenthous = 1)) OR ((thousand = 42) AND (tenthous = 42))) -> BitmapOr -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: ((thousand = 42) AND (tenthous = 1)) -> Bitmap Index Scan on tenk1_thous_tenthous - Index Cond: ((thousand = 42) AND (tenthous = 3)) - -> Bitmap Index Scan on tenk1_thous_tenthous Index Cond: ((thousand = 42) AND (tenthous = 42)) -(9 rows) +(7 rows) SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42); unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 ---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+--------- 42 | 5530 | 0 | 2 | 2 | 2 | 42 | 42 | 42 | 42 | 42 | 84 | 85 | QBAAAA | SEIAAA | OOOOxx @@ -1818,6 +1817,7 @@ SELECT count(*) FROM tenk1 10 (1 row) +RESET cpu_operator_cost; -- -- Check behavior with duplicate index column contents -- diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 761376b..bc011ee 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3584,24 +3584,28 @@ select b.unique1 from join int4_tbl i1 on b.thousand = f1 right join int4_tbl i2 on i2.f1 = b.tenthous order by 1; - QUERY PLAN ------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Sort Sort Key: b.unique1 - -> Nested Loop Left Join - -> Seq Scan on int4_tbl i2 + -> Hash Right Join + Hash Cond: (b.tenthous = i2.f1) -> Nested Loop Left Join Join Filter: (b.unique1 = 42) -> Nested Loop -> Nested Loop -> Seq Scan on int4_tbl i1 - -> Index Scan using tenk1_thous_tenthous on tenk1 b - Index Cond: ((thousand = i1.f1) AND (tenthous = i2.f1)) + -> Bitmap Heap Scan on tenk1 b + Recheck Cond: (thousand = i1.f1) + -> Bitmap Index Scan on tenk1_thous_tenthous + Index Cond: (thousand = i1.f1) -> Index Scan using tenk1_unique1 on tenk1 a Index Cond: (unique1 = b.unique2) -> Index Only Scan using tenk1_thous_tenthous on tenk1 c Index Cond: (thousand = a.thousand) -(15 rows) + -> Hash + -> Seq Scan on int4_tbl i2 +(19 rows) select b.unique1 from tenk1 a join tenk1 b on a.unique1 = b.unique2 diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index 3c0c1cd..6607a7b 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -666,11 +666,13 @@ DROP TABLE onek_with_null; -- Check bitmap index path planning -- +SET cpu_operator_cost=0; + EXPLAIN (COSTS OFF) SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42); SELECT * FROM tenk1 - WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 3 OR tenthous = 42); + WHERE thousand = 42 AND (tenthous = 1 OR tenthous = 42); EXPLAIN (COSTS OFF) SELECT count(*) FROM tenk1 @@ -678,6 +680,8 @@ SELECT count(*) FROM tenk1 SELECT count(*) FROM tenk1 WHERE hundred = 42 AND (thousand = 42 OR thousand = 99); +RESET cpu_operator_cost; + -- -- Check behavior with duplicate index column contents -- -- 2.7.4