... sigh, this time with the patch actually attached. regards, tom lane
diff --git a/contrib/bloom/blcost.c b/contrib/bloom/blcost.c index a38fcf3c579..4359b81d196 100644 --- a/contrib/bloom/blcost.c +++ b/contrib/bloom/blcost.c @@ -30,6 +30,9 @@ blcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, /* We have to visit all index tuples anyway */ costs.numIndexTuples = index->tuples; + /* As in btcostestimate, count only the metapage as non-leaf */ + costs.numNonLeafPages = 1; + /* Use generic estimate */ genericcostestimate(root, path, loop_count, &costs); diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index a96b1b9c0bc..3449f82c71b 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -6931,6 +6931,11 @@ index_other_operands_eval_cost(PlannerInfo *root, List *indexquals) return qual_arg_cost; } +/* + * Compute generic index access cost estimates. + * + * See struct GenericCosts in selfuncs.h for more info. + */ void genericcostestimate(PlannerInfo *root, IndexPath *path, @@ -7026,16 +7031,18 @@ genericcostestimate(PlannerInfo *root, * Estimate the number of index pages that will be retrieved. * * We use the simplistic method of taking a pro-rata fraction of the total - * number of index pages. In effect, this counts only leaf pages and not - * any overhead such as index metapage or upper tree levels. + * number of index leaf pages. We disregard any overhead such as index + * metapages or upper tree levels. * * In practice access to upper index levels is often nearly free because * those tend to stay in cache under load; moreover, the cost involved is * highly dependent on index type. We therefore ignore such costs here * and leave it to the caller to add a suitable charge if needed. */ - if (index->pages > 1 && index->tuples > 1) - numIndexPages = ceil(numIndexTuples * index->pages / index->tuples); + if (index->pages > costs->numNonLeafPages && index->tuples > 1) + numIndexPages = + ceil(numIndexTuples * (index->pages - costs->numNonLeafPages) + / index->tuples); else numIndexPages = 1.0; @@ -7626,9 +7633,18 @@ btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, /* * Now do generic index cost estimation. + * + * While we expended effort to make realistic estimates of numIndexTuples + * and num_sa_scans, we are content to count only the btree metapage as + * non-leaf. btree fanout is typically high enough that upper pages are + * few relative to leaf pages, so accounting for them would move the + * estimates at most a percent or two. Given the uncertainty in just how + * many upper pages exist in a particular index, we'll skip trying to + * handle that. */ costs.numIndexTuples = numIndexTuples; costs.num_sa_scans = num_sa_scans; + costs.numNonLeafPages = 1; genericcostestimate(root, path, loop_count, &costs); @@ -7693,6 +7709,9 @@ hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { GenericCosts costs = {0}; + /* As in btcostestimate, count only the metapage as non-leaf */ + costs.numNonLeafPages = 1; + genericcostestimate(root, path, loop_count, &costs); /* @@ -7737,6 +7756,8 @@ gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts costs = {0}; Cost descentCost; + /* GiST has no metapage, so we treat all pages as leaf pages */ + genericcostestimate(root, path, loop_count, &costs); /* @@ -7792,6 +7813,9 @@ spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts costs = {0}; Cost descentCost; + /* As in btcostestimate, count only the metapage as non-leaf */ + costs.numNonLeafPages = 1; + genericcostestimate(root, path, loop_count, &costs); /* diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h index 013049b3098..a34a737edf8 100644 --- a/src/include/utils/selfuncs.h +++ b/src/include/utils/selfuncs.h @@ -121,6 +121,12 @@ typedef struct VariableStatData * Similarly, they can set num_sa_scans to some value >= 1 for an index AM * that doesn't necessarily perform exactly one primitive index scan per * distinct combination of ScalarArrayOp array elements. + * Similarly, they can set numNonLeafPages to some value >= 1 if they know + * how many index pages are not leaf pages. (It's always good to count + * totally non-data-bearing pages such as metapages here, since accounting + * for the metapage can move cost estimates for a small index significantly. + * But upper pages in large indexes may be few enough relative to leaf pages + * that it's not worth trying to count them.) */ typedef struct { @@ -135,6 +141,7 @@ typedef struct double numIndexTuples; /* number of leaf tuples visited */ double spc_random_page_cost; /* relevant random_page_cost value */ double num_sa_scans; /* # indexscans from ScalarArrayOpExprs */ + BlockNumber numNonLeafPages; /* # of index pages that are not leafs */ } GenericCosts; /* Hooks for plugins to get control when we ask for stats */ diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index fa2c7405519..b0c87b1e8e6 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -9122,12 +9122,14 @@ drop index j1_id2_idx; set enable_nestloop to 0; set enable_hashjoin to 0; set enable_sort to 0; +-- we need additional data to get the partial indexes to be preferred +insert into j1 select 2, i from generate_series(1, 100) i; +insert into j2 select 1, i from generate_series(2, 100) i; +analyze j1; +analyze j2; -- create indexes that will be preferred over the PKs to perform the join create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1; create index j2_id1_idx on j2 (id1) where id1 % 1000 = 1; --- need an additional row in j2, if we want j2_id1_idx to be preferred -insert into j2 values(1,2); -analyze j2; explain (costs off) select * from j1 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1; diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out index 38dfaf021c9..a8deabc9b84 100644 --- a/src/test/regress/expected/memoize.out +++ b/src/test/regress/expected/memoize.out @@ -261,6 +261,7 @@ CREATE INDEX flt_f_idx ON flt (f); INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float); ANALYZE flt; SET enable_seqscan TO off; +SET enable_material TO off; -- Ensure memoize operates in logical mode SELECT explain_memoize(' SELECT * FROM flt f1 INNER JOIN flt f2 ON f1.f = f2.f;', false); @@ -454,6 +455,7 @@ WHERE unique1 < 3 (1 row) RESET enable_seqscan; +RESET enable_material; RESET enable_mergejoin; RESET work_mem; RESET hash_mem_multiplier; diff --git a/src/test/regress/expected/select.out b/src/test/regress/expected/select.out index bab0cc93ff5..698d08ddd72 100644 --- a/src/test/regress/expected/select.out +++ b/src/test/regress/expected/select.out @@ -861,7 +861,6 @@ select unique2 from onek2 where unique2 = 11 and stringu1 < 'B'; 11 (1 row) -RESET enable_indexscan; -- check multi-index cases too explain (costs off) select unique1, unique2 from onek2 @@ -908,6 +907,7 @@ select unique1, unique2 from onek2 0 | 998 (2 rows) +RESET enable_indexscan; -- -- Test some corner cases that have been known to confuse the planner -- diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index d01d1da4ef8..5bcc7a41556 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -3444,14 +3444,16 @@ set enable_nestloop to 0; set enable_hashjoin to 0; set enable_sort to 0; +-- we need additional data to get the partial indexes to be preferred +insert into j1 select 2, i from generate_series(1, 100) i; +insert into j2 select 1, i from generate_series(2, 100) i; +analyze j1; +analyze j2; + -- create indexes that will be preferred over the PKs to perform the join create index j1_id1_idx on j1 (id1) where id1 % 1000 = 1; create index j2_id1_idx on j2 (id1) where id1 % 1000 = 1; --- need an additional row in j2, if we want j2_id1_idx to be preferred -insert into j2 values(1,2); -analyze j2; - explain (costs off) select * from j1 inner join j2 on j1.id1 = j2.id1 and j1.id2 = j2.id2 where j1.id1 % 1000 = 1 and j2.id1 % 1000 = 1; diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql index c0d47fa875a..179a9107b49 100644 --- a/src/test/regress/sql/memoize.sql +++ b/src/test/regress/sql/memoize.sql @@ -138,6 +138,7 @@ INSERT INTO flt VALUES('-0.0'::float),('+0.0'::float); ANALYZE flt; SET enable_seqscan TO off; +SET enable_material TO off; -- Ensure memoize operates in logical mode SELECT explain_memoize(' @@ -217,6 +218,7 @@ WHERE unique1 < 3 WHERE t0.ten = t1.twenty AND t0.two <> t2.four OFFSET 0); RESET enable_seqscan; +RESET enable_material; RESET enable_mergejoin; RESET work_mem; RESET hash_mem_multiplier; diff --git a/src/test/regress/sql/select.sql b/src/test/regress/sql/select.sql index 1d1bf2b9310..771b9869a20 100644 --- a/src/test/regress/sql/select.sql +++ b/src/test/regress/sql/select.sql @@ -221,7 +221,6 @@ SET enable_indexscan TO off; explain (costs off) select unique2 from onek2 where unique2 = 11 and stringu1 < 'B'; select unique2 from onek2 where unique2 = 11 and stringu1 < 'B'; -RESET enable_indexscan; -- check multi-index cases too explain (costs off) select unique1, unique2 from onek2 @@ -233,6 +232,7 @@ select unique1, unique2 from onek2 where (unique2 = 11 and stringu1 < 'B') or unique1 = 0; select unique1, unique2 from onek2 where (unique2 = 11 and stringu1 < 'B') or unique1 = 0; +RESET enable_indexscan; -- -- Test some corner cases that have been known to confuse the planner