Attached for real.
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 88780c0..1c25f36 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -548,11 +548,12 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, /* * Save amcostestimate's results for possible use in bitmap scan planning. - * We don't bother to save indexStartupCost or indexCorrelation, because a - * bitmap scan doesn't care about either. + * We don't bother to save indexStartupCost, because a + * bitmap scan doesn't care. */ path->indextotalcost = indexTotalCost; path->indexselectivity = indexSelectivity; + path->indexCorrelation = indexCorrelation; /* all costs for touching index itself included here */ startup_cost += indexStartupCost; @@ -671,6 +672,7 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, else min_IO_cost = 0; } +elog(DEBUG4, "index_pages_fetched2 fetched:%d", (int)pages_fetched); if (partial_path) { @@ -711,6 +713,9 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, csquared = indexCorrelation * indexCorrelation; run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); + Cost heapCost=max_IO_cost + csquared * (min_IO_cost - max_IO_cost); + elog(DEBUG4, "index_cost1 startup:%f heap:%f run:%f idxtotal:%f total:%f", + indexStartupCost, heapCost, run_cost, indexTotalCost, path->path.total_cost); /* * Estimate CPU costs per tuple. @@ -744,6 +749,9 @@ cost_index(IndexPath *path, PlannerInfo *root, double loop_count, path->path.startup_cost = startup_cost; path->path.total_cost = startup_cost + run_cost; + + elog(DEBUG4, "index_cost1 startup:%f heap:%f run:%f idxtotal:%f total:%f", + indexStartupCost, heapCost, run_cost, indexTotalCost, path->path.total_cost); } /* @@ -876,6 +884,7 @@ index_pages_fetched(double tuples_fetched, BlockNumber pages, } pages_fetched = ceil(pages_fetched); } +elog(DEBUG4, "index_pages_fetched T=pages=%d total_pages:%d ECC:%d bECC:%d fetched:%d", (int)T, (int)total_pages, (int)effective_cache_size, (int)b, (int)pages_fetched); return pages_fetched; } @@ -888,6 +897,7 @@ index_pages_fetched(double tuples_fetched, BlockNumber pages, * not completely clear, and detecting duplicates is difficult, so ignore it * for now. */ + static double get_indexpath_pages(Path *bitmapqual) { @@ -925,6 +935,66 @@ get_indexpath_pages(Path *bitmapqual) } /* + * get_indexpath_correlation + * Recursively determine correlation of an bitmap path by XXX:min/max of correlation of indices scanned. + */ +static double +UNUSED_get_indexpath_correlation(PlannerInfo *root, Path *bitmapqual) +{ + double x=0, result = 0; + int loop = 0; + ListCell *l; + + if (IsA(bitmapqual, IndexPath)) + { + IndexPath *ipath = (IndexPath *) bitmapqual; + amcostestimate_function amcostestimate = ipath->indexinfo->amcostestimate; + double junk; + Cost junk2; + Selectivity junk3; + amcostestimate(root, ipath, 1/*loop_count*/, + &junk2/*&indexStartupCost*/, &junk2/*&indexTotalCost*/, + &junk3/*&indexSelectivity*/, &x, + &junk/*&index_pages*/); + // result = 0.1*x*x; + // result = fabs(x); + result = (x*x + fabs(x))/2; + elog(DEBUG4, "get_indexpath_correlation %f", result); + /* The correlation values below here will be between [0,1] and not further squared */ + } else if (IsA(bitmapqual, BitmapAndPath)) { + BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; + + foreach(l, apath->bitmapquals) + { + x = UNUSED_get_indexpath_correlation(root, (Path *) lfirst(l)); + // XXX: should use the correlation of the most-selective path.. + if (loop) result = Max(result, x); + else result = x; + loop=1; + } + } + else if (IsA(bitmapqual, BitmapOrPath)) + { + BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; + + foreach(l, opath->bitmapquals) + { + /* Min is probably not right: should use the correlation of the + * least-selective path */ + x = UNUSED_get_indexpath_correlation(root, (Path *) lfirst(l)); + if (loop) result = Min(result, x); + else result = x; + loop=1; + } + } + + else + elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); + + return result; +} + +/* * cost_bitmap_heap_scan * Determines and returns the cost of scanning a relation using a bitmap * index-then-heap plan. @@ -954,7 +1024,6 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, double pages_fetched; double spc_seq_page_cost, spc_random_page_cost; - double T; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo)); @@ -975,7 +1044,6 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, &tuples_fetched); startup_cost += indexTotalCost; - T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; /* Fetch estimated page costs for tablespace containing table. */ get_tablespace_page_costs(baserel->reltablespace, @@ -988,12 +1056,35 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, * appropriate to charge spc_seq_page_cost apiece. The effect is * nonlinear, too. For lack of a better idea, interpolate like this to * determine the cost per page. + * Note this works at PAGE granularity, so even if we read 1% of a + * table's tuples, if we have to read nearly every page, it should be + * considered sequential. */ - if (pages_fetched >= 2.0) + double T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; + if (pages_fetched >= 2.0) { cost_per_page = spc_random_page_cost - (spc_random_page_cost - spc_seq_page_cost) * sqrt(pages_fetched / T); - else + elog(DEBUG4, "cost_bitmap_heap_scan old pages:%d fetched:%f fraction:%f cost_per_page:%f", baserel->pages, pages_fetched, pages_fetched/T, cost_per_page); + + // XXX: interpolate based on correlation from seq_page_cost to rand_page_cost? + // a highly correlated bitmap scan 1) likely reads fewer pages; and, + // 2) at higher "density" (more sequential). + // double correlation = get_indexpath_correlation(root, bitmapqual); + double correlation = ((IndexPath *)bitmapqual)->indexCorrelation; + double cost_per_page2 = spc_seq_page_cost + + (1-correlation*correlation)*(spc_random_page_cost - spc_seq_page_cost); // XXX: *sqrt(pages_fetched/T) ? + elog(DEBUG4, "cost_bitmap_heap_scan new corr=%f cost_per_page=%f", correlation, cost_per_page2); + // There are two variables: fraction of pages(T) and correlation. + // If T is high, giving sequential reads, we want low cost_per_page regardless of correlation. + // If correlation is high, we (probably) want low cost per page. + // ...the exception is if someone does an =ANY() query on a list of non-consecutive values. + // Something like start_time=ANY('2017-01-01', '2017-02-01',...) + // which reads small number of rows from pages all across the length of the table. + // But index scan doesn't seem to do address that at all, so leave it alone for now. + cost_per_page=Min(cost_per_page, cost_per_page2); + // cost_per_page=sqrt(cost_per_page*cost_per_page + cost_per_page2*cost_per_page2); + } else cost_per_page = spc_random_page_cost; run_cost += pages_fetched * cost_per_page; @@ -1024,6 +1115,7 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, path->rows = clamp_row_est(path->rows / parallel_divisor); } + elog(DEBUG4, "cost_bitmap_heap_scan run_cost0: %f", run_cost ); run_cost += cpu_run_cost; @@ -1031,21 +1123,24 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel, startup_cost += path->pathtarget->cost.startup; run_cost += path->pathtarget->cost.per_tuple * path->rows; + elog(DEBUG4, "cost_bitmap_heap_scan startup:%f cpu_run_cost:%f run_cost2:%f", startup_cost, cpu_run_cost, run_cost); + path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost; } /* * cost_bitmap_tree_node - * Extract cost and selectivity from a bitmap tree node (index/and/or) + * Extract cost, selectivity, and correlation from a bitmap tree node (index/and/or) */ void -cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec) +cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec, double *correlation) { if (IsA(path, IndexPath)) { *cost = ((IndexPath *) path)->indextotalcost; *selec = ((IndexPath *) path)->indexselectivity; + if (correlation) *correlation = ((IndexPath *) path)->indexCorrelation; /* * Charge a small amount per retrieved tuple to reflect the costs of @@ -1059,11 +1154,13 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec) { *cost = path->total_cost; *selec = ((BitmapAndPath *) path)->bitmapselectivity; + if (correlation) *correlation = ((BitmapAndPath *) path)->bitmapcorrelation; } else if (IsA(path, BitmapOrPath)) { *cost = path->total_cost; *selec = ((BitmapOrPath *) path)->bitmapselectivity; + if (correlation) *correlation = ((BitmapOrPath *) path)->bitmapcorrelation; } else { @@ -1086,8 +1183,9 @@ void cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root) { Cost totalCost; - Selectivity selec; + Selectivity selec, minsubselec; ListCell *l; + double correlation; /* * We estimate AND selectivity on the assumption that the inputs are @@ -1099,22 +1197,32 @@ cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root) * definitely too simplistic? */ totalCost = 0.0; - selec = 1.0; + minsubselec = selec = 1.0; + correlation = 0; foreach(l, path->bitmapquals) { Path *subpath = (Path *) lfirst(l); Cost subCost; Selectivity subselec; + double subcorrelation; - cost_bitmap_tree_node(subpath, &subCost, &subselec); + cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation); selec *= subselec; + /* For an AND node, use the correlation of its most-selective subpath */ + if (subselec<=minsubselec) { + correlation = subcorrelation; + minsubselec = subselec; + } + totalCost += subCost; if (l != list_head(path->bitmapquals)) + // ??? XXX && !IsA(subpath, IndexPath)) totalCost += 100.0 * cpu_operator_cost; } path->bitmapselectivity = selec; + path->bitmapcorrelation = correlation; path->path.rows = 0; /* per above, not used */ path->path.startup_cost = totalCost; path->path.total_cost = totalCost; @@ -1130,8 +1238,9 @@ void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root) { Cost totalCost; - Selectivity selec; + Selectivity selec, maxsubselec; ListCell *l; + double correlation; /* * We estimate OR selectivity on the assumption that the inputs are @@ -1144,23 +1253,32 @@ cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root) * optimized out when the inputs are BitmapIndexScans. */ totalCost = 0.0; - selec = 0.0; + maxsubselec = selec = 0.0; + correlation = 0; foreach(l, path->bitmapquals) { Path *subpath = (Path *) lfirst(l); Cost subCost; Selectivity subselec; + double subcorrelation; - cost_bitmap_tree_node(subpath, &subCost, &subselec); + cost_bitmap_tree_node(subpath, &subCost, &subselec, &subcorrelation); selec += subselec; + /* For an OR node, use the correlation of its least-selective subpath */ + if (subselec>=maxsubselec) { + correlation = subcorrelation; + maxsubselec = subselec; + } + totalCost += subCost; if (l != list_head(path->bitmapquals) && !IsA(subpath, IndexPath)) totalCost += 100.0 * cpu_operator_cost; } path->bitmapselectivity = Min(selec, 1.0); + path->bitmapcorrelation = correlation; path->path.rows = 0; /* per above, not used */ path->path.startup_cost = totalCost; path->path.total_cost = totalCost; @@ -5390,8 +5508,11 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, { Cost indexTotalCost; Selectivity indexSelectivity; + double indexCorrelation; double T; - double pages_fetched; + double pages_fetched, + pages_fetchedMIN, + pages_fetchedMAX; double tuples_fetched; double heap_pages; long maxentries; @@ -5400,13 +5521,15 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, * Fetch total cost of obtaining the bitmap, as well as its total * selectivity. */ - cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity); + cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity, &indexCorrelation); /* * Estimate number of main-table pages fetched. */ tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); + elog(DEBUG4, "compute_bitmap old tuples_fetched:%f indexSelectivity:%f baserel->tuples:%f", + tuples_fetched, indexSelectivity, baserel->tuples); T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; /* @@ -5414,7 +5537,16 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, * the same as the Mackert and Lohman formula for the case T <= b (ie, no * re-reads needed). */ - pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + pages_fetchedMAX = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched); + + /* pages_fetchedMIN is for the perfectly correlated case (csquared=1) */ + pages_fetchedMIN = ceil(indexSelectivity * (double) baserel->pages); + + pages_fetched = pages_fetchedMAX + indexCorrelation*indexCorrelation*(pages_fetchedMIN - pages_fetchedMAX); + + // pages_fetched = pages_fetchedMAX - indexCorrelation*indexCorrelation * (pages_fetchedMIN-pages_fetchedMAX); +elog(DEBUG4, "compute_bitmap pages_fetched high: %f low: %f corr=%f val: %f", pages_fetchedMAX, pages_fetchedMIN, indexCorrelation, pages_fetched); + /* * Calculate the number of pages fetched from the heap. Then based on