Hi, With extended statistics it may not be immediately obvious if they were applied and to which clauses. If you have multiple extended statistics, we may also apply them in different order, etc. And with expressions, there's also the question of matching expressions to the statistics.
So it seems useful to include this into in the explain plan - show which statistics were applied, in which order. Attached is an early PoC patch doing that in VERBOSE mode. I'll add it to the next CF. A simple example demonstrating the idea: ====================================================================== create table t (a int, b int); insert into t select mod(i,10), mod(i,10) from generate_series(1,100000) s(i); create statistics s on a, b from t; analyze t; test=# explain (verbose) select * from t where a = 1 and b = 1; QUERY PLAN --------------------------------------------------------------- Seq Scan on public.t (cost=0.00..1943.00 rows=10040 width=8) Output: a, b Filter: ((t.a = 1) AND (t.b = 1)) Statistics: public.s Clauses: ((a = 1) AND (b = 1)) (4 rows) test=# explain (verbose) select 1 from t group by a, b; QUERY PLAN ---------------------------------------------------------------------- HashAggregate (cost=1943.00..1943.10 rows=10 width=12) Output: 1, a, b Group Key: t.a, t.b -> Seq Scan on public.t (cost=0.00..1443.00 rows=100000 width=8) Output: a, b Statistics: public.s Clauses: (a AND b) (6 rows) ====================================================================== The current implementation is a bit ugly PoC, with a couple annoying issues that need to be solved: 1) The information is stashed in multiple lists added to a Plan. Maybe there's a better place, and maybe we need to invent a better way to track the info (a new node stashed in a single List). 2) The deparsing is modeled (i.e. copied) from how we deal with index quals, but it's having issues with nested OR clauses, because there are nested RestrictInfo nodes and the deparsing does not expect that. 3) It does not work for functional dependencies, because we effectively "merge" all functional dependencies and apply the entries. Not sure how to display this, but I think it should show the individual dependencies actually applied. 4) The info is collected always, but I guess we should do that only when in explain mode. Not sure how expensive it is. 5) It includes just statistics name + clauses, but maybe we should include additional info (e.g estimate for that combination of clauses). 6) The clauses in the grouping query are transformed to AND list, which is wrong. This is easy to fix, I was lazy to do that in a PoC patch. 7) It does not show statistics for individual expressions. I suppose examine_variable could add it to the rel somehow, and maybe we could do that with index expressions too? regards -- Tomas Vondra EnterpriseDB: http://www.enterprisedb.com The Enterprise PostgreSQL Company
>From 4629d1d9b1fc5f6c3bc93e0544b0c022345086c9 Mon Sep 17 00:00:00 2001 From: Tomas Vondra <tomas.von...@postgresql.org> Date: Thu, 18 Mar 2021 15:09:24 +0100 Subject: [PATCH] show stats in explain --- src/backend/commands/explain.c | 97 +++++++++++++++++++++++ src/backend/nodes/copyfuncs.c | 4 + src/backend/nodes/makefuncs.c | 11 +++ src/backend/nodes/outfuncs.c | 4 + src/backend/nodes/readfuncs.c | 4 + src/backend/optimizer/plan/createplan.c | 15 ++++ src/backend/optimizer/util/relnode.c | 12 +++ src/backend/optimizer/util/restrictinfo.c | 35 ++++++++ src/backend/statistics/extended_stats.c | 5 ++ src/backend/utils/adt/selfuncs.c | 11 +++ src/backend/utils/cache/lsyscache.c | 49 ++++++++++++ src/include/nodes/makefuncs.h | 2 + src/include/nodes/pathnodes.h | 5 ++ src/include/nodes/plannodes.h | 5 ++ src/include/optimizer/restrictinfo.h | 2 + src/include/utils/lsyscache.h | 3 + 16 files changed, 264 insertions(+) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index afc45429ba..7a4520f151 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -77,6 +77,9 @@ static void show_qual(List *qual, const char *qlabel, static void show_scan_qual(List *qual, const char *qlabel, PlanState *planstate, List *ancestors, ExplainState *es); +static void show_scan_stats(List *stats, List *clauses, List *ors, + PlanState *planstate, List *ancestors, + ExplainState *es); static void show_upper_qual(List *qual, const char *qlabel, PlanState *planstate, List *ancestors, ExplainState *es); @@ -1720,6 +1723,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (es->verbose) + show_scan_stats(plan->applied_stats, plan->applied_clauses, + plan->applied_clauses_or, + planstate, ancestors, es); break; case T_IndexOnlyScan: show_scan_qual(((IndexOnlyScan *) plan)->indexqual, @@ -1736,10 +1743,18 @@ ExplainNode(PlanState *planstate, List *ancestors, if (es->analyze) ExplainPropertyFloat("Heap Fetches", NULL, planstate->instrument->ntuples2, 0, es); + if (es->verbose) + show_scan_stats(plan->applied_stats, plan->applied_clauses, + plan->applied_clauses_or, + planstate, ancestors, es); break; case T_BitmapIndexScan: show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, "Index Cond", planstate, ancestors, es); + if (es->verbose) + show_scan_stats(plan->applied_stats, plan->applied_clauses, + plan->applied_clauses_or, + planstate, ancestors, es); break; case T_BitmapHeapScan: show_scan_qual(((BitmapHeapScan *) plan)->bitmapqualorig, @@ -1769,6 +1784,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (es->verbose) + show_scan_stats(plan->applied_stats, plan->applied_clauses, + plan->applied_clauses_or, + planstate, ancestors, es); break; case T_Gather: { @@ -1956,6 +1975,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (es->verbose) + show_scan_stats(plan->applied_stats, plan->applied_clauses, + plan->applied_clauses_or, + planstate, ancestors, es); break; case T_Group: show_group_keys(castNode(GroupState, planstate), ancestors, es); @@ -1963,6 +1986,10 @@ ExplainNode(PlanState *planstate, List *ancestors, if (plan->qual) show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); + if (es->verbose) + show_scan_stats(plan->applied_stats, plan->applied_clauses, + plan->applied_clauses_or, + planstate, ancestors, es); break; case T_Sort: show_sort_keys(castNode(SortState, planstate), ancestors, es); @@ -2284,6 +2311,76 @@ show_scan_qual(List *qual, const char *qlabel, show_qual(qual, qlabel, planstate, ancestors, useprefix, es); } +/* + * Show a generic expression + */ +static char * +deparse_stat_expression(Node *node, + PlanState *planstate, List *ancestors, + ExplainState *es) +{ + List *context; + + /* Set up deparsing context */ + context = set_deparse_context_plan(es->deparse_cxt, + planstate->plan, + ancestors); + + /* Deparse the expression */ + return deparse_expression(node, context, false, false); +} + +/* + * Show a qualifier expression (which is a List with implicit AND semantics) + */ +static char * +show_stat_qual(List *qual, bool is_or, + PlanState *planstate, List *ancestors, + ExplainState *es) +{ + Node *node; + + /* No work if empty qual */ + if (qual == NIL) + return NULL; + + /* Convert AND list to explicit AND */ + if (is_or) + node = (Node *) make_ors_explicit(qual); + else + node = (Node *) make_ands_explicit(qual); + + /* And show it */ + return deparse_stat_expression(node, planstate, ancestors, es); +} + +/* + * Show applied statistics for scan plan node + */ +static void +show_scan_stats(List *stats, List *clauses, List *ors, + PlanState *planstate, List *ancestors, ExplainState *es) +{ + ListCell *lc1, *lc2, *lc3; + StringInfoData str; + + forthree (lc1, stats, lc2, clauses, lc3, ors) + { + StatisticExtInfo *stat = (StatisticExtInfo *) lfirst(lc1); + List *clauses = (List *) lfirst(lc2); + int is_or = lfirst_int(lc3); + + initStringInfo(&str); + + appendStringInfo(&str, "%s.%s Clauses: %s", + get_namespace_name(get_statistics_namespace(stat->statOid)), + get_statistics_name(stat->statOid), + show_stat_qual(clauses, is_or, planstate, ancestors, es)); + + ExplainPropertyText("Statistics", str.data, es); + } +} + /* * Show a qualifier expression for an upper-level plan node */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index d5b1ad4567..76cf385540 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -128,6 +128,10 @@ CopyPlanFields(const Plan *from, Plan *newnode) COPY_NODE_FIELD(initPlan); COPY_BITMAPSET_FIELD(extParam); COPY_BITMAPSET_FIELD(allParam); + + COPY_NODE_FIELD(applied_stats); + COPY_NODE_FIELD(applied_clauses); + COPY_NODE_FIELD(applied_clauses_or); } /* diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index 01c110cd2f..91d20f95c9 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -715,6 +715,17 @@ make_ands_explicit(List *andclauses) return make_andclause(andclauses); } +Expr * +make_ors_explicit(List *orclauses) +{ + if (orclauses == NIL) + return (Expr *) makeBoolConst(true, false); + else if (list_length(orclauses) == 1) + return (Expr *) linitial(orclauses); + else + return make_orclause(orclauses); +} + List * make_ands_implicit(Expr *clause) { diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 12561c4757..d88120ed7f 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -341,6 +341,10 @@ _outPlanInfo(StringInfo str, const Plan *node) WRITE_NODE_FIELD(initPlan); WRITE_BITMAPSET_FIELD(extParam); WRITE_BITMAPSET_FIELD(allParam); + + WRITE_NODE_FIELD(applied_stats); + WRITE_NODE_FIELD(applied_clauses); + WRITE_NODE_FIELD(applied_clauses_or); } /* diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 377185f7c6..4759f11d31 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1623,6 +1623,10 @@ ReadCommonPlan(Plan *local_node) READ_NODE_FIELD(initPlan); READ_BITMAPSET_FIELD(extParam); READ_BITMAPSET_FIELD(allParam); + + READ_NODE_FIELD(applied_stats); + READ_NODE_FIELD(applied_clauses); + READ_NODE_FIELD(applied_clauses_or); } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 906cab7053..617cfe5ecc 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -5198,12 +5198,27 @@ order_qual_clauses(PlannerInfo *root, List *clauses) static void copy_generic_path_info(Plan *dest, Path *src) { + ListCell *lc; + dest->startup_cost = src->startup_cost; dest->total_cost = src->total_cost; dest->plan_rows = src->rows; dest->plan_width = src->pathtarget->width; dest->parallel_aware = src->parallel_aware; dest->parallel_safe = src->parallel_safe; + + dest->applied_stats = src->parent->applied_stats; + dest->applied_clauses_or = src->parent->applied_clauses_or; + + dest->applied_clauses = NIL; + foreach (lc, src->parent->applied_clauses) + { + List *clauses = (List *) lfirst(lc); + + dest->applied_clauses + = lappend(dest->applied_clauses, + maybe_extract_actual_clauses(clauses, false)); + } } /* diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 345c877aeb..d546f795f6 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -259,6 +259,10 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->partexprs = NULL; rel->nullable_partexprs = NULL; + rel->applied_stats = NIL; + rel->applied_clauses = NIL; + rel->applied_clauses_or = NIL; + /* * Pass assorted information down the inheritance hierarchy. */ @@ -673,6 +677,10 @@ build_join_rel(PlannerInfo *root, joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->applied_stats = NIL; + joinrel->applied_clauses = NIL; + joinrel->applied_clauses_or = NIL; + /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); @@ -851,6 +859,10 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->partexprs = NULL; joinrel->nullable_partexprs = NULL; + joinrel->applied_stats = NIL; + joinrel->applied_clauses = NIL; + joinrel->applied_clauses_or = NIL; + joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, inner_rel->top_parent_relids); diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c index eb113d94c1..781a2ec789 100644 --- a/src/backend/optimizer/util/restrictinfo.c +++ b/src/backend/optimizer/util/restrictinfo.c @@ -446,6 +446,41 @@ extract_actual_clauses(List *restrictinfo_list, return result; } +/* + * maybe_extract_actual_clauses + * + * Just like extract_actual_clauses, but does not require the clauses to + * already be RestrictInfo. + * + * XXX Does not handle RestrictInfos nested in OR clauses. + */ +List * +maybe_extract_actual_clauses(List *restrictinfo_list, + bool pseudoconstant) +{ + List *result = NIL; + ListCell *l; + + foreach(l, restrictinfo_list) + { + RestrictInfo *rinfo; + Node *node = (Node *) lfirst(l); + + if (!IsA(node, RestrictInfo)) + { + result = lappend(result, node); + continue; + } + + rinfo = (RestrictInfo *) node; + + if (rinfo->pseudoconstant == pseudoconstant) + result = lappend(result, rinfo->clause); + } + + return result; +} + /* * extract_actual_join_clauses * diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 8c75690fce..e662fecba1 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -1793,6 +1793,11 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli listidx++; } + /* add it to the list of applied stats/clauses */ + rel->applied_stats = lappend(rel->applied_stats, stat); + rel->applied_clauses = lappend(rel->applied_clauses, stat_clauses); + rel->applied_clauses_or = lappend_int(rel->applied_clauses_or, (is_or) ? 1 : 0); + if (is_or) { bool *or_matches = NULL; diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 7e41bc5641..b1828cdb8e 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -3997,6 +3997,7 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, ListCell *lc2; Bitmapset *matched = NULL; AttrNumber attnum_offset; + List *matched_exprs = NIL; /* * How much we need to offset the attnums? If there are no @@ -4044,6 +4045,9 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, matched = bms_add_member(matched, attnum); + /* track expressions matched by this statistics */ + matched_exprs = lappend(matched_exprs, varinfo->var); + found = true; } @@ -4072,6 +4076,9 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, matched = bms_add_member(matched, attnum); + /* track expressions matched by this statistics */ + matched_exprs = lappend(matched_exprs, expr); + /* there should be just one matching expression */ break; } @@ -4080,6 +4087,10 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel, } } + rel->applied_stats = lappend(rel->applied_stats, matched_info); + rel->applied_clauses = lappend(rel->applied_clauses, matched_exprs); + rel->applied_clauses_or = lappend(rel->applied_clauses_or, 0); + /* Find the specific item that exactly matches the combination */ for (i = 0; i < stats->nitems; i++) { diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 6bba5f8ec4..abbe8232ac 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -33,6 +33,7 @@ #include "catalog/pg_proc.h" #include "catalog/pg_range.h" #include "catalog/pg_statistic.h" +#include "catalog/pg_statistic_ext.h" #include "catalog/pg_transform.h" #include "catalog/pg_type.h" #include "miscadmin.h" @@ -3568,3 +3569,51 @@ get_index_isclustered(Oid index_oid) return isclustered; } + +/* + * get_statistics_name + * Returns the name of a given extended statistics + * + * Returns a palloc'd copy of the string, or NULL if no such namespace. + */ +char * +get_statistics_name(Oid stxid) +{ + HeapTuple tp; + + tp = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stxid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_statistic_ext stxtup = (Form_pg_statistic_ext) GETSTRUCT(tp); + char *result; + + result = pstrdup(NameStr(stxtup->stxname)); + ReleaseSysCache(tp); + return result; + } + else + return NULL; +} + +/* + * get_statistics_namespace + * Returns the namespace OID of a given extended statistics + */ +Oid +get_statistics_namespace(Oid stxid) +{ + HeapTuple tp; + + tp = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(stxid)); + if (HeapTupleIsValid(tp)) + { + Form_pg_statistic_ext stxtup = (Form_pg_statistic_ext) GETSTRUCT(tp); + Oid result; + + result = stxtup->stxnamespace; + ReleaseSysCache(tp); + return result; + } + else + return InvalidOid; +} diff --git a/src/include/nodes/makefuncs.h b/src/include/nodes/makefuncs.h index 48a7ebfe45..1c256c6b87 100644 --- a/src/include/nodes/makefuncs.h +++ b/src/include/nodes/makefuncs.h @@ -94,6 +94,8 @@ extern Node *make_and_qual(Node *qual1, Node *qual2); extern Expr *make_ands_explicit(List *andclauses); extern List *make_ands_implicit(Expr *clause); +extern Expr *make_ors_explicit(List *orclauses); + extern IndexInfo *makeIndexInfo(int numattrs, int numkeyattrs, Oid amoid, List *expressions, List *predicates, bool unique, bool isready, bool concurrent); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index e4b554f811..50ede6e028 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -759,6 +759,11 @@ typedef struct RelOptInfo Relids all_partrels; /* Relids set of all partition relids */ List **partexprs; /* Non-nullable partition key expressions */ List **nullable_partexprs; /* Nullable partition key expressions */ + + /* info about applied extended statistics */ + List *applied_stats; /* list of StatisticExtInfo */ + List *applied_clauses; /* list of lists (of clauses) */ + List *applied_clauses_or; /* are the clauses AND or OR */ } RelOptInfo; /* diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 6e62104d0b..dd2d9f783b 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -153,6 +153,11 @@ typedef struct Plan */ Bitmapset *extParam; Bitmapset *allParam; + + /* info about applied statistics */ + List *applied_stats; + List *applied_clauses; + List *applied_clauses_or; } Plan; /* ---------------- diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h index 0165ffde37..e234f00aa5 100644 --- a/src/include/optimizer/restrictinfo.h +++ b/src/include/optimizer/restrictinfo.h @@ -37,6 +37,8 @@ extern bool restriction_is_securely_promotable(RestrictInfo *restrictinfo, extern List *get_actual_clauses(List *restrictinfo_list); extern List *extract_actual_clauses(List *restrictinfo_list, bool pseudoconstant); +extern List *maybe_extract_actual_clauses(List *restrictinfo_list, + bool pseudoconstant); extern void extract_actual_join_clauses(List *restrictinfo_list, Relids joinrelids, List **joinquals, diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 77871aaefc..cff60de56d 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -199,6 +199,9 @@ extern bool get_index_isreplident(Oid index_oid); extern bool get_index_isvalid(Oid index_oid); extern bool get_index_isclustered(Oid index_oid); +extern char *get_statistics_name(Oid stxid); +extern Oid get_statistics_namespace(Oid stxid); + #define type_is_array(typid) (get_element_type(typid) != InvalidOid) /* type_is_array_domain accepts both plain arrays and domains over arrays */ #define type_is_array_domain(typid) (get_base_element_type(typid) != InvalidOid) -- 2.30.2