Hi hackers,
I've encountered and addressed errors in the
"0001-pg_stat_advisor-extension.patch" when applying it to the main
branch, specifically trailing whitespace issues at lines 117 and 118:
```
0001-pg_stat_advisor-extension.patch:117: trailing whitespace.
QUERY PLAN
0001-pg_stat_advisor-extension.patch:118: trailing whitespace.
warning: 2 lines add whitespace errors.
```
An updated patch is attached for review
I welcome your insights, feedback, and evaluations regarding the
necessity of integrating this new extension into PostgreSQL.
Kind regards,
Ilia Evdokimov,
Tantor Labs LLC.
From 6316706c42996219e507bb6ded9dd1e872180e38 Mon Sep 17 00:00:00 2001
From: Ilia Evdokimov <ilya.evdoki...@tantorlabs.ru>
Date: Tue, 6 Feb 2024 18:11:04 +0300
Subject: [PATCH] pg_stat_advisor extension
---
contrib/Makefile | 1 +
contrib/meson.build | 1 +
contrib/pg_stat_advisor/.gitignore | 3 +
contrib/pg_stat_advisor/Makefile | 20 +
contrib/pg_stat_advisor/README.md | 85 ++++
.../expected/pg_stat_advisor.out | 96 ++++
contrib/pg_stat_advisor/meson.build | 30 ++
contrib/pg_stat_advisor/pg_stat_advisor.c | 477 ++++++++++++++++++
.../pg_stat_advisor/sql/pg_stat_advisor.sql | 50 ++
9 files changed, 763 insertions(+)
create mode 100644 contrib/pg_stat_advisor/.gitignore
create mode 100644 contrib/pg_stat_advisor/Makefile
create mode 100644 contrib/pg_stat_advisor/README.md
create mode 100644 contrib/pg_stat_advisor/expected/pg_stat_advisor.out
create mode 100644 contrib/pg_stat_advisor/meson.build
create mode 100644 contrib/pg_stat_advisor/pg_stat_advisor.c
create mode 100644 contrib/pg_stat_advisor/sql/pg_stat_advisor.sql
diff --git a/contrib/Makefile b/contrib/Makefile
index da4e2316a3..da9a4ceeaa 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -34,6 +34,7 @@ SUBDIRS = \
pg_buffercache \
pg_freespacemap \
pg_prewarm \
+ pg_stat_advisor \
pg_stat_statements \
pg_surgery \
pg_trgm \
diff --git a/contrib/meson.build b/contrib/meson.build
index c12dc906ca..a20d99443b 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -49,6 +49,7 @@ subdir('pgcrypto')
subdir('pg_freespacemap')
subdir('pg_prewarm')
subdir('pgrowlocks')
+subdir('pg_stat_advisor')
subdir('pg_stat_statements')
subdir('pgstattuple')
subdir('pg_surgery')
diff --git a/contrib/pg_stat_advisor/.gitignore b/contrib/pg_stat_advisor/.gitignore
new file mode 100644
index 0000000000..913175ff6e
--- /dev/null
+++ b/contrib/pg_stat_advisor/.gitignore
@@ -0,0 +1,3 @@
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/pg_stat_advisor/Makefile b/contrib/pg_stat_advisor/Makefile
new file mode 100644
index 0000000000..f31b939e8a
--- /dev/null
+++ b/contrib/pg_stat_advisor/Makefile
@@ -0,0 +1,20 @@
+# contrib/pg_stat_advisor/Makefile
+
+MODULE_big = pg_stat_advisor
+OBJS = \
+ $(WIN32RES) \
+ pg_stat_advisor.o
+PGFILEDESC = "pg_stat_advisor - analyze query performance and recommend the creation of additional statistics"
+
+REGRESS = pg_stat_advisor
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_stat_advisor
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_stat_advisor/README.md b/contrib/pg_stat_advisor/README.md
new file mode 100644
index 0000000000..f9610f2ed5
--- /dev/null
+++ b/contrib/pg_stat_advisor/README.md
@@ -0,0 +1,85 @@
+## pg_stat_advisor - PostgreSQL advisor to create statistics
+
+pg_stat_advisor is a PostgreSQL extension designed to analyze query performance and recommend the creation of additional statistics to improve query plan.
+
+Append pg_stat_advisor to shared_preload_libraries configuration parameter in your postgresql.conf file then restart the PostgreSQL database to apply the changes. Or you can use "LOAD 'pg_stat_advisor';"command
+```
+LOAD 'pg_stat_advisor';
+```
+
+There is the pg_stat_advisor.suggest_statistics_threshold GUC that can be used to set a suggest_statistics_threshold. It is the the ratio of total tuples produced compared to the planned rows. If parameter is set by 0, the printing switches off.
+
+For example:
+```
+SET pg_stat_advisor.suggest_statistics_threshold = 1.0;
+```
+
+Examples:
+
+
+```
+postgres=# create table t (i int, j int);
+CREATE TABLE
+postgres=# insert into t select i/10, i/100 from generate_series(1, 1000000) i;
+INSERT 0 1000000
+postgres=# analyze t;
+ANALYZE
+postgres=# explain analyze select * from t where i = 100 and j = 10;
+ QUERY PLAN
+
+----------------------------------------------------------------------------------------------
+------------------
+ Gather (cost=1000.00..11675.10 rows=1 width=8) (actual time=0.526..61.564 rows=10 loops=1)
+ Workers Planned: 2
+ Workers Launched: 2
+ -> Parallel Seq Scan on t (cost=0.00..10675.00 rows=1 width=8) (actual time=35.369..54.44
+7 rows=3 loops=3)
+ Filter: ((i = 100) AND (j = 10))
+ Rows Removed by Filter: 333330
+ Planning Time: 0.148 ms
+ Execution Time: 61.589 ms
+(8 rows)
+
+
+postgres=# load 'pg_stat_advisor';
+LOAD
+postgres=# set pg_stat_advisor.suggest_statistics_threshold = 0.1;
+SET
+postgres=# explain analyze select * from t where i = 100 and j = 10;
+NOTICE: pg_stat_advisor suggestion: CREATE STATISTICS t_i_j ON i, j FROM t
+ QUERY PLAN
+
+----------------------------------------------------------------------------------------------
+------------------
+ Gather (cost=1000.00..11675.10 rows=1 width=8) (actual time=0.400..59.292 rows=10 loops=1)
+ Workers Planned: 2
+ Workers Launched: 2
+ -> Parallel Seq Scan on t (cost=0.00..10675.00 rows=1 width=8) (actual time=35.614..54.29
+1 rows=3 loops=3)
+ Filter: ((i = 100) AND (j = 10))
+ Rows Removed by Filter: 333330
+ Planning Time: 0.081 ms
+ Execution Time: 59.413 ms
+(8 rows)
+
+
+postgres=# create statistics stat_t_i_j on i, j from t;
+CREATE STATISTICS
+postgres=# analyze t;
+ANALYZE
+postgres=# explain analyze select * from t where i = 100 and j = 10;
+ QUERY PLAN
+
+----------------------------------------------------------------------------------------------
+------------------
+ Gather (cost=1000.00..11675.10 rows=10 width=8) (actual time=0.400..59.292 rows=10 loops=1)
+ Workers Planned: 2
+ Workers Launched: 2
+ -> Parallel Seq Scan on t (cost=0.00..10675.00 rows=1 width=8) (actual time=35.614..54.29
+1 rows=3 loops=3)
+ Filter: ((i = 100) AND (j = 10))
+ Rows Removed by Filter: 333330
+ Planning Time: 0.081 ms
+ Execution Time: 59.413 ms
+(8 rows)
+```
diff --git a/contrib/pg_stat_advisor/expected/pg_stat_advisor.out b/contrib/pg_stat_advisor/expected/pg_stat_advisor.out
new file mode 100644
index 0000000000..b45e7153ae
--- /dev/null
+++ b/contrib/pg_stat_advisor/expected/pg_stat_advisor.out
@@ -0,0 +1,96 @@
+LOAD 'pg_stat_advisor';
+SET pg_stat_advisor.suggest_statistics_threshold = 0.1;
+-------------------------------Check printing suggestion-----------------------------------------------------------
+CREATE TABLE t (i INT, j INT);
+INSERT INTO t SELECT i/10, i/100 FROM GENERATE_SERIES(1,1000000) i;
+ANALYZE t;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t WHERE i = 100 AND j = 10;
+NOTICE: pg_stat_advisor suggestion: CREATE STATISTICS t_i_j ON i, j FROM t
+ QUERY PLAN
+------------------------------------------------------
+ Gather (actual rows=10 loops=1)
+ Workers Planned: 2
+ Workers Launched: 2
+ -> Parallel Seq Scan on t (actual rows=3 loops=3)
+ Filter: ((i = 100) AND (j = 10))
+ Rows Removed by Filter: 333330
+(6 rows)
+
+CREATE STATISTICS t_i_j ON i, j FROM t;
+ANALYZE t;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t WHERE i = 100 AND j = 10;
+ QUERY PLAN
+------------------------------------------------------
+ Gather (actual rows=10 loops=1)
+ Workers Planned: 2
+ Workers Launched: 2
+ -> Parallel Seq Scan on t (actual rows=3 loops=3)
+ Filter: ((i = 100) AND (j = 10))
+ Rows Removed by Filter: 333330
+(6 rows)
+
+DROP STATISTICS t_i_j;
+-------------------------------Check existing statistics except suggested name-------------------------------------
+CREATE STATISTICS statistics_t_i_j ON i, j FROM t;
+ANALYZE t;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t WHERE i = 100 AND j = 10;
+ QUERY PLAN
+------------------------------------------------------
+ Gather (actual rows=10 loops=1)
+ Workers Planned: 2
+ Workers Launched: 2
+ -> Parallel Seq Scan on t (actual rows=3 loops=3)
+ Filter: ((i = 100) AND (j = 10))
+ Rows Removed by Filter: 333330
+(6 rows)
+
+DROP STATISTICS statistics_t_i_j;
+DROP TABLE t;
+-------------------------------Check suggest_statistics_threshold parameter----------------------------------------
+SET pg_stat_advisor.suggest_statistics_threshold = 10.0;
+CREATE TABLE inner_tab(x INT, y INT);
+CREATE TABLE outer_tab(pk INT PRIMARY KEY, x INT, y INT);
+CREATE INDEX ON inner_tab(x,y);
+INSERT INTO outer_tab VALUES (GENERATE_SERIES(1,100000), GENERATE_SERIES(1,100000), GENERATE_SERIES(1,100000)*10);
+INSERT INTO inner_tab VALUES (GENERATE_SERIES(1,1000000)/10, GENERATE_SERIES(1,1000000)/10*10);
+ANALYZE inner_tab;
+ANALYZE outer_tab;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM outer_tab JOIN inner_tab USING(x,y) WHERE pk=1;
+NOTICE: pg_stat_advisor suggestion: CREATE STATISTICS inner_tab_x_y ON x, y FROM inner_tab
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Nested Loop (actual rows=10 loops=1)
+ -> Index Scan using outer_tab_pkey on outer_tab (actual rows=1 loops=1)
+ Index Cond: (pk = 1)
+ -> Index Only Scan using inner_tab_x_y_idx on inner_tab (actual rows=10 loops=1)
+ Index Cond: ((x = outer_tab.x) AND (y = outer_tab.y))
+ Heap Fetches: 10
+(6 rows)
+
+CREATE STATISTICS inner_tab_x_y ON x, y FROM inner_tab;
+ANALYZE inner_tab;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM outer_tab JOIN inner_tab USING(x,y) WHERE pk=1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Nested Loop (actual rows=10 loops=1)
+ -> Index Scan using outer_tab_pkey on outer_tab (actual rows=1 loops=1)
+ Index Cond: (pk = 1)
+ -> Index Only Scan using inner_tab_x_y_idx on inner_tab (actual rows=10 loops=1)
+ Index Cond: ((x = outer_tab.x) AND (y = outer_tab.y))
+ Heap Fetches: 10
+(6 rows)
+
+DROP STATISTICS inner_tab_x_y;
+SET pg_stat_advisor.suggest_statistics_threshold = 10.1;
+ANALYZE inner_tab;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM outer_tab JOIN inner_tab USING(x,y) WHERE pk=1;
+ QUERY PLAN
+-------------------------------------------------------------------------------------
+ Nested Loop (actual rows=10 loops=1)
+ -> Index Scan using outer_tab_pkey on outer_tab (actual rows=1 loops=1)
+ Index Cond: (pk = 1)
+ -> Index Only Scan using inner_tab_x_y_idx on inner_tab (actual rows=10 loops=1)
+ Index Cond: ((x = outer_tab.x) AND (y = outer_tab.y))
+ Heap Fetches: 10
+(6 rows)
+
diff --git a/contrib/pg_stat_advisor/meson.build b/contrib/pg_stat_advisor/meson.build
new file mode 100644
index 0000000000..292917c025
--- /dev/null
+++ b/contrib/pg_stat_advisor/meson.build
@@ -0,0 +1,30 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+pg_stat_advisor_sources = files(
+ 'pg_stat_advisor.c',
+)
+
+if host_system == 'windows'
+ pg_stat_advisor_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_stat_advsior',
+ '--FILEDESC', 'pg_stat_advisor - suggestion of creating statistics',])
+endif
+
+pg_stat_advisor = shared_module('pg_stat_advisor',
+ pg_stat_advisor_sources,
+ kwargs: contrib_mod_args + {
+ 'dependencies': contrib_mod_args['dependencies'],
+ },
+)
+contrib_targets += pg_stat_advisor
+
+tests += {
+ 'name': 'pg_stat_advisor',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+ 'regress': {
+ 'sql': [
+ 'pg_stat_advisor',
+ ],
+ },
+}
diff --git a/contrib/pg_stat_advisor/pg_stat_advisor.c b/contrib/pg_stat_advisor/pg_stat_advisor.c
new file mode 100644
index 0000000000..498a41d4bf
--- /dev/null
+++ b/contrib/pg_stat_advisor/pg_stat_advisor.c
@@ -0,0 +1,477 @@
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/table.h"
+#include "catalog/pg_statistic_ext.h"
+#include "commands/explain.h"
+#include "optimizer/optimizer.h"
+#include "parser/parsetree.h"
+#include "statistics/statistics.h"
+#include "utils/guc.h"
+#include "utils/syscache.h"
+#include "utils/lsyscache.h"
+
+PG_MODULE_MAGIC;
+
+/* GUC variables */
+static double pg_stat_advisor_suggest_statistics_threshold = 0.0;
+
+/* Current nesting depth of ExecutorRun calls */
+static int nesting_level = 0;
+
+#define pg_stat_advisor_enabled() (nesting_level == 0)
+
+void _PG_init(void);
+
+/* Saved hook values in case of unload */
+static ExecutorStart_hook_type prev_ExecutorStart = NULL;
+static ExecutorRun_hook_type prev_ExecutorRun = NULL;
+static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
+static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
+
+static void explain_ExecutorStart(QueryDesc *queryDesc, int eflags);
+static void explain_ExecutorRun(QueryDesc *queryDesc,
+ ScanDirection direction,
+ uint64 count, bool execute_once);
+static void explain_ExecutorFinish(QueryDesc *queryDesc);
+static void explain_ExecutorEnd(QueryDesc *queryDesc);
+
+static void SuggestMultiColumnStatisticsForNode(PlanState *planstate, ExplainState *es);
+
+/*
+ * Module load callback
+ */
+void
+_PG_init(void)
+{
+ /* Define custom GUC variables. */
+ DefineCustomRealVariable("pg_stat_advisor.suggest_statistics_threshold",
+ "Set the threshold for actual/estimated rows",
+ "Zero disables suggestion of creating statistics",
+ &pg_stat_advisor_suggest_statistics_threshold,
+ 0.0,
+ 0.0,
+ INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ MarkGUCPrefixReserved("pg_stat_advisor");
+
+ /* Install hooks. */
+ prev_ExecutorStart = ExecutorStart_hook;
+ ExecutorStart_hook = explain_ExecutorStart;
+ prev_ExecutorRun = ExecutorRun_hook;
+ ExecutorRun_hook = explain_ExecutorRun;
+ prev_ExecutorFinish = ExecutorFinish_hook;
+ ExecutorFinish_hook = explain_ExecutorFinish;
+ prev_ExecutorEnd = ExecutorEnd_hook;
+ ExecutorEnd_hook = explain_ExecutorEnd;
+}
+
+/*
+ * ExecutorStart hook: start up logging if needed
+ */
+static void
+explain_ExecutorStart(QueryDesc *queryDesc, int eflags)
+{
+ if (prev_ExecutorStart)
+ prev_ExecutorStart(queryDesc, eflags);
+ else
+ standard_ExecutorStart(queryDesc, eflags);
+
+ if (pg_stat_advisor_enabled() && queryDesc->totaltime == NULL)
+ {
+ MemoryContext oldcxt;
+
+ oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+ queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
+ MemoryContextSwitchTo(oldcxt);
+ }
+}
+
+/*
+ * ExecutorRun hook: all we need do is track nesting depth
+ */
+static void
+explain_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction,
+ uint64 count, bool execute_once)
+{
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorRun)
+ prev_ExecutorRun(queryDesc, direction, count, execute_once);
+ else
+ standard_ExecutorRun(queryDesc, direction, count, execute_once);
+ }
+ PG_FINALLY();
+ {
+ nesting_level--;
+ }
+ PG_END_TRY();
+}
+
+/*
+ * ExecutorFinish hook: all we need do is track nesting depth
+ */
+static void
+explain_ExecutorFinish(QueryDesc *queryDesc)
+{
+ nesting_level++;
+ PG_TRY();
+ {
+ if (prev_ExecutorFinish)
+ prev_ExecutorFinish(queryDesc);
+ else
+ standard_ExecutorFinish(queryDesc);
+ }
+ PG_FINALLY();
+ {
+ nesting_level--;
+ }
+ PG_END_TRY();
+}
+
+/*
+ * ExecutorEnd hook: log results if needed
+ */
+static void
+explain_ExecutorEnd(QueryDesc *queryDesc)
+{
+ if (queryDesc->totaltime && pg_stat_advisor_enabled())
+ {
+ MemoryContext oldcxt;
+ ExplainState *es;
+
+ /*
+ * Make sure we operate in the per-query context, so any cruft will be
+ * discarded later during ExecutorEnd.
+ */
+ oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
+
+ /*
+ * Make sure stats accumulation is done. (Note: it's okay if several
+ * levels of hook all do this.)
+ */
+ InstrEndLoop(queryDesc->totaltime);
+
+ es = NewExplainState();
+
+ es->analyze = queryDesc->instrument_options;
+
+ ExplainBeginOutput(es);
+ ExplainQueryText(es, queryDesc);
+
+ ExplainPrintPlan(es, queryDesc);
+ if (es->analyze)
+ ExplainPrintTriggers(es, queryDesc);
+ if (es->costs)
+ ExplainPrintJITSummary(es, queryDesc);
+ ExplainEndOutput(es);
+
+ if (pg_stat_advisor_suggest_statistics_threshold && !IsParallelWorker())
+ SuggestMultiColumnStatisticsForNode(queryDesc->planstate, es);
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ if (prev_ExecutorEnd)
+ prev_ExecutorEnd(queryDesc);
+ else
+ standard_ExecutorEnd(queryDesc);
+}
+
+/**
+ * Comparator used to sort Vars by name
+ */
+static int
+vars_list_comparator(const ListCell *a, const ListCell *b)
+{
+ char *va = strVal(linitial(((ColumnRef *) lfirst(a))->fields));
+ char *vb = strVal(linitial(((ColumnRef *) lfirst(b))->fields));
+
+ return strcmp(va, vb);
+}
+
+/**
+ * Suggest statistics for specified subplans.
+ */
+static void
+SuggestMultiColumnStatisticsForSubPlans(List *plans, ExplainState *es)
+{
+ ListCell *lst;
+
+ foreach(lst, plans)
+ {
+ SubPlanState *sps = (SubPlanState *) lfirst(lst);
+
+ SuggestMultiColumnStatisticsForNode(sps->planstate, es);
+ }
+}
+
+/**
+ * Suggest statistics for plan subnodes.
+ */
+static void
+SuggestMultiColumnStatisticsForMemberNodes(PlanState **planstates, int nsubnodes,
+ ExplainState *es)
+{
+ int j;
+
+ for (j = 0; j < nsubnodes; j++)
+ SuggestMultiColumnStatisticsForNode(planstates[j], es);
+}
+
+/**
+ * Find extended statistics from 'relation_oid' relation on colmap
+ */
+static bool
+FindExtendedStatisticsOnVars(Oid *relation_oid, Bitmapset *colmap)
+{
+ bool isExistExtendedStatistics = false;
+ ListCell *oid;
+
+ /* Receive all extended statistics for current relation */
+ Relation pg_stextdata = table_open(*relation_oid, RowExclusiveLock);
+ List *statistics = RelationGetStatExtList(pg_stextdata);
+
+ foreach(oid, statistics)
+ {
+ Oid statOid = lfirst_oid(oid);
+ Form_pg_statistic_ext staForm;
+ HeapTuple htup;
+ Bitmapset *keys = NULL;
+ int i;
+
+ /* Read from pg_statistic_ext */
+ htup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statOid));
+ if (!HeapTupleIsValid(htup))
+ elog(ERROR, "cache lookup failed for statistics object %u", statOid);
+ staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
+
+ for (i = 0; i < staForm->stxkeys.dim1; i++)
+ keys = bms_add_member(keys, staForm->stxkeys.values[i]);
+
+ /* If we have statistics for current columns */
+ if (!bms_compare(keys, colmap))
+ {
+ isExistExtendedStatistics = true;
+
+ ReleaseSysCache(htup);
+ bms_free(keys);
+
+ break;
+ }
+
+ ReleaseSysCache(htup);
+ bms_free(keys);
+ }
+
+ list_free(statistics);
+ table_close(pg_stextdata, RowExclusiveLock);
+
+ return isExistExtendedStatistics;
+}
+
+/**
+ * Suggest statistics for qual
+ */
+static void
+SuggestMultiColumnStatisticsForQual(List *qual, ExplainState *es)
+{
+ List *vars = NULL;
+ ListCell *lc;
+
+ /* Extract vars from all quals */
+ foreach(lc, qual)
+ {
+ Node *node = (Node *) lfirst(lc);
+
+ if (IsA(node, RestrictInfo))
+ node = (Node *) ((RestrictInfo *) node)->clause;
+ vars = list_concat(vars, pull_vars_of_level(node, 0));
+ }
+
+ /* Loop until we considered all vars */
+ while (vars != NULL)
+ {
+ ListCell *cell;
+ List *cols = NULL;
+ Index relno = 0;
+ Bitmapset *colmap = NULL;
+
+ /* Contruct list of unique vars */
+ foreach(cell, vars)
+ {
+ Node *node = (Node *) lfirst(cell);
+
+ if (IsA(node, Var))
+ {
+ Var *var = (Var *) node;
+ int varno = IS_SPECIAL_VARNO(var->varno) ? var->varnosyn : var->varno;
+
+ if (cols == NULL || varno == relno)
+ {
+ int varattno = IS_SPECIAL_VARNO(var->varno) ? var->varattnosyn : var->varattno;
+
+ relno = varno;
+ if (var->varattno > 0 &&
+ !bms_is_member(varattno, colmap) &&
+ varno >= 1 && /* not synthetic var */
+ varno <= list_length(es->rtable) &&
+ list_length(cols) < STATS_MAX_DIMENSIONS)
+ {
+ RangeTblEntry *rte = rt_fetch(varno, es->rtable);
+
+ if (rte->rtekind == RTE_RELATION)
+ {
+ ColumnRef *col = makeNode(ColumnRef);
+ char *colname = get_rte_attribute_name(rte, varattno);
+
+ col->fields = list_make1(makeString(colname));
+ cols = lappend(cols, col);
+ colmap = bms_add_member(colmap, varattno);
+ }
+ }
+ }
+ else
+ {
+ continue;
+ }
+ }
+ vars = foreach_delete_current(vars, cell);
+ }
+ /* To suggest statitics we need to have at least 2 columns */
+ if (list_length(cols) >= 2)
+ {
+ RangeTblEntry *rte = rt_fetch(relno, es->rtable);
+ char *rel_name = get_rel_name(rte->relid);
+ char *stat_name = rel_name;
+ char *create_stat_stmt = (char *) "";
+ char const *sep = "ON";
+ size_t name_len;
+
+ /* Sort variables by name */
+ list_sort(cols, vars_list_comparator);
+
+ /*
+ * Construct name for statistic by concatenating relation name
+ * with all columns
+ */
+ foreach(cell, cols)
+ {
+ char *col_name = strVal(linitial(((ColumnRef *) lfirst(cell))->fields));
+
+ stat_name = psprintf("%s_%s", stat_name, col_name);
+ create_stat_stmt = psprintf("%s%s %s", create_stat_stmt, sep, col_name);
+ sep = ",";
+ }
+
+ name_len = strlen(stat_name);
+ /* Truncate name if it doesn't fit in NameData */
+ if (name_len >= NAMEDATALEN)
+ stat_name = psprintf("%.*s_%08x", NAMEDATALEN - 10, stat_name, (unsigned) hash_any((uint8 *) stat_name, name_len));
+
+ if (!FindExtendedStatisticsOnVars(&rte->relid, colmap))
+ {
+ ereport(NOTICE, (errmsg("pg_stat_advisor suggestion: CREATE STATISTICS %s %s FROM %s",
+ stat_name, create_stat_stmt, rel_name),
+ errhidestmt(true)));
+ }
+ }
+
+ bms_free(colmap);
+ }
+}
+
+/**
+ * Suggest statistics for node
+ */
+static void
+SuggestMultiColumnStatisticsForNode(PlanState *planstate, ExplainState *es)
+{
+ Plan *plan = planstate->plan;
+
+ if (planstate->instrument && plan->plan_rows != 0 &&
+ pg_stat_advisor_suggest_statistics_threshold > 0.0 &&
+ planstate->instrument->ntuples / plan->plan_rows >= pg_stat_advisor_suggest_statistics_threshold)
+ {
+ elog(DEBUG1, "Estimated=%f, actual=%f, error=%f: plan=%s",
+ plan->plan_rows,
+ planstate->instrument->ntuples,
+ planstate->instrument->ntuples / plan->plan_rows,
+ nodeToString(plan));
+ /* quals, sort keys, etc */
+ switch (nodeTag(plan))
+ {
+ case T_IndexScan:
+ SuggestMultiColumnStatisticsForQual(((IndexScan *) plan)->indexqualorig, es);
+ break;
+ case T_IndexOnlyScan:
+ SuggestMultiColumnStatisticsForQual(((IndexOnlyScan *) plan)->indexqual, es);
+ break;
+ case T_BitmapIndexScan:
+ SuggestMultiColumnStatisticsForQual(((BitmapIndexScan *) plan)->indexqualorig, es);
+ break;
+ case T_NestLoop:
+ SuggestMultiColumnStatisticsForQual(((NestLoop *) plan)->join.joinqual, es);
+ break;
+ case T_MergeJoin:
+ SuggestMultiColumnStatisticsForQual(((MergeJoin *) plan)->mergeclauses, es);
+ SuggestMultiColumnStatisticsForQual(((MergeJoin *) plan)->join.joinqual, es);
+ break;
+ case T_HashJoin:
+ SuggestMultiColumnStatisticsForQual(((HashJoin *) plan)->hashclauses, es);
+ SuggestMultiColumnStatisticsForQual(((HashJoin *) plan)->join.joinqual, es);
+ break;
+ default:
+ break;
+ }
+ SuggestMultiColumnStatisticsForQual(plan->qual, es);
+ }
+
+ /* initPlan-s */
+ if (planstate->initPlan)
+ SuggestMultiColumnStatisticsForSubPlans(planstate->initPlan, es);
+
+ /* lefttree */
+ if (outerPlanState(planstate))
+ SuggestMultiColumnStatisticsForNode(outerPlanState(planstate), es);
+
+ /* righttree */
+ if (innerPlanState(planstate))
+ SuggestMultiColumnStatisticsForNode(innerPlanState(planstate), es);
+
+ /* special child plans */
+ switch (nodeTag(plan))
+ {
+ case T_Append:
+ SuggestMultiColumnStatisticsForMemberNodes(((AppendState *) planstate)->appendplans,
+ ((AppendState *) planstate)->as_nplans,
+ es);
+ break;
+ case T_MergeAppend:
+ SuggestMultiColumnStatisticsForMemberNodes(((MergeAppendState *) planstate)->mergeplans,
+ ((MergeAppendState *) planstate)->ms_nplans,
+ es);
+ break;
+ case T_BitmapAnd:
+ SuggestMultiColumnStatisticsForMemberNodes(((BitmapAndState *) planstate)->bitmapplans,
+ ((BitmapAndState *) planstate)->nplans,
+ es);
+ break;
+ case T_BitmapOr:
+ SuggestMultiColumnStatisticsForMemberNodes(((BitmapOrState *) planstate)->bitmapplans,
+ ((BitmapOrState *) planstate)->nplans,
+ es);
+ break;
+ case T_SubqueryScan:
+ SuggestMultiColumnStatisticsForNode(((SubqueryScanState *) planstate)->subplan, es);
+ break;
+ default:
+ break;
+ }
+}
diff --git a/contrib/pg_stat_advisor/sql/pg_stat_advisor.sql b/contrib/pg_stat_advisor/sql/pg_stat_advisor.sql
new file mode 100644
index 0000000000..3fd8b24416
--- /dev/null
+++ b/contrib/pg_stat_advisor/sql/pg_stat_advisor.sql
@@ -0,0 +1,50 @@
+LOAD 'pg_stat_advisor';
+SET pg_stat_advisor.suggest_statistics_threshold = 0.1;
+
+-------------------------------Check printing suggestion-----------------------------------------------------------
+
+CREATE TABLE t (i INT, j INT);
+INSERT INTO t SELECT i/10, i/100 FROM GENERATE_SERIES(1,1000000) i;
+ANALYZE t;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t WHERE i = 100 AND j = 10;
+
+CREATE STATISTICS t_i_j ON i, j FROM t;
+
+ANALYZE t;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t WHERE i = 100 AND j = 10;
+
+DROP STATISTICS t_i_j;
+
+-------------------------------Check existing statistics except suggested name-------------------------------------
+
+CREATE STATISTICS statistics_t_i_j ON i, j FROM t;
+
+ANALYZE t;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM t WHERE i = 100 AND j = 10;
+
+DROP STATISTICS statistics_t_i_j;
+DROP TABLE t;
+
+-------------------------------Check suggest_statistics_threshold parameter----------------------------------------
+
+SET pg_stat_advisor.suggest_statistics_threshold = 10.0;
+
+CREATE TABLE inner_tab(x INT, y INT);
+CREATE TABLE outer_tab(pk INT PRIMARY KEY, x INT, y INT);
+CREATE INDEX ON inner_tab(x,y);
+INSERT INTO outer_tab VALUES (GENERATE_SERIES(1,100000), GENERATE_SERIES(1,100000), GENERATE_SERIES(1,100000)*10);
+INSERT INTO inner_tab VALUES (GENERATE_SERIES(1,1000000)/10, GENERATE_SERIES(1,1000000)/10*10);
+ANALYZE inner_tab;
+ANALYZE outer_tab;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM outer_tab JOIN inner_tab USING(x,y) WHERE pk=1;
+
+CREATE STATISTICS inner_tab_x_y ON x, y FROM inner_tab;
+
+ANALYZE inner_tab;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM outer_tab JOIN inner_tab USING(x,y) WHERE pk=1;
+
+DROP STATISTICS inner_tab_x_y;
+SET pg_stat_advisor.suggest_statistics_threshold = 10.1;
+
+ANALYZE inner_tab;
+EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM outer_tab JOIN inner_tab USING(x,y) WHERE pk=1;
\ No newline at end of file
--
2.34.1