Hi,

Attached is a rebased and cleaned-up version of these patches, with more
comments, refactorings etc. Justin and Zhihong, can you take a look?


0001 - Ignore extended statistics for inheritance trees

0002 - Build inherited extended stats on partitioned tables

Those are mostly just Justin's patches, with more detailed comments and
updated commit message. I've considered moving the rel->inh check to
statext_clauselist_selectivity, and then removing the check from
dependencies and MCV. But I decided no to do that, because someone might
be calling those functions directly (even if that's very unlikely).

The one thing bugging me a bit is that the regression test checks only a
GROUP BY query. It'd be nice to add queries testing MCV/dependencies
too, but that seems tricky because most queries will use per-partitions
stats.


0003 - Add stxdinherit flag to pg_statistic_ext_data

This is the patch for master, allowing to build stats for both inherits
flag values. It adds the flag to pg_stats_ext_exprs view to, reworked
how we deal with iterating both flags etc. I've adopted most of the
Justin's fixup patches, except that in plancat.c I've refactored how we
load the stats to process keys/expressions just once.

It has the same issue with regression test using just a GROUP BY query,
but if we add a test to 0001/0002, that'll fix this too.


0004 - Refactor parent ACL check

Not sure about this - I doubt saving 30 rows in an 8kB file is really
worth it. Maybe it is, but then maybe we should try cleaning up the
other ACL checks in this file too? Seems mostly orthogonal to this
thread, though.


regards

-- 
Tomas Vondra
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company
From 569099c5d14535a95cc21798cb041cae7eee0e36 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Sun, 12 Dec 2021 02:28:41 +0100
Subject: [PATCH 4/4] Refactor parent ACL check

selfuncs.c is 8k lines long, and this makes it 30 LOC shorter.
---
 src/backend/utils/adt/selfuncs.c | 140 ++++++++++++-------------------
 1 file changed, 52 insertions(+), 88 deletions(-)

diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 886d85b7f3..9f656316bf 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -187,6 +187,8 @@ static char *convert_string_datum(Datum value, Oid typid, Oid collid,
 								  bool *failure);
 static double convert_timevalue_to_scalar(Datum value, Oid typid,
 										  bool *failure);
+static void recheck_parent_acl(PlannerInfo *root, VariableStatData *vardata,
+								Oid relid);
 static void examine_simple_variable(PlannerInfo *root, Var *var,
 									VariableStatData *vardata);
 static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
@@ -5153,51 +5155,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 									(pg_class_aclcheck(rte->relid, userid,
 													   ACL_SELECT) == ACLCHECK_OK);
 
-								/*
-								 * If the user doesn't have permissions to
-								 * access an inheritance child relation, check
-								 * the permissions of the table actually
-								 * mentioned in the query, since most likely
-								 * the user does have that permission.  Note
-								 * that whole-table select privilege on the
-								 * parent doesn't quite guarantee that the
-								 * user could read all columns of the child.
-								 * But in practice it's unlikely that any
-								 * interesting security violation could result
-								 * from allowing access to the expression
-								 * index's stats, so we allow it anyway.  See
-								 * similar code in examine_simple_variable()
-								 * for additional comments.
-								 */
-								if (!vardata->acl_ok &&
-									root->append_rel_array != NULL)
-								{
-									AppendRelInfo *appinfo;
-									Index		varno = index->rel->relid;
-
-									appinfo = root->append_rel_array[varno];
-									while (appinfo &&
-										   planner_rt_fetch(appinfo->parent_relid,
-															root)->rtekind == RTE_RELATION)
-									{
-										varno = appinfo->parent_relid;
-										appinfo = root->append_rel_array[varno];
-									}
-									if (varno != index->rel->relid)
-									{
-										/* Repeat access check on this rel */
-										rte = planner_rt_fetch(varno, root);
-										Assert(rte->rtekind == RTE_RELATION);
-
-										userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
-
-										vardata->acl_ok =
-											rte->securityQuals == NIL &&
-											(pg_class_aclcheck(rte->relid,
-															   userid,
-															   ACL_SELECT) == ACLCHECK_OK);
-									}
-								}
+								recheck_parent_acl(root, vardata, index->rel->relid);
 							}
 							else
 							{
@@ -5302,49 +5260,7 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 						(pg_class_aclcheck(rte->relid, userid,
 										   ACL_SELECT) == ACLCHECK_OK);
 
-					/*
-					 * If the user doesn't have permissions to access an
-					 * inheritance child relation, check the permissions of
-					 * the table actually mentioned in the query, since most
-					 * likely the user does have that permission.  Note that
-					 * whole-table select privilege on the parent doesn't
-					 * quite guarantee that the user could read all columns of
-					 * the child. But in practice it's unlikely that any
-					 * interesting security violation could result from
-					 * allowing access to the expression stats, so we allow it
-					 * anyway.  See similar code in examine_simple_variable()
-					 * for additional comments.
-					 */
-					if (!vardata->acl_ok &&
-						root->append_rel_array != NULL)
-					{
-						AppendRelInfo *appinfo;
-						Index		varno = onerel->relid;
-
-						appinfo = root->append_rel_array[varno];
-						while (appinfo &&
-							   planner_rt_fetch(appinfo->parent_relid,
-												root)->rtekind == RTE_RELATION)
-						{
-							varno = appinfo->parent_relid;
-							appinfo = root->append_rel_array[varno];
-						}
-						if (varno != onerel->relid)
-						{
-							/* Repeat access check on this rel */
-							rte = planner_rt_fetch(varno, root);
-							Assert(rte->rtekind == RTE_RELATION);
-
-							userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
-
-							vardata->acl_ok =
-								rte->securityQuals == NIL &&
-								(pg_class_aclcheck(rte->relid,
-												   userid,
-												   ACL_SELECT) == ACLCHECK_OK);
-						}
-					}
-
+					recheck_parent_acl(root, vardata, onerel->relid);
 					break;
 				}
 
@@ -5354,6 +5270,54 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 	}
 }
 
+/*
+ * If the user doesn't have permissions to access an inheritance child
+ * relation, check the permissions of the table actually mentioned in the
+ * query, since most likely the user does have that permission.  Note that
+ * whole-table select privilege on the parent doesn't quite guarantee that the
+ * user could read all columns of the child.  But in practice it's unlikely
+ * that any interesting security violation could result from allowing access to
+ * the expression stats, so we allow it anyway.  See similar code in
+ * examine_simple_variable() for additional comments.
+ */
+static void
+recheck_parent_acl(PlannerInfo *root, VariableStatData *vardata, Oid relid)
+{
+	RangeTblEntry	*rte;
+	Oid		userid;
+
+	if (!vardata->acl_ok &&
+		root->append_rel_array != NULL)
+	{
+		AppendRelInfo *appinfo;
+		Index		varno = relid;
+
+		appinfo = root->append_rel_array[varno];
+		while (appinfo &&
+			   planner_rt_fetch(appinfo->parent_relid,
+								root)->rtekind == RTE_RELATION)
+		{
+			varno = appinfo->parent_relid;
+			appinfo = root->append_rel_array[varno];
+		}
+
+		if (varno != relid)
+		{
+			/* Repeat access check on this rel */
+			rte = planner_rt_fetch(varno, root);
+			Assert(rte->rtekind == RTE_RELATION);
+
+			userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+			vardata->acl_ok =
+				rte->securityQuals == NIL &&
+				(pg_class_aclcheck(rte->relid,
+								   userid,
+								   ACL_SELECT) == ACLCHECK_OK);
+		}
+	}
+}
+
 /*
  * examine_simple_variable
  *		Handle a simple Var for examine_variable
-- 
2.31.1

From 4c74acba061488f5c0954c4240ca2636591b9c61 Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Sun, 12 Dec 2021 00:07:27 +0100
Subject: [PATCH 3/4] Add stxdinherit flag to pg_statistic_ext_data

The support for extended statistics on inheritance trees was somewhat
problematic, because the catalog pg_statistic_ext_data did not have a
flag identifying whether the data was built with/without data for the
child relations.

For partitioned tables we've been able to work around this because the
non-leaf relations can't contain data, so there's really just one set of
statistics to store. But for regular inheritance trees we had to pick,
and there no clear winner.

This introduces the pg_statistic_ext_data.stxdinherit flag, analogous to
pg_statistic.stainherit, and modifies analyze to build statistics for
both cases.

This relaxes the relationship between the two catalogs - until now we've
created the pg_statistic_ext_data one when creating the statistics, and
then only updated it later. There was always 1:1 relationship between
rows in the two catalogs. With this change we no longer insert any data
rows while creating statistics, because we don't know which flag value
to create, and it seem wasteful to initialize both for every relation.
The there may be 0, 1 or 2 data rows for each statistics definition.

Patch by me, with extensive improvements and fixes by Justin Pryzby.

Author: Tomas Vondra, Justin Pryzby
Reviewed-by: Tomas Vondra, Justin Pryzby
Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
---
 doc/src/sgml/catalogs.sgml                  |  23 +++
 src/backend/catalog/system_views.sql        |   2 +
 src/backend/commands/analyze.c              |  28 +---
 src/backend/commands/statscmds.c            |  72 +++++-----
 src/backend/optimizer/util/plancat.c        | 147 ++++++++++++--------
 src/backend/statistics/dependencies.c       |  22 ++-
 src/backend/statistics/extended_stats.c     |  75 +++++-----
 src/backend/statistics/mcv.c                |   9 +-
 src/backend/statistics/mvdistinct.c         |   5 +-
 src/backend/utils/adt/selfuncs.c            |  36 +++--
 src/backend/utils/cache/syscache.c          |   6 +-
 src/include/catalog/pg_statistic_ext_data.h |   4 +-
 src/include/commands/defrem.h               |   1 +
 src/include/nodes/pathnodes.h               |   1 +
 src/include/statistics/statistics.h         |  11 +-
 src/test/regress/expected/rules.out         |   2 +
 src/test/regress/expected/stats_ext.out     |  18 ++-
 src/test/regress/sql/stats_ext.sql          |   4 +-
 18 files changed, 251 insertions(+), 215 deletions(-)

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 025db98763..9ab2cb52a7 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -7521,6 +7521,19 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
    created with <link linkend="sql-createstatistics"><command>CREATE STATISTICS</command></link>.
   </para>
 
+  <para>
+   Normally there is one entry, with <structfield>stxdinherit</structfield> =
+   <literal>false</literal>, for each statistics object that has been analyzed.
+   If the table has inheritance children, a second entry with
+   <structfield>stxdinherit</structfield> = <literal>true</literal> is also created.
+   This row represents the statistics object over the inheritance tree, i.e.,
+   statistics for the data you'd see with
+   <literal>SELECT * FROM <replaceable>table</replaceable>*</literal>,
+   whereas the <structfield>stxdinherit</structfield> = <literal>false</literal> row
+   represents the results of
+   <literal>SELECT * FROM ONLY <replaceable>table</replaceable></literal>.
+  </para>
+
   <para>
    Like <link linkend="catalog-pg-statistic"><structname>pg_statistic</structname></link>,
    <structname>pg_statistic_ext_data</structname> should not be
@@ -7560,6 +7573,16 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
       </para></entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>stxdinherit</structfield> <type>bool</type>
+      </para>
+      <para>
+       If true, the stats include inheritance child columns, not just the
+       values in the specified relation
+      </para></entry>
+     </row>
+
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
        <structfield>stxdndistinct</structfield> <type>pg_ndistinct</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 61b515cdb8..319653789b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -266,6 +266,7 @@ CREATE VIEW pg_stats_ext WITH (security_barrier) AS
            ) AS attnames,
            pg_get_statisticsobjdef_expressions(s.oid) as exprs,
            s.stxkind AS kinds,
+           sd.stxdinherit AS inherited,
            sd.stxdndistinct AS n_distinct,
            sd.stxddependencies AS dependencies,
            m.most_common_vals,
@@ -298,6 +299,7 @@ CREATE VIEW pg_stats_ext_exprs WITH (security_barrier) AS
            s.stxname AS statistics_name,
            pg_get_userbyid(s.stxowner) AS statistics_owner,
            stat.expr,
+           sd.stxdinherit AS inherited,
            (stat.a).stanullfrac AS null_frac,
            (stat.a).stawidth AS avg_width,
            (stat.a).stadistinct AS n_distinct,
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 58a82e4929..3436f09d63 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -549,7 +549,6 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	{
 		MemoryContext col_context,
 					old_context;
-		bool		build_ext_stats;
 
 		pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
 									 PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
@@ -613,30 +612,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							thisdata->attr_cnt, thisdata->vacattrstats);
 		}
 
-		/*
-		 * Should we build extended statistics for this relation?
-		 *
-		 * The extended statistics catalog does not include an inheritance
-		 * flag, so we can't store statistics built both with and without
-		 * data from child relations. We can store just one set of statistics
-		 * per relation. For plain relations that's fine, but for inheritance
-		 * trees we have to pick whether to store statistics for just the
-		 * one relation or the whole tree. For plain inheritance we store
-		 * the (!inh) version, mostly for backwards compatibility reasons.
-		 * For partitioned tables that's pointless (the non-leaf tables are
-		 * always empty), so we store stats representing the whole tree.
-		 */
-		build_ext_stats = (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ? inh : (!inh);
-
-		/*
-		 * Build extended statistics (if there are any).
-		 *
-		 * For now we only build extended statistics on individual relations,
-		 * not for relations representing inheritance trees.
-		 */
-		if (build_ext_stats)
-			BuildRelationExtStatistics(onerel, totalrows, numrows, rows,
-									   attr_cnt, vacattrstats);
+		/* Build extended statistics (if there are any). */
+		BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows,
+								   attr_cnt, vacattrstats);
 	}
 
 	pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index 8f1550ec80..1fa39e6f21 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -75,13 +75,10 @@ CreateStatistics(CreateStatsStmt *stmt)
 	HeapTuple	htup;
 	Datum		values[Natts_pg_statistic_ext];
 	bool		nulls[Natts_pg_statistic_ext];
-	Datum		datavalues[Natts_pg_statistic_ext_data];
-	bool		datanulls[Natts_pg_statistic_ext_data];
 	int2vector *stxkeys;
 	List	   *stxexprs = NIL;
 	Datum		exprsDatum;
 	Relation	statrel;
-	Relation	datarel;
 	Relation	rel = NULL;
 	Oid			relid;
 	ObjectAddress parentobject,
@@ -514,28 +511,10 @@ CreateStatistics(CreateStatsStmt *stmt)
 	relation_close(statrel, RowExclusiveLock);
 
 	/*
-	 * Also build the pg_statistic_ext_data tuple, to hold the actual
-	 * statistics data.
+	 * We used to create the pg_statistic_ext_data tuple too, but it's not clear
+	 * what value should the stxdinherit flag have (it depends on whether the rel
+	 * is partitioned, contains data, etc.)
 	 */
-	datarel = table_open(StatisticExtDataRelationId, RowExclusiveLock);
-
-	memset(datavalues, 0, sizeof(datavalues));
-	memset(datanulls, false, sizeof(datanulls));
-
-	datavalues[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statoid);
-
-	/* no statistics built yet */
-	datanulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
-	datanulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
-	datanulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
-	datanulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
-
-	/* insert it into pg_statistic_ext_data */
-	htup = heap_form_tuple(datarel->rd_att, datavalues, datanulls);
-	CatalogTupleInsert(datarel, htup);
-	heap_freetuple(htup);
-
-	relation_close(datarel, RowExclusiveLock);
 
 	InvokeObjectPostCreateHook(StatisticExtRelationId, statoid, 0);
 
@@ -717,32 +696,49 @@ AlterStatistics(AlterStatsStmt *stmt)
 }
 
 /*
- * Guts of statistics object deletion.
+ * Delete entry in pg_statistic_ext_data catalog. We don't know if the row
+ * exists, so don't error out.
  */
 void
-RemoveStatisticsById(Oid statsOid)
+RemoveStatisticsDataById(Oid statsOid, bool inh)
 {
 	Relation	relation;
 	HeapTuple	tup;
-	Form_pg_statistic_ext statext;
-	Oid			relid;
 
-	/*
-	 * First delete the pg_statistic_ext_data tuple holding the actual
-	 * statistical data.
-	 */
 	relation = table_open(StatisticExtDataRelationId, RowExclusiveLock);
 
-	tup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid));
-
-	if (!HeapTupleIsValid(tup)) /* should not happen */
-		elog(ERROR, "cache lookup failed for statistics data %u", statsOid);
+	tup = SearchSysCache2(STATEXTDATASTXOID, ObjectIdGetDatum(statsOid),
+						  BoolGetDatum(inh));
 
-	CatalogTupleDelete(relation, &tup->t_self);
+	/* We don't know if the data row for inh value exists. */
+	if (HeapTupleIsValid(tup))
+	{
+		CatalogTupleDelete(relation, &tup->t_self);
 
-	ReleaseSysCache(tup);
+		ReleaseSysCache(tup);
+	}
 
 	table_close(relation, RowExclusiveLock);
+}
+
+/*
+ * Guts of statistics object deletion.
+ */
+void
+RemoveStatisticsById(Oid statsOid)
+{
+	Relation	relation;
+	HeapTuple	tup;
+	Form_pg_statistic_ext statext;
+	Oid			relid;
+
+	/*
+	 * First delete the pg_statistic_ext_data tuples holding the actual
+	 * statistical data. There might be data with/without inheritance, so
+	 * attempt deleting both.
+	 */
+	RemoveStatisticsDataById(statsOid, true);
+	RemoveStatisticsDataById(statsOid, false);
 
 	/*
 	 * Delete the pg_statistic_ext tuple.  Also send out a cache inval on the
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 564a38a13e..8ba3a3c59d 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -30,6 +30,7 @@
 #include "catalog/pg_am.h"
 #include "catalog/pg_proc.h"
 #include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_statistic_ext_data.h"
 #include "foreign/fdwapi.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -1276,6 +1277,87 @@ get_relation_constraints(PlannerInfo *root,
 	return result;
 }
 
+/*
+ * Try loading data for the statistics object.
+ *
+ * We don't know if the data (specified by statOid and inh value) exist.
+ * The result is stored in stainfos list.
+ */
+static void
+get_relation_statistics_worker(List **stainfos, RelOptInfo *rel,
+							   Oid statOid, bool inh,
+							   Bitmapset *keys, List *exprs)
+{
+	Form_pg_statistic_ext_data dataForm;
+	HeapTuple	dtup;
+
+	dtup = SearchSysCache2(STATEXTDATASTXOID,
+						   ObjectIdGetDatum(statOid), BoolGetDatum(inh));
+	if (!HeapTupleIsValid(dtup))
+		return;
+
+	dataForm = (Form_pg_statistic_ext_data) GETSTRUCT(dtup);
+
+	/* add one StatisticExtInfo for each kind built */
+	if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT))
+	{
+		StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+		info->statOid = statOid;
+		info->inherit = dataForm->stxdinherit;
+		info->rel = rel;
+		info->kind = STATS_EXT_NDISTINCT;
+		info->keys = bms_copy(keys);
+		info->exprs = exprs;
+
+		*stainfos = lappend(*stainfos, info);
+	}
+
+	if (statext_is_kind_built(dtup, STATS_EXT_DEPENDENCIES))
+	{
+		StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+		info->statOid = statOid;
+		info->inherit = dataForm->stxdinherit;
+		info->rel = rel;
+		info->kind = STATS_EXT_DEPENDENCIES;
+		info->keys = bms_copy(keys);
+		info->exprs = exprs;
+
+		*stainfos = lappend(*stainfos, info);
+	}
+
+	if (statext_is_kind_built(dtup, STATS_EXT_MCV))
+	{
+		StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+		info->statOid = statOid;
+		info->inherit = dataForm->stxdinherit;
+		info->rel = rel;
+		info->kind = STATS_EXT_MCV;
+		info->keys = bms_copy(keys);
+		info->exprs = exprs;
+
+		*stainfos = lappend(*stainfos, info);
+	}
+
+	if (statext_is_kind_built(dtup, STATS_EXT_EXPRESSIONS))
+	{
+		StatisticExtInfo *info = makeNode(StatisticExtInfo);
+
+		info->statOid = statOid;
+		info->inherit = dataForm->stxdinherit;
+		info->rel = rel;
+		info->kind = STATS_EXT_EXPRESSIONS;
+		info->keys = bms_copy(keys);
+		info->exprs = exprs;
+
+		*stainfos = lappend(*stainfos, info);
+	}
+
+	ReleaseSysCache(dtup);
+}
+
 /*
  * get_relation_statistics
  *		Retrieve extended statistics defined on the table.
@@ -1299,7 +1381,6 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		Oid			statOid = lfirst_oid(l);
 		Form_pg_statistic_ext staForm;
 		HeapTuple	htup;
-		HeapTuple	dtup;
 		Bitmapset  *keys = NULL;
 		List	   *exprs = NIL;
 		int			i;
@@ -1309,10 +1390,6 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			elog(ERROR, "cache lookup failed for statistics object %u", statOid);
 		staForm = (Form_pg_statistic_ext) GETSTRUCT(htup);
 
-		dtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
-		if (!HeapTupleIsValid(dtup))
-			elog(ERROR, "cache lookup failed for statistics object %u", statOid);
-
 		/*
 		 * First, build the array of columns covered.  This is ultimately
 		 * wasted if no stats within the object have actually been built, but
@@ -1324,6 +1401,11 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 		/*
 		 * Preprocess expressions (if any). We read the expressions, run them
 		 * through eval_const_expressions, and fix the varnos.
+		 *
+		 * XXX We don't know yet if there are any data for this stats object,
+		 * with either stxdinherit value. But it's reasonable to assume there
+		 * is at least one of those, possibly both. So it's better to process
+		 * keys and expressions here.
 		 */
 		{
 			bool		isnull;
@@ -1364,61 +1446,14 @@ get_relation_statistics(RelOptInfo *rel, Relation relation)
 			}
 		}
 
-		/* add one StatisticExtInfo for each kind built */
-		if (statext_is_kind_built(dtup, STATS_EXT_NDISTINCT))
-		{
-			StatisticExtInfo *info = makeNode(StatisticExtInfo);
-
-			info->statOid = statOid;
-			info->rel = rel;
-			info->kind = STATS_EXT_NDISTINCT;
-			info->keys = bms_copy(keys);
-			info->exprs = exprs;
-
-			stainfos = lappend(stainfos, info);
-		}
-
-		if (statext_is_kind_built(dtup, STATS_EXT_DEPENDENCIES))
-		{
-			StatisticExtInfo *info = makeNode(StatisticExtInfo);
-
-			info->statOid = statOid;
-			info->rel = rel;
-			info->kind = STATS_EXT_DEPENDENCIES;
-			info->keys = bms_copy(keys);
-			info->exprs = exprs;
-
-			stainfos = lappend(stainfos, info);
-		}
-
-		if (statext_is_kind_built(dtup, STATS_EXT_MCV))
-		{
-			StatisticExtInfo *info = makeNode(StatisticExtInfo);
-
-			info->statOid = statOid;
-			info->rel = rel;
-			info->kind = STATS_EXT_MCV;
-			info->keys = bms_copy(keys);
-			info->exprs = exprs;
-
-			stainfos = lappend(stainfos, info);
-		}
-
-		if (statext_is_kind_built(dtup, STATS_EXT_EXPRESSIONS))
-		{
-			StatisticExtInfo *info = makeNode(StatisticExtInfo);
+		/* extract statistics for possible values of stxdinherit flag */
 
-			info->statOid = statOid;
-			info->rel = rel;
-			info->kind = STATS_EXT_EXPRESSIONS;
-			info->keys = bms_copy(keys);
-			info->exprs = exprs;
+		get_relation_statistics_worker(&stainfos, rel, statOid, true, keys, exprs);
 
-			stainfos = lappend(stainfos, info);
-		}
+		get_relation_statistics_worker(&stainfos, rel, statOid, false, keys, exprs);
 
 		ReleaseSysCache(htup);
-		ReleaseSysCache(dtup);
+
 		bms_free(keys);
 	}
 
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index a22ca73807..672091b517 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -619,14 +619,16 @@ dependency_is_fully_matched(MVDependency *dependency, Bitmapset *attnums)
  *		Load the functional dependencies for the indicated pg_statistic_ext tuple
  */
 MVDependencies *
-statext_dependencies_load(Oid mvoid)
+statext_dependencies_load(Oid mvoid, bool inh)
 {
 	MVDependencies *result;
 	bool		isnull;
 	Datum		deps;
 	HeapTuple	htup;
 
-	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
+	htup = SearchSysCache2(STATEXTDATASTXOID,
+						   ObjectIdGetDatum(mvoid),
+						   BoolGetDatum(inh));
 	if (!HeapTupleIsValid(htup))
 		elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
 
@@ -1417,16 +1419,6 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	Node	  **unique_exprs;
 	int			unique_exprs_cnt;
 
-	/*
-	 * When dealing with regular inheritence trees, ignore extended stats
-	 * (which were built without data from child rels, and thus do not
-	 * represent them). For partitioned tables data from partitions are
-	 * in the stats (and there's no data in the non-leaf relations), so
-	 * in this case we do consider extended stats.
-	 */
-	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
-		return 1.0;
-
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
 		return 1.0;
@@ -1610,6 +1602,10 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 		if (stat->kind != STATS_EXT_DEPENDENCIES)
 			continue;
 
+		/* skip statistics with mismatching stxdinherit value */
+		if (stat->inherit != rte->inh)
+			continue;
+
 		/*
 		 * Count matching attributes - we have to undo the attnum offsets. The
 		 * input attribute numbers are not offset (expressions are not
@@ -1656,7 +1652,7 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 		if (nmatched + nexprs < 2)
 			continue;
 
-		deps = statext_dependencies_load(stat->statOid);
+		deps = statext_dependencies_load(stat->statOid, rte->inh);
 
 		/*
 		 * The expressions may be represented by different attnums in the
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 68be000f3d..3423603dd7 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -25,6 +25,7 @@
 #include "catalog/pg_statistic_ext.h"
 #include "catalog/pg_statistic_ext_data.h"
 #include "executor/executor.h"
+#include "commands/defrem.h"
 #include "commands/progress.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
@@ -78,7 +79,7 @@ typedef struct StatExtEntry
 static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
 static VacAttrStats **lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
 											int nvacatts, VacAttrStats **vacatts);
-static void statext_store(Oid statOid,
+static void statext_store(Oid statOid, bool inh,
 						  MVNDistinct *ndistinct, MVDependencies *dependencies,
 						  MCVList *mcv, Datum exprs, VacAttrStats **stats);
 static int	statext_compute_stattarget(int stattarget,
@@ -111,7 +112,7 @@ static StatsBuildData *make_build_data(Relation onerel, StatExtEntry *stat,
  * requested stats, and serializes them back into the catalog.
  */
 void
-BuildRelationExtStatistics(Relation onerel, double totalrows,
+BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
 						   int numrows, HeapTuple *rows,
 						   int natts, VacAttrStats **vacattrstats)
 {
@@ -231,7 +232,8 @@ BuildRelationExtStatistics(Relation onerel, double totalrows,
 		}
 
 		/* store the statistics in the catalog */
-		statext_store(stat->statOid, ndistinct, dependencies, mcv, exprstats, stats);
+		statext_store(stat->statOid, inh,
+					  ndistinct, dependencies, mcv, exprstats, stats);
 
 		/* for reporting progress */
 		pgstat_progress_update_param(PROGRESS_ANALYZE_EXT_STATS_COMPUTED,
@@ -782,23 +784,27 @@ lookup_var_attr_stats(Relation rel, Bitmapset *attrs, List *exprs,
  *	tuple.
  */
 static void
-statext_store(Oid statOid,
+statext_store(Oid statOid, bool inh,
 			  MVNDistinct *ndistinct, MVDependencies *dependencies,
 			  MCVList *mcv, Datum exprs, VacAttrStats **stats)
 {
 	Relation	pg_stextdata;
-	HeapTuple	stup,
-				oldtup;
+	HeapTuple	stup;
 	Datum		values[Natts_pg_statistic_ext_data];
 	bool		nulls[Natts_pg_statistic_ext_data];
-	bool		replaces[Natts_pg_statistic_ext_data];
 
 	pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock);
 
 	memset(nulls, true, sizeof(nulls));
-	memset(replaces, false, sizeof(replaces));
 	memset(values, 0, sizeof(values));
 
+	/* basic info */
+	values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(statOid);
+	nulls[Anum_pg_statistic_ext_data_stxoid - 1] = false;
+
+	values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inh);
+	nulls[Anum_pg_statistic_ext_data_stxdinherit - 1] = false;
+
 	/*
 	 * Construct a new pg_statistic_ext_data tuple, replacing the calculated
 	 * stats.
@@ -831,25 +837,15 @@ statext_store(Oid statOid,
 		values[Anum_pg_statistic_ext_data_stxdexpr - 1] = exprs;
 	}
 
-	/* always replace the value (either by bytea or NULL) */
-	replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true;
-	replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true;
-	replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true;
-	replaces[Anum_pg_statistic_ext_data_stxdexpr - 1] = true;
-
-	/* there should already be a pg_statistic_ext_data tuple */
-	oldtup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(statOid));
-	if (!HeapTupleIsValid(oldtup))
-		elog(ERROR, "cache lookup failed for statistics object %u", statOid);
-
-	/* replace it */
-	stup = heap_modify_tuple(oldtup,
-							 RelationGetDescr(pg_stextdata),
-							 values,
-							 nulls,
-							 replaces);
-	ReleaseSysCache(oldtup);
-	CatalogTupleUpdate(pg_stextdata, &stup->t_self, stup);
+	/*
+	 * Delete the old tuple if it exists, and insert a new one. It's easier
+	 * than trying to update or insert, based on various conditions.
+	 */
+	RemoveStatisticsDataById(statOid, inh);
+
+	/* form and insert a new tuple */
+	stup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls);
+	CatalogTupleInsert(pg_stextdata, stup);
 
 	heap_freetuple(stup);
 
@@ -1235,7 +1231,7 @@ stat_covers_expressions(StatisticExtInfo *stat, List *exprs,
  * further tiebreakers are needed.
  */
 StatisticExtInfo *
-choose_best_statistics(List *stats, char requiredkind,
+choose_best_statistics(List *stats, char requiredkind, bool inh,
 					   Bitmapset **clause_attnums, List **clause_exprs,
 					   int nclauses)
 {
@@ -1257,6 +1253,10 @@ choose_best_statistics(List *stats, char requiredkind,
 		if (info->kind != requiredkind)
 			continue;
 
+		/* skip statistics with mismatching inheritance flag */
+		if (info->inherit != inh)
+			continue;
+
 		/*
 		 * Collect attributes and expressions in remaining (unestimated)
 		 * clauses fully covered by this statistic object.
@@ -1697,16 +1697,6 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	Selectivity sel = (is_or) ? 0.0 : 1.0;
 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
 
-	/*
-	 * When dealing with regular inheritence trees, ignore extended stats
-	 * (which were built without data from child rels, and thus do not
-	 * represent them). For partitioned tables data from partitions are
-	 * in the stats (and there's no data in the non-leaf relations), so
-	 * in this case we do consider extended stats.
-	 */
-	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
-		return sel;
-
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
 		return sel;
@@ -1758,7 +1748,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		Bitmapset  *simple_clauses;
 
 		/* find the best suited statistics object for these attnums */
-		stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
+		stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV, rte->inh,
 									  list_attnums, list_exprs,
 									  list_length(clauses));
 
@@ -1847,7 +1837,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 			MCVList    *mcv_list;
 
 			/* Load the MCV list stored in the statistics object */
-			mcv_list = statext_mcv_load(stat->statOid);
+			mcv_list = statext_mcv_load(stat->statOid, rte->inh);
 
 			/*
 			 * Compute the selectivity of the ORed list of clauses covered by
@@ -2408,7 +2398,7 @@ serialize_expr_stats(AnlExprData *exprdata, int nexprs)
  * identified by the supplied index.
  */
 HeapTuple
-statext_expressions_load(Oid stxoid, int idx)
+statext_expressions_load(Oid stxoid, bool inh, int idx)
 {
 	bool		isnull;
 	Datum		value;
@@ -2418,7 +2408,8 @@ statext_expressions_load(Oid stxoid, int idx)
 	HeapTupleData tmptup;
 	HeapTuple	tup;
 
-	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(stxoid));
+	htup = SearchSysCache2(STATEXTDATASTXOID,
+						   ObjectIdGetDatum(stxoid), BoolGetDatum(inh));
 	if (!HeapTupleIsValid(htup))
 		elog(ERROR, "cache lookup failed for statistics object %u", stxoid);
 
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index b350fc5f7b..75d7d35caa 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -559,12 +559,13 @@ build_column_frequencies(SortItem *groups, int ngroups,
  *		Load the MCV list for the indicated pg_statistic_ext tuple.
  */
 MCVList *
-statext_mcv_load(Oid mvoid)
+statext_mcv_load(Oid mvoid, bool inh)
 {
 	MCVList    *result;
 	bool		isnull;
 	Datum		mcvlist;
-	HeapTuple	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
+	HeapTuple	htup = SearchSysCache2(STATEXTDATASTXOID,
+									   ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
 
 	if (!HeapTupleIsValid(htup))
 		elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
@@ -2039,11 +2040,13 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
 	MCVList    *mcv;
 	Selectivity s = 0.0;
 
+	RangeTblEntry *rte = root->simple_rte_array[rel->relid];
+
 	/* match/mismatch bitmap for each MCV item */
 	bool	   *matches = NULL;
 
 	/* load the MCV list stored in the statistics object */
-	mcv = statext_mcv_load(stat->statOid);
+	mcv = statext_mcv_load(stat->statOid, rte->inh);
 
 	/* build a match bitmap for the clauses */
 	matches = mcv_get_match_bitmap(root, clauses, stat->keys, stat->exprs,
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 4481312d61..ab1f10d6c0 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -146,14 +146,15 @@ statext_ndistinct_build(double totalrows, StatsBuildData *data)
  *		Load the ndistinct value for the indicated pg_statistic_ext tuple
  */
 MVNDistinct *
-statext_ndistinct_load(Oid mvoid)
+statext_ndistinct_load(Oid mvoid, bool inh)
 {
 	MVNDistinct *result;
 	bool		isnull;
 	Datum		ndist;
 	HeapTuple	htup;
 
-	htup = SearchSysCache1(STATEXTDATASTXOID, ObjectIdGetDatum(mvoid));
+	htup = SearchSysCache2(STATEXTDATASTXOID,
+						   ObjectIdGetDatum(mvoid), BoolGetDatum(inh));
 	if (!HeapTupleIsValid(htup))
 		elog(ERROR, "cache lookup failed for statistics object %u", mvoid);
 
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index b25337370b..886d85b7f3 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3919,17 +3919,6 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 	if (!rel->statlist)
 		return false;
 
-	/*
-	 * When dealing with regular inheritence trees, ignore extended stats
-	 * (which were built without data from child rels, and thus do not
-	 * represent them). For partitioned tables data from partitions are
-	 * in the stats (and there's no data in the non-leaf relations), so
-	 * in this case we do consider extended stats.
-	 */
-	rte = planner_rt_fetch(rel->relid, root);
-	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
-		return false;
-
 	/* look for the ndistinct statistics object matching the most vars */
 	nmatches_vars = 0;			/* we require at least two matches */
 	nmatches_exprs = 0;
@@ -4015,7 +4004,8 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 
 	Assert(nmatches_vars + nmatches_exprs > 1);
 
-	stats = statext_ndistinct_load(statOid);
+	rte = planner_rt_fetch(rel->relid, root);
+	stats = statext_ndistinct_load(statOid, rte->inh);
 
 	/*
 	 * If we have a match, search it for the specific item that matches (there
@@ -5270,22 +5260,28 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 				/* found a match, see if we can extract pg_statistic row */
 				if (equal(node, expr))
 				{
-					HeapTuple	t = statext_expressions_load(info->statOid, pos);
-
-					/* Get statistics object's table for permission check */
-					RangeTblEntry *rte;
+					RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root);
 					Oid			userid;
+					bool		inh;
 
-					vardata->statsTuple = t;
+					Assert(rte->rtekind == RTE_RELATION);
+
+					/*
+					 * Determine if we'redealing with inheritance tree.
+					 *
+					 * XXX Can't we simply look at rte->inh?
+					 */
+					inh = root->append_rel_array == NULL ? false :
+						root->append_rel_array[onerel->relid]->parent_relid != 0;
 
 					/*
 					 * XXX Not sure if we should cache the tuple somewhere.
 					 * Now we just create a new copy every time.
 					 */
-					vardata->freefunc = ReleaseDummy;
+					vardata->statsTuple =
+						statext_expressions_load(info->statOid, inh, pos);
 
-					rte = planner_rt_fetch(onerel->relid, root);
-					Assert(rte->rtekind == RTE_RELATION);
+					vardata->freefunc = ReleaseDummy;
 
 					/*
 					 * Use checkAsUser if it's set, in case we're accessing
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 56870b46e4..6194751e99 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -763,11 +763,11 @@ static const struct cachedesc cacheinfo[] = {
 		32
 	},
 	{StatisticExtDataRelationId,	/* STATEXTDATASTXOID */
-		StatisticExtDataStxoidIndexId,
-		1,
+		StatisticExtDataStxoidInhIndexId,
+		2,
 		{
 			Anum_pg_statistic_ext_data_stxoid,
-			0,
+			Anum_pg_statistic_ext_data_stxdinherit,
 			0,
 			0
 		},
diff --git a/src/include/catalog/pg_statistic_ext_data.h b/src/include/catalog/pg_statistic_ext_data.h
index 7b73b790d2..8ffd8b68cd 100644
--- a/src/include/catalog/pg_statistic_ext_data.h
+++ b/src/include/catalog/pg_statistic_ext_data.h
@@ -32,6 +32,7 @@ CATALOG(pg_statistic_ext_data,3429,StatisticExtDataRelationId)
 {
 	Oid			stxoid BKI_LOOKUP(pg_statistic_ext);	/* statistics object
 														 * this data is for */
+	bool		stxdinherit;	/* true if inheritance children are included */
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
 
@@ -53,6 +54,7 @@ typedef FormData_pg_statistic_ext_data * Form_pg_statistic_ext_data;
 
 DECLARE_TOAST(pg_statistic_ext_data, 3430, 3431);
 
-DECLARE_UNIQUE_INDEX_PKEY(pg_statistic_ext_data_stxoid_index, 3433, StatisticExtDataStxoidIndexId, on pg_statistic_ext_data using btree(stxoid oid_ops));
+DECLARE_UNIQUE_INDEX_PKEY(pg_statistic_ext_data_stxoid_inh_index, 3433, StatisticExtDataStxoidInhIndexId, on pg_statistic_ext_data using btree(stxoid oid_ops, stxdinherit bool_ops));
+
 
 #endif							/* PG_STATISTIC_EXT_DATA_H */
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index f84d09959c..ab3c59fcd5 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -83,6 +83,7 @@ extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt);
 extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt);
 extern ObjectAddress AlterStatistics(AlterStatsStmt *stmt);
 extern void RemoveStatisticsById(Oid statsOid);
+extern void RemoveStatisticsDataById(Oid statsOid, bool inh);
 extern Oid	StatisticsGetRelation(Oid statId, bool missing_ok);
 
 /* commands/aggregatecmds.c */
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 324d92880b..02aff9847e 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -934,6 +934,7 @@ typedef struct StatisticExtInfo
 	NodeTag		type;
 
 	Oid			statOid;		/* OID of the statistics row */
+	bool		inherit;		/* includes child relations */
 	RelOptInfo *rel;			/* back-link to statistic's table */
 	char		kind;			/* statistics kind of this entry */
 	Bitmapset  *keys;			/* attnums of the columns covered */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 326cf26fea..3868e43f8a 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -94,11 +94,11 @@ typedef struct MCVList
 	MCVItem		items[FLEXIBLE_ARRAY_MEMBER];	/* array of MCV items */
 } MCVList;
 
-extern MVNDistinct *statext_ndistinct_load(Oid mvoid);
-extern MVDependencies *statext_dependencies_load(Oid mvoid);
-extern MCVList *statext_mcv_load(Oid mvoid);
+extern MVNDistinct *statext_ndistinct_load(Oid mvoid, bool inh);
+extern MVDependencies *statext_dependencies_load(Oid mvoid, bool inh);
+extern MCVList *statext_mcv_load(Oid mvoid, bool inh);
 
-extern void BuildRelationExtStatistics(Relation onerel, double totalrows,
+extern void BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
 									   int numrows, HeapTuple *rows,
 									   int natts, VacAttrStats **vacattrstats);
 extern int	ComputeExtStatisticsRows(Relation onerel,
@@ -121,9 +121,10 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
 												  bool is_or);
 extern bool has_stats_of_kind(List *stats, char requiredkind);
 extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
+												bool inh,
 												Bitmapset **clause_attnums,
 												List **clause_exprs,
 												int nclauses);
-extern HeapTuple statext_expressions_load(Oid stxoid, int idx);
+extern HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx);
 
 #endif							/* STATISTICS_H */
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index b58b062b10..d652f7b5fb 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2443,6 +2443,7 @@ pg_stats_ext| SELECT cn.nspname AS schemaname,
              JOIN pg_attribute a ON (((a.attrelid = s.stxrelid) AND (a.attnum = k.k))))) AS attnames,
     pg_get_statisticsobjdef_expressions(s.oid) AS exprs,
     s.stxkind AS kinds,
+    sd.stxdinherit AS inherited,
     sd.stxdndistinct AS n_distinct,
     sd.stxddependencies AS dependencies,
     m.most_common_vals,
@@ -2469,6 +2470,7 @@ pg_stats_ext_exprs| SELECT cn.nspname AS schemaname,
     s.stxname AS statistics_name,
     pg_get_userbyid(s.stxowner) AS statistics_owner,
     stat.expr,
+    sd.stxdinherit AS inherited,
     (stat.a).stanullfrac AS null_frac,
     (stat.a).stawidth AS avg_width,
     (stat.a).stadistinct AS n_distinct,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index a11fff655a..427884cda8 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -144,10 +144,9 @@ SELECT stxname, stxdndistinct, stxddependencies, stxdmcv
   FROM pg_statistic_ext s, pg_statistic_ext_data d
  WHERE s.stxname = 'ab1_a_b_stats'
    AND d.stxoid = s.oid;
-    stxname    | stxdndistinct | stxddependencies | stxdmcv 
----------------+---------------+------------------+---------
- ab1_a_b_stats |               |                  | 
-(1 row)
+ stxname | stxdndistinct | stxddependencies | stxdmcv 
+---------+---------------+------------------+---------
+(0 rows)
 
 ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
 \d+ ab1
@@ -192,11 +191,18 @@ SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
 
 CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
 VACUUM ANALYZE stxdinh, stxdinh1;
--- Since the stats object does not include inherited stats, it should not affect the estimates
+-- See if the extended stats affect the estimates
 SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
  estimated | actual 
 -----------+--------
-      1000 |   1008
+      1008 |   1008
+(1 row)
+
+-- Ensure correct (non-inherited) stats are applied to inherited query
+SELECT * FROM check_estimated_rows('SELECT * FROM ONLY stxdinh GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+         9 |      9
 (1 row)
 
 DROP TABLE stxdinh, stxdinh1;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 01383e8f5f..8deb010b3c 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -123,8 +123,10 @@ VACUUM ANALYZE stxdinh, stxdinh1;
 SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
 CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
 VACUUM ANALYZE stxdinh, stxdinh1;
--- Since the stats object does not include inherited stats, it should not affect the estimates
+-- See if the extended stats affect the estimates
 SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+-- Ensure correct (non-inherited) stats are applied to inherited query
+SELECT * FROM check_estimated_rows('SELECT * FROM ONLY stxdinh GROUP BY 1,2');
 DROP TABLE stxdinh, stxdinh1;
 
 -- Ensure inherited stats ARE applied to inherited query in partitioned table
-- 
2.31.1

From f2918393ff311ec1646b0082f1b0a0f35a88e77c Mon Sep 17 00:00:00 2001
From: Tomas Vondra <to...@2ndquadrant.com>
Date: Sat, 11 Dec 2021 22:38:08 +0100
Subject: [PATCH 2/4] Build inherited extended stats on partitioned tables

Commit 859b3003de disabled building of extended stats for inheritance
trees, to prevent updating the same catalog row twice. That resolved the
issue, but it also means there are no extended stats for partitioned
tables, because the (!inh) case has empty sample. This is a regression,
affecting queries that don't estimate the leaf relations directly.

But because partitioned tables are empty, we can invert the condition
and build statistics only for the case with inheritance, without losing
anything. And we can consider them when calculating estimates.

Report and patch by Justin Pryzby, minor fixes and cleanup by me.
Backpatch all the way back to PostgreSQL 10, where extended statistics
were introduced (same as 859b3003de).

Author: Justin Pryzby
Reported-by: Justin Pryzby
Backpatch-through: 10
Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
---
 src/backend/commands/analyze.c          | 18 +++++++++++++++++-
 src/backend/statistics/dependencies.c   |  9 ++++++---
 src/backend/statistics/extended_stats.c |  9 ++++++---
 src/backend/utils/adt/selfuncs.c        | 20 ++++++++++++--------
 src/test/regress/expected/stats_ext.out | 19 +++++++++++++++++++
 src/test/regress/sql/stats_ext.sql      | 10 ++++++++++
 6 files changed, 70 insertions(+), 15 deletions(-)

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index cd77907fc7..58a82e4929 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -549,6 +549,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	{
 		MemoryContext col_context,
 					old_context;
+		bool		build_ext_stats;
 
 		pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
 									 PROGRESS_ANALYZE_PHASE_COMPUTE_STATS);
@@ -612,13 +613,28 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							thisdata->attr_cnt, thisdata->vacattrstats);
 		}
 
+		/*
+		 * Should we build extended statistics for this relation?
+		 *
+		 * The extended statistics catalog does not include an inheritance
+		 * flag, so we can't store statistics built both with and without
+		 * data from child relations. We can store just one set of statistics
+		 * per relation. For plain relations that's fine, but for inheritance
+		 * trees we have to pick whether to store statistics for just the
+		 * one relation or the whole tree. For plain inheritance we store
+		 * the (!inh) version, mostly for backwards compatibility reasons.
+		 * For partitioned tables that's pointless (the non-leaf tables are
+		 * always empty), so we store stats representing the whole tree.
+		 */
+		build_ext_stats = (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) ? inh : (!inh);
+
 		/*
 		 * Build extended statistics (if there are any).
 		 *
 		 * For now we only build extended statistics on individual relations,
 		 * not for relations representing inheritance trees.
 		 */
-		if (!inh)
+		if (build_ext_stats)
 			BuildRelationExtStatistics(onerel, totalrows, numrows, rows,
 									   attr_cnt, vacattrstats);
 	}
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index 88479eee8b..a22ca73807 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -1418,10 +1418,13 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	int			unique_exprs_cnt;
 
 	/*
-	 * When dealing with inheritence trees, ignore extended stats (which were
-	 * built without data from child rels, and thus do not represent them).
+	 * When dealing with regular inheritence trees, ignore extended stats
+	 * (which were built without data from child rels, and thus do not
+	 * represent them). For partitioned tables data from partitions are
+	 * in the stats (and there's no data in the non-leaf relations), so
+	 * in this case we do consider extended stats.
 	 */
-	if (rte->inh)
+	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
 		return 1.0;
 
 	/* check if there's any stats that might be useful for us. */
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 607b02ab8e..68be000f3d 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1698,10 +1698,13 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
 
 	/*
-	 * When dealing with inheritence trees, ignore extended stats (which were
-	 * built without data from child rels, and thus do not represent them).
+	 * When dealing with regular inheritence trees, ignore extended stats
+	 * (which were built without data from child rels, and thus do not
+	 * represent them). For partitioned tables data from partitions are
+	 * in the stats (and there's no data in the non-leaf relations), so
+	 * in this case we do consider extended stats.
 	 */
-	if (rte->inh)
+	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
 		return sel;
 
 	/* check if there's any stats that might be useful for us. */
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 0b12f5288e..b25337370b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3913,19 +3913,23 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
 	StatisticExtInfo *matched_info = NULL;
-	RangeTblEntry		*rte = planner_rt_fetch(rel->relid, root);
-
-	/*
-	 * When dealing with inheritence trees, ignore extended stats (which were
-	 * built without data from child rels, and thus do not represent them).
-	 */
-	if (rte->inh)
-		return false;
+	RangeTblEntry		*rte;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
 		return false;
 
+	/*
+	 * When dealing with regular inheritence trees, ignore extended stats
+	 * (which were built without data from child rels, and thus do not
+	 * represent them). For partitioned tables data from partitions are
+	 * in the stats (and there's no data in the non-leaf relations), so
+	 * in this case we do consider extended stats.
+	 */
+	rte = planner_rt_fetch(rel->relid, root);
+	if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
+		return false;
+
 	/* look for the ndistinct statistics object matching the most vars */
 	nmatches_vars = 0;			/* we require at least two matches */
 	nmatches_exprs = 0;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 5410f58f7f..a11fff655a 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -200,6 +200,25 @@ SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
 (1 row)
 
 DROP TABLE stxdinh, stxdinh1;
+-- Ensure inherited stats ARE applied to inherited query in partitioned table
+CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
+CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1)TO(100);
+INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1,999)a;
+CREATE STATISTICS stxdinp ON (a),(b) FROM stxdinp;
+VACUUM ANALYZE stxdinp; -- partitions are processed recursively
+SELECT 1 FROM pg_statistic_ext WHERE stxrelid='stxdinp'::regclass;
+ ?column? 
+----------
+        1
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+        10 |     10
+(1 row)
+
+DROP TABLE stxdinp;
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 -- expression stats may be built on a single expression column
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index a196d5e2f8..01383e8f5f 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -127,6 +127,16 @@ VACUUM ANALYZE stxdinh, stxdinh1;
 SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
 DROP TABLE stxdinh, stxdinh1;
 
+-- Ensure inherited stats ARE applied to inherited query in partitioned table
+CREATE TABLE stxdinp(i int, a int, b int) PARTITION BY RANGE (i);
+CREATE TABLE stxdinp1 PARTITION OF stxdinp FOR VALUES FROM (1)TO(100);
+INSERT INTO stxdinp SELECT 1, a/100, a/100 FROM generate_series(1,999)a;
+CREATE STATISTICS stxdinp ON (a),(b) FROM stxdinp;
+VACUUM ANALYZE stxdinp; -- partitions are processed recursively
+SELECT 1 FROM pg_statistic_ext WHERE stxrelid='stxdinp'::regclass;
+SELECT * FROM check_estimated_rows('SELECT a, b FROM stxdinp GROUP BY 1,2');
+DROP TABLE stxdinp;
+
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 
-- 
2.31.1

From 322b52b92649c2625fbbba369b767029ceff8051 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 25 Sep 2021 19:42:41 -0500
Subject: [PATCH 1/4] Ignore extended statistics for inheritance trees

Since commit 859b3003de we only build extended statistics for relations
without the child relations, so that we update the catalog row only
once. However, the non-inherited statistics were still used for planning
of queries on inheritance trees, which may produce bogus estimates.

This is roughly the same issue 427c6b5b9 addressed ~15 years ago, and we
fix it the same way - by ignoring extended statistics when calculating
estimates for the inheritance tree as a whole. We still consider
extended statistics when calculating estimates for individual child
relations, of course.

Report and patch by Justin Pryzby, minor fixes and cleanup by me.
Backpatch all the way back to PostgreSQL 10, where extended statistics
were introduced (same as 859b3003de).

Author: Justin Pryzby
Reported-by: Justin Pryzby
Backpatch-through: 10
Discussion: https://postgr.es/m/20210923212624.GI831%40telsasoft.com
---
 src/backend/statistics/dependencies.c   |  9 +++++++++
 src/backend/statistics/extended_stats.c |  9 +++++++++
 src/backend/utils/adt/selfuncs.c        | 16 ++++++++++++++++
 src/test/regress/expected/stats_ext.out | 24 ++++++++++++++++++++++++
 src/test/regress/sql/stats_ext.sql      | 15 +++++++++++++++
 5 files changed, 73 insertions(+)

diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index 8bf80db8e4..88479eee8b 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -24,6 +24,7 @@
 #include "nodes/pathnodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
+#include "parser/parsetree.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
 #include "utils/bytea.h"
@@ -1410,11 +1411,19 @@ dependencies_clauselist_selectivity(PlannerInfo *root,
 	int			ndependencies;
 	int			i;
 	AttrNumber	attnum_offset;
+	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
 
 	/* unique expressions */
 	Node	  **unique_exprs;
 	int			unique_exprs_cnt;
 
+	/*
+	 * When dealing with inheritence trees, ignore extended stats (which were
+	 * built without data from child rels, and thus do not represent them).
+	 */
+	if (rte->inh)
+		return 1.0;
+
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_DEPENDENCIES))
 		return 1.0;
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 69ca52094f..607b02ab8e 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -30,6 +30,7 @@
 #include "nodes/nodeFuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
+#include "parser/parsetree.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "statistics/extended_stats_internal.h"
@@ -1694,6 +1695,14 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	List	  **list_exprs;		/* expressions matched to any statistic */
 	int			listidx;
 	Selectivity sel = (is_or) ? 0.0 : 1.0;
+	RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
+
+	/*
+	 * When dealing with inheritence trees, ignore extended stats (which were
+	 * built without data from child rels, and thus do not represent them).
+	 */
+	if (rte->inh)
+		return sel;
 
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 10895fb287..0b12f5288e 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3913,6 +3913,14 @@ estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
 	Oid			statOid = InvalidOid;
 	MVNDistinct *stats;
 	StatisticExtInfo *matched_info = NULL;
+	RangeTblEntry		*rte = planner_rt_fetch(rel->relid, root);
+
+	/*
+	 * When dealing with inheritence trees, ignore extended stats (which were
+	 * built without data from child rels, and thus do not represent them).
+	 */
+	if (rte->inh)
+		return false;
 
 	/* bail out immediately if the table has no extended statistics */
 	if (!rel->statlist)
@@ -5232,6 +5240,14 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 			if (vardata->statsTuple)
 				break;
 
+			/*
+			 * When dealing with inheritence trees, ignore extended stats (which
+			 * were built without data from child rels, and so do not represent
+			 * them).
+			 */
+			if (planner_rt_fetch(onerel->relid, root)->inh)
+				break;
+
 			/* skip stats without per-expression stats */
 			if (info->kind != STATS_EXT_EXPRESSIONS)
 				continue;
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index c60ba45aba..5410f58f7f 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -176,6 +176,30 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 NOTICE:  drop cascades to table ab1c
+-- Tests for stats with inheritance
+CREATE TABLE stxdinh(i int, j int);
+CREATE TABLE stxdinh1() INHERITS(stxdinh);
+INSERT INTO stxdinh SELECT a, a/10 FROM generate_series(1,9)a;
+INSERT INTO stxdinh1 SELECT a, a FROM generate_series(1,999)a;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Ensure non-inherited stats are not applied to inherited query
+-- Without stats object, it looks like this
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+      1000 |   1008
+(1 row)
+
+CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Since the stats object does not include inherited stats, it should not affect the estimates
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+ estimated | actual 
+-----------+--------
+      1000 |   1008
+(1 row)
+
+DROP TABLE stxdinh, stxdinh1;
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 -- expression stats may be built on a single expression column
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 6fb37962a7..a196d5e2f8 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -112,6 +112,21 @@ CREATE STATISTICS ab1_a_b_stats ON a, b FROM ab1;
 ANALYZE ab1;
 DROP TABLE ab1 CASCADE;
 
+-- Tests for stats with inheritance
+CREATE TABLE stxdinh(i int, j int);
+CREATE TABLE stxdinh1() INHERITS(stxdinh);
+INSERT INTO stxdinh SELECT a, a/10 FROM generate_series(1,9)a;
+INSERT INTO stxdinh1 SELECT a, a FROM generate_series(1,999)a;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Ensure non-inherited stats are not applied to inherited query
+-- Without stats object, it looks like this
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+CREATE STATISTICS stxdinh ON i,j FROM stxdinh;
+VACUUM ANALYZE stxdinh, stxdinh1;
+-- Since the stats object does not include inherited stats, it should not affect the estimates
+SELECT * FROM check_estimated_rows('SELECT * FROM stxdinh* GROUP BY 1,2');
+DROP TABLE stxdinh, stxdinh1;
+
 -- basic test for statistics on expressions
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c TIMESTAMP, d TIMESTAMPTZ);
 
-- 
2.31.1

Reply via email to