On Tue, Aug 8, 2023 at 4:08 PM Richard Guo <guofengli...@gmail.com> wrote:
>
>
> I haven't looked into the details but with 0002 patch I came across a
> crash while planning the query below.
>
> regression=# set enable_partitionwise_join to on;
> SET
> regression=# EXPLAIN (COSTS OFF)
> SELECT * FROM prt1 t1, prt2 t2
> WHERE t1.a = t2.b AND t1.a < 450 AND t2.b > 250 AND t1.b = 0;
> server closed the connection unexpectedly

Thanks for the report. PFA the patches with this issue fixed. There is
another issue seen when running partition_join.sql "ERROR:  index key
does not match expected index column". I will investigate that issue.

Right now I am looking at the 4th point in my first email in this
thread. [1]. I am trying to figure whether that approach would work.
If that approach doesn't work what's the best to track the
translations and also figure out answers to 1, 2, 3 there. Please let
me know your opinions if any.

-- 
Best Wishes,
Ashutosh Bapat

[1] 
https://www.postgresql.org/message-id/CAExHW5s=bclmmq8n_bn6iu+pjau0ds3z_6dn6ile69esmsp...@mail.gmail.com
From 4abdfd676a31d885ccdbe1803c60c1df1d0c1df2 Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.ba...@enterprisedb.com>
Date: Wed, 12 Jul 2023 14:34:14 +0530
Subject: [PATCH 1/2] Report memory used for planning a query in EXPLAIN
 ANALYZE

The memory used in the CurrentMemoryContext and its children is sampled
before and after calling pg_plan_query() from ExplainOneQuery(). The
difference in the two samples is reported as the memory consumed while
planning the query. This may not account for the memory allocated in
memory contexts which are not children of CurrentMemoryContext. These
contexts are usually other long lived contexts, e.g.
CacheMemoryContext, which are shared by all the queries run in a
session. The consumption in those can not be attributed only to a given
query and hence should not be reported any way.

The memory consumption is reported as "Planning Memory" property in
EXPLAIN ANALYZE output.

Ashutosh Bapat
---
 src/backend/commands/explain.c        | 12 ++++++++++--
 src/backend/commands/prepare.c        |  7 ++++++-
 src/backend/utils/mmgr/mcxt.c         | 19 +++++++++++++++++++
 src/include/commands/explain.h        |  3 ++-
 src/include/utils/memutils.h          |  1 +
 src/test/regress/expected/explain.out | 15 +++++++++++----
 6 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 8570b14f62..9f859949f0 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -397,16 +397,20 @@ ExplainOneQuery(Query *query, int cursorOptions,
 					planduration;
 		BufferUsage bufusage_start,
 					bufusage;
+		Size		mem_consumed;
 
 		if (es->buffers)
 			bufusage_start = pgBufferUsage;
 		INSTR_TIME_SET_CURRENT(planstart);
+		mem_consumed = MemoryContextMemUsed(CurrentMemoryContext);
 
 		/* plan the query */
 		plan = pg_plan_query(query, queryString, cursorOptions, params);
 
 		INSTR_TIME_SET_CURRENT(planduration);
 		INSTR_TIME_SUBTRACT(planduration, planstart);
+		mem_consumed = MemoryContextMemUsed(CurrentMemoryContext)
+						- mem_consumed;
 
 		/* calc differences of buffer counters. */
 		if (es->buffers)
@@ -417,7 +421,7 @@ ExplainOneQuery(Query *query, int cursorOptions,
 
 		/* run it (if needed) and produce output */
 		ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
-					   &planduration, (es->buffers ? &bufusage : NULL));
+					   &planduration, (es->buffers ? &bufusage : NULL), &mem_consumed);
 	}
 }
 
@@ -527,7 +531,7 @@ void
 ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 			   const char *queryString, ParamListInfo params,
 			   QueryEnvironment *queryEnv, const instr_time *planduration,
-			   const BufferUsage *bufusage)
+			   const BufferUsage *bufusage, const Size *mem_consumed)
 {
 	DestReceiver *dest;
 	QueryDesc  *queryDesc;
@@ -628,6 +632,10 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 		double		plantime = INSTR_TIME_GET_DOUBLE(*planduration);
 
 		ExplainPropertyFloat("Planning Time", "ms", 1000.0 * plantime, 3, es);
+
+		if (mem_consumed)
+			ExplainPropertyUInteger("Planning Memory", "bytes",
+										(uint64) *mem_consumed, es);
 	}
 
 	/* Print info about runtime of triggers */
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index 18f70319fc..84544ce481 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -583,10 +583,12 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 	instr_time	planduration;
 	BufferUsage bufusage_start,
 				bufusage;
+	Size		mem_consumed;
 
 	if (es->buffers)
 		bufusage_start = pgBufferUsage;
 	INSTR_TIME_SET_CURRENT(planstart);
+	mem_consumed = MemoryContextMemUsed(CurrentMemoryContext);
 
 	/* Look it up in the hash table */
 	entry = FetchPreparedStatement(execstmt->name, true);
@@ -623,6 +625,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 
 	INSTR_TIME_SET_CURRENT(planduration);
 	INSTR_TIME_SUBTRACT(planduration, planstart);
+	mem_consumed = MemoryContextMemUsed(CurrentMemoryContext)
+					- mem_consumed;
 
 	/* calc differences of buffer counters. */
 	if (es->buffers)
@@ -640,7 +644,8 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
 
 		if (pstmt->commandType != CMD_UTILITY)
 			ExplainOnePlan(pstmt, into, es, query_string, paramLI, queryEnv,
-						   &planduration, (es->buffers ? &bufusage : NULL));
+						   &planduration, (es->buffers ? &bufusage : NULL),
+						   &mem_consumed);
 		else
 			ExplainOneUtility(pstmt->utilityStmt, into, es, query_string,
 							  paramLI, queryEnv);
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 9fc83f11f6..43af271f33 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -747,6 +747,25 @@ MemoryContextStatsDetail(MemoryContext context, int max_children,
 								 grand_totals.totalspace - grand_totals.freespace)));
 }
 
+/*
+ * Compute the memory used in the given context and its children.
+ *
+ * XXX: Instead of this separate function we could modify
+ * MemoryContextStatsDetail() to report used memory and disable printing the
+ * detailed stats.
+ */
+extern Size
+MemoryContextMemUsed(MemoryContext context)
+{
+	MemoryContextCounters grand_totals;
+
+	memset(&grand_totals, 0, sizeof(grand_totals));
+
+	MemoryContextStatsInternal(context, 0, false, 100, &grand_totals, false);
+
+	return grand_totals.totalspace - grand_totals.freespace;
+}
+
 /*
  * MemoryContextStatsInternal
  *		One recursion level for MemoryContextStats
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index 3d3e632a0c..21e3d2f309 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -92,7 +92,8 @@ extern void ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into,
 						   ExplainState *es, const char *queryString,
 						   ParamListInfo params, QueryEnvironment *queryEnv,
 						   const instr_time *planduration,
-						   const BufferUsage *bufusage);
+						   const BufferUsage *bufusage,
+						   const Size *mem_consumed);
 
 extern void ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc);
 extern void ExplainPrintTriggers(ExplainState *es, QueryDesc *queryDesc);
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 21640d62a6..d7c477f229 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -92,6 +92,7 @@ extern void MemoryContextStatsDetail(MemoryContext context, int max_children,
 									 bool print_to_stderr);
 extern void MemoryContextAllowInCriticalSection(MemoryContext context,
 												bool allow);
+extern Size MemoryContextMemUsed(MemoryContext context);
 
 #ifdef MEMORY_CONTEXT_CHECKING
 extern void MemoryContextCheck(MemoryContext context);
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 1aca77491b..123ab22f5d 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -65,8 +65,9 @@ select explain_filter('explain (analyze) select * from int8_tbl i8');
 -----------------------------------------------------------------------------------------------
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
  Planning Time: N.N ms
+ Planning Memory: N bytes
  Execution Time: N.N ms
-(3 rows)
+(4 rows)
 
 select explain_filter('explain (analyze, verbose) select * from int8_tbl i8');
                                             explain_filter                                            
@@ -74,16 +75,18 @@ select explain_filter('explain (analyze, verbose) select * from int8_tbl i8');
  Seq Scan on public.int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
    Output: q1, q2
  Planning Time: N.N ms
+ Planning Memory: N bytes
  Execution Time: N.N ms
-(4 rows)
+(5 rows)
 
 select explain_filter('explain (analyze, buffers, format text) select * from int8_tbl i8');
                                         explain_filter                                         
 -----------------------------------------------------------------------------------------------
  Seq Scan on int8_tbl i8  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
  Planning Time: N.N ms
+ Planning Memory: N bytes
  Execution Time: N.N ms
-(3 rows)
+(4 rows)
 
 select explain_filter('explain (analyze, buffers, format xml) select * from int8_tbl i8');
                      explain_filter                     
@@ -128,6 +131,7 @@ select explain_filter('explain (analyze, buffers, format xml) select * from int8
        <Temp-Written-Blocks>N</Temp-Written-Blocks>    +
      </Planning>                                       +
      <Planning-Time>N.N</Planning-Time>                +
+     <Planning-Memory>N</Planning-Memory>              +
      <Triggers>                                        +
      </Triggers>                                       +
      <Execution-Time>N.N</Execution-Time>              +
@@ -174,6 +178,7 @@ select explain_filter('explain (analyze, buffers, format yaml) select * from int
      Temp Read Blocks: N      +
      Temp Written Blocks: N   +
    Planning Time: N.N         +
+   Planning Memory: N         +
    Triggers:                  +
    Execution Time: N.N
 (1 row)
@@ -280,6 +285,7 @@ select explain_filter('explain (analyze, buffers, format json) select * from int
        "Temp I/O Write Time": N.N  +
      },                            +
      "Planning Time": N.N,         +
+     "Planning Memory": N,         +
      "Triggers": [                 +
      ],                            +
      "Execution Time": N.N         +
@@ -531,7 +537,8 @@ select jsonb_pretty(
          "Triggers": [                                      +
          ],                                                 +
          "Planning Time": 0.0,                              +
-         "Execution Time": 0.0                              +
+         "Execution Time": 0.0,                             +
+         "Planning Memory": 0                               +
      }                                                      +
  ]
 (1 row)
-- 
2.25.1

From 2616e2c0623d212d6a957f24b620d9ca6855023d Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.ba...@enterprisedb.com>
Date: Thu, 22 Jun 2023 14:27:14 +0530
Subject: [PATCH 2/2] Avoid translating RestrictInfo repeatedly

RestrictInfo to be applied to the child rels (including the child join rels)
are obtained by translating RestrictInfo applicable to the parent rel. Since
these translations are not tracked, the same RestrictInfo may get translated
multiple times for the same parent and child pair. When using partitionwise
join this can happen as many times as the number of possible join orders
between the partitioned tables.

Every translated RestrictInfo is stored in the top parent's RestrictInfo in a
list. Every translated RestrictInfo contains a pointer to the top parent's
RestrictInfo to track multilevel translations. RestrictInfo::required_relids is
used as a key to search a given translation. adjust_appendrel_attrs_mutator()
first looks up an existing translations when translating a RestrictInfo and
creates a new one only when one doesn't exist.

Ashutosh Bapat
---
 src/backend/optimizer/util/appendinfo.c   | 43 ++++++++++++++++++++++-
 src/backend/optimizer/util/restrictinfo.c |  2 ++
 src/include/nodes/pathnodes.h             |  7 ++++
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/util/appendinfo.c b/src/backend/optimizer/util/appendinfo.c
index f456b3b0a4..8931cf91a4 100644
--- a/src/backend/optimizer/util/appendinfo.c
+++ b/src/backend/optimizer/util/appendinfo.c
@@ -217,6 +217,7 @@ adjust_appendrel_attrs_mutator(Node *node,
 {
 	AppendRelInfo **appinfos = context->appinfos;
 	int			nappinfos = context->nappinfos;
+	PlannerInfo	   *root = context->root;
 	int			cnt;
 
 	if (node == NULL)
@@ -445,7 +446,42 @@ adjust_appendrel_attrs_mutator(Node *node,
 	if (IsA(node, RestrictInfo))
 	{
 		RestrictInfo *oldinfo = (RestrictInfo *) node;
-		RestrictInfo *newinfo = makeNode(RestrictInfo);
+		Relids child_required_relids = adjust_child_relids(oldinfo->required_relids,
+															nappinfos, appinfos);
+		RestrictInfo   *parent_rinfo;
+		ListCell   *lc;
+		RestrictInfo *newinfo;
+		MemoryContext	old_context;
+
+		/*
+		 * If the adjusted RestrictInfo already exists, use it otherwise create
+		 * a new one. This avoids translating the same RestrictInfo again and
+		 * again wasting memory especially in partitionwise joins.
+		 */
+
+		if (bms_equal(child_required_relids, oldinfo->required_relids))
+			return (Node *) oldinfo;
+
+		/*
+		 * Check if we already have the RestrictInfo for the given child in the
+		 * topmost parent's RestrictInfo.
+		 */
+		parent_rinfo = oldinfo->parent_rinfo ? oldinfo->parent_rinfo : oldinfo;
+		foreach (lc, parent_rinfo->child_rinfos)
+		{
+			newinfo = lfirst(lc);
+
+			if (bms_equal(newinfo->required_relids, child_required_relids))
+			{
+				bms_free(child_required_relids);
+				return (Node *) newinfo;
+			}
+		}
+		bms_free(child_required_relids);
+
+		/* Translate in a long lasting memory context. */
+		old_context = MemoryContextSwitchTo(root->planner_cxt);
+		newinfo = makeNode(RestrictInfo);
 
 		/* Copy all flat-copiable fields, notably including rinfo_serial */
 		memcpy(newinfo, oldinfo, sizeof(RestrictInfo));
@@ -491,6 +527,11 @@ adjust_appendrel_attrs_mutator(Node *node,
 		newinfo->right_bucketsize = -1;
 		newinfo->left_mcvfreq = -1;
 		newinfo->right_mcvfreq = -1;
+		newinfo->parent_rinfo = parent_rinfo;
+		parent_rinfo->child_rinfos = lappend(parent_rinfo->child_rinfos,
+											 newinfo);
+
+		MemoryContextSwitchTo(old_context);
 
 		return (Node *) newinfo;
 	}
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index c1fbbb6bfe..fec8eeef58 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -246,6 +246,8 @@ make_restrictinfo_internal(PlannerInfo *root,
 
 	restrictinfo->left_hasheqoperator = InvalidOid;
 	restrictinfo->right_hasheqoperator = InvalidOid;
+	restrictinfo->child_rinfos = NIL;
+	restrictinfo->parent_rinfo = NULL;
 
 	return restrictinfo;
 }
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index a1dc1d07e1..a9b2809f2c 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2658,6 +2658,13 @@ typedef struct RestrictInfo
 	/* hash equality operators used for memoize nodes, else InvalidOid */
 	Oid			left_hasheqoperator pg_node_attr(equal_ignore);
 	Oid			right_hasheqoperator pg_node_attr(equal_ignore);
+
+	/* Only one of these two can be set. */
+	List	   *child_rinfos pg_node_attr(equal_ignore, copy_as(NIL)); /* RestrictInfos derived for children. */
+	struct RestrictInfo *parent_rinfo pg_node_attr(equal_ignore, copy_as(NULL));		/* Parent restrictinfo this
+											 * RestrictInf is derived from.
+											 */
+
 } RestrictInfo;
 
 /*
-- 
2.25.1

Reply via email to