On Tue, Feb 4, 2025 at 4:07 PM Ashutosh Bapat
<ashutosh.bapat....@gmail.com> wrote:
>
> If we are not interested in saving memory, there is a simpler way to
> improve planning time by adding a hash table per equivalence class to
> store the derived clauses, instead of a linked list, when the number
> of derived clauses is higher than a threshold (say 32 same as the
> threshold for join_rel_list. Maybe that approach will yield stable
> planning time.
>

PFA patchset with conflict, with latest HEAD, resolved. I have dropped
0005 from the previous patchset since it didn't show any significant
performance difference. Other than these two things, the patchset is
same as the previous one.


-- 
Best Wishes,
Ashutosh Bapat
From 8704d0ca210e763547d4eea61b00d5a13cac8a9e Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.bapat....@gmail.com>
Date: Thu, 26 Sep 2024 12:06:25 +0530
Subject: [PATCH 1/6] RestrictInfo hash table interface

Introduces the RestrictInfo hash table interface and adds a pointer to
RestrictInfo hash table in PlannerInfo. RestrictInfo::rinfo_serial and
RestrictInfo::required_relids are used as key into the hash table.

This commit does not add any code which uses the interface itself.
Adding a new member to PlannerInfo increases it size and may have slight
impact due to cacheline faults when accessing this huge structure.
Similarly the interface code increases object file size which again may
have some impact on performance. This is a separate commit to assess any
performance or memory impact this change has.

Ashutosh Bapat
---
 src/backend/optimizer/util/restrictinfo.c | 143 +++++++++++++++++++++-
 src/include/nodes/pathnodes.h             |   3 +
 src/include/optimizer/restrictinfo.h      |   3 +
 src/tools/pgindent/typedefs.list          |   2 +
 4 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index a80083d2323..9279062a877 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -19,7 +19,7 @@
 #include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/restrictinfo.h"
-
+#include "common/hashfn.h"
 
 static Expr *make_sub_restrictinfos(PlannerInfo *root,
 									Expr *clause,
@@ -676,3 +676,144 @@ join_clause_is_movable_into(RestrictInfo *rinfo,
 
 	return true;
 }
+
+/* ----------------------------------------------------------------------------
+ * RestrictInfo hash table interface.
+ *
+ * For storing and retrieving child RestrictInfos. Though the interface is used
+ * for child RestrictInfos, it can be used for parent RestrictInfos as well.
+ * Hence the names of functions and structures do not mention child in it unless
+ * necessary.
+ *
+ * RestrictInfo::rinfo_serial is unique for a set of RestrictInfos, all of
+ * which, are versions of same condition.  RestrictInfo::required_relids
+ * identifies the relations to which the RestrictInfo is applicable.  Together
+ * they are used as a key into the hash table since they uniquely identify a
+ * RestictInfo.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * Hash key for RestrictInfo hash table.
+ */
+typedef struct
+{
+	int			rinfo_serial;	/* Same as RestrictInfo::rinfo_serial */
+	Relids		required_relids;	/* Same as RestrictInfo::required_relids */
+} rinfo_tab_key;
+
+/* Hash table entry for RestrictInfo hash table. */
+typedef struct rinfo_tab_entry
+{
+	rinfo_tab_key key;			/* Key must be first. */
+	RestrictInfo *rinfo;
+} rinfo_tab_entry;
+
+/*
+ * rinfo_tab_key_hash
+ *		Computes hash of RestrictInfo hash table key.
+ */
+static uint32
+rinfo_tab_key_hash(const void *key, Size size)
+{
+	rinfo_tab_key *rtabkey = (rinfo_tab_key *) key;
+	uint32		result;
+
+	Assert(sizeof(rinfo_tab_key) == size);
+
+	/* Combine hashes of all components of the key. */
+	result = hash_bytes_uint32(rtabkey->rinfo_serial);
+	result = hash_combine(result, bms_hash_value(rtabkey->required_relids));
+
+	return result;
+}
+
+/*
+ * rinfo_tab_key_match
+ *		Match function for RestrictInfo hash table.
+ */
+static int
+rinfo_tab_key_match(const void *key1, const void *key2, Size size)
+{
+	rinfo_tab_key *rtabkey1 = (rinfo_tab_key *) key1;
+	rinfo_tab_key *rtabkey2 = (rinfo_tab_key *) key2;
+	int			result;
+
+	Assert(sizeof(rinfo_tab_key) == size);
+
+	result = rtabkey1->rinfo_serial - rtabkey2->rinfo_serial;
+	if (result)
+		return result;
+
+	return !bms_equal(rtabkey1->required_relids, rtabkey2->required_relids);
+}
+
+/*
+ * get_child_rinfo_hash
+ *		Returns the child RestrictInfo hash table from PlannerInfo, creating it if
+ *		necessary.
+ */
+static HTAB *
+get_child_rinfo_hash(PlannerInfo *root)
+{
+	if (!root->child_rinfo_hash)
+	{
+		HASHCTL		hash_ctl = {0};
+
+		hash_ctl.keysize = sizeof(rinfo_tab_key);
+		hash_ctl.entrysize = sizeof(rinfo_tab_entry);
+		hash_ctl.hcxt = root->planner_cxt;
+		hash_ctl.hash = rinfo_tab_key_hash;
+		hash_ctl.match = rinfo_tab_key_match;
+
+		root->child_rinfo_hash = hash_create("restrictinfo hash table",
+											 1000,
+											 &hash_ctl,
+											 HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
+	}
+
+	return root->child_rinfo_hash;
+}
+
+/*
+ * add_restrictinfo
+ *		Add the given RestrictInfo to the RestrictInfo hash table.
+ */
+extern void
+add_restrictinfo(PlannerInfo *root, RestrictInfo *rinfo)
+{
+	HTAB	   *rinfo_hash = get_child_rinfo_hash(root);
+	rinfo_tab_key key;
+	rinfo_tab_entry *rinfo_entry;
+	bool		found;
+
+	key.rinfo_serial = rinfo->rinfo_serial;
+	key.required_relids = rinfo->required_relids;
+	rinfo_entry = hash_search(rinfo_hash, &key, HASH_ENTER, &found);
+
+	Assert(!found);
+	rinfo_entry->rinfo = rinfo;
+}
+
+/*
+ * find_restrictinfo
+ *		Return the RestrictInfo with given rinfo_serial and
+ *		required_relids from RestrictInfo hash table.
+ *
+ * The function returns NULL if it does not find the required RestrictInfo in
+ * the hash table.
+ */
+RestrictInfo *
+find_restrictinfo(PlannerInfo *root, int rinfo_serial, Relids required_relids)
+{
+	HTAB	   *rinfo_hash = get_child_rinfo_hash(root);
+	rinfo_tab_entry *rinfo_entry;
+	rinfo_tab_key key;
+
+	key.rinfo_serial = rinfo_serial;
+	key.required_relids = required_relids;
+	rinfo_entry = hash_search(rinfo_hash, &key,
+							  HASH_FIND,
+							  NULL);
+	return (rinfo_entry ? rinfo_entry->rinfo : NULL);
+}
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index fbf05322c75..83dc2ea3781 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -363,6 +363,9 @@ struct PlannerInfo
 	/* counter for assigning RestrictInfo serial numbers */
 	int			last_rinfo_serial;
 
+	/* Hash table to store and retrieve child RestrictInfos. */
+	struct HTAB *child_rinfo_hash pg_node_attr(read_write_ignore);
+
 	/*
 	 * all_result_relids is empty for SELECT, otherwise it contains at least
 	 * parse->resultRelation.  For UPDATE/DELETE/MERGE across an inheritance
diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h
index ec91fc9c583..57d2210ce95 100644
--- a/src/include/optimizer/restrictinfo.h
+++ b/src/include/optimizer/restrictinfo.h
@@ -58,6 +58,9 @@ extern bool join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel);
 extern bool join_clause_is_movable_into(RestrictInfo *rinfo,
 										Relids currentrelids,
 										Relids current_and_outer);
+extern RestrictInfo *find_restrictinfo(PlannerInfo *root, int rinfo_serial,
+									   Relids child_required_relids);
+extern void add_restrictinfo(PlannerInfo *root, RestrictInfo *child_rinfo);
 
 /*
  * clause_sides_match_join
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 98ab45adfa3..2a1acb50a95 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3915,6 +3915,8 @@ rewind_source
 rewrite_event
 rf_context
 rfile
+rinfo_tab_entry
+rinfo_tab_key
 rm_detail_t
 role_auth_extra
 rolename_hash

base-commit: 1fd1bd871012732e3c6c482667d2f2c56f1a9395
-- 
2.34.1

From fcdb6009215b3c2ff03749e40412e5f8d9659a6e Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.bapat....@gmail.com>
Date: Thu, 26 Sep 2024 12:30:13 +0530
Subject: [PATCH 4/6] Avoid translating RestrictInfo repeatedly

RestrictInfo for the child relations (including the child join
relations) are obtained by translating RestrictInfo applicable to the
parent rel. Since these translations are not tracked, the same
RestrictInfo may get translated multiple times for the same parent and
child pair. When using partitionwise join this can happen as many times
as the number of possible join orders between the partitioned tables and
as many times a parent path is reparameterized for a child if a clause
is used in such a path.

Repeated translations are avoided by saving them in RestrictInfo hash
table and reusing as needed.

Ashutosh Bapat
---
 src/backend/optimizer/path/joinrels.c     |   7 +-
 src/backend/optimizer/util/pathnode.c     | 113 +++++++++++++++++--
 src/backend/optimizer/util/relnode.c      |   7 +-
 src/backend/optimizer/util/restrictinfo.c | 125 +++++++++++++++++++++-
 src/include/optimizer/restrictinfo.h      |   7 ++
 5 files changed, 242 insertions(+), 17 deletions(-)

diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 60d65762b5d..78d2f40472f 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -19,6 +19,7 @@
 #include "optimizer/joininfo.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "optimizer/restrictinfo.h"
 #include "partitioning/partbounds.h"
 #include "utils/memutils.h"
 
@@ -1652,10 +1653,8 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 		 * Construct restrictions applicable to the child join from those
 		 * applicable to the parent join.
 		 */
-		child_restrictlist =
-			(List *) adjust_appendrel_attrs(root,
-											(Node *) parent_restrictlist,
-											nappinfos, appinfos);
+		child_restrictlist = get_child_restrictinfos(root, parent_restrictlist,
+													 nappinfos, appinfos);
 
 		/* Find or construct the child join's RelOptInfo */
 		child_joinrel = joinrel->part_rels[cnt_parts];
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 93e73cb44db..3e301ceeab1 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -26,6 +26,7 @@
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
+#include "optimizer/restrictinfo.h"
 #include "optimizer/tlist.h"
 #include "parser/parsetree.h"
 #include "utils/memutils.h"
@@ -4241,6 +4242,78 @@ reparameterize_path(PlannerInfo *root, Path *path,
 	return NULL;
 }
 
+/*
+ * build_child_iclauses_multilevel
+ *		Translate IndexClause list applicable to the given top parent relation
+ *		to that applicable to the given child relation where the child relation
+ *		may be several levels below the top parent in the partition hierarchy.
+ *
+ * This is similar to adjust_appendrel_attrs_multilevel() for IndexClause
+ * except that it uses translated RestrictInfos if already available.
+ */
+static List *
+build_child_iclauses_multilevel(PlannerInfo *root,
+								List *parent_iclauselist,
+								RelOptInfo *childrel,
+								RelOptInfo *top_parent)
+{
+	List	   *child_iclauses = NIL;
+
+	foreach_node(IndexClause, iclause, parent_iclauselist)
+	{
+		List	   *rinfo_list = NIL;
+		List	   *child_rinfo_list;
+		IndexClause *child_iclause = makeNode(IndexClause);
+		ListCell   *lcp;
+		ListCell   *lcc;
+		List	   *indexquals;
+
+		memcpy(child_iclause, iclause, sizeof(IndexClause));
+
+		/*
+		 * Collect RestrictInfos to be translated. That's all there's to
+		 * translate in an IndexClause.
+		 */
+		rinfo_list = lappend(rinfo_list, iclause->rinfo);
+		rinfo_list = list_concat(rinfo_list, iclause->indexquals);
+
+		child_rinfo_list = get_child_restrictinfos_multilevel(root, rinfo_list,
+															  childrel, top_parent);
+		child_iclause->rinfo = linitial(child_rinfo_list);
+		child_iclause->indexquals = NIL;
+		indexquals = list_delete_first(child_rinfo_list);
+
+		/*
+		 * indexquals of parent indexclause may have commuted RestrictInfos.
+		 * Commute the child indexquals accordingly.
+		 */
+		forboth(lcc, indexquals, lcp, iclause->indexquals)
+		{
+			RestrictInfo *child_rinfo = lfirst_node(RestrictInfo, lcc);
+			RestrictInfo *rinfo = lfirst_node(RestrictInfo, lcp);
+			Relids		child_left_relids;
+
+			child_left_relids = adjust_child_relids_multilevel(root, rinfo->left_relids,
+															   childrel, top_parent);
+			if (!bms_equal(child_left_relids, child_rinfo->left_relids))
+			{
+				OpExpr	   *clause = castNode(OpExpr, rinfo->clause);
+
+				Assert(bms_equal(child_left_relids, child_rinfo->right_relids));
+
+				child_rinfo = commute_restrictinfo(child_rinfo, clause->opno);
+			}
+
+			child_iclause->indexquals = lappend(child_iclause->indexquals, child_rinfo);
+		}
+
+		list_free(rinfo_list);
+		child_iclauses = lappend(child_iclauses, child_iclause);
+	}
+
+	return child_iclauses;
+}
+
 /*
  * reparameterize_path_by_child
  * 		Given a path parameterized by the parent of the given child relation,
@@ -4343,7 +4416,10 @@ do { \
 				IndexPath  *ipath = (IndexPath *) path;
 
 				ADJUST_CHILD_ATTRS(ipath->indexinfo->indrestrictinfo);
-				ADJUST_CHILD_ATTRS(ipath->indexclauses);
+				ipath->indexclauses =
+					build_child_iclauses_multilevel(root,
+													ipath->indexclauses,
+													child_rel, child_rel->top_parent);
 				new_path = (Path *) ipath;
 			}
 			break;
@@ -4422,7 +4498,11 @@ do { \
 
 				REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
 				REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
-				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+				jpath->joinrestrictinfo =
+					get_child_restrictinfos_multilevel(root,
+													   jpath->joinrestrictinfo,
+													   child_rel,
+													   child_rel->top_parent);
 				new_path = (Path *) npath;
 			}
 			break;
@@ -4434,8 +4514,16 @@ do { \
 
 				REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
 				REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
-				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
-				ADJUST_CHILD_ATTRS(mpath->path_mergeclauses);
+				jpath->joinrestrictinfo =
+					get_child_restrictinfos_multilevel(root,
+													   jpath->joinrestrictinfo,
+													   child_rel,
+													   child_rel->top_parent);
+				mpath->path_mergeclauses =
+					get_child_restrictinfos_multilevel(root,
+													   mpath->path_mergeclauses,
+													   child_rel,
+													   child_rel->top_parent);
 				new_path = (Path *) mpath;
 			}
 			break;
@@ -4447,8 +4535,16 @@ do { \
 
 				REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
 				REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
-				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
-				ADJUST_CHILD_ATTRS(hpath->path_hashclauses);
+				jpath->joinrestrictinfo =
+					get_child_restrictinfos_multilevel(root,
+													   jpath->joinrestrictinfo,
+													   child_rel,
+													   child_rel->top_parent);
+				hpath->path_hashclauses =
+					get_child_restrictinfos_multilevel(root,
+													   hpath->path_hashclauses,
+													   child_rel,
+													   child_rel->top_parent);
 				new_path = (Path *) hpath;
 			}
 			break;
@@ -4525,7 +4621,10 @@ do { \
 		new_ppi->ppi_req_outer = bms_copy(required_outer);
 		new_ppi->ppi_rows = old_ppi->ppi_rows;
 		new_ppi->ppi_clauses = old_ppi->ppi_clauses;
-		ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses);
+		new_ppi->ppi_clauses =
+			get_child_restrictinfos_multilevel(root,
+											   new_ppi->ppi_clauses, child_rel,
+											   child_rel->top_parent);
 		new_ppi->ppi_serials = bms_copy(old_ppi->ppi_serials);
 		rel->ppilist = lappend(rel->ppilist, new_ppi);
 
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index ff507331a06..740b6e33631 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -961,10 +961,9 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 							   nappinfos, appinfos);
 
 	/* Construct joininfo list. */
-	joinrel->joininfo = (List *) adjust_appendrel_attrs(root,
-														(Node *) parent_joinrel->joininfo,
-														nappinfos,
-														appinfos);
+	joinrel->joininfo = get_child_restrictinfos(root,
+												parent_joinrel->joininfo,
+												nappinfos, appinfos);
 
 	/*
 	 * Lateral relids referred in child join will be same as that referred in
diff --git a/src/backend/optimizer/util/restrictinfo.c b/src/backend/optimizer/util/restrictinfo.c
index 9279062a877..11584cdb4ec 100644
--- a/src/backend/optimizer/util/restrictinfo.c
+++ b/src/backend/optimizer/util/restrictinfo.c
@@ -14,12 +14,13 @@
  */
 #include "postgres.h"
 
+#include "common/hashfn.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "optimizer/appendinfo.h"
 #include "optimizer/clauses.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/restrictinfo.h"
-#include "common/hashfn.h"
 
 static Expr *make_sub_restrictinfos(PlannerInfo *root,
 									Expr *clause,
@@ -779,7 +780,7 @@ get_child_rinfo_hash(PlannerInfo *root)
  * add_restrictinfo
  *		Add the given RestrictInfo to the RestrictInfo hash table.
  */
-extern void
+void
 add_restrictinfo(PlannerInfo *root, RestrictInfo *rinfo)
 {
 	HTAB	   *rinfo_hash = get_child_rinfo_hash(root);
@@ -791,6 +792,13 @@ add_restrictinfo(PlannerInfo *root, RestrictInfo *rinfo)
 	key.required_relids = rinfo->required_relids;
 	rinfo_entry = hash_search(rinfo_hash, &key, HASH_ENTER, &found);
 
+	/*
+	 * If the given RestrictInfo is already present in the hash table,
+	 * multiple instances of the same RestrictInfo may have been created. This
+	 * function is a good place to flag that. While multiple instances of same
+	 * RestrictInfo being created is not a correctness issue, they consume
+	 * memory unnecessarily.
+	 */
 	Assert(!found);
 	rinfo_entry->rinfo = rinfo;
 }
@@ -817,3 +825,116 @@ find_restrictinfo(PlannerInfo *root, int rinfo_serial, Relids required_relids)
 							  NULL);
 	return (rinfo_entry ? rinfo_entry->rinfo : NULL);
 }
+
+/*
+ * get_child_restrictinfos
+ * 		Returns list of child RestrictInfos obtained by translating the given
+ *		parent RestrictInfos according to the given AppendRelInfos.
+ *
+ * RestrictInfos applicable to a child relation are obtained by translating the
+ * corresponding RestrictInfos applicable to the parent relation. The same
+ * parent RestictInfo appears in restrictlist or joininfo list of many parent
+ * join relations and thus may get translated multiple times producing multiple
+ * instances of the same child RestrictInfo. In order to avoid that we store the
+ * translated child RestrictInfos in a hash table and reused.
+ *
+ * If a required translated RestrictInfo is available in the RestrictInfo hash
+ * table, the function includes the available child RestrictInfo to the result
+ * list.  Otherwise, it translates the corresponding parent RestrictInfo and
+ * adds it to the RestrictInfo hash table and the result list.
+ */
+List *
+get_child_restrictinfos(PlannerInfo *root, List *parent_restrictinfos,
+						int nappinfos, AppendRelInfo **appinfos)
+{
+	List	   *child_clauselist = NIL;
+
+	foreach_node(RestrictInfo, parent_rinfo, parent_restrictinfos)
+	{
+		Relids		child_req_relids;
+		RestrictInfo *child_rinfo = NULL;
+
+		child_req_relids = adjust_child_relids(parent_rinfo->required_relids,
+											   nappinfos, appinfos);
+
+		if (bms_equal(child_req_relids, parent_rinfo->required_relids))
+		{
+			/*
+			 * If no relid was translated, child's RestrictInfo is same as
+			 * that of parent.
+			 */
+			child_rinfo = parent_rinfo;
+		}
+		else
+		{
+			child_rinfo = find_restrictinfo(root, parent_rinfo->rinfo_serial, child_req_relids);
+
+			/*
+			 * This function may be called thousands of times when there are
+			 * thousands of partitions involved. We won't require the
+			 * translated child relids further. Hence free those to avoid
+			 * accumulating huge amounts of memory.
+			 */
+			bms_free(child_req_relids);
+		}
+
+		if (!child_rinfo)
+		{
+			child_rinfo = castNode(RestrictInfo,
+								   adjust_appendrel_attrs(root, (Node *) parent_rinfo,
+														  nappinfos, appinfos));
+			add_restrictinfo(root, child_rinfo);
+		}
+
+		child_clauselist = lappend(child_clauselist, child_rinfo);
+	}
+
+	return child_clauselist;
+}
+
+/*
+ * get_child_restrictinfos_multilevel
+ *		Similar to get_child_restrictinfos() but for translations through multiple
+ *		levels of partitioning hierarchy.
+ *
+ * This function is similar to adjust_appendrel_attrs_multilevel() except that
+ * this function makes use of RestrictInfo hash table.
+ */
+List *
+get_child_restrictinfos_multilevel(PlannerInfo *root, List *parent_clauselist,
+								   RelOptInfo *child_rel, RelOptInfo *top_parent)
+{
+	AppendRelInfo **appinfos;
+	int			nappinfos;
+	List	   *tmp_clauselist = parent_clauselist;
+	List	   *child_clauselist;
+
+	/* Recursively traverse up the partition hierarchy. */
+	if (child_rel->parent != top_parent)
+	{
+		if (child_rel->parent)
+		{
+			tmp_clauselist = get_child_restrictinfos_multilevel(root,
+																parent_clauselist,
+																child_rel->parent,
+																top_parent);
+		}
+		else
+			elog(ERROR, "child_rel is not a child of top_parent");
+	}
+
+	appinfos = find_appinfos_by_relids(root, child_rel->relids, &nappinfos);
+	child_clauselist = get_child_restrictinfos(root, tmp_clauselist,
+											   nappinfos, appinfos);
+
+	/*
+	 * This function will be called thousands of times, if there are thousands
+	 * of partitions involved. Free temporary objects created in this function
+	 * to avoid accumulating huge memory.
+	 */
+	pfree(appinfos);
+	if (tmp_clauselist != parent_clauselist)
+		list_free(tmp_clauselist);
+
+	return child_clauselist;
+}
diff --git a/src/include/optimizer/restrictinfo.h b/src/include/optimizer/restrictinfo.h
index 57d2210ce95..5e1f0136495 100644
--- a/src/include/optimizer/restrictinfo.h
+++ b/src/include/optimizer/restrictinfo.h
@@ -61,6 +61,13 @@ extern bool join_clause_is_movable_into(RestrictInfo *rinfo,
 extern RestrictInfo *find_restrictinfo(PlannerInfo *root, int rinfo_serial,
 									   Relids child_required_relids);
 extern void add_restrictinfo(PlannerInfo *root, RestrictInfo *child_rinfo);
+extern List *get_child_restrictinfos(PlannerInfo *root,
+									 List *parent_restrictinfos,
+									 int nappinfos, AppendRelInfo **appinfos);
+extern List *get_child_restrictinfos_multilevel(PlannerInfo *root,
+												List *parent_clauselist,
+												RelOptInfo *child_rel,
+												RelOptInfo *top_parent);
 
 /*
  * clause_sides_match_join
-- 
2.34.1

From 3781947e082e971510e459058d8047bfd5218402 Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.bapat....@gmail.com>
Date: Thu, 26 Sep 2024 12:24:15 +0530
Subject: [PATCH 3/6] Use RestrictInfo hash table for storing EC derived child
 RestrictInfos

RestrictInfos derived from an EquivalenceClass, including the child
RestrictInfos, are stored in its EquivalenceClass::ec_derives which
is a linked list. The linked list is scanned every time
create_join_clause() is called, which in turn is called multiple times
for a given partition. The loop thus contributes O(N^2) at run time
where N is number of partitions. When there are thousands of
partition, the loops contributes significantly to the planning time.

In order to reduce the time complexity of searching a clause, store the
child clauses in a hash table, so that contribution of this loop in
overall planning time reduces to O(N). Please note that the parent
derived clauses are still stored in EquivalenceClass::ec_derives.

The hash table approach differs from the linked list based approach in
the following aspects:

1. The linked lists are per EquivalenceClass whereas we use a single
   hash table for all the child clauses. A hash table consumes more
   memory compared to the linked list, thus having one hash table per
   EquivalenceClass causes more memory to be consumed. But hash tables
   can efficiently handle thousands of entries hence one hash table to
   store all the child derived clauses from all the EquivalenceClasses
   suffices.

2. EquivalenceClass::ec_derives is searched by
   EquivalenceClass::left_em/right_em pointers whereas the hash table is
   key'ed by RestrictInfo::rinfo_serial and
   RestrictInfo::required_relids. In the future (next commits) the hash
   table will be used for storing the child RestrictInfos produced by
   translating the corresponding parent RestrictInfos which may not be
   part of EquivalenceClasses. Such RestrictInfos do not have
   EquivalenceMembers associated with them and hence can not be used as
   a key. OTOH, RestrictInfo::rinfo_serial and
   RestrictInfo::required_relids is set in all RestrictInfos.

   To cope with this, create_join_clause() searches for the parent
   clause in EquivalenceClass::ec_sources or
   EquivalenceClass::ec_derives before searching for the child clause in
   the hash table. That's some extra work compared to linked list based
   approach where a child clause could be searched directly in
   EquivalenceClass::ec_derives. But with this change, the linked lists
   should be short and thus searching for a parent clause should not
   take much time.

Please note that EquivalenceClass:ec_derives is used in functions other
than create_join_clause(). But all those usages happen before any child
EquivalenceMembers are added. This commit adds Asserts in those places
or leverages existing Asserts to validate this fact. Also the Asserts
will help in noticing any change in case the assumption is broken in the
future.  Hence moving child derived clauses out of EquivalenceClass does
not need any changes to those functions.

Ashutosh Bapat
---
 src/backend/optimizer/path/equivclass.c   | 121 +++++++++++++++++-----
 src/backend/optimizer/plan/analyzejoins.c |   9 ++
 src/include/nodes/pathnodes.h             |  10 +-
 3 files changed, 115 insertions(+), 25 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index 0f9ecf5ee8b..e2a2296387d 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -27,6 +27,7 @@
 #include "optimizer/optimizer.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
+#include "optimizer/cost.h"
 #include "optimizer/planmain.h"
 #include "optimizer/restrictinfo.h"
 #include "rewrite/rewriteManip.h"
@@ -268,7 +269,12 @@ process_equivalence(PlannerInfo *root,
 		{
 			EquivalenceMember *cur_em = (EquivalenceMember *) lfirst(lc2);
 
-			Assert(!cur_em->em_is_child);	/* no children yet */
+			/*
+			 * No children yet and hence no child derived clauses to process.
+			 * Child derived clauses are stored outside an EquivalenceClass,
+			 * but we don't need to worry about them here.
+			 */
+			Assert(!cur_em->em_is_child);
 
 			/*
 			 * Match constants only within the same JoinDomain (see
@@ -1162,7 +1168,12 @@ generate_base_implied_equalities_const(PlannerInfo *root,
 		Oid			eq_op;
 		RestrictInfo *rinfo;
 
-		Assert(!cur_em->em_is_child);	/* no children yet */
+		/*
+		 * No children yet, and hence no child derived clauses. Child derived
+		 * clauses are stored outside EquivalenceClass but we don't need to
+		 * worry about those in this function.
+		 */
+		Assert(!cur_em->em_is_child);
 		if (cur_em == const_em)
 			continue;
 		eq_op = select_equality_operator(ec,
@@ -1825,6 +1836,53 @@ create_join_clause(PlannerInfo *root,
 	RestrictInfo *parent_rinfo = NULL;
 	ListCell   *lc;
 	MemoryContext oldcontext;
+	Relids		qualscope;
+
+	/*
+	 * The Relids computed here may be set in the RestrictInfo created in this
+	 * function. Hence allocate the memory for them in planner context where
+	 * the memory for RestrictInfo also gets allocated.
+	 */
+	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
+	qualscope = bms_union(leftem->em_relids, rightem->em_relids);
+	MemoryContextSwitchTo(oldcontext);
+
+	/*
+	 * If either EM is a child, recursively create the corresponding
+	 * parent-to-parent clause, so that we can duplicate its rinfo_serial and
+	 * fetch the child clause if it already exists.
+	 */
+	if (leftem->em_is_child || rightem->em_is_child)
+	{
+		EquivalenceMember *leftp = leftem->em_parent ? leftem->em_parent : leftem;
+		EquivalenceMember *rightp = rightem->em_parent ? rightem->em_parent : rightem;
+
+		parent_rinfo = create_join_clause(root, ec, opno,
+										  leftp, rightp,
+										  parent_ec);
+
+		rinfo = find_restrictinfo(root, parent_rinfo->rinfo_serial, qualscope);
+		if (rinfo)
+		{
+			/*
+			 * Make sure that we got the child clause made from given
+			 * equivalence members which are part of the given equivalence
+			 * class.
+			 */
+			Assert((rinfo->left_em == leftem && rinfo->right_em == rightem) ||
+				   (rinfo->left_em == rightem && rinfo->right_em == leftem));
+			Assert(rinfo->parent_ec == parent_ec);
+			Assert(rinfo->left_ec == ec && rinfo->right_ec == ec);
+
+			/*
+			 * qualscope was just used for lookup and is not required anymore.
+			 * Free it to avoid accumulating memory in case the query involves
+			 * thousands of partitions.
+			 */
+			bms_free(qualscope);
+			return rinfo;
+		}
+	}
 
 	/*
 	 * Search to see if we already built a RestrictInfo for this pair of
@@ -1867,27 +1925,12 @@ create_join_clause(PlannerInfo *root,
 	 */
 	oldcontext = MemoryContextSwitchTo(root->planner_cxt);
 
-	/*
-	 * If either EM is a child, recursively create the corresponding
-	 * parent-to-parent clause, so that we can duplicate its rinfo_serial.
-	 */
-	if (leftem->em_is_child || rightem->em_is_child)
-	{
-		EquivalenceMember *leftp = leftem->em_parent ? leftem->em_parent : leftem;
-		EquivalenceMember *rightp = rightem->em_parent ? rightem->em_parent : rightem;
-
-		parent_rinfo = create_join_clause(root, ec, opno,
-										  leftp, rightp,
-										  parent_ec);
-	}
-
 	rinfo = build_implied_join_equality(root,
 										opno,
 										ec->ec_collation,
 										leftem->em_expr,
 										rightem->em_expr,
-										bms_union(leftem->em_relids,
-												  rightem->em_relids),
+										qualscope,
 										ec->ec_min_security);
 
 	/*
@@ -1905,10 +1948,6 @@ create_join_clause(PlannerInfo *root,
 		rinfo->clause_relids = bms_add_members(rinfo->clause_relids,
 											   rightem->em_relids);
 
-	/* If it's a child clause, copy the parent's rinfo_serial */
-	if (parent_rinfo)
-		rinfo->rinfo_serial = parent_rinfo->rinfo_serial;
-
 	/* Mark the clause as redundant, or not */
 	rinfo->parent_ec = parent_ec;
 
@@ -1922,11 +1961,35 @@ create_join_clause(PlannerInfo *root,
 	/* Mark it as usable with these EMs */
 	rinfo->left_em = leftem;
 	rinfo->right_em = rightem;
-	/* and save it for possible re-use */
-	ec->ec_derives = lappend(ec->ec_derives, rinfo);
+
+	/*
+	 * and save it for possible re-use. Parent clauses go into ec_derives but
+	 * child clauses are stored in a separate hash table separately for
+	 * performance reasons as explained in the EquivaleneClass prologue.
+	 */
+	if (!parent_rinfo)
+		ec->ec_derives = lappend(ec->ec_derives, rinfo);
 
 	MemoryContextSwitchTo(oldcontext);
 
+	if (parent_rinfo)
+	{
+		/* Copy parent's rinfo_serial to child RestrictInfo. */
+		rinfo->rinfo_serial = parent_rinfo->rinfo_serial;
+
+		/*
+		 * When merging EquivalencClasses we also merge ec_derives from. But
+		 * by the time we derive child clauses, EC merging should be complete
+		 * and hence it should be safe not to save child clauses in derived
+		 * list. If that's not the case we might miss merging child derived
+		 * clauses. Following Assert will trigger if things change.
+		 */
+		Assert(root->ec_merging_done);
+
+		/* Save child RestrictInfo for further re-use. */
+		add_restrictinfo(root, rinfo);
+	}
+
 	return rinfo;
 }
 
@@ -2655,6 +2718,16 @@ find_derived_clause_for_ec_member(EquivalenceClass *ec,
 
 	Assert(ec->ec_has_const);
 	Assert(!em->em_is_const);
+
+	/*
+	 * The child derived clauses are stored outside an EquivalenceClass. If
+	 * this function is passed a child EquivalenceMember, we have to find the
+	 * corresponding parent clause using the parent EquivalenceMember and then
+	 * find the child clause. But as of now this function does not receive
+	 * child EquivalenceMember's, so we don't worry about all that.  Following
+	 * Assert will let us know if things change.
+	 */
+	Assert(!em->em_is_child);
 	foreach(lc, ec->ec_derives)
 	{
 		RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 3aa04d0d4e1..c83ddda585a 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -701,6 +701,15 @@ remove_rel_from_eclass(EquivalenceClass *ec, int relid, int ojrelid, int subst)
 			(ojrelid != -1 && bms_is_member(ojrelid, cur_em->em_relids)))
 		{
 			Assert(!cur_em->em_is_const);
+
+			/*
+			 * The relation removal happens before any children are expanded.
+			 * Hence we do not see child EquivalenceMembers in this function.
+			 * Hence we bon't need to worry about removing child derived
+			 * clauses which are stored outside EquivalenceClass. Assert below
+			 * would trigger if things change.
+			 */
+			Assert(!cur_em->em_is_child);
 			cur_em->em_relids = adjust_relid_set(cur_em->em_relids, relid, subst);
 			cur_em->em_relids = adjust_relid_set(cur_em->em_relids, ojrelid, subst);
 			if (bms_is_empty(cur_em->em_relids))
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 25689e5c5a6..cf8a633a275 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1406,6 +1406,14 @@ typedef struct JoinDomain
  * NB: if ec_merged isn't NULL, this class has been merged into another, and
  * should be ignored in favor of using the pointed-to class.
  *
+ * When there are hundreds of partitions or inheritance children involved, there
+ * can be thousands of derived child clauses all linked in ec_derives which is
+ * scanned linearly before creating a new clause in create_join_clause(). For
+ * faster lookup the child derived clauses are stored in a hash table located
+ * outside the equivalence class in PlannerInfo. We do not need one hash table
+ * per EquivalenceClass since, unlike a linked list, a hash tables can
+ * efficiently handle thousands of entries.
+ *
  * NB: EquivalenceClasses are never copied after creation.  Therefore,
  * copyObject() copies pointers to them as pointers, and equal() compares
  * pointers to EquivalenceClasses via pointer equality.  This is implemented
@@ -1422,7 +1430,7 @@ typedef struct EquivalenceClass
 	Oid			ec_collation;	/* collation, if datatypes are collatable */
 	List	   *ec_members;		/* list of EquivalenceMembers */
 	List	   *ec_sources;		/* list of generating RestrictInfos */
-	List	   *ec_derives;		/* list of derived RestrictInfos */
+	List	   *ec_derives;		/* list of derived parent RestrictInfos */
 	Relids		ec_relids;		/* all relids appearing in ec_members, except
 								 * for child members (see below) */
 	bool		ec_has_const;	/* any pseudoconstants in ec_members? */
-- 
2.34.1

From 5b14bed22abd3c2e2ff4fb03f94eedb6b1dbd345 Mon Sep 17 00:00:00 2001
From: Ashutosh Bapat <ashutosh.bapat....@gmail.com>
Date: Mon, 7 Oct 2024 19:48:17 +0530
Subject: [PATCH 2/6] Compact PlannerInfo to restore its previous size

Previous commit added a new member to PlannerInfo increasing its size
from 696 to 704 bytes in size on my laptop. This change coincides with a
potential planning time regression for query involving partitioned
tables with lower number of partitios (around 100 or less) and lower
number of joins between partitioned tables. This commit rearranges the
members of PlannerInfo so that the size is back to 696. We should merge
this commit into previous commit in case this fixes the regression,
otherwise discard it.

Ashutosh Bapat
---
 src/include/nodes/pathnodes.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 83dc2ea3781..25689e5c5a6 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -337,6 +337,12 @@ struct PlannerInfo
 	/* set true once ECs are canonical */
 	bool		ec_merging_done;
 
+	/* counter for assigning RestrictInfo serial numbers */
+	int			last_rinfo_serial;
+
+	/* Hash table to store and retrieve child RestrictInfos. */
+	struct HTAB *child_rinfo_hash pg_node_attr(read_write_ignore);
+
 	/* list of "canonical" PathKeys */
 	List	   *canon_pathkeys;
 
@@ -360,12 +366,6 @@ struct PlannerInfo
 	/* list of SpecialJoinInfos */
 	List	   *join_info_list;
 
-	/* counter for assigning RestrictInfo serial numbers */
-	int			last_rinfo_serial;
-
-	/* Hash table to store and retrieve child RestrictInfos. */
-	struct HTAB *child_rinfo_hash pg_node_attr(read_write_ignore);
-
 	/*
 	 * all_result_relids is empty for SELECT, otherwise it contains at least
 	 * parse->resultRelation.  For UPDATE/DELETE/MERGE across an inheritance
-- 
2.34.1

Reply via email to