From ffcb6ade688d1956c365df4d8368da5c8a721a65 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Wed, 2 Aug 2023 16:09:55 +0800
Subject: [PATCH v3] Postpone reparameterization of paths until when creating
 plans

---
 src/backend/optimizer/path/joinpath.c        |  55 +++-------
 src/backend/optimizer/plan/createplan.c      |  16 +++
 src/backend/optimizer/util/pathnode.c        | 106 ++++++++++++++++---
 src/include/nodes/pathnodes.h                |  13 +++
 src/include/optimizer/pathnode.h             |   1 +
 src/test/regress/expected/partition_join.out |  60 +++++++++++
 src/test/regress/sql/partition_join.sql      |  12 +++
 7 files changed, 205 insertions(+), 58 deletions(-)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 059e605e04..b10c2cd815 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -30,19 +30,6 @@
 /* Hook for plugins to get control in add_paths_to_joinrel() */
 set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
 
-/*
- * Paths parameterized by the parent can be considered to be parameterized by
- * any of its child.
- */
-#define PATH_PARAM_BY_PARENT(path, rel)	\
-	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path),	\
-									   (rel)->top_parent_relids))
-#define PATH_PARAM_BY_REL_SELF(path, rel)  \
-	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
-
-#define PATH_PARAM_BY_REL(path, rel)	\
-	(PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
-
 static void try_partial_mergejoin_path(PlannerInfo *root,
 									   RelOptInfo *joinrel,
 									   Path *outer_path,
@@ -794,24 +781,17 @@ try_nestloop_path(PlannerInfo *root,
 						  pathkeys, required_outer))
 	{
 		/*
-		 * If the inner path is parameterized, it is parameterized by the
-		 * topmost parent of the outer rel, not the outer rel itself.  Fix
-		 * that.
+		 * If the inner path is parameterized by the topmost parent of the
+		 * outer rel rather than the outer rel itself, we need to fix that.  We
+		 * will perform the translation in createplan.c.  For now we need to
+		 * check whether we can translate the inner path, and if not avoid
+		 * creating nestloop path.
 		 */
-		if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
+		if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+			!path_is_reparameterizable_by_child(inner_path))
 		{
-			inner_path = reparameterize_path_by_child(root, inner_path,
-													  outer_path->parent);
-
-			/*
-			 * If we could not translate the path, we can't create nest loop
-			 * path.
-			 */
-			if (!inner_path)
-			{
-				bms_free(required_outer);
-				return;
-			}
+			bms_free(required_outer);
+			return;
 		}
 
 		add_path(joinrel, (Path *)
@@ -886,20 +866,11 @@ try_partial_nestloop_path(PlannerInfo *root,
 		return;
 
 	/*
-	 * If the inner path is parameterized, it is parameterized by the topmost
-	 * parent of the outer rel, not the outer rel itself.  Fix that.
+	 * See the comments in try_nestloop_path.
 	 */
-	if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
-	{
-		inner_path = reparameterize_path_by_child(root, inner_path,
-												  outer_path->parent);
-
-		/*
-		 * If we could not translate the path, we can't create nest loop path.
-		 */
-		if (!inner_path)
-			return;
-	}
+	if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent) &&
+		!path_is_reparameterizable_by_child(inner_path))
+		return;
 
 	/* Might be good enough to be worth trying, so let's try it. */
 	add_partial_path(joinrel, (Path *)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index af48109058..a107bde294 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -29,6 +29,7 @@
 #include "optimizer/cost.h"
 #include "optimizer/optimizer.h"
 #include "optimizer/paramassign.h"
+#include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/placeholder.h"
 #include "optimizer/plancat.h"
@@ -4327,6 +4328,21 @@ create_nestloop_plan(PlannerInfo *root,
 	List	   *nestParams;
 	Relids		saveOuterRels = root->curOuterRels;
 
+	/*
+	 * If the inner path is parameterized by the topmost parent of the
+	 * outer rel rather than the outer rel itself, fix that.
+	 */
+	if (PATH_PARAM_BY_PARENT(best_path->jpath.innerjoinpath,
+							 best_path->jpath.outerjoinpath->parent))
+	{
+		best_path->jpath.innerjoinpath =
+			reparameterize_path_by_child(root,
+										 best_path->jpath.innerjoinpath,
+										 best_path->jpath.outerjoinpath->parent);
+
+		Assert(best_path->jpath.innerjoinpath != NULL);
+	}
+
 	/* NestLoop can project, so no need to be picky about child tlists */
 	outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0);
 
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 754f0b9f34..a93ac22660 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -2430,6 +2430,16 @@ create_nestloop_path(PlannerInfo *root,
 {
 	NestPath   *pathnode = makeNode(NestPath);
 	Relids		inner_req_outer = PATH_REQ_OUTER(inner_path);
+	Relids		outerrelids;
+
+	/*
+	 * Paths are parameterized by top-level parents, so run parameterization
+	 * tests on the parent relids.
+	 */
+	if (outer_path->parent->top_parent_relids)
+		outerrelids = outer_path->parent->top_parent_relids;
+	else
+		outerrelids = outer_path->parent->relids;
 
 	/*
 	 * If the inner path is parameterized by the outer, we must drop any
@@ -2439,7 +2449,7 @@ create_nestloop_path(PlannerInfo *root,
 	 * estimates for this path.  We detect such clauses by checking for serial
 	 * number match to clauses already enforced in the inner path.
 	 */
-	if (bms_overlap(inner_req_outer, outer_path->parent->relids))
+	if (bms_overlap(inner_req_outer, outerrelids))
 	{
 		Bitmapset  *enforced_serials = get_param_path_clause_serials(inner_path);
 		List	   *jclauses = NIL;
@@ -4018,6 +4028,44 @@ reparameterize_path(PlannerInfo *root, Path *path,
 	return NULL;
 }
 
+/*
+ * path_is_reparameterizable_by_child
+ * 		Given a path parameterized by the parent of a child relation, check to
+ * 		see if it can be translated to be parameterized by child relation.
+ *
+ * Currently, only a few path types are supported here, though more could be
+ * added at need.  Any addition or reduction in supported path types needs to
+ * be reflected in reparameterize_path_by_child().
+ */
+bool
+path_is_reparameterizable_by_child(Path *path)
+{
+	switch (nodeTag(path))
+	{
+		case T_Path:
+		case T_IndexPath:
+		case T_BitmapHeapPath:
+		case T_BitmapAndPath:
+		case T_BitmapOrPath:
+		case T_ForeignPath:
+		case T_CustomPath:
+		case T_NestPath:
+		case T_MergePath:
+		case T_HashPath:
+		case T_AppendPath:
+		case T_MaterialPath:
+		case T_MemoizePath:
+		case T_GatherPath:
+			return true;
+		default:
+
+			/* We don't know how to reparameterize this path. */
+			return false;
+	}
+
+	return false;
+}
+
 /*
  * reparameterize_path_by_child
  * 		Given a path parameterized by the parent of the given child relation,
@@ -4035,7 +4083,12 @@ reparameterize_path(PlannerInfo *root, Path *path,
  * members are copied as they are.
  *
  * Currently, only a few path types are supported here, though more could be
- * added at need.  We return NULL if we can't reparameterize the given path.
+ * added at need.  Any addition or reduction in supported path types needs to
+ * be reflected in path_is_reparameterizable_by_child().  We return NULL if we
+ * can't reparameterize the given path.
+ *
+ * Note that this function can only be called at createplan time, because it
+ * may modify RTEs on the fly.
  */
 Path *
 reparameterize_path_by_child(PlannerInfo *root, Path *path,
@@ -4046,11 +4099,11 @@ reparameterize_path_by_child(PlannerInfo *root, Path *path,
 	( (newnode) = makeNode(nodetype), \
 	  memcpy((newnode), (node), sizeof(nodetype)) )
 
-#define ADJUST_CHILD_ATTRS(node) \
+#define ADJUST_CHILD_ATTRS(node, fieldtype) \
 	((node) = \
-	 (List *) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \
-												child_rel, \
-												child_rel->top_parent))
+	 (fieldtype) adjust_appendrel_attrs_multilevel(root, (Node *) (node), \
+												   child_rel, \
+												   child_rel->top_parent))
 
 #define REPARAMETERIZE_CHILD_PATH(path) \
 do { \
@@ -4083,6 +4136,11 @@ do { \
 		!bms_overlap(PATH_REQ_OUTER(path), child_rel->top_parent_relids))
 		return path;
 
+	/*
+	 * The path should be reparameterizable, otherwise it would not come here.
+	 */
+	Assert(path_is_reparameterizable_by_child(path));
+
 	/*
 	 * If possible, reparameterize the given path, making a copy.
 	 *
@@ -4098,7 +4156,23 @@ do { \
 	switch (nodeTag(path))
 	{
 		case T_Path:
-			FLAT_COPY_PATH(new_path, path, Path);
+			{
+				FLAT_COPY_PATH(new_path, path, Path);
+
+				if (path->pathtype == T_SampleScan)
+				{
+					Index		scan_relid = path->parent->relid;
+					RangeTblEntry *rte;
+
+					/* it should be a base rel with a tablesample clause... */
+					Assert(scan_relid > 0);
+					rte = planner_rt_fetch(scan_relid, root);
+					Assert(rte->rtekind == RTE_RELATION);
+					Assert(rte->tablesample != NULL);
+
+					ADJUST_CHILD_ATTRS(rte->tablesample, TableSampleClause *);
+				}
+			}
 			break;
 
 		case T_IndexPath:
@@ -4106,7 +4180,7 @@ do { \
 				IndexPath  *ipath;
 
 				FLAT_COPY_PATH(ipath, path, IndexPath);
-				ADJUST_CHILD_ATTRS(ipath->indexclauses);
+				ADJUST_CHILD_ATTRS(ipath->indexclauses, List *);
 				new_path = (Path *) ipath;
 			}
 			break;
@@ -4186,7 +4260,7 @@ do { \
 				jpath = (JoinPath *) npath;
 				REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
 				REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
-				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
+				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo, List *);
 				new_path = (Path *) npath;
 			}
 			break;
@@ -4201,8 +4275,8 @@ do { \
 				jpath = (JoinPath *) mpath;
 				REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
 				REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
-				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
-				ADJUST_CHILD_ATTRS(mpath->path_mergeclauses);
+				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo, List *);
+				ADJUST_CHILD_ATTRS(mpath->path_mergeclauses, List *);
 				new_path = (Path *) mpath;
 			}
 			break;
@@ -4217,8 +4291,8 @@ do { \
 				jpath = (JoinPath *) hpath;
 				REPARAMETERIZE_CHILD_PATH(jpath->outerjoinpath);
 				REPARAMETERIZE_CHILD_PATH(jpath->innerjoinpath);
-				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo);
-				ADJUST_CHILD_ATTRS(hpath->path_hashclauses);
+				ADJUST_CHILD_ATTRS(jpath->joinrestrictinfo, List *);
+				ADJUST_CHILD_ATTRS(hpath->path_hashclauses, List *);
 				new_path = (Path *) hpath;
 			}
 			break;
@@ -4249,7 +4323,7 @@ do { \
 
 				FLAT_COPY_PATH(mpath, path, MemoizePath);
 				REPARAMETERIZE_CHILD_PATH(mpath->subpath);
-				ADJUST_CHILD_ATTRS(mpath->param_exprs);
+				ADJUST_CHILD_ATTRS(mpath->param_exprs, List *);
 				new_path = (Path *) mpath;
 			}
 			break;
@@ -4300,7 +4374,7 @@ do { \
 		new_ppi->ppi_req_outer = bms_copy(required_outer);
 		new_ppi->ppi_rows = old_ppi->ppi_rows;
 		new_ppi->ppi_clauses = old_ppi->ppi_clauses;
-		ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses);
+		ADJUST_CHILD_ATTRS(new_ppi->ppi_clauses, List *);
 		new_ppi->ppi_serials = bms_copy(old_ppi->ppi_serials);
 		rel->ppilist = lappend(rel->ppilist, new_ppi);
 
@@ -4319,7 +4393,7 @@ do { \
 					child_rel->top_parent_relids))
 	{
 		new_path->pathtarget = copy_pathtarget(new_path->pathtarget);
-		ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs);
+		ADJUST_CHILD_ATTRS(new_path->pathtarget->exprs, List *);
 	}
 
 	return new_path;
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index a1dc1d07e1..1656628623 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -1637,6 +1637,19 @@ typedef struct Path
 #define PATH_REQ_OUTER(path)  \
 	((path)->param_info ? (path)->param_info->ppi_req_outer : (Relids) NULL)
 
+/*
+ * Paths parameterized by the parent can be considered to be parameterized by
+ * any of its child.
+ */
+#define PATH_PARAM_BY_PARENT(path, rel)	\
+	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path),	\
+									   (rel)->top_parent_relids))
+#define PATH_PARAM_BY_REL_SELF(path, rel)  \
+	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
+
+#define PATH_PARAM_BY_REL(path, rel)	\
+	(PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
+
 /*----------
  * IndexPath represents an index scan over a single index.
  *
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 001e75b5b7..85f5562033 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -295,6 +295,7 @@ extern Path *reparameterize_path(PlannerInfo *root, Path *path,
 								 double loop_count);
 extern Path *reparameterize_path_by_child(PlannerInfo *root, Path *path,
 										  RelOptInfo *child_rel);
+extern bool path_is_reparameterizable_by_child(Path *path);
 
 /*
  * prototypes for relnode.c
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 6560fe2416..a11f738411 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -505,6 +505,31 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
  550 |     | 
 (12 rows)
 
+-- lateral reference in sample scan
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 JOIN LATERAL
+			  (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
+			  ON t1.a = s.a;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Append
+   ->  Nested Loop
+         ->  Seq Scan on prt1_p1 t1_1
+         ->  Sample Scan on prt1_p1 t2_1
+               Sampling: system (t1_1.a) REPEATABLE (t1_1.b)
+               Filter: (t1_1.a = a)
+   ->  Nested Loop
+         ->  Seq Scan on prt1_p2 t1_2
+         ->  Sample Scan on prt1_p2 t2_2
+               Sampling: system (t1_2.a) REPEATABLE (t1_2.b)
+               Filter: (t1_2.a = a)
+   ->  Nested Loop
+         ->  Seq Scan on prt1_p3 t1_3
+         ->  Sample Scan on prt1_p3 t2_3
+               Sampling: system (t1_3.a) REPEATABLE (t1_3.b)
+               Filter: (t1_3.a = a)
+(16 rows)
+
 -- bug with inadequate sort key representation
 SET enable_partitionwise_aggregate TO true;
 SET enable_hashjoin TO false;
@@ -1944,6 +1969,41 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
  550 | 0 | 0002 |     |      |     |     |      
 (12 rows)
 
+-- partitionwise join with lateral reference in sample scan
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 JOIN LATERAL
+			  (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON
+			  t1.a = s.a AND t1.b = s.b AND t1.c = s.c;
+                                       QUERY PLAN                                       
+----------------------------------------------------------------------------------------
+ Append
+   ->  Nested Loop
+         ->  Seq Scan on prt1_l_p1 t1_1
+         ->  Sample Scan on prt1_l_p1 t2_1
+               Sampling: system (t1_1.a) REPEATABLE (t1_1.b)
+               Filter: ((t1_1.a = a) AND (t1_1.b = b) AND ((t1_1.c)::text = (c)::text))
+   ->  Nested Loop
+         ->  Seq Scan on prt1_l_p2_p1 t1_2
+         ->  Sample Scan on prt1_l_p2_p1 t2_2
+               Sampling: system (t1_2.a) REPEATABLE (t1_2.b)
+               Filter: ((t1_2.a = a) AND (t1_2.b = b) AND ((t1_2.c)::text = (c)::text))
+   ->  Nested Loop
+         ->  Seq Scan on prt1_l_p2_p2 t1_3
+         ->  Sample Scan on prt1_l_p2_p2 t2_3
+               Sampling: system (t1_3.a) REPEATABLE (t1_3.b)
+               Filter: ((t1_3.a = a) AND (t1_3.b = b) AND ((t1_3.c)::text = (c)::text))
+   ->  Nested Loop
+         ->  Seq Scan on prt1_l_p3_p1 t1_4
+         ->  Sample Scan on prt1_l_p3_p1 t2_4
+               Sampling: system (t1_4.a) REPEATABLE (t1_4.b)
+               Filter: ((t1_4.a = a) AND (t1_4.b = b) AND ((t1_4.c)::text = (c)::text))
+   ->  Nested Loop
+         ->  Seq Scan on prt1_l_p3_p2 t1_5
+         ->  Sample Scan on prt1_l_p3_p2 t2_5
+               Sampling: system (t1_5.a) REPEATABLE (t1_5.b)
+               Filter: ((t1_5.a = a) AND (t1_5.b = b) AND ((t1_5.c)::text = (c)::text))
+(26 rows)
+
 -- join with one side empty
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
index 48daf3aee3..e2daab03fb 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -100,6 +100,12 @@ SELECT t1.a, ss.t2a, ss.t2c FROM prt1 t1 LEFT JOIN LATERAL
 			  (SELECT t2.a AS t2a, t3.a AS t3a, t2.b t2b, t2.c t2c, least(t1.a,t2.a,t3.a) FROM prt1 t2 JOIN prt2 t3 ON (t2.a = t3.b)) ss
 			  ON t1.c = ss.t2c WHERE (t1.b + coalesce(ss.t2b, 0)) = 0 ORDER BY t1.a;
 
+-- lateral reference in sample scan
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1 t1 JOIN LATERAL
+			  (SELECT * FROM prt1 t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s
+			  ON t1.a = s.a;
+
 -- bug with inadequate sort key representation
 SET enable_partitionwise_aggregate TO true;
 SET enable_hashjoin TO false;
@@ -387,6 +393,12 @@ SELECT * FROM prt1_l t1 LEFT JOIN LATERAL
 			  (SELECT t2.a AS t2a, t2.c AS t2c, t2.b AS t2b, t3.b AS t3b, least(t1.a,t2.a,t3.b) FROM prt1_l t2 JOIN prt2_l t3 ON (t2.a = t3.b AND t2.c = t3.c)) ss
 			  ON t1.a = ss.t2a AND t1.c = ss.t2c WHERE t1.b = 0 ORDER BY t1.a;
 
+-- partitionwise join with lateral reference in sample scan
+EXPLAIN (COSTS OFF)
+SELECT * FROM prt1_l t1 JOIN LATERAL
+			  (SELECT * FROM prt1_l t2 TABLESAMPLE SYSTEM (t1.a) REPEATABLE(t1.b)) s ON
+			  t1.a = s.a AND t1.b = s.b AND t1.c = s.c;
+
 -- join with one side empty
 EXPLAIN (COSTS OFF)
 SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1_l WHERE a = 1 AND a = 2) t1 RIGHT JOIN prt2_l t2 ON t1.a = t2.b AND t1.b = t2.a AND t1.c = t2.c;
-- 
2.31.0

