Hi.

I was investigating why foreign join is not happening and found the following issue. When we search for foreign join path we stop on the first try. For example, in case A JOIN B JOIN C where all of them are foreign tables on the same server firstly we estimate A JOIN B, A JOIN C and B JOIN C costs. Then we select one of them (let's say A JOIN B) and estimate (A JOIN B) JOIN C cost. On this stage we fill in joinrel->fdw_private in joinrel (A, B, C). Now we could look at A JOIN (B JOIN C), but we don't, as in contrib/postgres_fdw/postgres_fdw.c:5962 (in postgresGetForeignJoinPaths()) we have:

        /*
         * Skip if this join combination has been considered already.
         */
        if (joinrel->fdw_private)
                return;

This can lead to situation when A JOIN B JOIN C cost is not correctly estimated (as we don't look at another join orders) and foreign join is not pushed down when it would be efficient.

Attached patch tries to fix this. Now we collect different join paths for joinrels, if join is possible at all. However, as we don't know what path is really selected, we can't precisely estimate fpinfo costs.

The following example shows the case, when we fail to push down foreign join due to mentioned issue.

create extension postgres_fdw;

DO $d$
    BEGIN
EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw
            OPTIONS (dbname '$$||current_database()||$$',
                     port '$$||current_setting('port')||$$'
            )$$;
END
$d$;

create user MAPPING FOR PUBLIC SERVER loopback ;

CREATE SCHEMA test;

CREATE TABLE test.district (
    d_id smallint NOT NULL PRIMARY KEY,
    d_next_o_id integer NOT NULL,
    d_name varchar(100) NOT NULL
);

CREATE TABLE test.stock (
    s_i_id integer NOT NULL PRIMARY KEY,
    s_quantity smallint NOT NULL,
    s_data varchar(100) NOT NULL
);

CREATE TABLE test.order_line (
    ol_o_id integer NOT NULL PRIMARY KEY,
    ol_i_id integer NOT NULL,
    ol_d_id smallint NOT NULL
);

import foreign schema test from server loopback into public ;

INSERT INTO district SELECT i, 20000 + 20*i , 'test' FROM generate_series(1,10) i; INSERT INTO stock SELECT i, round(random()*100) + 10, 'test data' FROM generate_series(1,10000) i; INSERT INTO order_line SELECT i, i%10000, i%10 FROM generate_series(1,30000) i;

analyze test.order_line, test.district ,test.stock;
analyze order_line, district ,stock;


Without patch:

explain analyze verbose SELECT * FROM order_line, stock, district WHERE ol_d_id = 1 AND d_id = 1 AND (ol_o_id < d_next_o_id) AND ol_o_id >= (d_next_o_id - 20) AND s_i_id = ol_i_id AND s_quantity < 11; QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Hash Join (cost=382.31..966.86 rows=2 width=37) (actual time=5.459..5.466 rows=0 loops=1) Output: order_line.ol_o_id, order_line.ol_i_id, order_line.ol_d_id, stock.s_i_id, stock.s_quantity, stock.s_data, district.d_id, district.d_next_o_id, district.d_name
   Hash Cond: (order_line.ol_i_id = stock.s_i_id)
-> Foreign Scan (cost=100.00..683.29 rows=333 width=21) (actual time=2.773..2.775 rows=2 loops=1) Output: order_line.ol_o_id, order_line.ol_i_id, order_line.ol_d_id, district.d_id, district.d_next_o_id, district.d_name
         Relations: (public.order_line) INNER JOIN (public.district)
Remote SQL: SELECT r1.ol_o_id, r1.ol_i_id, r1.ol_d_id, r3.d_id, r3.d_next_o_id, r3.d_name FROM (test.order_line r1 INNER JOIN test.district r3 ON (((r1.ol_o_id < r3.d_next_o_id)) AND ((r1.ol_o_id >= (r3.d_next_o_id - 20))) AND ((r3.d_id = 1)) AND ((r1.ol_d_id = 1)))) -> Hash (cost=281.42..281.42 rows=71 width=16) (actual time=2.665..2.665 rows=48 loops=1)
         Output: stock.s_i_id, stock.s_quantity, stock.s_data
         Buckets: 1024  Batches: 1  Memory Usage: 11kB
-> Foreign Scan on public.stock (cost=100.00..281.42 rows=71 width=16) (actual time=2.625..2.631 rows=48 loops=1)
               Output: stock.s_i_id, stock.s_quantity, stock.s_data
Remote SQL: SELECT s_i_id, s_quantity, s_data FROM test.stock WHERE ((s_quantity < 11))
 Planning Time: 1.812 ms
 Execution Time: 8.534 ms
(15 rows)

With patch:

explain analyze verbose SELECT * FROM order_line, stock, district WHERE ol_d_id = 1 AND d_id = 1 AND (ol_o_id < d_next_o_id) AND ol_o_id >= (d_next_o_id - 20) AND s_i_id = ol_i_id AND s_quantity < 11; QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Foreign Scan (cost=100.00..974.88 rows=2 width=37) (actual time=3.399..3.400 rows=0 loops=1) Output: order_line.ol_o_id, order_line.ol_i_id, order_line.ol_d_id, stock.s_i_id, stock.s_quantity, stock.s_data, district.d_id, district.d_next_o_id, district.d_name Relations: ((public.order_line) INNER JOIN (public.district)) INNER JOIN (public.stock) Remote SQL: SELECT r1.ol_o_id, r1.ol_i_id, r1.ol_d_id, r2.s_i_id, r2.s_quantity, r2.s_data, r3.d_id, r3.d_next_o_id, r3.d_name FROM ((test.order_line r1 INNER JOIN test.district r3 ON (((r1.ol_o_id < r3.d_next_o_id)) AND ((r1.ol_o_id >= (r3.d_next_o_id - 20))) AND ((r3.d_id = 1)) AND ((r1.ol_d_id = 1)))) INNER JOIN test.stock r2 ON (((r1.ol_i_id = r2.s_i_id)) AND ((r2.s_quantity < 11))))
 Planning Time: 0.928 ms
 Execution Time: 4.511 ms



--
Best regards,
Alexander Pyhalov,
Postgres Professional
From e0c21e620804adbdeb6bc7d11ad606d263e63249 Mon Sep 17 00:00:00 2001
From: Alexander Pyhalov <a.pyha...@postgrespro.ru>
Date: Mon, 24 Jan 2022 18:28:12 +0300
Subject: [PATCH] Look through all possible foreign join orders

---
 .../postgres_fdw/expected/postgres_fdw.out    | 10 ++--
 contrib/postgres_fdw/postgres_fdw.c           | 47 ++++++++++++++++---
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out
index 7d6f7d9e3df..1547049108c 100644
--- a/contrib/postgres_fdw/expected/postgres_fdw.out
+++ b/contrib/postgres_fdw/expected/postgres_fdw.out
@@ -1203,8 +1203,8 @@ SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Foreign Scan
    Output: t1.c1, t2.c2, t3.c3, t1.c3
-   Relations: ((public.ft1 t1) INNER JOIN (public.ft2 t2)) INNER JOIN (public.ft4 t3)
-   Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
+   Relations: ((public.ft1 t1) INNER JOIN (public.ft4 t3)) INNER JOIN (public.ft2 t2)
+   Remote SQL: SELECT r1."C 1", r2.c2, r4.c3, r1.c3 FROM (("S 1"."T 1" r1 INNER JOIN "S 1"."T 3" r4 ON (((r1."C 1" = r4.c1)))) INNER JOIN "S 1"."T 1" r2 ON (((r1."C 1" = r2."C 1")))) ORDER BY r1.c3 ASC NULLS LAST, r1."C 1" ASC NULLS LAST LIMIT 10::bigint OFFSET 10::bigint
 (4 rows)
 
 SELECT t1.c1, t2.c2, t3.c3 FROM ft1 t1 JOIN ft2 t2 ON (t1.c1 = t2.c1) JOIN ft4 t3 ON (t3.c1 = t1.c1) ORDER BY t1.c3, t1.c1 OFFSET 10 LIMIT 10;
@@ -5585,12 +5585,12 @@ UPDATE ft2 SET c3 = 'foo'
   FROM ft4 INNER JOIN ft5 ON (ft4.c1 = ft5.c1)
   WHERE ft2.c1 > 1200 AND ft2.c2 = ft4.c1
   RETURNING ft2, ft2.*, ft4, ft4.*;       -- can be pushed down
-                                                                                                                                                                          QUERY PLAN                                                                                                                                                                           
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+                                                                                                                                                                      QUERY PLAN                                                                                                                                                                      
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  Update on public.ft2
    Output: ft2.*, ft2.c1, ft2.c2, ft2.c3, ft2.c4, ft2.c5, ft2.c6, ft2.c7, ft2.c8, ft4.*, ft4.c1, ft4.c2, ft4.c3
    ->  Foreign Update
-         Remote SQL: UPDATE "S 1"."T 1" r1 SET c3 = 'foo'::text FROM ("S 1"."T 3" r2 INNER JOIN "S 1"."T 4" r3 ON (TRUE)) WHERE ((r2.c1 = r3.c1)) AND ((r1.c2 = r2.c1)) AND ((r1."C 1" > 1200)) RETURNING r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r2.c1, r2.c2, r2.c3
+         Remote SQL: UPDATE "S 1"."T 1" r1 SET c3 = 'foo'::text FROM ("S 1"."T 3" r2 INNER JOIN "S 1"."T 4" r3 ON (((r2.c1 = r3.c1)))) WHERE ((r2.c1 = r1.c2)) AND ((r1."C 1" > 1200)) RETURNING r1."C 1", r1.c2, r1.c3, r1.c4, r1.c5, r1.c6, r1.c7, r1.c8, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.c1, r2.c2, r2.c3) END, r2.c1, r2.c2, r2.c3
 (4 rows)
 
 UPDATE ft2 SET c3 = 'foo'
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index bf3f3d9e26e..613d3c8a10d 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -5950,7 +5950,7 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
 							JoinType jointype,
 							JoinPathExtraData *extra)
 {
-	PgFdwRelationInfo *fpinfo;
+	PgFdwRelationInfo *fpinfo, *oldfpinfo;
 	ForeignPath *joinpath;
 	double		rows;
 	int			width;
@@ -5960,9 +5960,10 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
 								 * EvalPlanQual gets triggered. */
 
 	/*
-	 * Skip if this join combination has been considered already.
+	 * Skip if this join combination has been considered already and rejected.
 	 */
-	if (joinrel->fdw_private)
+	oldfpinfo = (PgFdwRelationInfo *) joinrel->fdw_private;
+	if (oldfpinfo && !oldfpinfo->pushdown_safe)
 		return;
 
 	/*
@@ -6002,6 +6003,12 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
 		epq_path = GetExistingLocalJoinPath(joinrel);
 		if (!epq_path)
 		{
+			if (oldfpinfo)
+			{
+				joinrel->fdw_private = oldfpinfo;
+				pfree(fpinfo);
+			}
+
 			elog(DEBUG3, "could not push down foreign join because a local path suitable for EPQ checks was not found");
 			return;
 		}
@@ -6011,6 +6018,12 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
 
 	if (!foreign_join_ok(root, joinrel, jointype, outerrel, innerrel, extra))
 	{
+		if (oldfpinfo)
+		{
+			joinrel->fdw_private = oldfpinfo;
+			pfree(fpinfo);
+		}
+
 		/* Free path required for EPQ if we copied one; we don't need it now */
 		if (epq_path)
 			pfree(epq_path);
@@ -6044,14 +6057,36 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
 	/* Estimate costs for bare join relation */
 	estimate_path_cost_size(root, joinrel, NIL, NIL, NULL,
 							&rows, &width, &startup_cost, &total_cost);
-	/* Now update this information in the joinrel */
-	joinrel->rows = rows;
-	joinrel->reltarget->width = width;
+
 	fpinfo->rows = rows;
 	fpinfo->width = width;
 	fpinfo->startup_cost = startup_cost;
 	fpinfo->total_cost = total_cost;
 
+	if (oldfpinfo)
+	{
+		/* If this path is not cheaper, ignore it */
+		if (startup_cost >= oldfpinfo->startup_cost && total_cost >= oldfpinfo->total_cost)
+		{
+			joinrel->fdw_private = oldfpinfo;
+			pfree(fpinfo);
+			return;
+		}
+		else
+		{
+			/*
+			 * We don't know which path is selected - old or new one, but let's be optimistic.
+			 */
+			fpinfo->startup_cost = Min(oldfpinfo->startup_cost, startup_cost);
+			fpinfo->total_cost = Min(oldfpinfo->total_cost, total_cost);
+			pfree(oldfpinfo);
+		}
+	}
+
+	/* Now update this information in the joinrel */
+	joinrel->rows = rows;
+	joinrel->reltarget->width = width;
+
 	/*
 	 * Create a new join path and add it to the joinrel which represents a
 	 * join between foreign tables.
-- 
2.25.1

Reply via email to