Hi.

There's issue with join pushdown after

commit 86dc90056dfdbd9d1b891718d2e5614e3e432f35
Author: Tom Lane <t...@sss.pgh.pa.us>
Date:   Wed Mar 31 11:52:34 2021 -0400

    Rework planning and execution of UPDATE and DELETE

To make sure that join pushdown path selected, one can patch
contrib/postgres_fdw/postgres_fdw.c in the following way:

diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index c48a421e88b..c2bf6833050 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -5959,6 +5959,8 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
        /* Estimate costs for bare join relation */
        estimate_path_cost_size(root, joinrel, NIL, NIL, NULL,
&rows, &width, &startup_cost, &total_cost);
+
+       startup_cost = total_cost = 0;
        /* Now update this information in the joinrel */
        joinrel->rows = rows;
        joinrel->reltarget->width = width;

Now, this simple test shows the issue:

create extension postgres_fdw;

DO $d$
    BEGIN
EXECUTE $$CREATE SERVER loopback FOREIGN DATA WRAPPER postgres_fdw
            OPTIONS (dbname '$$||current_database()||$$',
                     port '$$||current_setting('port')||$$')$$;
   END;
$d$;

CREATE USER MAPPING FOR CURRENT_USER SERVER loopback;

CREATE TABLE base_tbl (a int, b int);
CREATE FOREIGN TABLE remote_tbl (a int, b int)
  SERVER loopback OPTIONS (table_name 'base_tbl');

insert into remote_tbl select generate_series(1,100), generate_series(1,100);

explain verbose update remote_tbl d set a= case when current_timestamp> '2012-02-02'::timestamp then 5 else 6 end FROM remote_tbl AS t (a, b) WHERE d.a = (t.a); QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Update on public.remote_tbl d  (cost=0.00..42.35 rows=0 width=0)
   Remote SQL: UPDATE public.base_tbl SET a = $2 WHERE ctid = $1
   ->  Foreign Scan  (cost=0.00..42.35 rows=8470 width=74)
Output: CASE WHEN (CURRENT_TIMESTAMP > '2012-02-02 00:00:00'::timestamp without time zone) THEN 5 ELSE 6 END, d.ctid, d.*, t.* Relations: (public.remote_tbl d) INNER JOIN (public.remote_tbl t) Remote SQL: SELECT r1.ctid, CASE WHEN (r1.*)::text IS NOT NULL THEN ROW(r1.a, r1.b) END, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.a, r2.b) END FROM (public.base_tbl r1 INNER JOIN public.base_tbl r2 ON (((r1.a = r2.a)))) FOR UPDATE OF r1
         ->  Merge Join  (cost=433.03..566.29 rows=8470 width=70)
               Output: d.ctid, d.*, t.*
               Merge Cond: (d.a = t.a)
               ->  Sort  (cost=211.00..214.10 rows=1241 width=42)
                     Output: d.ctid, d.*, d.a
                     Sort Key: d.a
-> Foreign Scan on public.remote_tbl d (cost=100.00..147.23 rows=1241 width=42)
                           Output: d.ctid, d.*, d.a
Remote SQL: SELECT a, b, ctid FROM public.base_tbl FOR UPDATE
               ->  Sort  (cost=222.03..225.44 rows=1365 width=36)
                     Output: t.*, t.a
                     Sort Key: t.a
-> Foreign Scan on public.remote_tbl t (cost=100.00..150.95 rows=1365 width=36)
                           Output: t.*, t.a
                           Remote SQL: SELECT a, b FROM public.base_tbl
update remote_tbl d set a= case when current_timestamp> '2012-02-02'::timestamp then 5 else 6 end FROM remote_tbl AS t (a, b) WHERE d.a = (t.a);

You'll get
ERROR:  input of anonymous composite types is not implemented
CONTEXT:  whole-row reference to foreign table "remote_tbl"

make_tuple_from_result_row() (called by fetch_more_data()), will try to call InputFunctionCall() for ROW(r1.a, r1.b) and will get error in record_in().

Here ROW(r2.a, r2.b) would have attribute type id, corresponding to remote_tbl, but ROW(r1.a, r1.b) would have atttypid 2249 (RECORD).

Before 86dc90056dfdbd9d1b891718d2e5614e3e432f35 the plan would be different and looked like

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Update on public.remote_tbl d  (cost=0.00..73.54 rows=14708 width=46)
   Remote SQL: UPDATE public.base_tbl SET a = $2 WHERE ctid = $1
   ->  Foreign Scan  (cost=0.00..73.54 rows=14708 width=46)
Output: CASE WHEN (CURRENT_TIMESTAMP > '2012-02-02 00:00:00'::timestamp without time zone) THEN d.a ELSE 6 END, d.b, d.ctid, t.* Relations: (public.remote_tbl d) INNER JOIN (public.remote_tbl t) Remote SQL: SELECT r1.a, r1.b, r1.ctid, CASE WHEN (r2.*)::text IS NOT NULL THEN ROW(r2.a, r2.b) END FROM (public.base_tbl r1 INNER JOIN public.base_tbl r2 ON (((r1.a = r2.a)))) FOR UPDATE OF r1
         ->  Merge Join  (cost=516.00..747.39 rows=14708 width=46)
               Output: d.a, d.b, d.ctid, t.*
               Merge Cond: (d.a = t.a)
               ->  Sort  (cost=293.97..299.35 rows=2155 width=14)
                     Output: d.a, d.b, d.ctid
                     Sort Key: d.a
-> Foreign Scan on public.remote_tbl d (cost=100.00..174.65 rows=2155 width=14)
                           Output: d.a, d.b, d.ctid
Remote SQL: SELECT a, b, ctid FROM public.base_tbl FOR UPDATE
               ->  Sort  (cost=222.03..225.44 rows=1365 width=36)
                     Output: t.*, t.a
                     Sort Key: t.a
-> Foreign Scan on public.remote_tbl t (cost=100.00..150.95 rows=1365 width=36)
                           Output: t.*, t.a
                           Remote SQL: SELECT a, b FROM public.base_tbl

Here ROW(r2.a, r2.b) would have attribute type id, corresponding to remote_tbl.

--
Best regards,
Alexander Pyhalov,
Postgres Professional


Reply via email to