On Thu, Sep 26, 2019 at 1:56 PM Amit Langote <amitlangot...@gmail.com> wrote:
> On Wed, Sep 4, 2019 at 10:45 AM Amit Langote <amitlangot...@gmail.com> wrote:
> > On Fri, Aug 9, 2019 at 10:51 AM Amit Langote <amitlangot...@gmail.com> 
> > wrote:
> > To avoid losing track of this, I've added this to November CF.
> >
> > https://commitfest.postgresql.org/25/2277/
> >
> > Struggled a bit to give a title to the entry though.
>
> Noticed that one of the patches needed a rebase.
>
> Attached updated patches.  Note that v8-0001 is v7-0001 unchanged that
> Fujita-san posted on Aug 8.

Rebased again.

Thanks,
Amit
From a0f6939e5b2ee8f78c813545f45d59a346d22e5f Mon Sep 17 00:00:00 2001
From: Etsuro Fujita <efuj...@postgresql.org>
Date: Thu, 8 Aug 2019 21:41:12 +0900
Subject: [PATCH v9 1/4] Remove dependency on estate->es_result_relation_info
 from FDW APIs.

FDW APIs for executing a foreign table direct modification assumed that
the FDW would obtain the target foreign table's ResultRelInfo from
estate->es_result_relation_info of the passed-in ForeignScanState node,
but the upcoming patch(es) to refactor partitioning-related code in
nodeModifyTable.c will remove the es_result_relation_info variable.
Revise BeginDirectModify()'s API to pass the ResultRelInfo explicitly, to
remove the dependency on that variable from the FDW APIs.  For
ExecInitForeignScan() to efficiently get the ResultRelInfo to pass to
BeginDirectModify(), add a field to ForeignScan that gives the index of
the target foreign table in the list of the query result relations.

Patch by Amit Langote, following a proposal by Andres Freund, reviewed by
Andres Freund and me

Discussion: 
https://postgr.es/m/20190718010911.l6xcdv6birtxi...@alap3.anarazel.de
---
 contrib/postgres_fdw/postgres_fdw.c     | 25 +++++++++++++++++++------
 doc/src/sgml/fdwhandler.sgml            |  8 ++++++--
 src/backend/executor/nodeForeignscan.c  | 14 ++++++++++----
 src/backend/nodes/copyfuncs.c           |  1 +
 src/backend/nodes/outfuncs.c            |  1 +
 src/backend/nodes/readfuncs.c           |  1 +
 src/backend/optimizer/plan/createplan.c |  2 ++
 src/backend/optimizer/plan/setrefs.c    | 15 +++++++++++++++
 src/include/foreign/fdwapi.h            |  1 +
 src/include/nodes/plannodes.h           |  3 +++
 10 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/contrib/postgres_fdw/postgres_fdw.c 
b/contrib/postgres_fdw/postgres_fdw.c
index bdc21b36d1..642deaf7cb 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -218,6 +218,7 @@ typedef struct PgFdwDirectModifyState
        int                     num_tuples;             /* # of result tuples */
        int                     next_tuple;             /* index of next one to 
return */
        Relation        resultRel;              /* relcache entry for the 
target relation */
+       ResultRelInfo *resultRelInfo;   /* ResultRelInfo for the target 
relation */
        AttrNumber *attnoMap;           /* array of attnums of input user 
columns */
        AttrNumber      ctidAttno;              /* attnum of input ctid column 
*/
        AttrNumber      oidAttno;               /* attnum of input oid column */
@@ -361,7 +362,9 @@ static bool postgresPlanDirectModify(PlannerInfo *root,
                                                                         
ModifyTable *plan,
                                                                         Index 
resultRelation,
                                                                         int 
subplan_index);
-static void postgresBeginDirectModify(ForeignScanState *node, int eflags);
+static void postgresBeginDirectModify(ForeignScanState *node,
+                                                 ResultRelInfo *rinfo,
+                                                 int eflags);
 static TupleTableSlot *postgresIterateDirectModify(ForeignScanState *node);
 static void postgresEndDirectModify(ForeignScanState *node);
 static void postgresExplainForeignScan(ForeignScanState *node,
@@ -2319,6 +2322,11 @@ postgresPlanDirectModify(PlannerInfo *root,
                        rebuild_fdw_scan_tlist(fscan, returningList);
        }
 
+       /*
+        * Set the index of the subplan result rel.
+        */
+       fscan->resultRelIndex = subplan_index;
+
        table_close(rel, NoLock);
        return true;
 }
@@ -2328,7 +2336,9 @@ postgresPlanDirectModify(PlannerInfo *root,
  *             Prepare a direct foreign table modification
  */
 static void
-postgresBeginDirectModify(ForeignScanState *node, int eflags)
+postgresBeginDirectModify(ForeignScanState *node,
+                                                 ResultRelInfo *rinfo,
+                                                 int eflags)
 {
        ForeignScan *fsplan = (ForeignScan *) node->ss.ps.plan;
        EState     *estate = node->ss.ps.state;
@@ -2356,7 +2366,7 @@ postgresBeginDirectModify(ForeignScanState *node, int 
eflags)
         * Identify which user to do the remote access as.  This should match 
what
         * ExecCheckRTEPerms() does.
         */
-       rtindex = estate->es_result_relation_info->ri_RangeTableIndex;
+       rtindex = rinfo->ri_RangeTableIndex;
        rte = exec_rt_fetch(rtindex, estate);
        userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
 
@@ -2389,6 +2399,9 @@ postgresBeginDirectModify(ForeignScanState *node, int 
eflags)
                dmstate->rel = NULL;
        }
 
+       /* Save the ResultRelInfo for the target relation. */
+       dmstate->resultRelInfo = rinfo;
+
        /* Initialize state variable */
        dmstate->num_tuples = -1;       /* -1 means not set yet */
 
@@ -2451,7 +2464,7 @@ postgresIterateDirectModify(ForeignScanState *node)
 {
        PgFdwDirectModifyState *dmstate = (PgFdwDirectModifyState *) 
node->fdw_state;
        EState     *estate = node->ss.ps.state;
-       ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
+       ResultRelInfo *resultRelInfo = dmstate->resultRelInfo;
 
        /*
         * If this is the first call after Begin, execute the statement.
@@ -4087,7 +4100,7 @@ get_returning_data(ForeignScanState *node)
 {
        PgFdwDirectModifyState *dmstate = (PgFdwDirectModifyState *) 
node->fdw_state;
        EState     *estate = node->ss.ps.state;
-       ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
+       ResultRelInfo *resultRelInfo = dmstate->resultRelInfo;
        TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
        TupleTableSlot *resultSlot;
 
@@ -4234,7 +4247,7 @@ apply_returning_filter(PgFdwDirectModifyState *dmstate,
                                           TupleTableSlot *slot,
                                           EState *estate)
 {
-       ResultRelInfo *relInfo = estate->es_result_relation_info;
+       ResultRelInfo *relInfo = dmstate->resultRelInfo;
        TupleDesc       resultTupType = RelationGetDescr(dmstate->resultRel);
        TupleTableSlot *resultSlot;
        Datum      *values;
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 6587678af2..0aff958415 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -873,6 +873,7 @@ PlanDirectModify(PlannerInfo *root,
 <programlisting>
 void
 BeginDirectModify(ForeignScanState *node,
+                  ResultRelInfo *rinfo,
                   int eflags);
 </programlisting>
 
@@ -886,6 +887,8 @@ BeginDirectModify(ForeignScanState *node,
      <structname>ForeignScanState</structname> node (in particular, from the 
underlying
      <structname>ForeignScan</structname> plan node, which contains any 
FDW-private
      information provided by <function>PlanDirectModify</function>).
+     In addition, the <structname>ResultRelInfo</structname> struct also
+     contains information about the target foreign table.
      <literal>eflags</literal> contains flag bits describing the executor's
      operating mode for this plan node.
     </para>
@@ -917,8 +920,9 @@ IterateDirectModify(ForeignScanState *node);
      tuple table slot (the node's <structfield>ScanTupleSlot</structfield> 
should be
      used for this purpose).  The data that was actually inserted, updated
      or deleted must be stored in the
-     
<literal>es_result_relation_info-&gt;ri_projectReturning-&gt;pi_exprContext-&gt;ecxt_scantuple</literal>
-     of the node's <structname>EState</structname>.
+     
<literal>ri_projectReturning-&gt;pi_exprContext-&gt;ecxt_scantuple</literal>
+     of the target foreign table's <structname>ResultRelInfo</structname>
+     passed to <function>BeginDirectModify</function>.
      Return NULL if no more rows are available.
      Note that this is called in a short-lived memory context that will be
      reset between invocations.  Create a memory context in
diff --git a/src/backend/executor/nodeForeignscan.c 
b/src/backend/executor/nodeForeignscan.c
index 52af1dac5c..84ef31ceef 100644
--- a/src/backend/executor/nodeForeignscan.c
+++ b/src/backend/executor/nodeForeignscan.c
@@ -221,12 +221,18 @@ ExecInitForeignScan(ForeignScan *node, EState *estate, 
int eflags)
                        ExecInitNode(outerPlan(node), estate, eflags);
 
        /*
-        * Tell the FDW to initialize the scan.
+        * Tell the FDW to initialize the scan or the direct modification.
         */
-       if (node->operation != CMD_SELECT)
-               fdwroutine->BeginDirectModify(scanstate, eflags);
-       else
+       if (node->operation == CMD_SELECT)
                fdwroutine->BeginForeignScan(scanstate, eflags);
+       else
+       {
+               ResultRelInfo *resultRelInfo;
+
+               Assert(node->resultRelIndex >= 0);
+               resultRelInfo = 
&estate->es_result_relations[node->resultRelIndex];
+               fdwroutine->BeginDirectModify(scanstate, resultRelInfo, eflags);
+       }
 
        return scanstate;
 }
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index a9b8b84b8f..a1094defb7 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -761,6 +761,7 @@ _copyForeignScan(const ForeignScan *from)
        COPY_NODE_FIELD(fdw_recheck_quals);
        COPY_BITMAPSET_FIELD(fs_relids);
        COPY_SCALAR_FIELD(fsSystemCol);
+       COPY_SCALAR_FIELD(resultRelIndex);
 
        return newnode;
 }
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index ac02e5ec8d..91af796a00 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -698,6 +698,7 @@ _outForeignScan(StringInfo str, const ForeignScan *node)
        WRITE_NODE_FIELD(fdw_recheck_quals);
        WRITE_BITMAPSET_FIELD(fs_relids);
        WRITE_BOOL_FIELD(fsSystemCol);
+       WRITE_INT_FIELD(resultRelIndex);
 }
 
 static void
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 3f9ebc9044..d5514398f4 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -2013,6 +2013,7 @@ _readForeignScan(void)
        READ_NODE_FIELD(fdw_recheck_quals);
        READ_BITMAPSET_FIELD(fs_relids);
        READ_BOOL_FIELD(fsSystemCol);
+       READ_INT_FIELD(resultRelIndex);
 
        READ_DONE();
 }
diff --git a/src/backend/optimizer/plan/createplan.c 
b/src/backend/optimizer/plan/createplan.c
index 8c8b4f8ed6..455740663b 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -5460,6 +5460,8 @@ make_foreignscan(List *qptlist,
        node->fs_relids = NULL;
        /* fsSystemCol will be filled in by create_foreignscan_plan */
        node->fsSystemCol = false;
+       /* resultRelIndex will be set by PlanDirectModify/setrefs.c, if needed 
*/
+       node->resultRelIndex = -1;
 
        return node;
 }
diff --git a/src/backend/optimizer/plan/setrefs.c 
b/src/backend/optimizer/plan/setrefs.c
index f581c5f532..c16282b082 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -901,6 +901,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
                                        rc->rti += rtoffset;
                                        rc->prti += rtoffset;
                                }
+                               /*
+                                * Caution: Do not change the relative ordering 
of this loop
+                                * and the statement below that adds the result 
relations to
+                                * root->glob->resultRelations, because we need 
to use the
+                                * current value of 
list_length(root->glob->resultRelations)
+                                * in some plans.
+                                */
                                foreach(l, splan->plans)
                                {
                                        lfirst(l) = set_plan_refs(root,
@@ -1240,6 +1247,14 @@ set_foreignscan_references(PlannerInfo *root,
        }
 
        fscan->fs_relids = offset_relid_set(fscan->fs_relids, rtoffset);
+
+       /*
+        * Adjust resultRelIndex if it's valid (note that we are called before
+        * adding the RT indexes of ModifyTable result relations to the global
+        * list)
+        */
+       if (fscan->resultRelIndex >= 0)
+               fscan->resultRelIndex += 
list_length(root->glob->resultRelations);
 }
 
 /*
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 822686033e..adf39bc618 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -112,6 +112,7 @@ typedef bool (*PlanDirectModify_function) (PlannerInfo 
*root,
                                                                                
   int subplan_index);
 
 typedef void (*BeginDirectModify_function) (ForeignScanState *node,
+                                                                               
        ResultRelInfo *rinfo,
                                                                                
        int eflags);
 
 typedef TupleTableSlot *(*IterateDirectModify_function) (ForeignScanState 
*node);
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 477b4da192..f3f2699e90 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -620,6 +620,9 @@ typedef struct ForeignScan
        List       *fdw_recheck_quals;  /* original quals not in scan.plan.qual 
*/
        Bitmapset  *fs_relids;          /* RTIs generated by this scan */
        bool            fsSystemCol;    /* true if any "system column" is 
needed */
+       int                     resultRelIndex; /* index of foreign table in 
the list of query
+                                                                * result 
relations for INSERT/UPDATE/DELETE;
+                                                                * -1 for 
SELECT */
 } ForeignScan;
 
 /* ----------------
-- 
2.16.5

From 82a386601239c90da2a4bba69762df28351053cb Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 30 Jul 2019 10:51:35 +0900
Subject: [PATCH v9 4/4] Refactor transition tuple capture code a bit

In the case of inherited update and partitioned table inserts,
a child tuple needs to be converted back into the root table format.
The tuple conversion map needed to do that was previously stored in
ModifyTableState and adjusted every time the child relation changed,
an arrangement which is a bit cumbersome to maintain.  Instead save
the map in the child result relation's ResultRelInfo.

This allows to get rid of a bunch of code that was needed to
manipulate tcs_map.
---
 src/backend/commands/copy.c            |  31 ++---
 src/backend/commands/trigger.c         |  19 ++-
 src/backend/executor/execPartition.c   |  19 ++-
 src/backend/executor/nodeModifyTable.c | 207 +++++++--------------------------
 src/include/commands/trigger.h         |  10 +-
 src/include/nodes/execnodes.h          |   9 +-
 6 files changed, 85 insertions(+), 210 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 38cffde583..61d9a1d3cf 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -3110,32 +3110,15 @@ CopyFrom(CopyState cstate)
                        }
 
                        /*
-                        * If we're capturing transition tuples, we might need 
to convert
-                        * from the partition rowtype to root rowtype.
+                        * If we're capturing transition tuples and there are 
no BEFORE
+                        * triggers on the partition, we can just use the 
original
+                        * unconverted tuple instead of converting the tuple in 
partition
+                        * format back to root format.  We must do the 
conversion if such
+                        * triggers exist because they may change the tuple.
                         */
                        if (cstate->transition_capture != NULL)
-                       {
-                               if (has_before_insert_row_trig)
-                               {
-                                       /*
-                                        * If there are any BEFORE triggers on 
the partition,
-                                        * we'll have to be ready to convert 
their result back to
-                                        * tuplestore format.
-                                        */
-                                       
cstate->transition_capture->tcs_original_insert_tuple = NULL;
-                                       cstate->transition_capture->tcs_map =
-                                               
resultRelInfo->ri_PartitionInfo->pi_PartitionToRootMap;
-                               }
-                               else
-                               {
-                                       /*
-                                        * Otherwise, just remember the 
original unconverted
-                                        * tuple, to avoid a needless round 
trip conversion.
-                                        */
-                                       
cstate->transition_capture->tcs_original_insert_tuple = myslot;
-                                       cstate->transition_capture->tcs_map = 
NULL;
-                               }
-                       }
+                               
cstate->transition_capture->tcs_original_insert_tuple =
+                                       !has_before_insert_row_trig ? myslot : 
NULL;
 
                        /*
                         * We might need to convert from the root rowtype to 
the partition
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index faeea16d21..191f1418fe 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -35,6 +35,7 @@
 #include "commands/defrem.h"
 #include "commands/trigger.h"
 #include "executor/executor.h"
+#include "executor/execPartition.h"
 #include "miscadmin.h"
 #include "nodes/bitmapset.h"
 #include "nodes/makefuncs.h"
@@ -4652,9 +4653,7 @@ GetAfterTriggersTableData(Oid relid, CmdType cmdType)
  * If there are no triggers in 'trigdesc' that request relevant transition
  * tables, then return NULL.
  *
- * The resulting object can be passed to the ExecAR* functions.  The caller
- * should set tcs_map or tcs_original_insert_tuple as appropriate when dealing
- * with child tables.
+ * The resulting object can be passed to the ExecAR* functions.
  *
  * Note that we copy the flags from a parent table into this struct (rather
  * than subsequently using the relation's TriggerDesc directly) so that we can
@@ -5748,13 +5747,23 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo 
*relinfo,
         */
        if (row_trigger && transition_capture != NULL)
        {
-               TupleTableSlot *original_insert_tuple = 
transition_capture->tcs_original_insert_tuple;
-               TupleConversionMap *map = transition_capture->tcs_map;
+               TupleTableSlot *original_insert_tuple;
+               PartitionRoutingInfo *pinfo = relinfo->ri_PartitionInfo;
+               TupleConversionMap *map = pinfo ?
+                                                               
pinfo->pi_PartitionToRootMap :
+                                                               
relinfo->ri_ChildToRootMap;
                bool            delete_old_table = 
transition_capture->tcs_delete_old_table;
                bool            update_old_table = 
transition_capture->tcs_update_old_table;
                bool            update_new_table = 
transition_capture->tcs_update_new_table;
                bool            insert_new_table = 
transition_capture->tcs_insert_new_table;
 
+               /*
+                * Get the originally inserted tuple from 
TransitionCaptureState and
+                * set the variable to NULL so that the same tuple is not read 
again.
+                */
+               original_insert_tuple = 
transition_capture->tcs_original_insert_tuple;
+               transition_capture->tcs_original_insert_tuple = NULL;
+
                /*
                 * For INSERT events NEW should be non-NULL, for DELETE events 
OLD
                 * should be non-NULL, whereas for UPDATE events normally both 
OLD and
diff --git a/src/backend/executor/execPartition.c 
b/src/backend/executor/execPartition.c
index d23f292cb0..d7ed3fcdbe 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -931,9 +931,22 @@ ExecInitRoutingInfo(ModifyTableState *mtstate,
        if (mtstate &&
                (mtstate->mt_transition_capture || 
mtstate->mt_oc_transition_capture))
        {
-               partrouteinfo->pi_PartitionToRootMap =
-                       
convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc),
-                                                                  
RelationGetDescr(partRelInfo->ri_PartitionRoot));
+               ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+
+               /*
+                * If the partition appears to be an UPDATE result relation, 
the map
+                * has already been initialized by ExecInitModifyTable(); use 
that one
+                * instead of building one from scratch.  To distinguish UPDATE 
result
+                * relations from tuple-routing result relations, we rely on 
the fact
+                * that each of the former has a distinct RT index.
+                */
+               if (node && node->rootRelation != 
partRelInfo->ri_RangeTableIndex)
+                       partrouteinfo->pi_PartitionToRootMap =
+                               partRelInfo->ri_ChildToRootMap;
+               else
+                       partrouteinfo->pi_PartitionToRootMap =
+                               
convert_tuples_by_name(RelationGetDescr(partRelInfo->ri_RelationDesc),
+                                                                          
RelationGetDescr(partRelInfo->ri_PartitionRoot));
        }
        else
                partrouteinfo->pi_PartitionToRootMap = NULL;
diff --git a/src/backend/executor/nodeModifyTable.c 
b/src/backend/executor/nodeModifyTable.c
index 1362b2f2d1..9de5a5371c 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -73,9 +73,6 @@ static TupleTableSlot 
*ExecPrepareTupleRouting(ModifyTableState *mtstate,
                                                                                
           TupleTableSlot *slot,
                                                                                
           ResultRelInfo **partRelInfo);
 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
-static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
-static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
-                                                                               
                   int whichplan);
 
 /*
  * Verify that the tuples to be produced by INSERT or UPDATE match the
@@ -339,10 +336,6 @@ ExecComputeStoredGenerated(ResultRelInfo *resultRelInfo,
  *             relations.
  *
  *             Returns RETURNING result if any, otherwise NULL.
- *
- *             This may change the currently active tuple conversion map in
- *             mtstate->mt_transition_capture, so the callers must take care to
- *             save the previous value to avoid losing track of it.
  * ----------------------------------------------------------------
  */
 static TupleTableSlot *
@@ -1051,9 +1044,7 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate,
 {
        EState     *estate = mtstate->ps.state;
        PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
-       int                     map_index;
-       TupleConversionMap *tupconv_map;
-       TupleConversionMap *saved_tcs_map = NULL;
+       TupleConversionMap *tupconv_map = resultRelInfo->ri_ChildToRootMap;
        bool            tuple_deleted;
        TupleTableSlot *epqslot = NULL;
 
@@ -1129,41 +1120,16 @@ ExecCrossPartitionUpdate(ModifyTableState *mtstate,
                }
        }
 
-       /*
-        * resultRelInfo is one of the per-subplan resultRelInfos.  So we
-        * should convert the tuple into root's tuple descriptor, since
-        * ExecInsert() starts the search from root.  The tuple conversion
-        * map list is in the order of mtstate->resultRelInfo[], so to
-        * retrieve the one for this resultRel, we need to know the
-        * position of the resultRel in mtstate->resultRelInfo[].
-        */
-       map_index = resultRelInfo - mtstate->resultRelInfo;
-       Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
-       tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
        if (tupconv_map != NULL)
                slot = execute_attr_map_slot(tupconv_map->attrMap,
                                                                         slot,
                                                                         
mtstate->mt_root_tuple_slot);
 
-       /*
-        * ExecInsert() may scribble on mtstate->mt_transition_capture,
-        * so save the currently active map.
-        */
-       if (mtstate->mt_transition_capture)
-               saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
-
        /* Tuple routing starts from the root table. */
        Assert(mtstate->rootResultRelInfo != NULL);
        *inserted_tuple = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot,
                                                                 planSlot, 
estate, canSetTag);
 
-       /* Clear the INSERT's tuple and restore the saved map. */
-       if (mtstate->mt_transition_capture)
-       {
-               mtstate->mt_transition_capture->tcs_original_insert_tuple = 
NULL;
-               mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
-       }
-
        /* We're done moving. */
        return true;
 }
@@ -1868,28 +1834,6 @@ ExecSetupTransitionCaptureState(ModifyTableState 
*mtstate, EState *estate)
                        MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
                                                                           
RelationGetRelid(targetRelInfo->ri_RelationDesc),
                                                                           
CMD_UPDATE);
-
-       /*
-        * If we found that we need to collect transition tuples then we may 
also
-        * need tuple conversion maps for any children that have TupleDescs that
-        * aren't compatible with the tuplestores.  (We can share these maps
-        * between the regular and ON CONFLICT cases.)
-        */
-       if (mtstate->mt_transition_capture != NULL ||
-               mtstate->mt_oc_transition_capture != NULL)
-       {
-               ExecSetupChildParentMapForSubplan(mtstate);
-
-               /*
-                * Install the conversion map for the first plan for UPDATE and 
DELETE
-                * operations.  It will be advanced each time we switch to the 
next
-                * plan.  (INSERT operations set it every time, so we need not 
update
-                * mtstate->mt_oc_transition_capture here.)
-                */
-               if (mtstate->mt_transition_capture && mtstate->operation != 
CMD_INSERT)
-                       mtstate->mt_transition_capture->tcs_map =
-                               tupconv_map_for_subplan(mtstate, 0);
-       }
 }
 
 /*
@@ -1913,6 +1857,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
        ResultRelInfo *partrel;
        PartitionRoutingInfo *partrouteinfo;
        TupleConversionMap *map;
+       bool            has_before_insert_row_trig;
 
        /*
         * Look up the target partition's ResultRelInfo.  If ExecFindPartition
@@ -1927,37 +1872,17 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
        Assert(partrouteinfo != NULL);
 
        /*
-        * If we're capturing transition tuples, we might need to convert from 
the
-        * partition rowtype to root partitioned table's rowtype.
+        * If we're capturing transition tuples and there are no BEFORE
+        * triggers on the partition, we can just use the original
+        * unconverted tuple instead of converting the tuple in partition
+        * format back to root format.  We must do the conversion if such
+        * triggers exist because they may change the tuple.
         */
+       has_before_insert_row_trig = (partrel->ri_TrigDesc &&
+                                                                 
partrel->ri_TrigDesc->trig_insert_before_row);
        if (mtstate->mt_transition_capture != NULL)
-       {
-               if (partrel->ri_TrigDesc &&
-                       partrel->ri_TrigDesc->trig_insert_before_row)
-               {
-                       /*
-                        * If there are any BEFORE triggers on the partition, 
we'll have
-                        * to be ready to convert their result back to 
tuplestore format.
-                        */
-                       
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
-                       mtstate->mt_transition_capture->tcs_map =
-                               partrouteinfo->pi_PartitionToRootMap;
-               }
-               else
-               {
-                       /*
-                        * Otherwise, just remember the original unconverted 
tuple, to
-                        * avoid a needless round trip conversion.
-                        */
-                       
mtstate->mt_transition_capture->tcs_original_insert_tuple = slot;
-                       mtstate->mt_transition_capture->tcs_map = NULL;
-               }
-       }
-       if (mtstate->mt_oc_transition_capture != NULL)
-       {
-               mtstate->mt_oc_transition_capture->tcs_map =
-                       partrouteinfo->pi_PartitionToRootMap;
-       }
+               mtstate->mt_transition_capture->tcs_original_insert_tuple =
+                       !has_before_insert_row_trig ? slot : NULL;
 
        /*
         * Convert the tuple, if necessary.
@@ -1973,58 +1898,6 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
        return slot;
 }
 
-/*
- * Initialize the child-to-root tuple conversion map array for UPDATE subplans.
- *
- * This map array is required to convert the tuple from the subplan result rel
- * to the target table descriptor. This requirement arises for two independent
- * scenarios:
- * 1. For update-tuple-routing.
- * 2. For capturing tuples in transition tables.
- */
-static void
-ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
-{
-       ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
-       ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
-       TupleDesc       outdesc;
-       int                     numResultRelInfos = mtstate->mt_nplans;
-       int                     i;
-
-       /*
-        * Build array of conversion maps from each child's TupleDesc to the one
-        * used in the target relation.  The map pointers may be NULL when no
-        * conversion is necessary, which is hopefully a common case.
-        */
-
-       /* Get tuple descriptor of the target rel. */
-       outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
-
-       mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
-               palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
-
-       for (i = 0; i < numResultRelInfos; ++i)
-       {
-               mtstate->mt_per_subplan_tupconv_maps[i] =
-                       
convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
-                                                                  outdesc);
-       }
-}
-
-/*
- * For a given subplan index, get the tuple conversion map.
- */
-static TupleConversionMap *
-tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
-{
-       /* If nobody else set the per-subplan array of maps, do so ourselves. */
-       if (mtstate->mt_per_subplan_tupconv_maps == NULL)
-               ExecSetupChildParentMapForSubplan(mtstate);
-
-       Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
-       return mtstate->mt_per_subplan_tupconv_maps[whichplan];
-}
-
 /* ----------------------------------------------------------------
  *        ExecModifyTable
  *
@@ -2120,17 +1993,6 @@ ExecModifyTable(PlanState *pstate)
                                junkfilter = resultRelInfo->ri_junkFilter;
                                EvalPlanQualSetPlan(&node->mt_epqstate, 
subplanstate->plan,
                                                                        
node->mt_arowmarks[node->mt_whichplan]);
-                               /* Prepare to convert transition tuples from 
this child. */
-                               if (node->mt_transition_capture != NULL)
-                               {
-                                       node->mt_transition_capture->tcs_map =
-                                               tupconv_map_for_subplan(node, 
node->mt_whichplan);
-                               }
-                               if (node->mt_oc_transition_capture != NULL)
-                               {
-                                       node->mt_oc_transition_capture->tcs_map 
=
-                                               tupconv_map_for_subplan(node, 
node->mt_whichplan);
-                               }
                                continue;
                        }
                        else
@@ -2299,6 +2161,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
        int                     i;
        Relation        rel;
        bool            update_tuple_routing_needed = node->partColsUpdated;
+       ResultRelInfo *rootResultRel;
 
        /* check for unsupported flags */
        Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2321,8 +2184,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
 
        /* If modifying a partitioned table, initialize the root table info */
        if (node->rootResultRelIndex >= 0)
+       {
                mtstate->rootResultRelInfo = estate->es_root_result_relations +
                        node->rootResultRelIndex;
+               rootResultRel = mtstate->rootResultRelInfo;
+       }
+       else
+               rootResultRel = mtstate->resultRelInfo;
 
        mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
        mtstate->mt_nplans = nplans;
@@ -2331,6 +2199,13 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
        EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, 
node->epqParam);
        mtstate->fireBSTriggers = true;
 
+       /*
+        * Build state for collecting transition tuples.  This requires having a
+        * valid trigger query context, so skip it in explain-only mode.
+        */
+       if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
+               ExecSetupTransitionCaptureState(mtstate, estate);
+
        /*
         * call ExecInitNode on each of the plans to be executed and save the
         * results into the array "mt_plans".  This is also a convenient place 
to
@@ -2397,6 +2272,20 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
                                                                                
                                         eflags);
                }
 
+               /*
+                * If needed, initialize a map to convert tuples in the child 
format
+                * to the format of the table mentioned in the query (root 
relation).
+                * It's needed for update tuple routing, because the routing 
starts
+                * from the root relation.  It's also needed for capturing 
transition
+                * tuples, because the transition tuple store can only store 
tuples
+                * in the root table format.
+                */
+               if (update_tuple_routing_needed ||
+                       (mtstate->mt_transition_capture &&
+                        mtstate->operation != CMD_INSERT))
+                       resultRelInfo->ri_ChildToRootMap =
+                               
convert_tuples_by_name(RelationGetDescr(resultRelInfo->ri_RelationDesc),
+                                                                          
RelationGetDescr(rootResultRel->ri_RelationDesc));
                resultRelInfo++;
                i++;
        }
@@ -2421,26 +2310,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
                        ExecSetupPartitionTupleRouting(estate, mtstate, rel);
 
        /*
-        * Build state for collecting transition tuples.  This requires having a
-        * valid trigger query context, so skip it in explain-only mode.
-        */
-       if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
-               ExecSetupTransitionCaptureState(mtstate, estate);
-
-       /*
-        * Construct mapping from each of the per-subplan partition attnos to 
the
-        * root attno.  This is required when during update row movement the 
tuple
-        * descriptor of a source partition does not match the root partitioned
-        * table descriptor.  In such a case we need to convert tuples to the 
root
-        * tuple descriptor, because the search for destination partition starts
-        * from the root.  We'll also need a slot to store these converted 
tuples.
-        * We can skip this setup if it's not a partition key update.
+        * For update row movement we'll need a dedicated slot to store the
+        * tuples that have been converted from partition format to the root
+        * table format.
         */
        if (update_tuple_routing_needed)
-       {
-               ExecSetupChildParentMapForSubplan(mtstate);
                mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL);
-       }
 
        /*
         * Initialize any WITH CHECK OPTION constraints if needed.
diff --git a/src/include/commands/trigger.h b/src/include/commands/trigger.h
index a46feeedb0..bb080980c0 100644
--- a/src/include/commands/trigger.h
+++ b/src/include/commands/trigger.h
@@ -45,7 +45,7 @@ typedef struct TriggerData
  * The state for capturing old and new tuples into transition tables for a
  * single ModifyTable node (or other operation source, e.g. copy.c).
  *
- * This is per-caller to avoid conflicts in setting tcs_map or
+ * This is per-caller to avoid conflicts in setting
  * tcs_original_insert_tuple.  Note, however, that the pointed-to
  * private data may be shared across multiple callers.
  */
@@ -64,14 +64,6 @@ typedef struct TransitionCaptureState
        bool            tcs_update_new_table;
        bool            tcs_insert_new_table;
 
-       /*
-        * For UPDATE and DELETE, AfterTriggerSaveEvent may need to convert the
-        * new and old tuples from a child table's format to the format of the
-        * relation named in a query so that it is compatible with the 
transition
-        * tuplestores.  The caller must store the conversion map here if so.
-        */
-       TupleConversionMap *tcs_map;
-
        /*
         * For INSERT and COPY, it would be wasteful to convert tuples from 
child
         * format to parent format after they have already been converted in the
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 62e20fdfdc..62eed595ca 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -485,6 +485,12 @@ typedef struct ResultRelInfo
 
        /* For use by copy.c when performing multi-inserts */
        struct CopyMultiInsertBuffer *ri_CopyMultiInsertBuffer;
+
+       /*
+        * Map to convert child sublan tuples to root parent format, set only if
+        * either update row movement or transition tuple capture is active.
+        */
+       TupleConversionMap *ri_ChildToRootMap;
 } ResultRelInfo;
 
 /* ----------------
@@ -1185,9 +1191,6 @@ typedef struct ModifyTableState
 
        /* controls transition table population for INSERT...ON CONFLICT UPDATE 
*/
        struct TransitionCaptureState *mt_oc_transition_capture;
-
-       /* Per plan map for tuple conversion from child to root */
-       TupleConversionMap **mt_per_subplan_tupconv_maps;
 } ModifyTableState;
 
 /* ----------------
-- 
2.16.5

From d3a17640835035e95c56859b4eed652444ec5b86 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Fri, 19 Jul 2019 14:53:20 +0900
Subject: [PATCH v9 2/4] Remove es_result_relation_info

This changes many places that access the currently active result
relation via es_result_relation_info to instead receive it directly
via function parameters.  Maintaining that state in
es_result_relation_info has become cumbersome, especially with
partitioning where each partition gets its own result relation info.
Having to set and reset it across arbitrary operations has caused
bugs in the past.
---
 src/backend/commands/copy.c              |  17 +--
 src/backend/commands/tablecmds.c         |   2 -
 src/backend/executor/execIndexing.c      |  12 +-
 src/backend/executor/execMain.c          |   5 -
 src/backend/executor/execReplication.c   |  22 ++--
 src/backend/executor/execUtils.c         |   2 -
 src/backend/executor/nodeModifyTable.c   | 188 +++++++++++++------------------
 src/backend/replication/logical/worker.c |  26 +++--
 src/include/executor/executor.h          |  19 +++-
 src/include/executor/nodeModifyTable.h   |   3 +-
 src/include/nodes/execnodes.h            |   1 -
 src/test/regress/expected/insert.out     |   4 +-
 src/test/regress/sql/insert.sql          |   4 +-
 13 files changed, 130 insertions(+), 175 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 42a147b67d..38cffde583 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2441,9 +2441,6 @@ CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
        ResultRelInfo *resultRelInfo = buffer->resultRelInfo;
        TupleTableSlot **slots = buffer->slots;
 
-       /* Set es_result_relation_info to the ResultRelInfo we're flushing. */
-       estate->es_result_relation_info = resultRelInfo;
-
        /*
         * Print error context information correctly, if one of the operations
         * below fail.
@@ -2476,7 +2473,8 @@ CopyMultiInsertBufferFlush(CopyMultiInsertInfo *miinfo,
 
                        cstate->cur_lineno = buffer->linenos[i];
                        recheckIndexes =
-                               ExecInsertIndexTuples(buffer->slots[i], estate, 
false, NULL,
+                               ExecInsertIndexTuples(resultRelInfo,
+                                                                         
buffer->slots[i], estate, false, NULL,
                                                                          NIL);
                        ExecARInsertTriggers(estate, resultRelInfo,
                                                                 slots[i], 
recheckIndexes,
@@ -2841,7 +2839,6 @@ CopyFrom(CopyState cstate)
 
        estate->es_result_relations = resultRelInfo;
        estate->es_num_result_relations = 1;
-       estate->es_result_relation_info = resultRelInfo;
 
        ExecInitRangeTable(estate, cstate->range_table);
 
@@ -3112,11 +3109,6 @@ CopyFrom(CopyState cstate)
                                prevResultRelInfo = resultRelInfo;
                        }
 
-                       /*
-                        * For ExecInsertIndexTuples() to work on the 
partition's indexes
-                        */
-                       estate->es_result_relation_info = resultRelInfo;
-
                        /*
                         * If we're capturing transition tuples, we might need 
to convert
                         * from the partition rowtype to root rowtype.
@@ -3221,7 +3213,7 @@ CopyFrom(CopyState cstate)
                                /* Compute stored generated columns */
                                if 
(resultRelInfo->ri_RelationDesc->rd_att->constr &&
                                        
resultRelInfo->ri_RelationDesc->rd_att->constr->has_generated_stored)
-                                       ExecComputeStoredGenerated(estate, 
myslot);
+                                       
ExecComputeStoredGenerated(resultRelInfo, estate, myslot);
 
                                /*
                                 * If the target is a plain table, check the 
constraints of
@@ -3292,7 +3284,8 @@ CopyFrom(CopyState cstate)
                                                                                
   myslot, mycid, ti_options, bistate);
 
                                                if 
(resultRelInfo->ri_NumIndices > 0)
-                                                       recheckIndexes = 
ExecInsertIndexTuples(myslot,
+                                                       recheckIndexes = 
ExecInsertIndexTuples(resultRelInfo,
+                                                                               
                                                   myslot,
                                                                                
                                                   estate,
                                                                                
                                                   false,
                                                                                
                                                   NULL,
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index daa80ec4aa..1495d2c60e 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -1749,7 +1749,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, 
List *relids_logged,
        resultRelInfo = resultRelInfos;
        foreach(cell, rels)
        {
-               estate->es_result_relation_info = resultRelInfo;
                ExecBSTruncateTriggers(estate, resultRelInfo);
                resultRelInfo++;
        }
@@ -1879,7 +1878,6 @@ ExecuteTruncateGuts(List *explicit_rels, List *relids, 
List *relids_logged,
        resultRelInfo = resultRelInfos;
        foreach(cell, rels)
        {
-               estate->es_result_relation_info = resultRelInfo;
                ExecASTruncateTriggers(estate, resultRelInfo);
                resultRelInfo++;
        }
diff --git a/src/backend/executor/execIndexing.c 
b/src/backend/executor/execIndexing.c
index 40bd8049f0..357bf17e31 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -270,7 +270,8 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
  * ----------------------------------------------------------------
  */
 List *
-ExecInsertIndexTuples(TupleTableSlot *slot,
+ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
+                                         TupleTableSlot *slot,
                                          EState *estate,
                                          bool noDupErr,
                                          bool *specConflict,
@@ -278,7 +279,6 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
 {
        ItemPointer tupleid = &slot->tts_tid;
        List       *result = NIL;
-       ResultRelInfo *resultRelInfo;
        int                     i;
        int                     numIndices;
        RelationPtr relationDescs;
@@ -293,7 +293,6 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
        /*
         * Get information from the result relation info structure.
         */
-       resultRelInfo = estate->es_result_relation_info;
        numIndices = resultRelInfo->ri_NumIndices;
        relationDescs = resultRelInfo->ri_IndexRelationDescs;
        indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
@@ -479,11 +478,10 @@ ExecInsertIndexTuples(TupleTableSlot *slot,
  * ----------------------------------------------------------------
  */
 bool
-ExecCheckIndexConstraints(TupleTableSlot *slot,
+ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
                                                  EState *estate, ItemPointer 
conflictTid,
                                                  List *arbiterIndexes)
 {
-       ResultRelInfo *resultRelInfo;
        int                     i;
        int                     numIndices;
        RelationPtr relationDescs;
@@ -498,10 +496,6 @@ ExecCheckIndexConstraints(TupleTableSlot *slot,
        ItemPointerSetInvalid(conflictTid);
        ItemPointerSetInvalid(&invalidItemPtr);
 
-       /*
-        * Get information from the result relation info structure.
-        */
-       resultRelInfo = estate->es_result_relation_info;
        numIndices = resultRelInfo->ri_NumIndices;
        relationDescs = resultRelInfo->ri_IndexRelationDescs;
        indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index c46eb8d646..879ac8fbcd 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -857,9 +857,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
                estate->es_result_relations = resultRelInfos;
                estate->es_num_result_relations = numResultRelations;
 
-               /* es_result_relation_info is NULL except when within 
ModifyTable */
-               estate->es_result_relation_info = NULL;
-
                /*
                 * In the partitioned result relation case, also build 
ResultRelInfos
                 * for all the partitioned table roots, because we will need 
them to
@@ -903,7 +900,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
                 */
                estate->es_result_relations = NULL;
                estate->es_num_result_relations = 0;
-               estate->es_result_relation_info = NULL;
                estate->es_root_result_relations = NULL;
                estate->es_num_root_result_relations = 0;
        }
@@ -2814,7 +2810,6 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree)
                        rcestate->es_num_root_result_relations = 
numRootResultRels;
                }
        }
-       /* es_result_relation_info must NOT be copied */
        /* es_trig_target_relations must NOT be copied */
        rcestate->es_top_eflags = parentestate->es_top_eflags;
        rcestate->es_instrument = parentestate->es_instrument;
diff --git a/src/backend/executor/execReplication.c 
b/src/backend/executor/execReplication.c
index 95e027c970..14d11e75c3 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -390,10 +390,10 @@ retry:
  * Caller is responsible for opening the indexes.
  */
 void
-ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot)
+ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo,
+                                                EState *estate, TupleTableSlot 
*slot)
 {
        bool            skip_tuple = false;
-       ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
        Relation        rel = resultRelInfo->ri_RelationDesc;
 
        /* For now we support only tables. */
@@ -416,7 +416,7 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot 
*slot)
                /* Compute stored generated columns */
                if (rel->rd_att->constr &&
                        rel->rd_att->constr->has_generated_stored)
-                       ExecComputeStoredGenerated(estate, slot);
+                       ExecComputeStoredGenerated(resultRelInfo, estate, slot);
 
                /* Check the constraints of the tuple */
                if (rel->rd_att->constr)
@@ -428,7 +428,8 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot 
*slot)
                simple_table_tuple_insert(resultRelInfo->ri_RelationDesc, slot);
 
                if (resultRelInfo->ri_NumIndices > 0)
-                       recheckIndexes = ExecInsertIndexTuples(slot, estate, 
false, NULL,
+                       recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+                                                                               
                   slot, estate, false, NULL,
                                                                                
                   NIL);
 
                /* AFTER ROW INSERT Triggers */
@@ -452,11 +453,11 @@ ExecSimpleRelationInsert(EState *estate, TupleTableSlot 
*slot)
  * Caller is responsible for opening the indexes.
  */
 void
-ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
+ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
+                                                EState *estate, EPQState 
*epqstate,
                                                 TupleTableSlot *searchslot, 
TupleTableSlot *slot)
 {
        bool            skip_tuple = false;
-       ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
        Relation        rel = resultRelInfo->ri_RelationDesc;
        ItemPointer tid = &(searchslot->tts_tid);
 
@@ -482,7 +483,7 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
                /* Compute stored generated columns */
                if (rel->rd_att->constr &&
                        rel->rd_att->constr->has_generated_stored)
-                       ExecComputeStoredGenerated(estate, slot);
+                       ExecComputeStoredGenerated(resultRelInfo, estate, slot);
 
                /* Check the constraints of the tuple */
                if (rel->rd_att->constr)
@@ -494,7 +495,8 @@ ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
                                                                  
&update_indexes);
 
                if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
-                       recheckIndexes = ExecInsertIndexTuples(slot, estate, 
false, NULL,
+                       recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+                                                                               
                   slot, estate, false, NULL,
                                                                                
                   NIL);
 
                /* AFTER ROW UPDATE Triggers */
@@ -513,11 +515,11 @@ ExecSimpleRelationUpdate(EState *estate, EPQState 
*epqstate,
  * Caller is responsible for opening the indexes.
  */
 void
-ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
+ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
+                                                EState *estate, EPQState 
*epqstate,
                                                 TupleTableSlot *searchslot)
 {
        bool            skip_tuple = false;
-       ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
        Relation        rel = resultRelInfo->ri_RelationDesc;
        ItemPointer tid = &searchslot->tts_tid;
 
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index ee0239b146..06bd715096 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -124,8 +124,6 @@ CreateExecutorState(void)
 
        estate->es_result_relations = NULL;
        estate->es_num_result_relations = 0;
-       estate->es_result_relation_info = NULL;
-
        estate->es_root_result_relations = NULL;
        estate->es_num_root_result_relations = 0;
 
diff --git a/src/backend/executor/nodeModifyTable.c 
b/src/backend/executor/nodeModifyTable.c
index 9ba1d78344..b01601578a 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -70,7 +70,8 @@ static TupleTableSlot 
*ExecPrepareTupleRouting(ModifyTableState *mtstate,
                                                                                
           EState *estate,
                                                                                
           PartitionTupleRouting *proute,
                                                                                
           ResultRelInfo *targetRelInfo,
-                                                                               
           TupleTableSlot *slot);
+                                                                               
           TupleTableSlot *slot,
+                                                                               
           ResultRelInfo **partRelInfo);
 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
 static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
 static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
@@ -246,9 +247,9 @@ ExecCheckTIDVisible(EState *estate,
  * Compute stored generated columns for a tuple
  */
 void
-ExecComputeStoredGenerated(EState *estate, TupleTableSlot *slot)
+ExecComputeStoredGenerated(ResultRelInfo *resultRelInfo,
+                                                  EState *estate, 
TupleTableSlot *slot)
 {
-       ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
        Relation        rel = resultRelInfo->ri_RelationDesc;
        TupleDesc       tupdesc = RelationGetDescr(rel);
        int                     natts = tupdesc->natts;
@@ -334,32 +335,48 @@ ExecComputeStoredGenerated(EState *estate, TupleTableSlot 
*slot)
  *             ExecInsert
  *
  *             For INSERT, we have to insert the tuple into the target relation
- *             and insert appropriate tuples into the index relations.
+ *             (or partition thereof) and insert appropriate tuples into the 
index
+ *             relations.
  *
  *             Returns RETURNING result if any, otherwise NULL.
+ *
+ *             This may change the currently active tuple conversion map in
+ *             mtstate->mt_transition_capture, so the callers must take care to
+ *             save the previous value to avoid losing track of it.
  * ----------------------------------------------------------------
  */
 static TupleTableSlot *
 ExecInsert(ModifyTableState *mtstate,
+                  ResultRelInfo *resultRelInfo,
                   TupleTableSlot *slot,
                   TupleTableSlot *planSlot,
                   EState *estate,
                   bool canSetTag)
 {
-       ResultRelInfo *resultRelInfo;
        Relation        resultRelationDesc;
        List       *recheckIndexes = NIL;
        TupleTableSlot *result = NULL;
        TransitionCaptureState *ar_insert_trig_tcs;
        ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
        OnConflictAction onconflict = node->onConflictAction;
-
-       ExecMaterializeSlot(slot);
+       PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
 
        /*
-        * get information on the (current) result relation
+        * If the input result relation is a partitioned table, find the leaf
+        * partition to insert the tuple into.
         */
-       resultRelInfo = estate->es_result_relation_info;
+       if (proute)
+       {
+               ResultRelInfo *partRelInfo;
+
+               slot = ExecPrepareTupleRouting(mtstate, estate, proute,
+                                                                          
resultRelInfo, slot,
+                                                                          
&partRelInfo);
+               resultRelInfo = partRelInfo;
+       }
+
+       ExecMaterializeSlot(slot);
+
        resultRelationDesc = resultRelInfo->ri_RelationDesc;
 
        /*
@@ -392,7 +409,7 @@ ExecInsert(ModifyTableState *mtstate,
                 */
                if (resultRelationDesc->rd_att->constr &&
                        
resultRelationDesc->rd_att->constr->has_generated_stored)
-                       ExecComputeStoredGenerated(estate, slot);
+                       ExecComputeStoredGenerated(resultRelInfo, estate, slot);
 
                /*
                 * insert into foreign table: let the FDW do it
@@ -427,7 +444,7 @@ ExecInsert(ModifyTableState *mtstate,
                 */
                if (resultRelationDesc->rd_att->constr &&
                        
resultRelationDesc->rd_att->constr->has_generated_stored)
-                       ExecComputeStoredGenerated(estate, slot);
+                       ExecComputeStoredGenerated(resultRelInfo, estate, slot);
 
                /*
                 * Check any RLS WITH CHECK policies.
@@ -489,8 +506,8 @@ ExecInsert(ModifyTableState *mtstate,
                         */
        vlock:
                        specConflict = false;
-                       if (!ExecCheckIndexConstraints(slot, estate, 
&conflictTid,
-                                                                               
   arbiterIndexes))
+                       if (!ExecCheckIndexConstraints(resultRelInfo, slot, 
estate,
+                                                                               
   &conflictTid, arbiterIndexes))
                        {
                                /* committed conflict tuple found */
                                if (onconflict == ONCONFLICT_UPDATE)
@@ -550,7 +567,8 @@ ExecInsert(ModifyTableState *mtstate,
                                                                                
   specToken);
 
                        /* insert index entries for tuple */
-                       recheckIndexes = ExecInsertIndexTuples(slot, estate, 
true,
+                       recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+                                                                               
                   slot, estate, true,
                                                                                
                   &specConflict,
                                                                                
                   arbiterIndexes);
 
@@ -589,7 +607,8 @@ ExecInsert(ModifyTableState *mtstate,
 
                        /* insert index entries for tuple */
                        if (resultRelInfo->ri_NumIndices > 0)
-                               recheckIndexes = ExecInsertIndexTuples(slot, 
estate, false, NULL,
+                               recheckIndexes = 
ExecInsertIndexTuples(resultRelInfo,
+                                                                               
                           slot, estate, false, NULL,
                                                                                
                           NIL);
                }
        }
@@ -675,6 +694,7 @@ ExecInsert(ModifyTableState *mtstate,
  */
 static TupleTableSlot *
 ExecDelete(ModifyTableState *mtstate,
+                  ResultRelInfo *resultRelInfo,
                   ItemPointer tupleid,
                   HeapTuple oldtuple,
                   TupleTableSlot *planSlot,
@@ -686,7 +706,6 @@ ExecDelete(ModifyTableState *mtstate,
                   bool *tupleDeleted,
                   TupleTableSlot **epqreturnslot)
 {
-       ResultRelInfo *resultRelInfo;
        Relation        resultRelationDesc;
        TM_Result       result;
        TM_FailureData tmfd;
@@ -696,10 +715,6 @@ ExecDelete(ModifyTableState *mtstate,
        if (tupleDeleted)
                *tupleDeleted = false;
 
-       /*
-        * get information on the (current) result relation
-        */
-       resultRelInfo = estate->es_result_relation_info;
        resultRelationDesc = resultRelInfo->ri_RelationDesc;
 
        /* BEFORE ROW DELETE Triggers */
@@ -1035,6 +1050,7 @@ ldelete:;
  */
 static TupleTableSlot *
 ExecUpdate(ModifyTableState *mtstate,
+                  ResultRelInfo *resultRelInfo,
                   ItemPointer tupleid,
                   HeapTuple oldtuple,
                   TupleTableSlot *slot,
@@ -1043,12 +1059,10 @@ ExecUpdate(ModifyTableState *mtstate,
                   EState *estate,
                   bool canSetTag)
 {
-       ResultRelInfo *resultRelInfo;
        Relation        resultRelationDesc;
        TM_Result       result;
        TM_FailureData tmfd;
        List       *recheckIndexes = NIL;
-       TupleConversionMap *saved_tcs_map = NULL;
 
        /*
         * abort the operation if not running transactions
@@ -1058,10 +1072,6 @@ ExecUpdate(ModifyTableState *mtstate,
 
        ExecMaterializeSlot(slot);
 
-       /*
-        * get information on the (current) result relation
-        */
-       resultRelInfo = estate->es_result_relation_info;
        resultRelationDesc = resultRelInfo->ri_RelationDesc;
 
        /* BEFORE ROW UPDATE Triggers */
@@ -1088,7 +1098,7 @@ ExecUpdate(ModifyTableState *mtstate,
                 */
                if (resultRelationDesc->rd_att->constr &&
                        
resultRelationDesc->rd_att->constr->has_generated_stored)
-                       ExecComputeStoredGenerated(estate, slot);
+                       ExecComputeStoredGenerated(resultRelInfo, estate, slot);
 
                /*
                 * update in foreign table: let the FDW do it
@@ -1125,7 +1135,7 @@ ExecUpdate(ModifyTableState *mtstate,
                 */
                if (resultRelationDesc->rd_att->constr &&
                        
resultRelationDesc->rd_att->constr->has_generated_stored)
-                       ExecComputeStoredGenerated(estate, slot);
+                       ExecComputeStoredGenerated(resultRelInfo, estate, slot);
 
                /*
                 * Check any RLS UPDATE WITH CHECK policies
@@ -1175,6 +1185,7 @@ lreplace:;
                        PartitionTupleRouting *proute = 
mtstate->mt_partition_tuple_routing;
                        int                     map_index;
                        TupleConversionMap *tupconv_map;
+                       TupleConversionMap *saved_tcs_map = NULL;
 
                        /*
                         * Disallow an INSERT ON CONFLICT DO UPDATE that causes 
the
@@ -1200,9 +1211,12 @@ lreplace:;
                         * Row movement, part 1.  Delete the tuple, but skip 
RETURNING
                         * processing. We want to return rows from INSERT.
                         */
-                       ExecDelete(mtstate, tupleid, oldtuple, planSlot, 
epqstate,
-                                          estate, false, false /* canSetTag */ 
,
-                                          true /* changingPart */ , 
&tuple_deleted, &epqslot);
+                       ExecDelete(mtstate, resultRelInfo, tupleid, oldtuple, 
planSlot,
+                                          epqstate, estate,
+                                          false,       /* processReturning */
+                                          false,       /* canSetTag */
+                                          true,        /* changingPart */
+                                          &tuple_deleted, &epqslot);
 
                        /*
                         * For some reason if DELETE didn't happen (e.g. 
trigger prevented
@@ -1242,16 +1256,6 @@ lreplace:;
                                }
                        }
 
-                       /*
-                        * Updates set the transition capture map only when a 
new subplan
-                        * is chosen.  But for inserts, it is set for each row. 
So after
-                        * INSERT, we need to revert back to the map created 
for UPDATE;
-                        * otherwise the next UPDATE will incorrectly use the 
one created
-                        * for INSERT.  So first save the one created for 
UPDATE.
-                        */
-                       if (mtstate->mt_transition_capture)
-                               saved_tcs_map = 
mtstate->mt_transition_capture->tcs_map;
-
                        /*
                         * resultRelInfo is one of the per-subplan 
resultRelInfos.  So we
                         * should convert the tuple into root's tuple 
descriptor, since
@@ -1269,18 +1273,18 @@ lreplace:;
                                                                                
         mtstate->mt_root_tuple_slot);
 
                        /*
-                        * Prepare for tuple routing, making it look like we're 
inserting
-                        * into the root.
+                        * ExecInsert() may scribble on 
mtstate->mt_transition_capture,
+                        * so save the currently active map.
                         */
-                       Assert(mtstate->rootResultRelInfo != NULL);
-                       slot = ExecPrepareTupleRouting(mtstate, estate, proute,
-                                                                               
   mtstate->rootResultRelInfo, slot);
+                       if (mtstate->mt_transition_capture)
+                               saved_tcs_map = 
mtstate->mt_transition_capture->tcs_map;
 
-                       ret_slot = ExecInsert(mtstate, slot, planSlot,
-                                                                 estate, 
canSetTag);
+                       /* Tuple routing starts from the root table. */
+                       Assert(mtstate->rootResultRelInfo != NULL);
+                       ret_slot = ExecInsert(mtstate, 
mtstate->rootResultRelInfo, slot,
+                                                                 planSlot, 
estate, canSetTag);
 
-                       /* Revert ExecPrepareTupleRouting's node change. */
-                       estate->es_result_relation_info = resultRelInfo;
+                       /* Clear the INSERT's tuple and restore the saved map. 
*/
                        if (mtstate->mt_transition_capture)
                        {
                                
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
@@ -1444,7 +1448,8 @@ lreplace:;
 
                /* insert index entries for tuple if necessary */
                if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
-                       recheckIndexes = ExecInsertIndexTuples(slot, estate, 
false, NULL, NIL);
+                       recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
+                                                                               
                   slot, estate, false, NULL, NIL);
        }
 
        if (canSetTag)
@@ -1683,7 +1688,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
         */
 
        /* Execute UPDATE with projection */
-       *returning = ExecUpdate(mtstate, conflictTid, NULL,
+       *returning = ExecUpdate(mtstate, resultRelInfo, conflictTid, NULL,
                                                        
resultRelInfo->ri_onConflict->oc_ProjSlot,
                                                        planSlot,
                                                        &mtstate->mt_epqstate, 
mtstate->ps.state,
@@ -1840,40 +1845,36 @@ ExecSetupTransitionCaptureState(ModifyTableState 
*mtstate, EState *estate)
  * ExecPrepareTupleRouting --- prepare for routing one tuple
  *
  * Determine the partition in which the tuple in slot is to be inserted,
- * and modify mtstate and estate to prepare for it.
- *
- * Caller must revert the estate changes after executing the insertion!
- * In mtstate, transition capture changes may also need to be reverted.
+ * and return its ResultRelInfo in *partRelInfo.  The returned value is
+ * a slot holding the tuple of the partition rowtype.
  *
- * Returns a slot holding the tuple of the partition rowtype.
+ * This also sets the transition table information in mtstate based on the
+ * selected partition.
  */
 static TupleTableSlot *
 ExecPrepareTupleRouting(ModifyTableState *mtstate,
                                                EState *estate,
                                                PartitionTupleRouting *proute,
                                                ResultRelInfo *targetRelInfo,
-                                               TupleTableSlot *slot)
+                                               TupleTableSlot *slot,
+                                               ResultRelInfo **partRelInfo)
 {
        ResultRelInfo *partrel;
        PartitionRoutingInfo *partrouteinfo;
        TupleConversionMap *map;
 
        /*
-        * Lookup the target partition's ResultRelInfo.  If ExecFindPartition 
does
-        * not find a valid partition for the tuple in 'slot' then an error is
+        * Look up the target partition's ResultRelInfo.  If ExecFindPartition
+        * doesn't find a valid partition for the tuple in 'slot' then an error 
is
         * raised.  An error may also be raised if the found partition is not a
         * valid target for INSERTs.  This is required since a partitioned table
         * UPDATE to another partition becomes a DELETE+INSERT.
         */
        partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, 
estate);
+       *partRelInfo = partrel;
        partrouteinfo = partrel->ri_PartitionInfo;
        Assert(partrouteinfo != NULL);
 
-       /*
-        * Make it look like we are inserting into the partition.
-        */
-       estate->es_result_relation_info = partrel;
-
        /*
         * If we're capturing transition tuples, we might need to convert from 
the
         * partition rowtype to root partitioned table's rowtype.
@@ -1984,10 +1985,8 @@ static TupleTableSlot *
 ExecModifyTable(PlanState *pstate)
 {
        ModifyTableState *node = castNode(ModifyTableState, pstate);
-       PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
        EState     *estate = node->ps.state;
        CmdType         operation = node->operation;
-       ResultRelInfo *saved_resultRelInfo;
        ResultRelInfo *resultRelInfo;
        PlanState  *subplanstate;
        JunkFilter *junkfilter;
@@ -2035,17 +2034,6 @@ ExecModifyTable(PlanState *pstate)
        subplanstate = node->mt_plans[node->mt_whichplan];
        junkfilter = resultRelInfo->ri_junkFilter;
 
-       /*
-        * es_result_relation_info must point to the currently active result
-        * relation while we are within this ModifyTable node.  Even though
-        * ModifyTable nodes can't be nested statically, they can be nested
-        * dynamically (since our subplan could include a reference to a 
modifying
-        * CTE).  So we have to save and restore the caller's value.
-        */
-       saved_resultRelInfo = estate->es_result_relation_info;
-
-       estate->es_result_relation_info = resultRelInfo;
-
        /*
         * Fetch rows from subplan(s), and execute the required table 
modification
         * for each row.
@@ -2079,7 +2067,6 @@ ExecModifyTable(PlanState *pstate)
                                resultRelInfo++;
                                subplanstate = 
node->mt_plans[node->mt_whichplan];
                                junkfilter = resultRelInfo->ri_junkFilter;
-                               estate->es_result_relation_info = resultRelInfo;
                                EvalPlanQualSetPlan(&node->mt_epqstate, 
subplanstate->plan,
                                                                        
node->mt_arowmarks[node->mt_whichplan]);
                                /* Prepare to convert transition tuples from 
this child. */
@@ -2124,7 +2111,6 @@ ExecModifyTable(PlanState *pstate)
                         */
                        slot = ExecProcessReturning(resultRelInfo, NULL, 
planSlot);
 
-                       estate->es_result_relation_info = saved_resultRelInfo;
                        return slot;
                }
 
@@ -2207,25 +2193,21 @@ ExecModifyTable(PlanState *pstate)
                switch (operation)
                {
                        case CMD_INSERT:
-                               /* Prepare for tuple routing if needed. */
-                               if (proute)
-                                       slot = ExecPrepareTupleRouting(node, 
estate, proute,
-                                                                               
                   resultRelInfo, slot);
-                               slot = ExecInsert(node, slot, planSlot,
+                               slot = ExecInsert(node, resultRelInfo, slot, 
planSlot,
                                                                  estate, 
node->canSetTag);
-                               /* Revert ExecPrepareTupleRouting's state 
change. */
-                               if (proute)
-                                       estate->es_result_relation_info = 
resultRelInfo;
                                break;
                        case CMD_UPDATE:
-                               slot = ExecUpdate(node, tupleid, oldtuple, 
slot, planSlot,
-                                                                 
&node->mt_epqstate, estate, node->canSetTag);
+                               slot = ExecUpdate(node, resultRelInfo, tupleid, 
oldtuple, slot,
+                                                                 planSlot, 
&node->mt_epqstate, estate,
+                                                                 
node->canSetTag);
                                break;
                        case CMD_DELETE:
-                               slot = ExecDelete(node, tupleid, oldtuple, 
planSlot,
-                                                                 
&node->mt_epqstate, estate,
-                                                                 true, 
node->canSetTag,
-                                                                 false /* 
changingPart */ , NULL, NULL);
+                               slot = ExecDelete(node, resultRelInfo, tupleid, 
oldtuple,
+                                                                 planSlot, 
&node->mt_epqstate, estate,
+                                                                 true,         
/* processReturning */
+                                                                 
node->canSetTag,
+                                                                 false,        
/* changingPart */
+                                                                 NULL, NULL);
                                break;
                        default:
                                elog(ERROR, "unknown operation");
@@ -2237,15 +2219,9 @@ ExecModifyTable(PlanState *pstate)
                 * the work on next call.
                 */
                if (slot)
-               {
-                       estate->es_result_relation_info = saved_resultRelInfo;
                        return slot;
-               }
        }
 
-       /* Restore es_result_relation_info before exiting */
-       estate->es_result_relation_info = saved_resultRelInfo;
-
        /*
         * We're done, but fire AFTER STATEMENT triggers before exiting.
         */
@@ -2266,7 +2242,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
        ModifyTableState *mtstate;
        CmdType         operation = node->operation;
        int                     nplans = list_length(node->plans);
-       ResultRelInfo *saved_resultRelInfo;
        ResultRelInfo *resultRelInfo;
        Plan       *subplan;
        ListCell   *l;
@@ -2309,14 +2284,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
         * call ExecInitNode on each of the plans to be executed and save the
         * results into the array "mt_plans".  This is also a convenient place 
to
         * verify that the proposed target relations are valid and open their
-        * indexes for insertion of new index entries.  Note we *must* set
-        * estate->es_result_relation_info correctly while we initialize each
-        * sub-plan; external modules such as FDWs may depend on that (see
-        * contrib/postgres_fdw/postgres_fdw.c: postgresBeginDirectModify() as 
one
-        * example).
+        * indexes for insertion of new index entries.
         */
-       saved_resultRelInfo = estate->es_result_relation_info;
-
        resultRelInfo = mtstate->resultRelInfo;
        i = 0;
        foreach(l, node->plans)
@@ -2358,7 +2327,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
                        update_tuple_routing_needed = true;
 
                /* Now init the plan for this result rel */
-               estate->es_result_relation_info = resultRelInfo;
                mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
                mtstate->mt_scans[i] =
                        ExecInitExtraTupleSlot(mtstate->ps.state, 
ExecGetResultType(mtstate->mt_plans[i]),
@@ -2382,8 +2350,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
                i++;
        }
 
-       estate->es_result_relation_info = saved_resultRelInfo;
-
        /* Get the target relation */
        rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc;
 
diff --git a/src/backend/replication/logical/worker.c 
b/src/backend/replication/logical/worker.c
index 4bf6f5e427..c3a6fc30d0 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -191,7 +191,6 @@ create_estate_for_relation(LogicalRepRelMapEntry *rel)
 
        estate->es_result_relations = resultRelInfo;
        estate->es_num_result_relations = 1;
-       estate->es_result_relation_info = resultRelInfo;
 
        estate->es_output_cid = GetCurrentCommandId(true);
 
@@ -581,6 +580,7 @@ GetRelationIdentityOrPK(Relation rel)
 static void
 apply_handle_insert(StringInfo s)
 {
+       ResultRelInfo *resultRelInfo;
        LogicalRepRelMapEntry *rel;
        LogicalRepTupleData newtup;
        LogicalRepRelId relid;
@@ -604,6 +604,7 @@ apply_handle_insert(StringInfo s)
 
        /* Initialize the executor state. */
        estate = create_estate_for_relation(rel);
+       resultRelInfo = &estate->es_result_relations[0];
        remoteslot = ExecInitExtraTupleSlot(estate,
                                                                                
RelationGetDescr(rel->localrel),
                                                                                
&TTSOpsVirtual);
@@ -617,13 +618,13 @@ apply_handle_insert(StringInfo s)
        slot_fill_defaults(rel, estate, remoteslot);
        MemoryContextSwitchTo(oldctx);
 
-       ExecOpenIndices(estate->es_result_relation_info, false);
+       ExecOpenIndices(resultRelInfo, false);
 
        /* Do the insert. */
-       ExecSimpleRelationInsert(estate, remoteslot);
+       ExecSimpleRelationInsert(resultRelInfo, estate, remoteslot);
 
        /* Cleanup. */
-       ExecCloseIndices(estate->es_result_relation_info);
+       ExecCloseIndices(resultRelInfo);
        PopActiveSnapshot();
 
        /* Handle queued AFTER triggers. */
@@ -678,6 +679,7 @@ check_relation_updatable(LogicalRepRelMapEntry *rel)
 static void
 apply_handle_update(StringInfo s)
 {
+       ResultRelInfo *resultRelInfo;
        LogicalRepRelMapEntry *rel;
        LogicalRepRelId relid;
        Oid                     idxoid;
@@ -711,6 +713,7 @@ apply_handle_update(StringInfo s)
 
        /* Initialize the executor state. */
        estate = create_estate_for_relation(rel);
+       resultRelInfo = &estate->es_result_relations[0];
        remoteslot = ExecInitExtraTupleSlot(estate,
                                                                                
RelationGetDescr(rel->localrel),
                                                                                
&TTSOpsVirtual);
@@ -719,7 +722,7 @@ apply_handle_update(StringInfo s)
        EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
 
        PushActiveSnapshot(GetTransactionSnapshot());
-       ExecOpenIndices(estate->es_result_relation_info, false);
+       ExecOpenIndices(resultRelInfo, false);
 
        /* Build the search tuple. */
        oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -761,7 +764,8 @@ apply_handle_update(StringInfo s)
                EvalPlanQualSetSlot(&epqstate, remoteslot);
 
                /* Do the actual update. */
-               ExecSimpleRelationUpdate(estate, &epqstate, localslot, 
remoteslot);
+               ExecSimpleRelationUpdate(resultRelInfo, estate, &epqstate, 
localslot,
+                                                                remoteslot);
        }
        else
        {
@@ -777,7 +781,7 @@ apply_handle_update(StringInfo s)
        }
 
        /* Cleanup. */
-       ExecCloseIndices(estate->es_result_relation_info);
+       ExecCloseIndices(resultRelInfo);
        PopActiveSnapshot();
 
        /* Handle queued AFTER triggers. */
@@ -800,6 +804,7 @@ apply_handle_update(StringInfo s)
 static void
 apply_handle_delete(StringInfo s)
 {
+       ResultRelInfo *resultRelInfo;
        LogicalRepRelMapEntry *rel;
        LogicalRepTupleData oldtup;
        LogicalRepRelId relid;
@@ -830,6 +835,7 @@ apply_handle_delete(StringInfo s)
 
        /* Initialize the executor state. */
        estate = create_estate_for_relation(rel);
+       resultRelInfo = &estate->es_result_relations[0];
        remoteslot = ExecInitExtraTupleSlot(estate,
                                                                                
RelationGetDescr(rel->localrel),
                                                                                
&TTSOpsVirtual);
@@ -838,7 +844,7 @@ apply_handle_delete(StringInfo s)
        EvalPlanQualInit(&epqstate, estate, NULL, NIL, -1);
 
        PushActiveSnapshot(GetTransactionSnapshot());
-       ExecOpenIndices(estate->es_result_relation_info, false);
+       ExecOpenIndices(resultRelInfo, false);
 
        /* Find the tuple using the replica identity index. */
        oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
@@ -866,7 +872,7 @@ apply_handle_delete(StringInfo s)
                EvalPlanQualSetSlot(&epqstate, localslot);
 
                /* Do the actual delete. */
-               ExecSimpleRelationDelete(estate, &epqstate, localslot);
+               ExecSimpleRelationDelete(resultRelInfo, estate, &epqstate, 
localslot);
        }
        else
        {
@@ -878,7 +884,7 @@ apply_handle_delete(StringInfo s)
        }
 
        /* Cleanup. */
-       ExecCloseIndices(estate->es_result_relation_info);
+       ExecCloseIndices(resultRelInfo);
        PopActiveSnapshot();
 
        /* Handle queued AFTER triggers. */
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 6298c7c8ca..a12d02cf5e 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -567,10 +567,14 @@ extern TupleTableSlot *ExecGetReturningSlot(EState 
*estate, ResultRelInfo *relIn
  */
 extern void ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative);
 extern void ExecCloseIndices(ResultRelInfo *resultRelInfo);
-extern List *ExecInsertIndexTuples(TupleTableSlot *slot, EState *estate, bool 
noDupErr,
+extern List *ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
+                                                                  
TupleTableSlot *slot, EState *estate,
+                                                                  bool 
noDupErr,
                                                                   bool 
*specConflict, List *arbiterIndexes);
-extern bool ExecCheckIndexConstraints(TupleTableSlot *slot, EState *estate,
-                                                                         
ItemPointer conflictTid, List *arbiterIndexes);
+extern bool ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo,
+                                                 TupleTableSlot *slot,
+                                                 EState *estate, ItemPointer 
conflictTid,
+                                                 List *arbiterIndexes);
 extern void check_exclusion_constraint(Relation heap, Relation index,
                                                                           
IndexInfo *indexInfo,
                                                                           
ItemPointer tupleid,
@@ -587,10 +591,13 @@ extern bool RelationFindReplTupleByIndex(Relation rel, 
Oid idxoid,
 extern bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode,
                                                                         
TupleTableSlot *searchslot, TupleTableSlot *outslot);
 
-extern void ExecSimpleRelationInsert(EState *estate, TupleTableSlot *slot);
-extern void ExecSimpleRelationUpdate(EState *estate, EPQState *epqstate,
+extern void ExecSimpleRelationInsert(ResultRelInfo *resultRelInfo,
+                                                                        EState 
*estate, TupleTableSlot *slot);
+extern void ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
+                                                                        EState 
*estate, EPQState *epqstate,
                                                                         
TupleTableSlot *searchslot, TupleTableSlot *slot);
-extern void ExecSimpleRelationDelete(EState *estate, EPQState *epqstate,
+extern void ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
+                                                                        EState 
*estate, EPQState *epqstate,
                                                                         
TupleTableSlot *searchslot);
 extern void CheckCmdReplicaIdentity(Relation rel, CmdType cmd);
 
diff --git a/src/include/executor/nodeModifyTable.h 
b/src/include/executor/nodeModifyTable.h
index 891b119608..103d4cd6c3 100644
--- a/src/include/executor/nodeModifyTable.h
+++ b/src/include/executor/nodeModifyTable.h
@@ -15,7 +15,8 @@
 
 #include "nodes/execnodes.h"
 
-extern void ExecComputeStoredGenerated(EState *estate, TupleTableSlot *slot);
+extern void ExecComputeStoredGenerated(ResultRelInfo *resultRelInfo,
+                                                  EState *estate, 
TupleTableSlot *slot);
 
 extern ModifyTableState *ExecInitModifyTable(ModifyTable *node, EState 
*estate, int eflags);
 extern void ExecEndModifyTable(ModifyTableState *node);
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 0c2a77aaf8..62e20fdfdc 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -518,7 +518,6 @@ typedef struct EState
        /* Info about target table(s) for insert/update/delete queries: */
        ResultRelInfo *es_result_relations; /* array of ResultRelInfos */
        int                     es_num_result_relations;        /* length of 
array */
-       ResultRelInfo *es_result_relation_info; /* currently active array elt */
 
        /*
         * Info about the partition root table(s) for insert/update/delete 
queries
diff --git a/src/test/regress/expected/insert.out 
b/src/test/regress/expected/insert.out
index 75e25cdf48..b73e26fc69 100644
--- a/src/test/regress/expected/insert.out
+++ b/src/test/regress/expected/insert.out
@@ -818,9 +818,7 @@ drop role regress_coldesc_role;
 drop table inserttest3;
 drop table brtrigpartcon;
 drop function brtrigpartcon1trigf();
--- check that "do nothing" BR triggers work with tuple-routing (this checks
--- that estate->es_result_relation_info is appropriately set/reset for each
--- routed tuple)
+-- check that "do nothing" BR triggers work with tuple-routing
 create table donothingbrtrig_test (a int, b text) partition by list (a);
 create table donothingbrtrig_test1 (b text, a int);
 create table donothingbrtrig_test2 (c text, b text, a int);
diff --git a/src/test/regress/sql/insert.sql b/src/test/regress/sql/insert.sql
index 23885f638c..e5a0a05d13 100644
--- a/src/test/regress/sql/insert.sql
+++ b/src/test/regress/sql/insert.sql
@@ -542,9 +542,7 @@ drop table inserttest3;
 drop table brtrigpartcon;
 drop function brtrigpartcon1trigf();
 
--- check that "do nothing" BR triggers work with tuple-routing (this checks
--- that estate->es_result_relation_info is appropriately set/reset for each
--- routed tuple)
+-- check that "do nothing" BR triggers work with tuple-routing
 create table donothingbrtrig_test (a int, b text) partition by list (a);
 create table donothingbrtrig_test1 (b text, a int);
 create table donothingbrtrig_test2 (c text, b text, a int);
-- 
2.16.5

From 1bf84886550b8d31e8f1f8f4a46c43b14db3dd6c Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Fri, 19 Jul 2019 16:24:38 +0900
Subject: [PATCH v9 3/4] Rearrange partition update row movement code a bit

The block of code that does the actual moving (DELETE+INSERT) has
been moved to a function named ExecCrossPartitionUpdate() which must
be retried until it says the movement has been done or can't be done.

This also rearrange the code in ExecDelete() and ExecInsert() around
executing AFTER ROW DELETE and AFTER ROW INSERT triggers, resp.  In
the case of an update row movement, such triggers should not see the
affected tuple in their OLD/NEW transition table.
---
 src/backend/executor/nodeModifyTable.c | 347 +++++++++++++++++++--------------
 1 file changed, 199 insertions(+), 148 deletions(-)

diff --git a/src/backend/executor/nodeModifyTable.c 
b/src/backend/executor/nodeModifyTable.c
index b01601578a..1362b2f2d1 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -356,7 +356,6 @@ ExecInsert(ModifyTableState *mtstate,
        Relation        resultRelationDesc;
        List       *recheckIndexes = NIL;
        TupleTableSlot *result = NULL;
-       TransitionCaptureState *ar_insert_trig_tcs;
        ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
        OnConflictAction onconflict = node->onConflictAction;
        PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
@@ -620,31 +619,30 @@ ExecInsert(ModifyTableState *mtstate,
        }
 
        /*
-        * If this insert is the result of a partition key update that moved the
-        * tuple to a new partition, put this row into the transition NEW TABLE,
-        * if there is one. We need to do this separately for DELETE and INSERT
-        * because they happen on different tables.
+        * If the insert is a part of update row movement, put this row into the
+        * UPDATE trigger's NEW TABLE (transition table) instead of that of an
+        * INSERT trigger.
         */
-       ar_insert_trig_tcs = mtstate->mt_transition_capture;
-       if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
-               && mtstate->mt_transition_capture->tcs_update_new_table)
+       if (mtstate->operation == CMD_UPDATE &&
+               mtstate->mt_transition_capture &&
+               mtstate->mt_transition_capture->tcs_update_new_table)
        {
-               ExecARUpdateTriggers(estate, resultRelInfo, NULL,
-                                                        NULL,
-                                                        slot,
-                                                        NULL,
-                                                        
mtstate->mt_transition_capture);
+               ExecARUpdateTriggers(estate, resultRelInfo, NULL, NULL, slot,
+                                                        NIL, 
mtstate->mt_transition_capture);
 
                /*
-                * We've already captured the NEW TABLE row, so make sure any AR
-                * INSERT trigger fired below doesn't capture it again.
+                * Execute AFTER ROW INSERT Triggers, but such that the row is 
not
+                * captured again in the transition table if any.
                 */
-               ar_insert_trig_tcs = NULL;
+               ExecARInsertTriggers(estate, resultRelInfo, slot, 
recheckIndexes,
+                                                        NULL);
+       }
+       else
+       {
+               /* AFTER ROW INSERT Triggers */
+               ExecARInsertTriggers(estate, resultRelInfo, slot, 
recheckIndexes,
+                                                        
mtstate->mt_transition_capture);
        }
-
-       /* AFTER ROW INSERT Triggers */
-       ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
-                                                ar_insert_trig_tcs);
 
        list_free(recheckIndexes);
 
@@ -710,7 +708,6 @@ ExecDelete(ModifyTableState *mtstate,
        TM_Result       result;
        TM_FailureData tmfd;
        TupleTableSlot *slot = NULL;
-       TransitionCaptureState *ar_delete_trig_tcs;
 
        if (tupleDeleted)
                *tupleDeleted = false;
@@ -954,32 +951,30 @@ ldelete:;
                *tupleDeleted = true;
 
        /*
-        * If this delete is the result of a partition key update that moved the
-        * tuple to a new partition, put this row into the transition OLD TABLE,
-        * if there is one. We need to do this separately for DELETE and INSERT
-        * because they happen on different tables.
+        * If the delete is a part of update row movement, put this row into the
+        * UPDATE trigger's OLD TABLE (transition table) instead of that of an
+        * DELETE trigger.
         */
-       ar_delete_trig_tcs = mtstate->mt_transition_capture;
-       if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
-               && mtstate->mt_transition_capture->tcs_update_old_table)
+       if (mtstate->operation == CMD_UPDATE &&
+               mtstate->mt_transition_capture &&
+               mtstate->mt_transition_capture->tcs_update_old_table)
        {
-               ExecARUpdateTriggers(estate, resultRelInfo,
-                                                        tupleid,
-                                                        oldtuple,
-                                                        NULL,
-                                                        NULL,
-                                                        
mtstate->mt_transition_capture);
+               ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple,
+                                                        NULL, NIL, 
mtstate->mt_transition_capture);
 
                /*
-                * We've already captured the NEW TABLE row, so make sure any AR
-                * DELETE trigger fired below doesn't capture it again.
+                * Execute AFTER ROW DELETE Triggers, but such that the row is 
not
+                * captured again in the transition table if any.
                 */
-               ar_delete_trig_tcs = NULL;
+               ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
+                                                        NULL);
+       }
+       else
+       {
+               /* AFTER ROW DELETE Triggers */
+               ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
+                                                        
mtstate->mt_transition_capture);
        }
-
-       /* AFTER ROW DELETE Triggers */
-       ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
-                                                ar_delete_trig_tcs);
 
        /* Process RETURNING if present and if requested */
        if (processReturning && resultRelInfo->ri_projectReturning)
@@ -1026,6 +1021,153 @@ ldelete:;
        return NULL;
 }
 
+/*
+ *     ExecCrossPartitionUpdate
+ *             Move an updated tuple from a given partition to the correct 
partition
+ *             of its root parent table
+ *
+ *     This works by first deleting the tuple from the current partition,
+ *     followed by inserting it into the root parent table, that is,
+ *     mtstate->rootResultRelInfo, from where it's re-routed to the correct
+ *     partition.
+ *
+ *     Returns true if the tuple has been successfully moved or if it's found
+ *     that the tuple was concurrently deleted so there's nothing more to do
+ *     for the caller.
+ *
+ *     False is returned if the tuple we're trying to move is found to have 
been
+ *     concurrently updated.  Caller should check if the updated tuple that's
+ *     returned in *retry_slot still needs to be re-routed and call this 
function
+ *     again if needed.
+ */
+static bool
+ExecCrossPartitionUpdate(ModifyTableState *mtstate,
+                                                ResultRelInfo *resultRelInfo,
+                                                ItemPointer tupleid, HeapTuple 
oldtuple,
+                                                TupleTableSlot *slot, 
TupleTableSlot *planSlot,
+                                                EPQState *epqstate, bool 
canSetTag,
+                                                TupleTableSlot **retry_slot,
+                                                TupleTableSlot 
**inserted_tuple)
+{
+       EState     *estate = mtstate->ps.state;
+       PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
+       int                     map_index;
+       TupleConversionMap *tupconv_map;
+       TupleConversionMap *saved_tcs_map = NULL;
+       bool            tuple_deleted;
+       TupleTableSlot *epqslot = NULL;
+
+       *inserted_tuple = NULL;
+       *retry_slot = NULL;
+
+       /*
+        * Disallow an INSERT ON CONFLICT DO UPDATE that causes the
+        * original row to migrate to a different partition.  Maybe this
+        * can be implemented some day, but it seems a fringe feature with
+        * little redeeming value.
+        */
+       if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == 
ONCONFLICT_UPDATE)
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("invalid ON UPDATE specification"),
+                                errdetail("The result tuple would appear in a 
different partition than the original tuple.")));
+
+       /*
+        * When an UPDATE is run on a leaf partition, we will not have
+        * partition tuple routing set up. In that case, fail with
+        * partition constraint violation error.
+        */
+       if (proute == NULL)
+               ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
+
+       /*
+        * Row movement, part 1.  Delete the tuple, but skip RETURNING
+        * processing. We want to return rows from INSERT.
+        */
+       ExecDelete(mtstate, resultRelInfo, tupleid, oldtuple, planSlot,
+                          epqstate, estate,
+                          false,       /* processReturning */
+                          false,       /* canSetTag */
+                          true,        /* changingPart */
+                          &tuple_deleted, &epqslot);
+
+       /*
+        * For some reason if DELETE didn't happen (e.g. trigger prevented
+        * it, or it was already deleted by self, or it was concurrently
+        * deleted by another transaction), then we should skip the insert
+        * as well; otherwise, an UPDATE could cause an increase in the
+        * total number of rows across all partitions, which is clearly
+        * wrong.
+        *
+        * For a normal UPDATE, the case where the tuple has been the
+        * subject of a concurrent UPDATE or DELETE would be handled by
+        * the EvalPlanQual machinery, but for an UPDATE that we've
+        * translated into a DELETE from this partition and an INSERT into
+        * some other partition, that's not available, because CTID chains
+        * can't span relation boundaries.  We mimic the semantics to a
+        * limited extent by skipping the INSERT if the DELETE fails to
+        * find a tuple. This ensures that two concurrent attempts to
+        * UPDATE the same tuple at the same time can't turn one tuple
+        * into two, and that an UPDATE of a just-deleted tuple can't
+        * resurrect it.
+        */
+       if (!tuple_deleted)
+       {
+               /*
+                * epqslot will be typically NULL.  But when ExecDelete()
+                * finds that another transaction has concurrently updated the
+                * same row, it re-fetches the row, skips the delete, and
+                * epqslot is set to the re-fetched tuple slot. In that case,
+                * we need to do all the checks again.
+                */
+               if (TupIsNull(epqslot))
+                       return true;
+               else
+               {
+                       *retry_slot = 
ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
+                       return false;
+               }
+       }
+
+       /*
+        * resultRelInfo is one of the per-subplan resultRelInfos.  So we
+        * should convert the tuple into root's tuple descriptor, since
+        * ExecInsert() starts the search from root.  The tuple conversion
+        * map list is in the order of mtstate->resultRelInfo[], so to
+        * retrieve the one for this resultRel, we need to know the
+        * position of the resultRel in mtstate->resultRelInfo[].
+        */
+       map_index = resultRelInfo - mtstate->resultRelInfo;
+       Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
+       tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
+       if (tupconv_map != NULL)
+               slot = execute_attr_map_slot(tupconv_map->attrMap,
+                                                                        slot,
+                                                                        
mtstate->mt_root_tuple_slot);
+
+       /*
+        * ExecInsert() may scribble on mtstate->mt_transition_capture,
+        * so save the currently active map.
+        */
+       if (mtstate->mt_transition_capture)
+               saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
+
+       /* Tuple routing starts from the root table. */
+       Assert(mtstate->rootResultRelInfo != NULL);
+       *inserted_tuple = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot,
+                                                                planSlot, 
estate, canSetTag);
+
+       /* Clear the INSERT's tuple and restore the saved map. */
+       if (mtstate->mt_transition_capture)
+       {
+               mtstate->mt_transition_capture->tcs_original_insert_tuple = 
NULL;
+               mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
+       }
+
+       /* We're done moving. */
+       return true;
+}
+
 /* ----------------------------------------------------------------
  *             ExecUpdate
  *
@@ -1179,119 +1321,28 @@ lreplace:;
                 */
                if (partition_constraint_failed)
                {
-                       bool            tuple_deleted;
-                       TupleTableSlot *ret_slot;
-                       TupleTableSlot *epqslot = NULL;
-                       PartitionTupleRouting *proute = 
mtstate->mt_partition_tuple_routing;
-                       int                     map_index;
-                       TupleConversionMap *tupconv_map;
-                       TupleConversionMap *saved_tcs_map = NULL;
-
-                       /*
-                        * Disallow an INSERT ON CONFLICT DO UPDATE that causes 
the
-                        * original row to migrate to a different partition.  
Maybe this
-                        * can be implemented some day, but it seems a fringe 
feature with
-                        * little redeeming value.
-                        */
-                       if (((ModifyTable *) 
mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
-                               ereport(ERROR,
-                                               
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                                errmsg("invalid ON UPDATE 
specification"),
-                                                errdetail("The result tuple 
would appear in a different partition than the original tuple.")));
-
-                       /*
-                        * When an UPDATE is run on a leaf partition, we will 
not have
-                        * partition tuple routing set up. In that case, fail 
with
-                        * partition constraint violation error.
-                        */
-                       if (proute == NULL)
-                               ExecPartitionCheckEmitError(resultRelInfo, 
slot, estate);
-
-                       /*
-                        * Row movement, part 1.  Delete the tuple, but skip 
RETURNING
-                        * processing. We want to return rows from INSERT.
-                        */
-                       ExecDelete(mtstate, resultRelInfo, tupleid, oldtuple, 
planSlot,
-                                          epqstate, estate,
-                                          false,       /* processReturning */
-                                          false,       /* canSetTag */
-                                          true,        /* changingPart */
-                                          &tuple_deleted, &epqslot);
+                       TupleTableSlot *inserted_tuple,
+                                                  *retry_slot;
+                       bool                    retry;
 
                        /*
-                        * For some reason if DELETE didn't happen (e.g. 
trigger prevented
-                        * it, or it was already deleted by self, or it was 
concurrently
-                        * deleted by another transaction), then we should skip 
the insert
-                        * as well; otherwise, an UPDATE could cause an 
increase in the
-                        * total number of rows across all partitions, which is 
clearly
-                        * wrong.
-                        *
-                        * For a normal UPDATE, the case where the tuple has 
been the
-                        * subject of a concurrent UPDATE or DELETE would be 
handled by
-                        * the EvalPlanQual machinery, but for an UPDATE that 
we've
-                        * translated into a DELETE from this partition and an 
INSERT into
-                        * some other partition, that's not available, because 
CTID chains
-                        * can't span relation boundaries.  We mimic the 
semantics to a
-                        * limited extent by skipping the INSERT if the DELETE 
fails to
-                        * find a tuple. This ensures that two concurrent 
attempts to
-                        * UPDATE the same tuple at the same time can't turn 
one tuple
-                        * into two, and that an UPDATE of a just-deleted tuple 
can't
-                        * resurrect it.
+                        * ExecCrossPartitionUpdate will first DELETE the row 
from the
+                        * partition it's currently in and then insert it back 
into the
+                        * root table, which will re-route it to the correct 
partition.
+                        * The first part may have to be repeated if it is 
detected that
+                        * the tuple we're trying to move has been concurrently 
updated.
                         */
-                       if (!tuple_deleted)
-                       {
-                               /*
-                                * epqslot will be typically NULL.  But when 
ExecDelete()
-                                * finds that another transaction has 
concurrently updated the
-                                * same row, it re-fetches the row, skips the 
delete, and
-                                * epqslot is set to the re-fetched tuple slot. 
In that case,
-                                * we need to do all the checks again.
-                                */
-                               if (TupIsNull(epqslot))
-                                       return NULL;
-                               else
-                               {
-                                       slot = 
ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
-                                       goto lreplace;
-                               }
-                       }
-
-                       /*
-                        * resultRelInfo is one of the per-subplan 
resultRelInfos.  So we
-                        * should convert the tuple into root's tuple 
descriptor, since
-                        * ExecInsert() starts the search from root.  The tuple 
conversion
-                        * map list is in the order of 
mtstate->resultRelInfo[], so to
-                        * retrieve the one for this resultRel, we need to know 
the
-                        * position of the resultRel in 
mtstate->resultRelInfo[].
-                        */
-                       map_index = resultRelInfo - mtstate->resultRelInfo;
-                       Assert(map_index >= 0 && map_index < 
mtstate->mt_nplans);
-                       tupconv_map = tupconv_map_for_subplan(mtstate, 
map_index);
-                       if (tupconv_map != NULL)
-                               slot = 
execute_attr_map_slot(tupconv_map->attrMap,
-                                                                               
         slot,
-                                                                               
         mtstate->mt_root_tuple_slot);
-
-                       /*
-                        * ExecInsert() may scribble on 
mtstate->mt_transition_capture,
-                        * so save the currently active map.
-                        */
-                       if (mtstate->mt_transition_capture)
-                               saved_tcs_map = 
mtstate->mt_transition_capture->tcs_map;
-
-                       /* Tuple routing starts from the root table. */
-                       Assert(mtstate->rootResultRelInfo != NULL);
-                       ret_slot = ExecInsert(mtstate, 
mtstate->rootResultRelInfo, slot,
-                                                                 planSlot, 
estate, canSetTag);
-
-                       /* Clear the INSERT's tuple and restore the saved map. 
*/
-                       if (mtstate->mt_transition_capture)
+                       retry = !ExecCrossPartitionUpdate(mtstate, 
resultRelInfo, tupleid,
+                                                                               
          oldtuple, slot, planSlot,
+                                                                               
          epqstate, canSetTag,
+                                                                               
          &retry_slot, &inserted_tuple);
+                       if (retry)
                        {
-                               
mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
-                               mtstate->mt_transition_capture->tcs_map = 
saved_tcs_map;
+                               slot = retry_slot;
+                               goto lreplace;
                        }
 
-                       return ret_slot;
+                       return inserted_tuple;
                }
 
                /*
-- 
2.16.5

Reply via email to