On 2017/12/19 19:06, Amit Langote wrote:
> Hi.
> 
> I have a patch that rearranges the code around partition tuple-routing,
> such that allocation of per-partition objects (ResultRelInfo,
> TupleConversionMap, etc.) is delayed until a given partition is actually
> inserted into (i.e., a tuple is routed to it).  I can see good win for
> non-bulk inserts with the patch and the patch is implemented such that it
> doesn't affect the bulk-insert case much.
> 
> Performance numbers:
> 
> * Uses following hash-partitioned table:
> 
> create table t1 (a int, b int) partition by hash (a);
> create table t1_x partition of t1 for values with (modulus M, remainder R)
> ...
> 
> 
> * Non-bulk insert uses the following code (insert 100,000 rows one-by-one):
> 
> do $$
> begin
>   for i in 1..100000 loop
>     insert into t1 values (i, i+1);
>   end loop;
> end; $$;
> 
> * Times in milliseconds:
> 
> #parts           HEAD        Patched
> 
>      8       6216.300       4977.670
>     16       9061.388       6360.093
>     32      14081.656       8752.405
>     64      24887.110      13919.384
>    128      45926.251      24582.411
>    256      88088.084      45490.894
> 
> As you can see the performance can be as much as 2x faster with the patch,
> although time taken still increases as the number of partitions increases,
> because we still lock *all* partitions at the beginning.
> 
> * Bulk-inserting 100,000 rows using COPY:
> 
> copy t1 from '/tmp/t1.csv' csv;
> 
> * Times in milliseconds:
> 
> #parts           HEAD        Patched
> 
>      8        458.301        450.875
>     16        409.271        510.723
>     32        500.960        612.003
>     64        430.687        795.046
>    128        449.314        565.786
>    256        493.171        490.187
> 
> Not much harm here, although numbers are a bit noisy.
> 
> Patch is divided into 4, first 3 of which are refactoring patches.
> 
> I know this patch will conflict severely with [1] and [2], so it's fine if
> we consider applying these later.  Will add this to next CF.

I rebased the patches, since they started conflicting with a recently
committed patch [1].

Thanks,
Amit

[1]
https://git.postgresql.org/gitweb/?p=postgresql.git;a=commit;h=cc6337d2fed5
From 187683f6ea153e8be1a5c067b3546e70b15ccd47 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 19 Dec 2017 10:43:45 +0900
Subject: [PATCH v2 1/4] Teach CopyFrom to use ModifyTableState for
 tuple-routing

This removes all fields of CopyStateData that were meant for
tuple routing and instead uses ModifyTableState that has all those
fields, including transition_tupconv_maps.  In COPY's case,
transition_tupconv_maps is only required if tuple routing is being
used, so it's safe.
---
 src/backend/commands/copy.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 6bfca2a4af..242dc56d87 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -166,11 +166,7 @@ typedef struct CopyStateData
        bool            volatile_defexprs;      /* is any of defexprs volatile? 
*/
        List       *range_table;
 
-       /* Tuple-routing support info */
-       PartitionTupleRouting *partition_tuple_routing;
-
        TransitionCaptureState *transition_capture;
-       TupleConversionMap **transition_tupconv_maps;
 
        /*
         * These variables are used to reduce overhead in textual COPY FROM.
@@ -2286,6 +2282,7 @@ CopyFrom(CopyState cstate)
        ResultRelInfo *resultRelInfo;
        ResultRelInfo *saved_resultRelInfo = NULL;
        EState     *estate = CreateExecutorState(); /* for ExecConstraints() */
+       ModifyTableState *mtstate = makeNode(ModifyTableState);
        ExprContext *econtext;
        TupleTableSlot *myslot;
        MemoryContext oldcontext = CurrentMemoryContext;
@@ -2304,6 +2301,8 @@ CopyFrom(CopyState cstate)
        Size            bufferedTuplesSize = 0;
        int                     firstBufferedLineNo = 0;
 
+       PartitionTupleRouting *proute = NULL;
+
        Assert(cstate->rel);
 
        /*
@@ -2469,10 +2468,15 @@ CopyFrom(CopyState cstate)
         */
        if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
        {
-               PartitionTupleRouting *proute;
+               ModifyTable *node = makeNode(ModifyTable);
 
-               proute = cstate->partition_tuple_routing =
-                       ExecSetupPartitionTupleRouting(NULL, cstate->rel, 1, 
estate);
+               /* Just need make this field appear valid. */
+               node->nominalRelation = 1;
+               mtstate->ps.plan = (Plan *) node;
+               mtstate->ps.state = estate;
+               mtstate->resultRelInfo = resultRelInfo;
+               proute = mtstate->mt_partition_tuple_routing =
+                       ExecSetupPartitionTupleRouting(mtstate, cstate->rel, 1, 
estate);
 
                /*
                 * If we are capturing transition tuples, they may need to be
@@ -2484,11 +2488,11 @@ CopyFrom(CopyState cstate)
                {
                        int                     i;
 
-                       cstate->transition_tupconv_maps = (TupleConversionMap 
**)
+                       mtstate->mt_transition_tupconv_maps = 
(TupleConversionMap **)
                                palloc0(sizeof(TupleConversionMap *) * 
proute->num_partitions);
                        for (i = 0; i < proute->num_partitions; ++i)
                        {
-                               cstate->transition_tupconv_maps[i] =
+                               mtstate->mt_transition_tupconv_maps[i] =
                                        
convert_tuples_by_name(RelationGetDescr(proute->partitions[i]->ri_RelationDesc),
                                                                                
   RelationGetDescr(cstate->rel),
                                                                                
   gettext_noop("could not convert row type"));
@@ -2509,7 +2513,7 @@ CopyFrom(CopyState cstate)
        if ((resultRelInfo->ri_TrigDesc != NULL &&
                 (resultRelInfo->ri_TrigDesc->trig_insert_before_row ||
                  resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) ||
-               cstate->partition_tuple_routing != NULL ||
+               mtstate->mt_partition_tuple_routing != NULL ||
                cstate->volatile_defexprs)
        {
                useHeapMultiInsert = false;
@@ -2584,11 +2588,10 @@ CopyFrom(CopyState cstate)
                ExecStoreTuple(tuple, slot, InvalidBuffer, false);
 
                /* Determine the partition to heap_insert the tuple into */
-               if (cstate->partition_tuple_routing)
+               if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
                {
                        int                     leaf_part_index;
                        TupleConversionMap *map;
-                       PartitionTupleRouting *proute = 
cstate->partition_tuple_routing;
 
                        /*
                         * Away we go ... If we end up not finding a partition 
after all,
@@ -2651,7 +2654,7 @@ CopyFrom(CopyState cstate)
                                         */
                                        
cstate->transition_capture->tcs_original_insert_tuple = NULL;
                                        cstate->transition_capture->tcs_map =
-                                               
cstate->transition_tupconv_maps[leaf_part_index];
+                                               
mtstate->mt_transition_tupconv_maps[leaf_part_index];
                                }
                                else
                                {
@@ -2832,8 +2835,8 @@ CopyFrom(CopyState cstate)
        ExecCloseIndices(resultRelInfo);
 
        /* Close all the partitioned tables, leaf partitions, and their indices 
*/
-       if (cstate->partition_tuple_routing)
-               ExecCleanupTupleRouting(cstate->partition_tuple_routing);
+       if (proute)
+               ExecCleanupTupleRouting(proute);
 
        /* Close any trigger target relations */
        ExecCleanUpTriggerState(estate);
-- 
2.11.0

From c05bbc58f7e994d21df210a16985911bc4954f50 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 19 Dec 2017 13:56:25 +0900
Subject: [PATCH v2 2/4] ExecFindPartition refactoring

---
 src/backend/commands/copy.c            |  5 +----
 src/backend/executor/execPartition.c   | 15 +++++++--------
 src/backend/executor/nodeModifyTable.c |  5 +----
 src/include/executor/execPartition.h   |  5 +----
 4 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 242dc56d87..8c724b8695 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2601,10 +2601,7 @@ CopyFrom(CopyState cstate)
                         * will get us the ResultRelInfo and TupleConversionMap 
for the
                         * partition, respectively.
                         */
-                       leaf_part_index = ExecFindPartition(resultRelInfo,
-                                                                               
                proute->partition_dispatch_info,
-                                                                               
                slot,
-                                                                               
                estate);
+                       leaf_part_index = ExecFindPartition(mtstate, slot);
                        Assert(leaf_part_index >= 0 &&
                                   leaf_part_index < proute->num_partitions);
 
diff --git a/src/backend/executor/execPartition.c 
b/src/backend/executor/execPartition.c
index 8c0d2df63c..21d230881a 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -140,11 +140,7 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
 }
 
 /*
- * ExecFindPartition -- Find a leaf partition in the partition tree rooted
- * at parent, for the heap tuple contained in *slot
- *
- * estate must be non-NULL; we'll need it to compute any expressions in the
- * partition key(s)
+ * ExecFindPartition -- Find a leaf partition for tuple contained in slot
  *
  * If no leaf partition is found, this routine errors out with the appropriate
  * error message, else it returns the leaf partition sequence number
@@ -152,14 +148,17 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
  * the partition tree.
  */
 int
-ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
-                                 TupleTableSlot *slot, EState *estate)
+ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *slot)
 {
+       EState     *estate = mtstate->ps.state;
        int                     result;
        Datum           values[PARTITION_MAX_KEYS];
        bool            isnull[PARTITION_MAX_KEYS];
        Relation        rel;
-       PartitionDispatch parent;
+       PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
+       PartitionDispatch  *pd = proute->partition_dispatch_info,
+                                               parent;
+       ResultRelInfo *resultRelInfo = mtstate->resultRelInfo;
        ExprContext *ecxt = GetPerTupleExprContext(estate);
        TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple;
 
diff --git a/src/backend/executor/nodeModifyTable.c 
b/src/backend/executor/nodeModifyTable.c
index c5eca1bb74..9a8f667d72 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -292,10 +292,7 @@ ExecInsert(ModifyTableState *mtstate,
                 * get us the ResultRelInfo and TupleConversionMap for the 
partition,
                 * respectively.
                 */
-               leaf_part_index = ExecFindPartition(resultRelInfo,
-                                                                               
        proute->partition_dispatch_info,
-                                                                               
        slot,
-                                                                               
        estate);
+               leaf_part_index = ExecFindPartition(mtstate, slot);
                Assert(leaf_part_index >= 0 &&
                           leaf_part_index < proute->num_partitions);
 
diff --git a/src/include/executor/execPartition.h 
b/src/include/executor/execPartition.h
index b5df357acd..79dad58828 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -86,10 +86,7 @@ typedef struct PartitionTupleRouting
 extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState 
*mtstate,
                                                           Relation rel, Index 
resultRTindex,
                                                           EState *estate);
-extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
-                                 PartitionDispatch *pd,
-                                 TupleTableSlot *slot,
-                                 EState *estate);
 extern void ExecCleanupTupleRouting(PartitionTupleRouting *proute);
+extern int ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *slot);
 
 #endif                                                 /* EXECPARTITION_H */
-- 
2.11.0

From cb5aeacf77cd11dc460fe5d2e2a4398359188e43 Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Tue, 19 Dec 2017 16:20:09 +0900
Subject: [PATCH v2 3/4] ExecSetupPartitionTupleRouting refactoring

---
 src/backend/commands/copy.c            |  2 +-
 src/backend/executor/execPartition.c   | 14 +++++++++++---
 src/backend/executor/nodeModifyTable.c |  4 +---
 src/include/executor/execPartition.h   |  3 +--
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 8c724b8695..13f1b5b3e1 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2476,7 +2476,7 @@ CopyFrom(CopyState cstate)
                mtstate->ps.state = estate;
                mtstate->resultRelInfo = resultRelInfo;
                proute = mtstate->mt_partition_tuple_routing =
-                       ExecSetupPartitionTupleRouting(mtstate, cstate->rel, 1, 
estate);
+                       ExecSetupPartitionTupleRouting(mtstate, cstate->rel);
 
                /*
                 * If we are capturing transition tuples, they may need to be
diff --git a/src/backend/executor/execPartition.c 
b/src/backend/executor/execPartition.c
index 21d230881a..be15189f1d 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -46,14 +46,15 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation 
rel,
  * RowExclusiveLock mode upon return from this function.
  */
 PartitionTupleRouting *
-ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
-                                                          Relation rel, Index 
resultRTindex,
-                                                          EState *estate)
+ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
 {
        TupleDesc       tupDesc = RelationGetDescr(rel);
        List       *leaf_parts;
        ListCell   *cell;
        int                     i;
+       EState     *estate = mtstate->ps.state;
+       ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+       Index           resultRTindex = node->nominalRelation;
        ResultRelInfo *leaf_part_rri;
        PartitionTupleRouting *proute;
 
@@ -67,8 +68,15 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
                RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch,
                                                                                
 &leaf_parts);
        proute->num_partitions = list_length(leaf_parts);
+
+       /*
+        * Allocate an array of ResultRelInfo pointers, but actual
+        * ResultRelInfo's will be allocated if and when needed.  See
+        * ExecFindPartition where it's done.
+        */
        proute->partitions = (ResultRelInfo **) palloc(proute->num_partitions *
                                                                                
                   sizeof(ResultRelInfo *));
+       /* Here too, actual TupleConversionMap's will be allocated later. */
        proute->partition_tupconv_maps =
                (TupleConversionMap **) palloc0(proute->num_partitions *
                                                                                
sizeof(TupleConversionMap *));
diff --git a/src/backend/executor/nodeModifyTable.c 
b/src/backend/executor/nodeModifyTable.c
index 9a8f667d72..0bd47ef3ab 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -1934,9 +1934,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
                rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
        {
                proute = mtstate->mt_partition_tuple_routing =
-                       ExecSetupPartitionTupleRouting(mtstate,
-                                                                               
   rel, node->nominalRelation,
-                                                                               
   estate);
+                       ExecSetupPartitionTupleRouting(mtstate, rel);
                num_partitions = proute->num_partitions;
        }
 
diff --git a/src/include/executor/execPartition.h 
b/src/include/executor/execPartition.h
index 79dad58828..b3517e2ee0 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -84,8 +84,7 @@ typedef struct PartitionTupleRouting
 } PartitionTupleRouting;
 
 extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState 
*mtstate,
-                                                          Relation rel, Index 
resultRTindex,
-                                                          EState *estate);
+                                                          Relation rel);
 extern void ExecCleanupTupleRouting(PartitionTupleRouting *proute);
 extern int ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *slot);
 
-- 
2.11.0

From ce6ae497836a6e46b8b769e7da31fc6b0bca0c3d Mon Sep 17 00:00:00 2001
From: amit <amitlangot...@gmail.com>
Date: Wed, 1 Nov 2017 10:31:21 +0900
Subject: [PATCH v2 4/4] During tuple-routing, initialize per-partition objects
 lazily

Those objects include ResultRelInfo, tuple conversion map,
WITH CHECK OPTION quals and RETURNING projections.

This means we don't allocate these objects for partitions that are
never inserted into.
---
 src/backend/commands/copy.c            |  16 +--
 src/backend/executor/execPartition.c   | 239 ++++++++++++++++++++++++---------
 src/backend/executor/nodeModifyTable.c | 111 ++-------------
 src/include/executor/execPartition.h   |   1 +
 4 files changed, 188 insertions(+), 179 deletions(-)

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 13f1b5b3e1..a6a5409770 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2482,22 +2482,13 @@ CopyFrom(CopyState cstate)
                 * If we are capturing transition tuples, they may need to be
                 * converted from partition format back to partitioned table 
format
                 * (this is only ever necessary if a BEFORE trigger modifies the
-                * tuple).
+                * tuple).  Note that we don't allocate the actual maps here; 
they'll
+                * be allocated by ExecInitPartitionResultRelInfo() if and when
+                * needed.
                 */
                if (cstate->transition_capture != NULL)
-               {
-                       int                     i;
-
                        mtstate->mt_transition_tupconv_maps = 
(TupleConversionMap **)
                                palloc0(sizeof(TupleConversionMap *) * 
proute->num_partitions);
-                       for (i = 0; i < proute->num_partitions; ++i)
-                       {
-                               mtstate->mt_transition_tupconv_maps[i] =
-                                       
convert_tuples_by_name(RelationGetDescr(proute->partitions[i]->ri_RelationDesc),
-                                                                               
   RelationGetDescr(cstate->rel),
-                                                                               
   gettext_noop("could not convert row type"));
-                       }
-               }
        }
 
        /*
@@ -2622,6 +2613,7 @@ CopyFrom(CopyState cstate)
                         */
                        saved_resultRelInfo = resultRelInfo;
                        resultRelInfo = proute->partitions[leaf_part_index];
+                       Assert(resultRelInfo != NULL);
 
                        /* We do not yet have a way to insert into a foreign 
partition */
                        if (resultRelInfo->ri_FdwRoutine)
diff --git a/src/backend/executor/execPartition.c 
b/src/backend/executor/execPartition.c
index be15189f1d..0654490e9c 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -23,6 +23,8 @@
 #include "utils/rls.h"
 #include "utils/ruleutils.h"
 
+static void ExecInitPartitionResultRelInfo(ModifyTableState *mtstate,
+                                       int partidx);
 static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
                                                                 int 
*num_parted, List **leaf_part_oids);
 static void get_partition_dispatch_recurse(Relation rel, Relation parent,
@@ -48,15 +50,10 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation 
rel,
 PartitionTupleRouting *
 ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
 {
-       TupleDesc       tupDesc = RelationGetDescr(rel);
        List       *leaf_parts;
        ListCell   *cell;
        int                     i;
-       EState     *estate = mtstate->ps.state;
-       ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
-       Index           resultRTindex = node->nominalRelation;
-       ResultRelInfo *leaf_part_rri;
-       PartitionTupleRouting *proute;
+       PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
 
        /*
         * Get the information about the partition tree after locking all the
@@ -68,19 +65,23 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, 
Relation rel)
                RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch,
                                                                                
 &leaf_parts);
        proute->num_partitions = list_length(leaf_parts);
+       proute->partition_oids = (Oid *) palloc0(proute->num_partitions *
+                                                                               
         sizeof(Oid));
+       i = 0;
+       foreach (cell, leaf_parts)
+               proute->partition_oids[i++] = lfirst_oid(cell);
 
        /*
         * Allocate an array of ResultRelInfo pointers, but actual
         * ResultRelInfo's will be allocated if and when needed.  See
         * ExecFindPartition where it's done.
         */
-       proute->partitions = (ResultRelInfo **) palloc(proute->num_partitions *
-                                                                               
                   sizeof(ResultRelInfo *));
+       proute->partitions = (ResultRelInfo **) palloc0(proute->num_partitions *
+                                                                               
                        sizeof(ResultRelInfo *));
        /* Here too, actual TupleConversionMap's will be allocated later. */
        proute->partition_tupconv_maps =
                (TupleConversionMap **) palloc0(proute->num_partitions *
                                                                                
sizeof(TupleConversionMap *));
-
        /*
         * Initialize an empty slot that will be used to manipulate tuples of 
any
         * given partition's rowtype.  It is attached to the caller-specified 
node
@@ -89,61 +90,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, 
Relation rel)
         */
        proute->partition_tuple_slot = MakeTupleTableSlot();
 
-       leaf_part_rri = (ResultRelInfo *) palloc0(proute->num_partitions *
-                                                                               
          sizeof(ResultRelInfo));
-       i = 0;
-       foreach(cell, leaf_parts)
-       {
-               Relation        partrel;
-               TupleDesc       part_tupdesc;
-
-               /*
-                * We locked all the partitions above including the leaf 
partitions.
-                * Note that each of the relations in proute->partitions are
-                * eventually closed by the caller.
-                */
-               partrel = heap_open(lfirst_oid(cell), NoLock);
-               part_tupdesc = RelationGetDescr(partrel);
-
-               /*
-                * Save a tuple conversion map to convert a tuple routed to this
-                * partition from the parent's type to the partition's.
-                */
-               proute->partition_tupconv_maps[i] =
-                       convert_tuples_by_name(tupDesc, part_tupdesc,
-                                                                  
gettext_noop("could not convert row type"));
-
-               InitResultRelInfo(leaf_part_rri,
-                                                 partrel,
-                                                 resultRTindex,
-                                                 rel,
-                                                 estate->es_instrument);
-
-               /*
-                * Verify result relation is a valid target for INSERT.
-                */
-               CheckValidResultRel(leaf_part_rri, CMD_INSERT);
-
-               /*
-                * Open partition indices.  The user may have asked to check for
-                * conflicts within this leaf partition and do "nothing" 
instead of
-                * throwing an error.  Be prepared in that case by initializing 
the
-                * index information needed by ExecInsert() to perform 
speculative
-                * insertions.
-                */
-               if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
-                       leaf_part_rri->ri_IndexRelationDescs == NULL)
-                       ExecOpenIndices(leaf_part_rri,
-                                                       mtstate != NULL &&
-                                                       mtstate->mt_onconflict 
!= ONCONFLICT_NONE);
-
-               estate->es_leaf_result_relations =
-                       lappend(estate->es_leaf_result_relations, 
leaf_part_rri);
-
-               proute->partitions[i] = leaf_part_rri++;
-               i++;
-       }
-
        return proute;
 }
 
@@ -261,6 +207,8 @@ ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot 
*slot)
                                 val_desc ? errdetail("Partition key of the 
failing row contains %s.", val_desc) : 0));
        }
 
+       /* Initialize the partition result rel, if not done already. */
+       ExecInitPartitionResultRelInfo(mtstate, result);
        ecxt->ecxt_scantuple = ecxt_scantuple_old;
        return result;
 }
@@ -295,8 +243,11 @@ ExecCleanupTupleRouting(PartitionTupleRouting * proute)
        {
                ResultRelInfo *resultRelInfo = proute->partitions[i];
 
-               ExecCloseIndices(resultRelInfo);
-               heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+               if (resultRelInfo)
+               {
+                       ExecCloseIndices(resultRelInfo);
+                       heap_close(resultRelInfo->ri_RelationDesc, NoLock);
+               }
        }
 
        /* Release the standalone partition tuple descriptor, if any */
@@ -305,6 +256,162 @@ ExecCleanupTupleRouting(PartitionTupleRouting * proute)
 }
 
 /*
+ * ExecInitPartitionResultRelInfo
+ *             Initialize ResultRelInfo for a partition if not done already
+ */
+static void
+ExecInitPartitionResultRelInfo(ModifyTableState *mtstate, int partidx)
+{
+       EState     *estate = mtstate->ps.state;
+       Relation        rootrel = mtstate->resultRelInfo->ri_RelationDesc;
+       Index           resultRTindex = 
mtstate->resultRelInfo->ri_RangeTableIndex;
+       ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+       PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
+       Relation        partrel;
+       TupleDesc       tupDesc = RelationGetDescr(rootrel),
+                               part_tupdesc;
+
+       /* Nothing to do if already set.*/
+       if (proute->partitions[partidx])
+               return;
+
+       proute->partitions[partidx] = (ResultRelInfo *)
+                                                                               
        palloc0(sizeof(ResultRelInfo));
+
+       /*
+        * We locked all the partitions in ExecSetupPartitionTupleRouting
+        * including the leaf partitions.
+        */
+       partrel = heap_open(proute->partition_oids[partidx], NoLock);
+       part_tupdesc = RelationGetDescr(partrel);
+       InitResultRelInfo(proute->partitions[partidx],
+                                         partrel,
+                                         resultRTindex,
+                                         rootrel,
+                                         estate->es_instrument);
+
+       /*
+        * Verify result relation is a valid target for INSERT.
+        */
+       CheckValidResultRel(proute->partitions[partidx], CMD_INSERT);
+
+       /*
+        * Open partition indices.  The user may have asked to check for
+        * conflicts within this leaf partition and do "nothing" instead of
+        * throwing an error.  Be prepared in that case by initializing the
+        * index information needed by ExecInsert() to perform speculative
+        * insertions.
+        */
+       if (partrel->rd_rel->relhasindex &&
+               proute->partitions[partidx]->ri_IndexRelationDescs == NULL)
+               ExecOpenIndices(proute->partitions[partidx],
+                                               mtstate->mt_onconflict != 
ONCONFLICT_NONE);
+
+       /*
+        * Save a tuple conversion map to convert a tuple routed to this
+        * partition from the parent's type to the partition's.
+        */
+       proute->partition_tupconv_maps[partidx] =
+                                                       
convert_tuples_by_name(tupDesc, part_tupdesc,
+                                                                               
           gettext_noop("could not convert row type"));
+
+       /*
+        * Also, if needed, the map to convert from partition's rowtype to the
+        * parent's that is needed to store the partition's tuples into the
+        * transition tuplestore which only accepts tuples of parent's rowtype.
+        */
+       if (mtstate->mt_transition_tupconv_maps)
+               mtstate->mt_transition_tupconv_maps[partidx] =
+                                                       
convert_tuples_by_name(part_tupdesc, tupDesc,
+                                                                               
           gettext_noop("could not convert row type"));
+
+       /*
+        * Build WITH CHECK OPTION constraints for each leaf partition rel. Note
+        * that we didn't build the withCheckOptionList for each partition 
within
+        * the planner, but simple translation of the varattnos for each 
partition
+        * will suffice.  This only occurs for the INSERT case; UPDATE/DELETE
+        * cases are handled above.
+        */
+       if (node && node->withCheckOptionLists != NIL)
+       {
+               List       *wcoList;
+               List       *mapped_wcoList;
+               List       *wcoExprs = NIL;
+               ListCell   *ll;
+
+               /*
+                * In case of INSERT on partitioned tables, there is only one 
plan.
+                * Likewise, there is only one WITH CHECK OPTIONS list, not one 
per
+                * partition.  We make a copy of the WCO qual for each 
partition; note
+                * that, if there are SubPlans in there, they all end up 
attached to
+                * the one parent Plan node.
+                */
+               Assert(mtstate->operation == CMD_INSERT &&
+                          list_length(node->withCheckOptionLists) == 1 &&
+                          mtstate->mt_nplans == 1);
+               wcoList = linitial(node->withCheckOptionLists);
+               mapped_wcoList = map_partition_varattnos(wcoList,
+                                                                               
                 resultRTindex,
+                                                                               
                 partrel, rootrel, NULL);
+               foreach(ll, mapped_wcoList)
+               {
+                       WithCheckOption *wco = castNode(WithCheckOption, 
lfirst(ll));
+                       ExprState  *wcoExpr = ExecInitQual(castNode(List, 
wco->qual),
+                                                                               
           mtstate->mt_plans[0]);
+                       wcoExprs = lappend(wcoExprs, wcoExpr);
+               }
+
+               proute->partitions[partidx]->ri_WithCheckOptions = 
mapped_wcoList;
+               proute->partitions[partidx]->ri_WithCheckOptionExprs = wcoExprs;
+       }
+
+       /*
+        * Build a projection for each leaf partition rel.  Note that we
+        * didn't build the returningList for each partition within the
+        * planner, but simple translation of the varattnos for each partition
+        * will suffice.  This only occurs for the INSERT case; UPDATE/DELETE
+        * are handled above.
+        */
+       if (node && node->returningLists != NIL)
+       {
+               TupleTableSlot *slot;
+               ExprContext *econtext;
+               List       *returningList;
+               List       *rlist;
+
+               returningList = linitial(node->returningLists);
+
+               /*
+                * Initialize result tuple slot and assign its rowtype using 
the first
+                * RETURNING list.  We assume the rest will look the same.
+                */
+               tupDesc = ExecTypeFromTL(returningList, false);
+
+               /* Set up a slot for the output of the RETURNING projection(s) 
*/
+               ExecInitResultTupleSlot(estate, &mtstate->ps);
+               ExecAssignResultType(&mtstate->ps, tupDesc);
+               slot = mtstate->ps.ps_ResultTupleSlot;
+
+               /* Need an econtext too */
+               if (mtstate->ps.ps_ExprContext == NULL)
+                       ExecAssignExprContext(estate, &mtstate->ps);
+               econtext = mtstate->ps.ps_ExprContext;
+
+               rlist = map_partition_varattnos(returningList,
+                                                                               
resultRTindex,
+                                                                               
partrel, rootrel, NULL);
+               proute->partitions[partidx]->ri_projectReturning =
+                               ExecBuildProjectionInfo(rlist, econtext, slot, 
&mtstate->ps,
+                                                                               
part_tupdesc);
+       }
+
+       /* Note that the entries in this list appear in no predetermined order. 
*/
+       estate->es_leaf_result_relations =
+                                                               
lappend(estate->es_leaf_result_relations,
+                                                                               
proute->partitions[partidx]);
+}
+
+/*
  * RelationGetPartitionDispatchInfo
  *             Returns information necessary to route tuples down a partition 
tree
  *
diff --git a/src/backend/executor/nodeModifyTable.c 
b/src/backend/executor/nodeModifyTable.c
index 0bd47ef3ab..fd1390a508 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -302,6 +302,7 @@ ExecInsert(ModifyTableState *mtstate,
                 */
                saved_resultRelInfo = resultRelInfo;
                resultRelInfo = proute->partitions[leaf_part_index];
+               Assert(resultRelInfo != NULL);
 
                /* We do not yet have a way to insert into a foreign partition 
*/
                if (resultRelInfo->ri_FdwRoutine)
@@ -1512,23 +1513,14 @@ ExecSetupTransitionCaptureState(ModifyTableState 
*mtstate, EState *estate)
                mtstate->mt_transition_tupconv_maps = (TupleConversionMap **)
                        palloc0(sizeof(TupleConversionMap *) * 
numResultRelInfos);
 
-               /* Choose the right set of partitions */
+               /*
+                * If partition tuple-routing is active, we can't have partition
+                * ResultRelInfo's just yet, so return in that case.  Instead,
+                * the conversion map will be initialized in
+                * ExecInitPartitionResultRelInfo() if and when needed.
+                */
                if (proute != NULL)
-               {
-                       /*
-                        * For tuple routing among partitions, we need 
TupleDescs based on
-                        * the partition routing table.
-                        */
-                       ResultRelInfo **resultRelInfos = proute->partitions;
-
-                       for (i = 0; i < numResultRelInfos; ++i)
-                       {
-                               mtstate->mt_transition_tupconv_maps[i] =
-                                       
convert_tuples_by_name(RelationGetDescr(resultRelInfos[i]->ri_RelationDesc),
-                                                                               
   RelationGetDescr(targetRelInfo->ri_RelationDesc),
-                                                                               
   gettext_noop("could not convert row type"));
-                       }
-               }
+                       return;
                else
                {
                        /* Otherwise we need the ResultRelInfo for each 
subplan. */
@@ -1830,8 +1822,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
        ListCell   *l;
        int                     i;
        Relation        rel;
-       PartitionTupleRouting *proute = NULL;
-       int                     num_partitions = 0;
 
        /* check for unsupported flags */
        Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -1932,11 +1922,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
        rel = mtstate->resultRelInfo->ri_RelationDesc;
        if (operation == CMD_INSERT &&
                rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-       {
-               proute = mtstate->mt_partition_tuple_routing =
-                       ExecSetupPartitionTupleRouting(mtstate, rel);
-               num_partitions = proute->num_partitions;
-       }
+               mtstate->mt_partition_tuple_routing =
+                                               
ExecSetupPartitionTupleRouting(mtstate, rel);
 
        /*
         * Build state for collecting transition tuples.  This requires having a
@@ -1972,65 +1959,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
        }
 
        /*
-        * Build WITH CHECK OPTION constraints for each leaf partition rel. Note
-        * that we didn't build the withCheckOptionList for each partition 
within
-        * the planner, but simple translation of the varattnos for each 
partition
-        * will suffice.  This only occurs for the INSERT case; UPDATE/DELETE
-        * cases are handled above.
-        */
-       if (node->withCheckOptionLists != NIL && num_partitions > 0)
-       {
-               List       *wcoList;
-               PlanState  *plan;
-
-               /*
-                * In case of INSERT on partitioned tables, there is only one 
plan.
-                * Likewise, there is only one WITH CHECK OPTIONS list, not one 
per
-                * partition.  We make a copy of the WCO qual for each 
partition; note
-                * that, if there are SubPlans in there, they all end up 
attached to
-                * the one parent Plan node.
-                */
-               Assert(operation == CMD_INSERT &&
-                          list_length(node->withCheckOptionLists) == 1 &&
-                          mtstate->mt_nplans == 1);
-               wcoList = linitial(node->withCheckOptionLists);
-               plan = mtstate->mt_plans[0];
-               for (i = 0; i < num_partitions; i++)
-               {
-                       Relation        partrel;
-                       List       *mapped_wcoList;
-                       List       *wcoExprs = NIL;
-                       ListCell   *ll;
-
-                       resultRelInfo = proute->partitions[i];
-                       partrel = resultRelInfo->ri_RelationDesc;
-
-                       /* varno = node->nominalRelation */
-                       mapped_wcoList = map_partition_varattnos(wcoList,
-                                                                               
                         node->nominalRelation,
-                                                                               
                         partrel, rel, NULL);
-                       foreach(ll, mapped_wcoList)
-                       {
-                               WithCheckOption *wco = 
castNode(WithCheckOption, lfirst(ll));
-                               ExprState  *wcoExpr = 
ExecInitQual(castNode(List, wco->qual),
-                                                                               
                   plan);
-
-                               wcoExprs = lappend(wcoExprs, wcoExpr);
-                       }
-
-                       resultRelInfo->ri_WithCheckOptions = mapped_wcoList;
-                       resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
-               }
-       }
-
-       /*
         * Initialize RETURNING projections if needed.
         */
        if (node->returningLists)
        {
                TupleTableSlot *slot;
                ExprContext *econtext;
-               List       *returningList;
 
                /*
                 * Initialize result tuple slot and assign its rowtype using 
the first
@@ -2061,31 +1995,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, 
int eflags)
                                                                                
resultRelInfo->ri_RelationDesc->rd_att);
                        resultRelInfo++;
                }
-
-               /*
-                * Build a projection for each leaf partition rel.  Note that we
-                * didn't build the returningList for each partition within the
-                * planner, but simple translation of the varattnos for each 
partition
-                * will suffice.  This only occurs for the INSERT case; 
UPDATE/DELETE
-                * are handled above.
-                */
-               returningList = linitial(node->returningLists);
-               for (i = 0; i < num_partitions; i++)
-               {
-                       Relation        partrel;
-                       List       *rlist;
-
-                       resultRelInfo = proute->partitions[i];
-                       partrel = resultRelInfo->ri_RelationDesc;
-
-                       /* varno = node->nominalRelation */
-                       rlist = map_partition_varattnos(returningList,
-                                                                               
        node->nominalRelation,
-                                                                               
        partrel, rel, NULL);
-                       resultRelInfo->ri_projectReturning =
-                               ExecBuildProjectionInfo(rlist, econtext, slot, 
&mtstate->ps,
-                                                                               
resultRelInfo->ri_RelationDesc->rd_att);
-               }
        }
        else
        {
diff --git a/src/include/executor/execPartition.h 
b/src/include/executor/execPartition.h
index b3517e2ee0..cd256551bb 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -79,6 +79,7 @@ typedef struct PartitionTupleRouting
        int                     num_dispatch;
        ResultRelInfo **partitions;
        int                     num_partitions;
+       Oid                *partition_oids;
        TupleConversionMap **partition_tupconv_maps;
        TupleTableSlot *partition_tuple_slot;
 } PartitionTupleRouting;
-- 
2.11.0

Reply via email to