On 2018/01/24 17:25, Amit Langote wrote: > On 2018/01/20 7:07, Robert Haas wrote: >> On Fri, Jan 19, 2018 at 3:56 AM, Amit Langote wrote: >>> I rebased the patches, since they started conflicting with a recently >>> committed patch [1]. >> >> I think that my latest commit has managed to break this pretty thoroughly. > > I rebased it. Here are the performance numbers again. > > * Uses following hash-partitioned table: > > create table t1 (a int, b int) partition by hash (a); > create table t1_x partition of t1 for values with (modulus M, remainder R) > ... > > > * Non-bulk insert uses the following code (insert 100,000 rows one-by-one): > > do $$ > begin > for i in 1..100000 loop > insert into t1 values (i, i+1); > end loop; > end; $$; > > Times in milliseconds: > > #parts HEAD Patched > 8 6148.313 4938.775 > 16 8882.420 6203.911 > 32 14251.072 8595.068 > 64 24465.691 13718.161 > 128 45099.435 23898.026 > 256 87307.332 44428.126 > > * Bulk-inserting 100,000 rows using COPY: > > copy t1 from '/tmp/t1.csv' csv; > > Times in milliseconds: > > #parts HEAD Patched > > 8 466.170 446.865 > 16 445.341 444.990 > 32 443.544 487.713 > 64 460.579 435.412 > 128 469.953 422.403 > 256 463.592 431.118
Rebased again. Thanks, Amit
From 250fe77e304a0f0841f2d7978405c2a32fdeb7a2 Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Tue, 19 Dec 2017 10:43:45 +0900 Subject: [PATCH v4 1/3] Teach CopyFrom to use ModifyTableState for tuple-routing This removes all fields of CopyStateData that were meant for tuple routing and instead uses ModifyTableState that has all those fields, including transition_tupconv_maps. In COPY's case, transition_tupconv_maps is only required if tuple routing is being used, so it's safe. --- src/backend/commands/copy.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 04a24c6082..251676b321 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -166,9 +166,6 @@ typedef struct CopyStateData bool volatile_defexprs; /* is any of defexprs volatile? */ List *range_table; - /* Tuple-routing support info */ - PartitionTupleRouting *partition_tuple_routing; - TransitionCaptureState *transition_capture; /* @@ -2285,6 +2282,7 @@ CopyFrom(CopyState cstate) ResultRelInfo *resultRelInfo; ResultRelInfo *saved_resultRelInfo = NULL; EState *estate = CreateExecutorState(); /* for ExecConstraints() */ + ModifyTableState *mtstate = makeNode(ModifyTableState); ExprContext *econtext; TupleTableSlot *myslot; MemoryContext oldcontext = CurrentMemoryContext; @@ -2303,6 +2301,8 @@ CopyFrom(CopyState cstate) Size bufferedTuplesSize = 0; int firstBufferedLineNo = 0; + PartitionTupleRouting *proute = NULL; + Assert(cstate->rel); /* @@ -2468,10 +2468,15 @@ CopyFrom(CopyState cstate) */ if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { - PartitionTupleRouting *proute; + ModifyTable *node = makeNode(ModifyTable); - proute = cstate->partition_tuple_routing = - ExecSetupPartitionTupleRouting(NULL, cstate->rel, 1, estate); + /* Just need make this field appear valid. */ + node->nominalRelation = 1; + mtstate->ps.plan = (Plan *) node; + mtstate->ps.state = estate; + mtstate->resultRelInfo = resultRelInfo; + proute = mtstate->mt_partition_tuple_routing = + ExecSetupPartitionTupleRouting(mtstate, cstate->rel, 1, estate); /* * If we are capturing transition tuples, they may need to be @@ -2496,7 +2501,7 @@ CopyFrom(CopyState cstate) if ((resultRelInfo->ri_TrigDesc != NULL && (resultRelInfo->ri_TrigDesc->trig_insert_before_row || resultRelInfo->ri_TrigDesc->trig_insert_instead_row)) || - cstate->partition_tuple_routing != NULL || + mtstate->mt_partition_tuple_routing != NULL || cstate->volatile_defexprs) { useHeapMultiInsert = false; @@ -2571,10 +2576,9 @@ CopyFrom(CopyState cstate) ExecStoreTuple(tuple, slot, InvalidBuffer, false); /* Determine the partition to heap_insert the tuple into */ - if (cstate->partition_tuple_routing) + if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { int leaf_part_index; - PartitionTupleRouting *proute = cstate->partition_tuple_routing; /* * Away we go ... If we end up not finding a partition after all, @@ -2806,8 +2810,8 @@ CopyFrom(CopyState cstate) ExecCloseIndices(resultRelInfo); /* Close all the partitioned tables, leaf partitions, and their indices */ - if (cstate->partition_tuple_routing) - ExecCleanupTupleRouting(cstate->partition_tuple_routing); + if (proute) + ExecCleanupTupleRouting(proute); /* Close any trigger target relations */ ExecCleanUpTriggerState(estate); -- 2.11.0
From 96f0b1fed760e8b921f2d1e5be72ad864f9dd3f6 Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Tue, 19 Dec 2017 13:56:25 +0900 Subject: [PATCH v4 2/3] ExecFindPartition refactoring --- src/backend/commands/copy.c | 5 +---- src/backend/executor/execPartition.c | 17 +++++++++-------- src/backend/executor/nodeModifyTable.c | 5 +---- src/include/executor/execPartition.h | 5 +---- 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 251676b321..2096a52cea 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2588,10 +2588,7 @@ CopyFrom(CopyState cstate) * will get us the ResultRelInfo and TupleConversionMap for the * partition, respectively. */ - leaf_part_index = ExecFindPartition(resultRelInfo, - proute->partition_dispatch_info, - slot, - estate); + leaf_part_index = ExecFindPartition(mtstate, slot); Assert(leaf_part_index >= 0 && leaf_part_index < proute->num_partitions); diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 106a96d910..947adf3032 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -227,11 +227,7 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, } /* - * ExecFindPartition -- Find a leaf partition in the partition tree rooted - * at parent, for the heap tuple contained in *slot - * - * estate must be non-NULL; we'll need it to compute any expressions in the - * partition key(s) + * ExecFindPartition -- Find a leaf partition for tuple contained in slot * * If no leaf partition is found, this routine errors out with the appropriate * error message, else it returns the leaf partition sequence number @@ -239,14 +235,19 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, * the partition tree. */ int -ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, - TupleTableSlot *slot, EState *estate) +ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *slot) { + EState *estate = mtstate->ps.state; int result; Datum values[PARTITION_MAX_KEYS]; bool isnull[PARTITION_MAX_KEYS]; Relation rel; - PartitionDispatch parent; + PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; + PartitionDispatch *pd = proute->partition_dispatch_info, + parent; + ResultRelInfo *resultRelInfo = (mtstate->rootResultRelInfo != NULL) + ? mtstate->rootResultRelInfo + : mtstate->resultRelInfo; ExprContext *ecxt = GetPerTupleExprContext(estate); TupleTableSlot *ecxt_scantuple_old = ecxt->ecxt_scantuple; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 828e1b0015..d2df9e94cf 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -297,10 +297,7 @@ ExecInsert(ModifyTableState *mtstate, * get us the ResultRelInfo and TupleConversionMap for the partition, * respectively. */ - leaf_part_index = ExecFindPartition(resultRelInfo, - proute->partition_dispatch_info, - slot, - estate); + leaf_part_index = ExecFindPartition(mtstate, slot); Assert(leaf_part_index >= 0 && leaf_part_index < proute->num_partitions); diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 3df9c498bb..7373f60ffb 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -105,10 +105,6 @@ typedef struct PartitionTupleRouting extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel, Index resultRTindex, EState *estate); -extern int ExecFindPartition(ResultRelInfo *resultRelInfo, - PartitionDispatch *pd, - TupleTableSlot *slot, - EState *estate); extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute); extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute, ResultRelInfo *rootRelInfo, int leaf_index); @@ -117,5 +113,6 @@ extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map, TupleTableSlot *new_slot, TupleTableSlot **p_my_slot); extern void ExecCleanupTupleRouting(PartitionTupleRouting *proute); +extern int ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *slot); #endif /* EXECPARTITION_H */ -- 2.11.0
From c821f1e30e3c33424fec52a7b1e1a7c28cbacba9 Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Tue, 19 Dec 2017 16:20:09 +0900 Subject: [PATCH v4 3/3] During tuple-routing, initialize per-partition objects lazily Those objects include ResultRelInfo, tuple conversion map, WITH CHECK OPTION quals and RETURNING projections. This means we don't allocate these objects for partitions that are never inserted into. --- src/backend/commands/copy.c | 6 +- src/backend/executor/execPartition.c | 506 +++++++++++++++++++++++---------- src/backend/executor/nodeModifyTable.c | 153 +--------- src/include/executor/execPartition.h | 10 +- 4 files changed, 377 insertions(+), 298 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 2096a52cea..1000bb4461 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2476,7 +2476,7 @@ CopyFrom(CopyState cstate) mtstate->ps.state = estate; mtstate->resultRelInfo = resultRelInfo; proute = mtstate->mt_partition_tuple_routing = - ExecSetupPartitionTupleRouting(mtstate, cstate->rel, 1, estate); + ExecSetupPartitionTupleRouting(mtstate, cstate->rel); /* * If we are capturing transition tuples, they may need to be @@ -2609,6 +2609,7 @@ CopyFrom(CopyState cstate) */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + Assert(resultRelInfo != NULL); /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) @@ -2638,8 +2639,7 @@ CopyFrom(CopyState cstate) */ cstate->transition_capture->tcs_original_insert_tuple = NULL; cstate->transition_capture->tcs_map = - TupConvMapForLeaf(proute, saved_resultRelInfo, - leaf_part_index); + TupConvMapForLeaf(mtstate, leaf_part_index); } else { diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 947adf3032..088f1fafd9 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -23,6 +23,8 @@ #include "utils/rls.h" #include "utils/ruleutils.h" +static void ExecInitPartitionResultRelInfo(ModifyTableState *mtstate, + int partidx); static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel, int *num_parted, List **leaf_part_oids); static void get_partition_dispatch_recurse(Relation rel, Relation parent, @@ -44,22 +46,23 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, * * Note that all the relations in the partition tree are locked using the * RowExclusiveLock mode upon return from this function. + * + * While we allocate the arrays of pointers of various objects for all + * partitions here, the objects themselves are lazily allocated and + * initialized for a given partition if a tuple is actually routed to it; + * see ExecInitPartitionResultRelInfo. */ PartitionTupleRouting * -ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate) +ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) { - TupleDesc tupDesc = RelationGetDescr(rel); + PartitionTupleRouting *proute; List *leaf_parts; ListCell *cell; - int i; - ResultRelInfo *leaf_part_arr = NULL, - *update_rri = NULL; - int num_update_rri = 0, - update_rri_index = 0; - bool is_update = false; - PartitionTupleRouting *proute; + int leaf_index, + update_rri_index, + num_update_rri; + bool is_update; + ResultRelInfo *update_rri; /* * Get the information about the partition tree after locking all the @@ -71,20 +74,24 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch, &leaf_parts); proute->num_partitions = list_length(leaf_parts); - proute->partitions = (ResultRelInfo **) palloc(proute->num_partitions * - sizeof(ResultRelInfo *)); + /* + * Actual ResultRelInfo's and TupleConversionMap's are allocated in + * ExecInitResultRelInfo(). + */ + proute->partitions = (ResultRelInfo **) palloc0(proute->num_partitions * + sizeof(ResultRelInfo *)); proute->parent_child_tupconv_maps = (TupleConversionMap **) palloc0(proute->num_partitions * sizeof(TupleConversionMap *)); + proute->partition_oids = (Oid *) palloc0(proute->num_partitions * + sizeof(Oid)); /* Set up details specific to the type of tuple routing we are doing. */ if (mtstate && mtstate->operation == CMD_UPDATE) { - ModifyTable *node = (ModifyTable *) mtstate->ps.plan; - is_update = true; update_rri = mtstate->resultRelInfo; - num_update_rri = list_length(node->plans); + num_update_rri = mtstate->mt_nplans; proute->subplan_partition_offsets = palloc(num_update_rri * sizeof(int)); proute->num_subplan_partition_offsets = num_update_rri; @@ -95,15 +102,29 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, */ proute->root_tuple_slot = MakeTupleTableSlot(); } - else + + leaf_index = update_rri_index = 0; + foreach (cell, leaf_parts) { + Oid leaf_oid = lfirst_oid(cell); + + proute->partition_oids[leaf_index] = leaf_oid; + /* - * Since we are inserting tuples, we need to create all new result - * rels. Avoid repeated pallocs by allocating memory for all the - * result rels in bulk. + * The per-subplan resultrels and the resultrels of the leaf + * partitions are both in the same canonical order. So while going + * through the leaf partition oids, we need to keep track of the + * next per-subplan result rel to be looked for in the leaf + * partition resultrels. */ - leaf_part_arr = (ResultRelInfo *) palloc0(proute->num_partitions * - sizeof(ResultRelInfo)); + if (is_update && update_rri_index < num_update_rri && + RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid) + { + proute->subplan_partition_offsets[update_rri_index] = leaf_index; + update_rri_index++; + } + + leaf_index++; } /* @@ -114,109 +135,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, */ proute->partition_tuple_slot = MakeTupleTableSlot(); - i = 0; - foreach(cell, leaf_parts) - { - ResultRelInfo *leaf_part_rri; - Relation partrel = NULL; - TupleDesc part_tupdesc; - Oid leaf_oid = lfirst_oid(cell); - - if (is_update) - { - /* - * If the leaf partition is already present in the per-subplan - * result rels, we re-use that rather than initialize a new result - * rel. The per-subplan resultrels and the resultrels of the leaf - * partitions are both in the same canonical order. So while going - * through the leaf partition oids, we need to keep track of the - * next per-subplan result rel to be looked for in the leaf - * partition resultrels. - */ - if (update_rri_index < num_update_rri && - RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid) - { - leaf_part_rri = &update_rri[update_rri_index]; - partrel = leaf_part_rri->ri_RelationDesc; - - /* - * This is required in order to we convert the partition's - * tuple to be compatible with the root partitioned table's - * tuple descriptor. When generating the per-subplan result - * rels, this was not set. - */ - leaf_part_rri->ri_PartitionRoot = rel; - - /* Remember the subplan offset for this ResultRelInfo */ - proute->subplan_partition_offsets[update_rri_index] = i; - - update_rri_index++; - } - else - leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); - } - else - { - /* For INSERTs, we already have an array of result rels allocated */ - leaf_part_rri = &leaf_part_arr[i]; - } - - /* - * If we didn't open the partition rel, it means we haven't - * initialized the result rel either. - */ - if (!partrel) - { - /* - * We locked all the partitions above including the leaf - * partitions. Note that each of the newly opened relations in - * proute->partitions are eventually closed by the caller. - */ - partrel = heap_open(leaf_oid, NoLock); - InitResultRelInfo(leaf_part_rri, - partrel, - resultRTindex, - rel, - estate->es_instrument); - } - - part_tupdesc = RelationGetDescr(partrel); - - /* - * Save a tuple conversion map to convert a tuple routed to this - * partition from the parent's type to the partition's. - */ - proute->parent_child_tupconv_maps[i] = - convert_tuples_by_name(tupDesc, part_tupdesc, - gettext_noop("could not convert row type")); - - /* - * Verify result relation is a valid target for an INSERT. An UPDATE - * of a partition-key becomes a DELETE+INSERT operation, so this check - * is still required when the operation is CMD_UPDATE. - */ - CheckValidResultRel(leaf_part_rri, CMD_INSERT); - - /* - * Open partition indices. The user may have asked to check for - * conflicts within this leaf partition and do "nothing" instead of - * throwing an error. Be prepared in that case by initializing the - * index information needed by ExecInsert() to perform speculative - * insertions. - */ - if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex && - leaf_part_rri->ri_IndexRelationDescs == NULL) - ExecOpenIndices(leaf_part_rri, - mtstate != NULL && - mtstate->mt_onconflict != ONCONFLICT_NONE); - - estate->es_leaf_result_relations = - lappend(estate->es_leaf_result_relations, leaf_part_rri); - - proute->partitions[i] = leaf_part_rri; - i++; - } - /* * For UPDATE, we should have found all the per-subplan resultrels in the * leaf partitions. @@ -342,10 +260,251 @@ ExecFindPartition(ModifyTableState *mtstate, TupleTableSlot *slot) val_desc ? errdetail("Partition key of the failing row contains %s.", val_desc) : 0)); } + /* Initialize the partition result rel, if not done already. */ + ExecInitPartitionResultRelInfo(mtstate, result); ecxt->ecxt_scantuple = ecxt_scantuple_old; return result; } +static int +leafpart_index_cmp(const void *arg1, const void *arg2) +{ + int leafidx1 = *(const int *) arg1; + int leafidx2 = *(const int *) arg2; + + if (leafidx1 > leafidx2) + return 1; + else if (leafidx1 < leafidx2) + return -1; + return 0; +} + +/* + * ExecInitPartitionResultRelInfo + * Initialize ResultRelInfo for a partition if not done already + */ +static void +ExecInitPartitionResultRelInfo(ModifyTableState *mtstate, int partidx) +{ + PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; + Relation partrel, + rootrel; + ResultRelInfo *leaf_part_rri; + int firstVarno; + Relation firstResultRel; + + /* Nothing to do if already set.*/ + if (proute->partitions[partidx]) + return; + + leaf_part_rri = NULL; + rootrel = (mtstate->rootResultRelInfo != NULL) + ? mtstate->rootResultRelInfo->ri_RelationDesc + : mtstate->resultRelInfo->ri_RelationDesc; + + /* + * If we are doing tuple routing for update, try to reuse the + * per-subplan resultrel for this partition that ExecInitModifyTable() + * might already have created. + */ + if (mtstate && mtstate->operation == CMD_UPDATE) + { + ResultRelInfo *update_rri; + int *partidx_entry; + + update_rri = mtstate->resultRelInfo; + + /* + * If the partition got a subplan, we'd be able to find its index + * in proute->subplan_partition_offsets. + */ + partidx_entry = (int *) bsearch(&partidx, + proute->subplan_partition_offsets, + mtstate->mt_nplans, sizeof(int), + leafpart_index_cmp); + if (partidx_entry) + { + int update_rri_index = + partidx_entry - proute->subplan_partition_offsets; + + Assert (update_rri_index < mtstate->mt_nplans); + leaf_part_rri = &update_rri[update_rri_index]; + partrel = leaf_part_rri->ri_RelationDesc; + + /* + * This is required in order to we convert the partition's + * tuple to be compatible with the root partitioned table's + * tuple descriptor. When generating the per-subplan result + * rels, this was not set. + */ + leaf_part_rri->ri_PartitionRoot = rootrel; + } + } + + /* + * Create new result rel, either if we are *inserting* the new tuple, or + * if we didn't find the result rel above for the update tuple routing + * case. + */ + if (leaf_part_rri == NULL) + { + EState *estate = mtstate->ps.state; + ModifyTable *node = (ModifyTable *) mtstate->ps.plan; + Index resultRTindex = node->nominalRelation; + + leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); + + /* + * We locked all the partitions in ExecSetupPartitionTupleRouting + * including the leaf partitions. + */ + partrel = heap_open(proute->partition_oids[partidx], NoLock); + InitResultRelInfo(leaf_part_rri, + partrel, + resultRTindex, + rootrel, + estate->es_instrument); + + /* + * These are required as reference objects for mapping partition + * attno's in expressions in WithCheckOptions and RETURNING. + */ + firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + + /* + * Build WITH CHECK OPTION constraints for this partition rel. Note + * that we didn't build the withCheckOptionList for each partition + * within the planner, but simple translation of the varattnos will + * suffice. This only occurs for the INSERT case or in the case of + * UPDATE for which we didn't find a result rel above to reuse. + */ + if (node && node->withCheckOptionLists != NIL) + { + List *wcoList; + List *mapped_wcoList; + List *wcoExprs = NIL; + ListCell *ll; + + /* + * In the case of INSERT on partitioned tables, there is only one + * plan. Likewise, there is only one WCO list, not one per + * partition. For UPDATE, there would be as many WCO lists as + * there are plans, but we use the first one as reference. Note + * that if there are SubPlans in there, they all end up attached + * to the one parent Plan node. + */ + Assert((mtstate->operation == CMD_INSERT && + list_length(node->withCheckOptionLists) == 1 && + mtstate->mt_nplans == 1) || + (mtstate->operation == CMD_UPDATE && + list_length(node->withCheckOptionLists) == + mtstate->mt_nplans)); + wcoList = linitial(node->withCheckOptionLists); + + mapped_wcoList = map_partition_varattnos(wcoList, + firstVarno, + partrel, firstResultRel, + NULL); + foreach(ll, mapped_wcoList) + { + WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); + ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), + mtstate->mt_plans[0]); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + leaf_part_rri->ri_WithCheckOptions = mapped_wcoList; + leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs; + } + + /* + * Build the RETURNING projection if any for the partition. Note that + * we didn't build the returningList for each partition within the + * planner, but simple translation of the varattnos will suffice. + * This only occurs for the INSERT case; in the UPDATE/DELETE cases, + * ExecInitModifyTable() would've initialized this. + */ + if (node && node->returningLists != NIL) + { + TupleTableSlot *slot; + ExprContext *econtext; + List *returningList; + List *rlist; + TupleDesc tupDesc; + + /* See the comment written above for WCO lists. */ + Assert((mtstate->operation == CMD_INSERT && + list_length(node->returningLists) == 1 && + mtstate->mt_nplans == 1) || + (mtstate->operation == CMD_UPDATE && + list_length(node->returningLists) == + mtstate->mt_nplans)); + returningList = linitial(node->returningLists); + + /* + * Initialize result tuple slot and assign its rowtype using the first + * RETURNING list. We assume the rest will look the same. + */ + tupDesc = ExecTypeFromTL(returningList, false); + + /* Set up a slot for the output of the RETURNING projection(s) */ + ExecInitResultTupleSlot(estate, &mtstate->ps); + ExecAssignResultType(&mtstate->ps, tupDesc); + slot = mtstate->ps.ps_ResultTupleSlot; + + /* Need an econtext too */ + if (mtstate->ps.ps_ExprContext == NULL) + ExecAssignExprContext(estate, &mtstate->ps); + econtext = mtstate->ps.ps_ExprContext; + + rlist = map_partition_varattnos(returningList, + firstVarno, + partrel, firstResultRel, NULL); + leaf_part_rri->ri_projectReturning = + ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, + RelationGetDescr(partrel)); + } + + /* + * Note that the entries in this list appear in no predetermined + * order as result of initializing partition result rels as and when + * they're needed. + */ + estate->es_leaf_result_relations = + lappend(estate->es_leaf_result_relations, + leaf_part_rri); + + /* + * Open partition indices. The user may have asked to check for + * conflicts within this leaf partition and do "nothing" instead of + * throwing an error. Be prepared in that case by initializing the + * index information needed by ExecInsert() to perform speculative + * insertions. + */ + if (partrel->rd_rel->relhasindex && + leaf_part_rri->ri_IndexRelationDescs == NULL) + ExecOpenIndices(leaf_part_rri, + mtstate->mt_onconflict != ONCONFLICT_NONE); + } + + /* + * Verify result relation is a valid target for INSERT. + */ + CheckValidResultRel(leaf_part_rri, CMD_INSERT); + + proute->partitions[partidx] = leaf_part_rri; + + /* + * Save a tuple conversion map to convert a tuple routed to this + * partition from the parent's type to the partition's. + */ + proute->parent_child_tupconv_maps[partidx] = + convert_tuples_by_name(RelationGetDescr(rootrel), + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); +} + /* * ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition * child-to-root tuple conversion map array. @@ -374,40 +533,92 @@ ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute) } /* - * TupConvMapForLeaf -- Get the tuple conversion map for a given leaf partition - * index. + * ChildParentTupConvMap -- Return tuple conversion map to convert tuples of + * 'partrel' into those of 'rootrel' + * + * If the function was previously called for this partition, we would've + * either already created the map and stored the same at + * proute->child_parent_tupconv_maps[index] or found out that such a map + * is not needed and thus set proute->child_parent_map_not_required[index]. */ -TupleConversionMap * -TupConvMapForLeaf(PartitionTupleRouting *proute, - ResultRelInfo *rootRelInfo, int leaf_index) +static TupleConversionMap * +ChildParentTupConvMap(Relation partrel, Relation rootrel, + PartitionTupleRouting *proute, int index) { - ResultRelInfo **resultRelInfos = proute->partitions; - TupleConversionMap **map; - TupleDesc tupdesc; + TupleConversionMap *map; /* Don't call this if we're not supposed to be using this type of map. */ Assert(proute->child_parent_tupconv_maps != NULL); /* If it's already known that we don't need a map, return NULL. */ - if (proute->child_parent_map_not_required[leaf_index]) + if (proute->child_parent_map_not_required[index]) return NULL; /* If we've already got a map, return it. */ - map = &proute->child_parent_tupconv_maps[leaf_index]; - if (*map != NULL) - return *map; + map = proute->child_parent_tupconv_maps[index]; + if (map != NULL) + return map; /* No map yet; try to create one. */ - tupdesc = RelationGetDescr(resultRelInfos[leaf_index]->ri_RelationDesc); - *map = - convert_tuples_by_name(tupdesc, - RelationGetDescr(rootRelInfo->ri_RelationDesc), - gettext_noop("could not convert row type")); + map = convert_tuples_by_name(RelationGetDescr(partrel), + RelationGetDescr(rootrel), + gettext_noop("could not convert row type")); /* If it turns out no map is needed, remember for next time. */ - proute->child_parent_map_not_required[leaf_index] = (*map == NULL); + proute->child_parent_map_not_required[index] = (map == NULL); + + return map; +} + +/* + * TupConvMapForLeaf -- Get the tuple conversion map for a given leaf partition + * index. + * + * Call this only if it's known that the partition at leaf_index has been + * initialized with ExecInitPartitionResultRelInfo(). + */ +TupleConversionMap * +TupConvMapForLeaf(ModifyTableState *mtstate, int leaf_index) +{ + PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; + ResultRelInfo **resultrels = proute->partitions, + *rootRelInfo = (mtstate->rootResultRelInfo != NULL) + ? mtstate->rootResultRelInfo + : mtstate->resultRelInfo; + + Assert(resultrels[leaf_index] != NULL); - return *map; + return ChildParentTupConvMap(resultrels[leaf_index]->ri_RelationDesc, + rootRelInfo->ri_RelationDesc, + proute, leaf_index); +} + +/* + * TupConvMapForSubplan -- Get the tuple conversion map for a partition given + * its subplan index. + * + * Call this if it's unclear whether the partition's ResultRelInfo has been + * initialized in mtstate->mt_partition_tuple_routing. + */ +TupleConversionMap * +TupConvMapForSubplan(ModifyTableState *mtstate, int subplan_index) +{ + int leaf_index; + PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; + ResultRelInfo *resultrels = mtstate->resultRelInfo, + *rootRelInfo = (mtstate->rootResultRelInfo != NULL) + ? mtstate->rootResultRelInfo + : mtstate->resultRelInfo; + + Assert(proute != NULL && + proute->subplan_partition_offsets != NULL && + subplan_index < proute->num_subplan_partition_offsets); + leaf_index = proute->subplan_partition_offsets[subplan_index]; + + Assert(subplan_index >= 0 && subplan_index < mtstate->mt_nplans); + return ChildParentTupConvMap(resultrels[subplan_index].ri_RelationDesc, + rootRelInfo->ri_RelationDesc, + proute, leaf_index); } /* @@ -490,8 +701,11 @@ ExecCleanupTupleRouting(PartitionTupleRouting *proute) continue; } - ExecCloseIndices(resultRelInfo); - heap_close(resultRelInfo->ri_RelationDesc, NoLock); + if (resultRelInfo) + { + ExecCloseIndices(resultRelInfo); + heap_close(resultRelInfo->ri_RelationDesc, NoLock); + } } /* Release the standalone partition tuple descriptors, if any */ diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index d2df9e94cf..afd9fbc853 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -307,6 +307,7 @@ ExecInsert(ModifyTableState *mtstate, */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + Assert(resultRelInfo != NULL); /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) @@ -335,8 +336,7 @@ ExecInsert(ModifyTableState *mtstate, mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL; mtstate->mt_transition_capture->tcs_map = - TupConvMapForLeaf(proute, saved_resultRelInfo, - leaf_part_index); + TupConvMapForLeaf(mtstate, leaf_part_index); } else { @@ -351,8 +351,7 @@ ExecInsert(ModifyTableState *mtstate, if (mtstate->mt_oc_transition_capture != NULL) { mtstate->mt_oc_transition_capture->tcs_map = - TupConvMapForLeaf(proute, saved_resultRelInfo, - leaf_part_index); + TupConvMapForLeaf(mtstate, leaf_part_index); } /* @@ -1801,27 +1800,10 @@ tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan) * array *only* if partition-indexed array is not required. */ if (mtstate->mt_per_subplan_tupconv_maps == NULL) - { - int leaf_index; - PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing; + return TupConvMapForSubplan(mtstate, whichplan); - /* - * If subplan-indexed array is NULL, things should have been arranged - * to convert the subplan index to partition index. - */ - Assert(proute && proute->subplan_partition_offsets != NULL && - whichplan < proute->num_subplan_partition_offsets); - - leaf_index = proute->subplan_partition_offsets[whichplan]; - - return TupConvMapForLeaf(proute, getTargetResultRelInfo(mtstate), - leaf_index); - } - else - { - Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans); - return mtstate->mt_per_subplan_tupconv_maps[whichplan]; - } + Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans); + return mtstate->mt_per_subplan_tupconv_maps[whichplan]; } /* ---------------------------------------------------------------- @@ -2095,14 +2077,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo; Plan *subplan; - int firstVarno = 0; - Relation firstResultRel = NULL; ListCell *l; int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; - PartitionTupleRouting *proute = NULL; - int num_partitions = 0; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2225,20 +2203,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) - { - proute = mtstate->mt_partition_tuple_routing = - ExecSetupPartitionTupleRouting(mtstate, - rel, node->nominalRelation, - estate); - num_partitions = proute->num_partitions; - - /* - * Below are required as reference objects for mapping partition - * attno's in expressions such as WithCheckOptions and RETURNING. - */ - firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; - } + mtstate->mt_partition_tuple_routing = + ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a @@ -2285,77 +2251,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } /* - * Build WITH CHECK OPTION constraints for each leaf partition rel. Note - * that we didn't build the withCheckOptionList for each partition within - * the planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE row - * movement. DELETEs and local UPDATEs are handled above. - */ - if (node->withCheckOptionLists != NIL && num_partitions > 0) - { - List *first_wcoList; - - /* - * In case of INSERT on partitioned tables, there is only one plan. - * Likewise, there is only one WITH CHECK OPTIONS list, not one per - * partition. Whereas for UPDATE, there are as many WCOs as there are - * plans. So in either case, use the WCO expression of the first - * resultRelInfo as a reference to calculate attno's for the WCO - * expression of each of the partitions. We make a copy of the WCO - * qual for each partition. Note that, if there are SubPlans in there, - * they all end up attached to the one parent Plan node. - */ - Assert(update_tuple_routing_needed || - (operation == CMD_INSERT && - list_length(node->withCheckOptionLists) == 1 && - mtstate->mt_nplans == 1)); - - first_wcoList = linitial(node->withCheckOptionLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *mapped_wcoList; - List *wcoExprs = NIL; - ListCell *ll; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have - * WithCheckOptions initialized. - */ - if (resultRelInfo->ri_WithCheckOptions) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - mapped_wcoList = map_partition_varattnos(first_wcoList, - firstVarno, - partrel, firstResultRel, - NULL); - foreach(ll, mapped_wcoList) - { - WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); - ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), - &mtstate->ps); - - wcoExprs = lappend(wcoExprs, wcoExpr); - } - - resultRelInfo->ri_WithCheckOptions = mapped_wcoList; - resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - } - } - - /* * Initialize RETURNING projections if needed. */ if (node->returningLists) { TupleTableSlot *slot; ExprContext *econtext; - List *firstReturningList; /* * Initialize result tuple slot and assign its rowtype using the first @@ -2386,44 +2287,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } - - /* - * Build a projection for each leaf partition rel. Note that we - * didn't build the returningList for each partition within the - * planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE - * row movement. DELETEs and local UPDATEs are handled above. - */ - firstReturningList = linitial(node->returningLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *rlist; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have a returningList - * built. - */ - if (resultRelInfo->ri_projectReturning) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - /* - * Use the returning expression of the first resultRelInfo as a - * reference to calculate attno's for the returning expression of - * each of the partitions. - */ - rlist = map_partition_varattnos(firstReturningList, - firstVarno, - partrel, firstResultRel, NULL); - resultRelInfo->ri_projectReturning = - ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, - resultRelInfo->ri_RelationDesc->rd_att); - } } else { diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 7373f60ffb..b1c08f6581 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -91,6 +91,7 @@ typedef struct PartitionTupleRouting { PartitionDispatch *partition_dispatch_info; int num_dispatch; + Oid *partition_oids; ResultRelInfo **partitions; int num_partitions; TupleConversionMap **parent_child_tupconv_maps; @@ -103,11 +104,12 @@ typedef struct PartitionTupleRouting } PartitionTupleRouting; extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate); + Relation rel); extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute); -extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute, - ResultRelInfo *rootRelInfo, int leaf_index); +extern TupleConversionMap *TupConvMapForLeaf(ModifyTableState *mtstate, + int leaf_index); +extern TupleConversionMap *TupConvMapForSubplan(ModifyTableState *mtstate, + int subplan_index); extern HeapTuple ConvertPartitionTupleSlot(TupleConversionMap *map, HeapTuple tuple, TupleTableSlot *new_slot, -- 2.11.0