Fujita-san, On 2018/02/16 19:50, Etsuro Fujita wrote: > (2018/02/16 18:23), Amit Langote wrote: >> Good idea. Done. > Thanks. I fixed a typo (s/Converti/Convert/) and adjusted these comments > a bit further to match the preceding code/comments. Attached is the > updated version.
Thank you for updating the patch. >> Updated patch attached. > > Thanks, I think the patch is in good shape, so I'll mark this as ready for > committer. Thanks a lot for reviewing! Attached rebased patch. Regards, Amit
From 20852fe6cc2f1b8bd57d8bae528083c9f8092dab Mon Sep 17 00:00:00 2001 From: amit <amitlangot...@gmail.com> Date: Mon, 19 Feb 2018 13:15:44 +0900 Subject: [PATCH v11] During tuple-routing, initialize per-partition objects lazily Those objects include ResultRelInfo, tuple conversion map, WITH CHECK OPTION quals and RETURNING projections. This means we don't allocate these objects for partitions that are never inserted into. --- src/backend/commands/copy.c | 10 +- src/backend/executor/execPartition.c | 309 ++++++++++++++++++++++++--------- src/backend/executor/nodeModifyTable.c | 131 ++------------ src/include/executor/execPartition.h | 9 +- 4 files changed, 252 insertions(+), 207 deletions(-) diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index d5883c98d1..4562a5121d 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2469,7 +2469,7 @@ CopyFrom(CopyState cstate) PartitionTupleRouting *proute; proute = cstate->partition_tuple_routing = - ExecSetupPartitionTupleRouting(NULL, cstate->rel, 1, estate); + ExecSetupPartitionTupleRouting(NULL, cstate->rel); /* * If we are capturing transition tuples, they may need to be @@ -2606,6 +2606,14 @@ CopyFrom(CopyState cstate) */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + if (resultRelInfo == NULL) + { + resultRelInfo = ExecInitPartitionInfo(NULL, + saved_resultRelInfo, + proute, estate, + leaf_part_index); + Assert(resultRelInfo != NULL); + } /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 00523ce250..04f76b123a 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -44,18 +44,23 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel, * * Note that all the relations in the partition tree are locked using the * RowExclusiveLock mode upon return from this function. + * + * While we allocate the arrays of pointers of ResultRelInfo and + * TupleConversionMap for all partitions here, actual objects themselves are + * lazily allocated for a given partition if a tuple is actually routed to it; + * see ExecInitPartitionInfo. However, if the function is invoked for update + * tuple routing, caller would already have initialized ResultRelInfo's for + * some of the partitions, which are reused and assigned to their respective + * slot in the aforementioned array. */ PartitionTupleRouting * -ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate) +ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel) { TupleDesc tupDesc = RelationGetDescr(rel); List *leaf_parts; ListCell *cell; int i; - ResultRelInfo *leaf_part_arr = NULL, - *update_rri = NULL; + ResultRelInfo *update_rri = NULL; int num_update_rri = 0, update_rri_index = 0; bool is_update = false; @@ -76,6 +81,8 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, proute->parent_child_tupconv_maps = (TupleConversionMap **) palloc0(proute->num_partitions * sizeof(TupleConversionMap *)); + proute->partition_oids = (Oid *) palloc(proute->num_partitions * + sizeof(Oid)); /* Set up details specific to the type of tuple routing we are doing. */ if (mtstate && mtstate->operation == CMD_UPDATE) @@ -95,16 +102,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, */ proute->root_tuple_slot = MakeTupleTableSlot(NULL); } - else - { - /* - * Since we are inserting tuples, we need to create all new result - * rels. Avoid repeated pallocs by allocating memory for all the - * result rels in bulk. - */ - leaf_part_arr = (ResultRelInfo *) palloc0(proute->num_partitions * - sizeof(ResultRelInfo)); - } /* * Initialize an empty slot that will be used to manipulate tuples of any @@ -117,11 +114,10 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, i = 0; foreach(cell, leaf_parts) { - ResultRelInfo *leaf_part_rri; - Relation partrel = NULL; - TupleDesc part_tupdesc; + ResultRelInfo *leaf_part_rri = NULL; Oid leaf_oid = lfirst_oid(cell); + proute->partition_oids[i] = leaf_oid; if (is_update) { /* @@ -136,6 +132,9 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, if (update_rri_index < num_update_rri && RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid) { + Relation partrel; + TupleDesc part_tupdesc; + leaf_part_rri = &update_rri[update_rri_index]; partrel = leaf_part_rri->ri_RelationDesc; @@ -151,73 +150,26 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, proute->subplan_partition_offsets[update_rri_index] = i; update_rri_index++; + + part_tupdesc = RelationGetDescr(partrel); + + /* + * Save a tuple conversion map to convert a tuple routed to + * this partition from the parent's type to the partition's. + */ + proute->parent_child_tupconv_maps[i] = + convert_tuples_by_name(tupDesc, part_tupdesc, + gettext_noop("could not convert row type")); + + /* + * Verify result relation is a valid target for an INSERT. An + * UPDATE of a partition-key becomes a DELETE+INSERT operation, + * so this check is required even when the operation is + * CMD_UPDATE. + */ + CheckValidResultRel(leaf_part_rri, CMD_INSERT); } - else - leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); } - else - { - /* For INSERTs, we already have an array of result rels allocated */ - leaf_part_rri = &leaf_part_arr[i]; - } - - /* - * If we didn't open the partition rel, it means we haven't - * initialized the result rel either. - */ - if (!partrel) - { - /* - * We locked all the partitions above including the leaf - * partitions. Note that each of the newly opened relations in - * proute->partitions are eventually closed by the caller. - */ - partrel = heap_open(leaf_oid, NoLock); - InitResultRelInfo(leaf_part_rri, - partrel, - resultRTindex, - rel, - estate->es_instrument); - - /* - * Since we've just initialized this ResultRelInfo, it's not in - * any list attached to the estate as yet. Add it, so that it can - * be found later. - */ - estate->es_tuple_routing_result_relations = - lappend(estate->es_tuple_routing_result_relations, - leaf_part_rri); - } - - part_tupdesc = RelationGetDescr(partrel); - - /* - * Save a tuple conversion map to convert a tuple routed to this - * partition from the parent's type to the partition's. - */ - proute->parent_child_tupconv_maps[i] = - convert_tuples_by_name(tupDesc, part_tupdesc, - gettext_noop("could not convert row type")); - - /* - * Verify result relation is a valid target for an INSERT. An UPDATE - * of a partition-key becomes a DELETE+INSERT operation, so this check - * is still required when the operation is CMD_UPDATE. - */ - CheckValidResultRel(leaf_part_rri, CMD_INSERT); - - /* - * Open partition indices. The user may have asked to check for - * conflicts within this leaf partition and do "nothing" instead of - * throwing an error. Be prepared in that case by initializing the - * index information needed by ExecInsert() to perform speculative - * insertions. - */ - if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex && - leaf_part_rri->ri_IndexRelationDescs == NULL) - ExecOpenIndices(leaf_part_rri, - mtstate != NULL && - mtstate->mt_onconflict != ONCONFLICT_NONE); proute->partitions[i] = leaf_part_rri; i++; @@ -352,6 +304,193 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, } /* + * ExecInitPartitionInfo + * Initialize ResultRelInfo and other information for a partition if not + * already done + * + * Returns the ResultRelInfo + */ +ResultRelInfo * +ExecInitPartitionInfo(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + PartitionTupleRouting *proute, + EState *estate, int partidx) +{ + Relation rootrel = resultRelInfo->ri_RelationDesc, + partrel; + ResultRelInfo *leaf_part_rri; + ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL; + + /* + * We locked all the partitions in ExecSetupPartitionTupleRouting + * including the leaf partitions. + */ + partrel = heap_open(proute->partition_oids[partidx], NoLock); + leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo)); + InitResultRelInfo(leaf_part_rri, + partrel, + node ? node->nominalRelation : 1, + rootrel, + estate->es_instrument); + + /* + * Verify result relation is a valid target for an INSERT. An UPDATE + * of a partition-key becomes a DELETE+INSERT operation, so this check + * is still required when the operation is CMD_UPDATE. + */ + CheckValidResultRel(leaf_part_rri, CMD_INSERT); + + /* + * Since we've just initialized this ResultRelInfo, it's not in + * any list attached to the estate as yet. Add it, so that it can + * be found later. + * + * Note that the entries in this list appear in no predetermined + * order, because partition result rels are initialized as and when + * they're needed. + */ + estate->es_tuple_routing_result_relations = + lappend(estate->es_tuple_routing_result_relations, + leaf_part_rri); + + /* + * Open partition indices. The user may have asked to check for + * conflicts within this leaf partition and do "nothing" instead of + * throwing an error. Be prepared in that case by initializing the + * index information needed by ExecInsert() to perform speculative + * insertions. + */ + if (partrel->rd_rel->relhasindex && + leaf_part_rri->ri_IndexRelationDescs == NULL) + ExecOpenIndices(leaf_part_rri, + (mtstate != NULL && + mtstate->mt_onconflict != ONCONFLICT_NONE)); + + /* + * Build WITH CHECK OPTION constraints for the partition. Note that we + * didn't build the withCheckOptionList for partitions within the planner, + * but simple translation of varattnos will suffice. This only occurs for + * the INSERT case or in the case of UPDATE tuple routing where we didn't + * find a result rel to reuse in ExecSetupPartitionTupleRouting(). + */ + if (node && node->withCheckOptionLists != NIL) + { + List *wcoList; + List *wcoExprs = NIL; + ListCell *ll; + int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + + /* + * In the case of INSERT on partitioned tables, there is only one + * plan. Likewise, there is only one WCO list, not one per + * partition. For UPDATE, there would be as many WCO lists as + * there are plans. + */ + Assert((node->operation == CMD_INSERT && + list_length(node->withCheckOptionLists) == 1 && + list_length(node->plans) == 1) || + (node->operation == CMD_UPDATE && + list_length(node->withCheckOptionLists) == + list_length(node->plans))); + + /* + * Use the WCO list of the first plan as a reference to calculate + * attno's for the WCO list of this partition. In the INSERT case, + * that refers to the root partitioned table, whereas in the UPDATE + * tuple routing case, that refers to the first partition in the + * mtstate->resultRelInfo array. In any case, both that relation and + * this partition should have the same columns, so we should be able + * to map attributes successfully. + */ + wcoList = linitial(node->withCheckOptionLists); + + /* + * Convert Vars in it to contain this partition's attribute numbers. + */ + wcoList = map_partition_varattnos(wcoList, firstVarno, + partrel, firstResultRel, NULL); + foreach(ll, wcoList) + { + WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); + ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), + mtstate->mt_plans[0]); + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + leaf_part_rri->ri_WithCheckOptions = wcoList; + leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs; + } + + /* + * Build the RETURNING projection for the partition. Note that we didn't + * build the returningList for partitions within the planner, but simple + * translation of varattnos will suffice. This only occurs for the INSERT + * case or in the case of UPDATE tuple routing where we didn't find a + * result rel to reuse in ExecSetupPartitionTupleRouting(). + */ + if (node && node->returningLists != NIL) + { + TupleTableSlot *slot; + ExprContext *econtext; + List *returningList; + int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + + /* See the comment above for WCO lists. */ + Assert((node->operation == CMD_INSERT && + list_length(node->returningLists) == 1 && + list_length(node->plans) == 1) || + (node->operation == CMD_UPDATE && + list_length(node->returningLists) == + list_length(node->plans))); + + /* + * Use the RETURNING list of the first plan as a reference to + * calculate attno's for the RETURNING list of this partition. See + * the comment above for WCO lists for more details on why this is + * okay. + */ + returningList = linitial(node->returningLists); + + /* + * Convert Vars in it to contain this partition's attribute numbers. + */ + returningList = map_partition_varattnos(returningList, firstVarno, + partrel, firstResultRel, + NULL); + + /* + * Initialize the projection itself. + * + * Use the slot and the expression context that would have been set up + * in ExecInitModifyTable() for projection's output. + */ + Assert(mtstate->ps.ps_ResultTupleSlot != NULL); + slot = mtstate->ps.ps_ResultTupleSlot; + Assert(mtstate->ps.ps_ExprContext != NULL); + econtext = mtstate->ps.ps_ExprContext; + leaf_part_rri->ri_projectReturning = + ExecBuildProjectionInfo(returningList, econtext, slot, + &mtstate->ps, RelationGetDescr(partrel)); + } + + Assert (proute->partitions[partidx] == NULL); + proute->partitions[partidx] = leaf_part_rri; + + /* + * Save a tuple conversion map to convert a tuple routed to this + * partition from the parent's type to the partition's. + */ + proute->parent_child_tupconv_maps[partidx] = + convert_tuples_by_name(RelationGetDescr(rootrel), + RelationGetDescr(partrel), + gettext_noop("could not convert row type")); + + return leaf_part_rri; +} + +/* * ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition * child-to-root tuple conversion map array. * @@ -477,6 +616,10 @@ ExecCleanupTupleRouting(PartitionTupleRouting *proute) { ResultRelInfo *resultRelInfo = proute->partitions[i]; + /* skip further processsing for uninitialized partitions */ + if (resultRelInfo == NULL) + continue; + /* * If this result rel is one of the UPDATE subplan result rels, let * ExecEndPlan() close it. For INSERT or COPY, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 93c03cfb07..87a4a92072 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -310,6 +310,14 @@ ExecInsert(ModifyTableState *mtstate, */ saved_resultRelInfo = resultRelInfo; resultRelInfo = proute->partitions[leaf_part_index]; + if (resultRelInfo == NULL) + { + resultRelInfo = ExecInitPartitionInfo(mtstate, + saved_resultRelInfo, + proute, estate, + leaf_part_index); + Assert(resultRelInfo != NULL); + } /* We do not yet have a way to insert into a foreign partition */ if (resultRelInfo->ri_FdwRoutine) @@ -2098,14 +2106,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo; Plan *subplan; - int firstVarno = 0; - Relation firstResultRel = NULL; ListCell *l; int i; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; - PartitionTupleRouting *proute = NULL; - int num_partitions = 0; /* check for unsupported flags */ Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); @@ -2228,20 +2232,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE && (operation == CMD_INSERT || update_tuple_routing_needed)) - { - proute = mtstate->mt_partition_tuple_routing = - ExecSetupPartitionTupleRouting(mtstate, - rel, node->nominalRelation, - estate); - num_partitions = proute->num_partitions; - - /* - * Below are required as reference objects for mapping partition - * attno's in expressions such as WithCheckOptions and RETURNING. - */ - firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; - firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; - } + mtstate->mt_partition_tuple_routing = + ExecSetupPartitionTupleRouting(mtstate, rel); /* * Build state for collecting transition tuples. This requires having a @@ -2288,77 +2280,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } /* - * Build WITH CHECK OPTION constraints for each leaf partition rel. Note - * that we didn't build the withCheckOptionList for each partition within - * the planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE row - * movement. DELETEs and local UPDATEs are handled above. - */ - if (node->withCheckOptionLists != NIL && num_partitions > 0) - { - List *first_wcoList; - - /* - * In case of INSERT on partitioned tables, there is only one plan. - * Likewise, there is only one WITH CHECK OPTIONS list, not one per - * partition. Whereas for UPDATE, there are as many WCOs as there are - * plans. So in either case, use the WCO expression of the first - * resultRelInfo as a reference to calculate attno's for the WCO - * expression of each of the partitions. We make a copy of the WCO - * qual for each partition. Note that, if there are SubPlans in there, - * they all end up attached to the one parent Plan node. - */ - Assert(update_tuple_routing_needed || - (operation == CMD_INSERT && - list_length(node->withCheckOptionLists) == 1 && - mtstate->mt_nplans == 1)); - - first_wcoList = linitial(node->withCheckOptionLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *mapped_wcoList; - List *wcoExprs = NIL; - ListCell *ll; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have - * WithCheckOptions initialized. - */ - if (resultRelInfo->ri_WithCheckOptions) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - mapped_wcoList = map_partition_varattnos(first_wcoList, - firstVarno, - partrel, firstResultRel, - NULL); - foreach(ll, mapped_wcoList) - { - WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll)); - ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), - &mtstate->ps); - - wcoExprs = lappend(wcoExprs, wcoExpr); - } - - resultRelInfo->ri_WithCheckOptions = mapped_wcoList; - resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - } - } - - /* * Initialize RETURNING projections if needed. */ if (node->returningLists) { TupleTableSlot *slot; ExprContext *econtext; - List *firstReturningList; /* * Initialize result tuple slot and assign its rowtype using the first @@ -2388,44 +2315,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) resultRelInfo->ri_RelationDesc->rd_att); resultRelInfo++; } - - /* - * Build a projection for each leaf partition rel. Note that we - * didn't build the returningList for each partition within the - * planner, but simple translation of the varattnos for each partition - * will suffice. This only occurs for the INSERT case or for UPDATE - * row movement. DELETEs and local UPDATEs are handled above. - */ - firstReturningList = linitial(node->returningLists); - for (i = 0; i < num_partitions; i++) - { - Relation partrel; - List *rlist; - - resultRelInfo = proute->partitions[i]; - - /* - * If we are referring to a resultRelInfo from one of the update - * result rels, that result rel would already have a returningList - * built. - */ - if (resultRelInfo->ri_projectReturning) - continue; - - partrel = resultRelInfo->ri_RelationDesc; - - /* - * Use the returning expression of the first resultRelInfo as a - * reference to calculate attno's for the returning expression of - * each of the partitions. - */ - rlist = map_partition_varattnos(firstReturningList, - firstVarno, - partrel, firstResultRel, NULL); - resultRelInfo->ri_projectReturning = - ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, - resultRelInfo->ri_RelationDesc->rd_att); - } } else { diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 3df9c498bb..e94718608f 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -58,6 +58,7 @@ typedef struct PartitionDispatchData *PartitionDispatch; * partition tree. * num_dispatch number of partitioned tables in the partition * tree (= length of partition_dispatch_info[]) + * partition_oids Array of leaf partitions OIDs * partitions Array of ResultRelInfo* objects with one entry * for every leaf partition in the partition tree. * num_partitions Number of leaf partitions in the partition tree @@ -91,6 +92,7 @@ typedef struct PartitionTupleRouting { PartitionDispatch *partition_dispatch_info; int num_dispatch; + Oid *partition_oids; ResultRelInfo **partitions; int num_partitions; TupleConversionMap **parent_child_tupconv_maps; @@ -103,12 +105,15 @@ typedef struct PartitionTupleRouting } PartitionTupleRouting; extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, - Relation rel, Index resultRTindex, - EState *estate); + Relation rel); extern int ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd, TupleTableSlot *slot, EState *estate); +extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + PartitionTupleRouting *proute, + EState *estate, int partidx); extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute); extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute, ResultRelInfo *rootRelInfo, int leaf_index); -- 2.11.0