(2018/02/19 13:19), Amit Langote wrote:
Attached rebased patch.
Thanks for the rebased patch!
One thing I noticed while updating the
tuple-routing-for-foreign-partitions patch on top of this is: we should
switch into the per-query memory context in ExecInitPartitionInfo.
Attached is an updated version for that.
Best regards,
Etsuro Fujita
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index d5883c9..4562a51 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -2469,7 +2469,7 @@ CopyFrom(CopyState cstate)
PartitionTupleRouting *proute;
proute = cstate->partition_tuple_routing =
- ExecSetupPartitionTupleRouting(NULL, cstate->rel, 1, estate);
+ ExecSetupPartitionTupleRouting(NULL, cstate->rel);
/*
* If we are capturing transition tuples, they may need to be
@@ -2606,6 +2606,14 @@ CopyFrom(CopyState cstate)
*/
saved_resultRelInfo = resultRelInfo;
resultRelInfo = proute->partitions[leaf_part_index];
+ if (resultRelInfo == NULL)
+ {
+ resultRelInfo = ExecInitPartitionInfo(NULL,
+ saved_resultRelInfo,
+ proute, estate,
+ leaf_part_index);
+ Assert(resultRelInfo != NULL);
+ }
/* We do not yet have a way to insert into a foreign partition */
if (resultRelInfo->ri_FdwRoutine)
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 00523ce..d35dac1 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -44,18 +44,23 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
*
* Note that all the relations in the partition tree are locked using the
* RowExclusiveLock mode upon return from this function.
+ *
+ * While we allocate the arrays of pointers of ResultRelInfo and
+ * TupleConversionMap for all partitions here, actual objects themselves are
+ * lazily allocated for a given partition if a tuple is actually routed to it;
+ * see ExecInitPartitionInfo. However, if the function is invoked for update
+ * tuple routing, caller would already have initialized ResultRelInfo's for
+ * some of the partitions, which are reused and assigned to their respective
+ * slot in the aforementioned array.
*/
PartitionTupleRouting *
-ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
- Relation rel, Index resultRTindex,
- EState *estate)
+ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
{
TupleDesc tupDesc = RelationGetDescr(rel);
List *leaf_parts;
ListCell *cell;
int i;
- ResultRelInfo *leaf_part_arr = NULL,
- *update_rri = NULL;
+ ResultRelInfo *update_rri = NULL;
int num_update_rri = 0,
update_rri_index = 0;
bool is_update = false;
@@ -76,6 +81,8 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
proute->parent_child_tupconv_maps =
(TupleConversionMap **) palloc0(proute->num_partitions *
sizeof(TupleConversionMap *));
+ proute->partition_oids = (Oid *) palloc(proute->num_partitions *
+ sizeof(Oid));
/* Set up details specific to the type of tuple routing we are doing. */
if (mtstate && mtstate->operation == CMD_UPDATE)
@@ -95,16 +102,6 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
*/
proute->root_tuple_slot = MakeTupleTableSlot(NULL);
}
- else
- {
- /*
- * Since we are inserting tuples, we need to create all new result
- * rels. Avoid repeated pallocs by allocating memory for all the
- * result rels in bulk.
- */
- leaf_part_arr = (ResultRelInfo *) palloc0(proute->num_partitions *
- sizeof(ResultRelInfo));
- }
/*
* Initialize an empty slot that will be used to manipulate tuples of any
@@ -117,11 +114,10 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
i = 0;
foreach(cell, leaf_parts)
{
- ResultRelInfo *leaf_part_rri;
- Relation partrel = NULL;
- TupleDesc part_tupdesc;
+ ResultRelInfo *leaf_part_rri = NULL;
Oid leaf_oid = lfirst_oid(cell);
+ proute->partition_oids[i] = leaf_oid;
if (is_update)
{
/*
@@ -136,6 +132,9 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
if (update_rri_index < num_update_rri &&
RelationGetRelid(update_rri[update_rri_index].ri_RelationDesc) == leaf_oid)
{
+ Relation partrel;
+ TupleDesc part_tupdesc;
+
leaf_part_rri = &update_rri[update_rri_index];
partrel = leaf_part_rri->ri_RelationDesc;
@@ -151,73 +150,26 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
proute->subplan_partition_offsets[update_rri_index] = i;
update_rri_index++;
- }
- else
- leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo));
- }
- else
- {
- /* For INSERTs, we already have an array of result rels allocated */
- leaf_part_rri = &leaf_part_arr[i];
- }
- /*
- * If we didn't open the partition rel, it means we haven't
- * initialized the result rel either.
- */
- if (!partrel)
- {
- /*
- * We locked all the partitions above including the leaf
- * partitions. Note that each of the newly opened relations in
- * proute->partitions are eventually closed by the caller.
- */
- partrel = heap_open(leaf_oid, NoLock);
- InitResultRelInfo(leaf_part_rri,
- partrel,
- resultRTindex,
- rel,
- estate->es_instrument);
-
- /*
- * Since we've just initialized this ResultRelInfo, it's not in
- * any list attached to the estate as yet. Add it, so that it can
- * be found later.
- */
- estate->es_tuple_routing_result_relations =
- lappend(estate->es_tuple_routing_result_relations,
- leaf_part_rri);
- }
-
- part_tupdesc = RelationGetDescr(partrel);
-
- /*
- * Save a tuple conversion map to convert a tuple routed to this
- * partition from the parent's type to the partition's.
- */
- proute->parent_child_tupconv_maps[i] =
- convert_tuples_by_name(tupDesc, part_tupdesc,
- gettext_noop("could not convert row type"));
+ part_tupdesc = RelationGetDescr(partrel);
- /*
- * Verify result relation is a valid target for an INSERT. An UPDATE
- * of a partition-key becomes a DELETE+INSERT operation, so this check
- * is still required when the operation is CMD_UPDATE.
- */
- CheckValidResultRel(leaf_part_rri, CMD_INSERT);
+ /*
+ * Save a tuple conversion map to convert a tuple routed to
+ * this partition from the parent's type to the partition's.
+ */
+ proute->parent_child_tupconv_maps[i] =
+ convert_tuples_by_name(tupDesc, part_tupdesc,
+ gettext_noop("could not convert row type"));
- /*
- * Open partition indices. The user may have asked to check for
- * conflicts within this leaf partition and do "nothing" instead of
- * throwing an error. Be prepared in that case by initializing the
- * index information needed by ExecInsert() to perform speculative
- * insertions.
- */
- if (leaf_part_rri->ri_RelationDesc->rd_rel->relhasindex &&
- leaf_part_rri->ri_IndexRelationDescs == NULL)
- ExecOpenIndices(leaf_part_rri,
- mtstate != NULL &&
- mtstate->mt_onconflict != ONCONFLICT_NONE);
+ /*
+ * Verify result relation is a valid target for an INSERT. An
+ * UPDATE of a partition-key becomes a DELETE+INSERT operation,
+ * so this check is required even when the operation is
+ * CMD_UPDATE.
+ */
+ CheckValidResultRel(leaf_part_rri, CMD_INSERT);
+ }
+ }
proute->partitions[i] = leaf_part_rri;
i++;
@@ -352,6 +304,204 @@ ExecFindPartition(ResultRelInfo *resultRelInfo, PartitionDispatch *pd,
}
/*
+ * ExecInitPartitionInfo
+ * Initialize ResultRelInfo and other information for a partition if not
+ * already done
+ *
+ * Returns the ResultRelInfo
+ */
+ResultRelInfo *
+ExecInitPartitionInfo(ModifyTableState *mtstate,
+ ResultRelInfo *resultRelInfo,
+ PartitionTupleRouting *proute,
+ EState *estate, int partidx)
+{
+ Relation rootrel = resultRelInfo->ri_RelationDesc,
+ partrel;
+ ResultRelInfo *leaf_part_rri;
+ ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
+ MemoryContext oldContext;
+
+ /*
+ * We locked all the partitions in ExecSetupPartitionTupleRouting
+ * including the leaf partitions.
+ */
+ partrel = heap_open(proute->partition_oids[partidx], NoLock);
+
+ /*
+ * Keep ResultRelInfo and other information for this partition in the
+ * per-query memory context so they'll survive throughout the query.
+ */
+ oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
+
+ leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo));
+ InitResultRelInfo(leaf_part_rri,
+ partrel,
+ node ? node->nominalRelation : 1,
+ rootrel,
+ estate->es_instrument);
+
+ /*
+ * Verify result relation is a valid target for an INSERT. An UPDATE
+ * of a partition-key becomes a DELETE+INSERT operation, so this check
+ * is still required when the operation is CMD_UPDATE.
+ */
+ CheckValidResultRel(leaf_part_rri, CMD_INSERT);
+
+ /*
+ * Since we've just initialized this ResultRelInfo, it's not in
+ * any list attached to the estate as yet. Add it, so that it can
+ * be found later.
+ *
+ * Note that the entries in this list appear in no predetermined
+ * order, because partition result rels are initialized as and when
+ * they're needed.
+ */
+ estate->es_tuple_routing_result_relations =
+ lappend(estate->es_tuple_routing_result_relations,
+ leaf_part_rri);
+
+ /*
+ * Open partition indices. The user may have asked to check for
+ * conflicts within this leaf partition and do "nothing" instead of
+ * throwing an error. Be prepared in that case by initializing the
+ * index information needed by ExecInsert() to perform speculative
+ * insertions.
+ */
+ if (partrel->rd_rel->relhasindex &&
+ leaf_part_rri->ri_IndexRelationDescs == NULL)
+ ExecOpenIndices(leaf_part_rri,
+ (mtstate != NULL &&
+ mtstate->mt_onconflict != ONCONFLICT_NONE));
+
+ /*
+ * Build WITH CHECK OPTION constraints for the partition. Note that we
+ * didn't build the withCheckOptionList for partitions within the planner,
+ * but simple translation of varattnos will suffice. This only occurs for
+ * the INSERT case or in the case of UPDATE tuple routing where we didn't
+ * find a result rel to reuse in ExecSetupPartitionTupleRouting().
+ */
+ if (node && node->withCheckOptionLists != NIL)
+ {
+ List *wcoList;
+ List *wcoExprs = NIL;
+ ListCell *ll;
+ int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+ Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+
+ /*
+ * In the case of INSERT on partitioned tables, there is only one
+ * plan. Likewise, there is only one WCO list, not one per
+ * partition. For UPDATE, there would be as many WCO lists as
+ * there are plans.
+ */
+ Assert((node->operation == CMD_INSERT &&
+ list_length(node->withCheckOptionLists) == 1 &&
+ list_length(node->plans) == 1) ||
+ (node->operation == CMD_UPDATE &&
+ list_length(node->withCheckOptionLists) ==
+ list_length(node->plans)));
+
+ /*
+ * Use the WCO list of the first plan as a reference to calculate
+ * attno's for the WCO list of this partition. In the INSERT case,
+ * that refers to the root partitioned table, whereas in the UPDATE
+ * tuple routing case, that refers to the first partition in the
+ * mtstate->resultRelInfo array. In any case, both that relation and
+ * this partition should have the same columns, so we should be able
+ * to map attributes successfully.
+ */
+ wcoList = linitial(node->withCheckOptionLists);
+
+ /*
+ * Convert Vars in it to contain this partition's attribute numbers.
+ */
+ wcoList = map_partition_varattnos(wcoList, firstVarno,
+ partrel, firstResultRel, NULL);
+ foreach(ll, wcoList)
+ {
+ WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
+ ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
+ mtstate->mt_plans[0]);
+
+ wcoExprs = lappend(wcoExprs, wcoExpr);
+ }
+
+ leaf_part_rri->ri_WithCheckOptions = wcoList;
+ leaf_part_rri->ri_WithCheckOptionExprs = wcoExprs;
+ }
+
+ /*
+ * Build the RETURNING projection for the partition. Note that we didn't
+ * build the returningList for partitions within the planner, but simple
+ * translation of varattnos will suffice. This only occurs for the INSERT
+ * case or in the case of UPDATE tuple routing where we didn't find a
+ * result rel to reuse in ExecSetupPartitionTupleRouting().
+ */
+ if (node && node->returningLists != NIL)
+ {
+ TupleTableSlot *slot;
+ ExprContext *econtext;
+ List *returningList;
+ int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+ Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+
+ /* See the comment above for WCO lists. */
+ Assert((node->operation == CMD_INSERT &&
+ list_length(node->returningLists) == 1 &&
+ list_length(node->plans) == 1) ||
+ (node->operation == CMD_UPDATE &&
+ list_length(node->returningLists) ==
+ list_length(node->plans)));
+
+ /*
+ * Use the RETURNING list of the first plan as a reference to
+ * calculate attno's for the RETURNING list of this partition. See
+ * the comment above for WCO lists for more details on why this is
+ * okay.
+ */
+ returningList = linitial(node->returningLists);
+
+ /*
+ * Convert Vars in it to contain this partition's attribute numbers.
+ */
+ returningList = map_partition_varattnos(returningList, firstVarno,
+ partrel, firstResultRel,
+ NULL);
+
+ /*
+ * Initialize the projection itself.
+ *
+ * Use the slot and the expression context that would have been set up
+ * in ExecInitModifyTable() for projection's output.
+ */
+ Assert(mtstate->ps.ps_ResultTupleSlot != NULL);
+ slot = mtstate->ps.ps_ResultTupleSlot;
+ Assert(mtstate->ps.ps_ExprContext != NULL);
+ econtext = mtstate->ps.ps_ExprContext;
+ leaf_part_rri->ri_projectReturning =
+ ExecBuildProjectionInfo(returningList, econtext, slot,
+ &mtstate->ps, RelationGetDescr(partrel));
+ }
+
+ Assert (proute->partitions[partidx] == NULL);
+ proute->partitions[partidx] = leaf_part_rri;
+
+ /*
+ * Save a tuple conversion map to convert a tuple routed to this
+ * partition from the parent's type to the partition's.
+ */
+ proute->parent_child_tupconv_maps[partidx] =
+ convert_tuples_by_name(RelationGetDescr(rootrel),
+ RelationGetDescr(partrel),
+ gettext_noop("could not convert row type"));
+
+ MemoryContextSwitchTo(oldContext);
+
+ return leaf_part_rri;
+}
+
+/*
* ExecSetupChildParentMapForLeaf -- Initialize the per-leaf-partition
* child-to-root tuple conversion map array.
*
@@ -477,6 +627,10 @@ ExecCleanupTupleRouting(PartitionTupleRouting *proute)
{
ResultRelInfo *resultRelInfo = proute->partitions[i];
+ /* skip further processsing for uninitialized partitions */
+ if (resultRelInfo == NULL)
+ continue;
+
/*
* If this result rel is one of the UPDATE subplan result rels, let
* ExecEndPlan() close it. For INSERT or COPY,
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 93c03cf..87a4a92 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -310,6 +310,14 @@ ExecInsert(ModifyTableState *mtstate,
*/
saved_resultRelInfo = resultRelInfo;
resultRelInfo = proute->partitions[leaf_part_index];
+ if (resultRelInfo == NULL)
+ {
+ resultRelInfo = ExecInitPartitionInfo(mtstate,
+ saved_resultRelInfo,
+ proute, estate,
+ leaf_part_index);
+ Assert(resultRelInfo != NULL);
+ }
/* We do not yet have a way to insert into a foreign partition */
if (resultRelInfo->ri_FdwRoutine)
@@ -2098,14 +2106,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
ResultRelInfo *saved_resultRelInfo;
ResultRelInfo *resultRelInfo;
Plan *subplan;
- int firstVarno = 0;
- Relation firstResultRel = NULL;
ListCell *l;
int i;
Relation rel;
bool update_tuple_routing_needed = node->partColsUpdated;
- PartitionTupleRouting *proute = NULL;
- int num_partitions = 0;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
@@ -2228,20 +2232,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
*/
if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
(operation == CMD_INSERT || update_tuple_routing_needed))
- {
- proute = mtstate->mt_partition_tuple_routing =
- ExecSetupPartitionTupleRouting(mtstate,
- rel, node->nominalRelation,
- estate);
- num_partitions = proute->num_partitions;
-
- /*
- * Below are required as reference objects for mapping partition
- * attno's in expressions such as WithCheckOptions and RETURNING.
- */
- firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
- firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
- }
+ mtstate->mt_partition_tuple_routing =
+ ExecSetupPartitionTupleRouting(mtstate, rel);
/*
* Build state for collecting transition tuples. This requires having a
@@ -2288,77 +2280,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
}
/*
- * Build WITH CHECK OPTION constraints for each leaf partition rel. Note
- * that we didn't build the withCheckOptionList for each partition within
- * the planner, but simple translation of the varattnos for each partition
- * will suffice. This only occurs for the INSERT case or for UPDATE row
- * movement. DELETEs and local UPDATEs are handled above.
- */
- if (node->withCheckOptionLists != NIL && num_partitions > 0)
- {
- List *first_wcoList;
-
- /*
- * In case of INSERT on partitioned tables, there is only one plan.
- * Likewise, there is only one WITH CHECK OPTIONS list, not one per
- * partition. Whereas for UPDATE, there are as many WCOs as there are
- * plans. So in either case, use the WCO expression of the first
- * resultRelInfo as a reference to calculate attno's for the WCO
- * expression of each of the partitions. We make a copy of the WCO
- * qual for each partition. Note that, if there are SubPlans in there,
- * they all end up attached to the one parent Plan node.
- */
- Assert(update_tuple_routing_needed ||
- (operation == CMD_INSERT &&
- list_length(node->withCheckOptionLists) == 1 &&
- mtstate->mt_nplans == 1));
-
- first_wcoList = linitial(node->withCheckOptionLists);
- for (i = 0; i < num_partitions; i++)
- {
- Relation partrel;
- List *mapped_wcoList;
- List *wcoExprs = NIL;
- ListCell *ll;
-
- resultRelInfo = proute->partitions[i];
-
- /*
- * If we are referring to a resultRelInfo from one of the update
- * result rels, that result rel would already have
- * WithCheckOptions initialized.
- */
- if (resultRelInfo->ri_WithCheckOptions)
- continue;
-
- partrel = resultRelInfo->ri_RelationDesc;
-
- mapped_wcoList = map_partition_varattnos(first_wcoList,
- firstVarno,
- partrel, firstResultRel,
- NULL);
- foreach(ll, mapped_wcoList)
- {
- WithCheckOption *wco = castNode(WithCheckOption, lfirst(ll));
- ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
- &mtstate->ps);
-
- wcoExprs = lappend(wcoExprs, wcoExpr);
- }
-
- resultRelInfo->ri_WithCheckOptions = mapped_wcoList;
- resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
- }
- }
-
- /*
* Initialize RETURNING projections if needed.
*/
if (node->returningLists)
{
TupleTableSlot *slot;
ExprContext *econtext;
- List *firstReturningList;
/*
* Initialize result tuple slot and assign its rowtype using the first
@@ -2388,44 +2315,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
resultRelInfo->ri_RelationDesc->rd_att);
resultRelInfo++;
}
-
- /*
- * Build a projection for each leaf partition rel. Note that we
- * didn't build the returningList for each partition within the
- * planner, but simple translation of the varattnos for each partition
- * will suffice. This only occurs for the INSERT case or for UPDATE
- * row movement. DELETEs and local UPDATEs are handled above.
- */
- firstReturningList = linitial(node->returningLists);
- for (i = 0; i < num_partitions; i++)
- {
- Relation partrel;
- List *rlist;
-
- resultRelInfo = proute->partitions[i];
-
- /*
- * If we are referring to a resultRelInfo from one of the update
- * result rels, that result rel would already have a returningList
- * built.
- */
- if (resultRelInfo->ri_projectReturning)
- continue;
-
- partrel = resultRelInfo->ri_RelationDesc;
-
- /*
- * Use the returning expression of the first resultRelInfo as a
- * reference to calculate attno's for the returning expression of
- * each of the partitions.
- */
- rlist = map_partition_varattnos(firstReturningList,
- firstVarno,
- partrel, firstResultRel, NULL);
- resultRelInfo->ri_projectReturning =
- ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
- resultRelInfo->ri_RelationDesc->rd_att);
- }
}
else
{
diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h
index 3df9c49..e947186 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -58,6 +58,7 @@ typedef struct PartitionDispatchData *PartitionDispatch;
* partition tree.
* num_dispatch number of partitioned tables in the partition
* tree (= length of partition_dispatch_info[])
+ * partition_oids Array of leaf partitions OIDs
* partitions Array of ResultRelInfo* objects with one entry
* for every leaf partition in the partition tree.
* num_partitions Number of leaf partitions in the partition tree
@@ -91,6 +92,7 @@ typedef struct PartitionTupleRouting
{
PartitionDispatch *partition_dispatch_info;
int num_dispatch;
+ Oid *partition_oids;
ResultRelInfo **partitions;
int num_partitions;
TupleConversionMap **parent_child_tupconv_maps;
@@ -103,12 +105,15 @@ typedef struct PartitionTupleRouting
} PartitionTupleRouting;
extern PartitionTupleRouting *ExecSetupPartitionTupleRouting(ModifyTableState *mtstate,
- Relation rel, Index resultRTindex,
- EState *estate);
+ Relation rel);
extern int ExecFindPartition(ResultRelInfo *resultRelInfo,
PartitionDispatch *pd,
TupleTableSlot *slot,
EState *estate);
+extern ResultRelInfo *ExecInitPartitionInfo(ModifyTableState *mtstate,
+ ResultRelInfo *resultRelInfo,
+ PartitionTupleRouting *proute,
+ EState *estate, int partidx);
extern void ExecSetupChildParentMapForLeaf(PartitionTupleRouting *proute);
extern TupleConversionMap *TupConvMapForLeaf(PartitionTupleRouting *proute,
ResultRelInfo *rootRelInfo, int leaf_index);