Here's a rebased version of this patch (it had a trivial conflict). No further changes.
-- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 82d8140ba2..677b9cdd58 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -1951,7 +1951,7 @@ postgresBeginForeignInsert(ModifyTableState *mtstate, if (plan && plan->operation == CMD_UPDATE && (resultRelInfo->ri_usesFdwDirectModify || resultRelInfo->ri_FdwState) && - resultRelInfo > mtstate->resultRelInfo + mtstate->mt_whichplan) + resultRelInfo > mtstate->resultRelInfos[mtstate->mt_whichplan]) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot route tuples into foreign table to be updated \"%s\"", @@ -2005,7 +2005,7 @@ postgresBeginForeignInsert(ModifyTableState *mtstate, */ if (plan && plan->operation == CMD_UPDATE && resultRelation == plan->rootRelation) - resultRelation = mtstate->resultRelInfo[0].ri_RangeTableIndex; + resultRelation = mtstate->resultRelInfos[0]->ri_RangeTableIndex; } /* Construct the SQL command string. */ diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 3aeef30b28..2e9954ddda 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -2856,7 +2856,7 @@ CopyFrom(CopyState cstate) mtstate->ps.plan = NULL; mtstate->ps.state = estate; mtstate->operation = CMD_INSERT; - mtstate->resultRelInfo = estate->es_result_relations; + mtstate->resultRelInfos = &estate->es_result_relations; if (resultRelInfo->ri_FdwRoutine != NULL && resultRelInfo->ri_FdwRoutine->BeginForeignInsert != NULL) diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 62fb3434a3..682bf615db 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -3190,14 +3190,14 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, /* Should we explicitly label target relations? */ labeltargets = (mtstate->mt_nplans > 1 || (mtstate->mt_nplans == 1 && - mtstate->resultRelInfo->ri_RangeTableIndex != node->nominalRelation)); + mtstate->resultRelInfos[0]->ri_RangeTableIndex != node->nominalRelation)); if (labeltargets) ExplainOpenGroup("Target Tables", "Target Tables", false, es); for (j = 0; j < mtstate->mt_nplans; j++) { - ResultRelInfo *resultRelInfo = mtstate->resultRelInfo + j; + ResultRelInfo *resultRelInfo = mtstate->resultRelInfos[j]; FdwRoutine *fdwroutine = resultRelInfo->ri_FdwRoutine; if (labeltargets) diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index d23f292cb0..8d1d961809 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -471,7 +471,7 @@ ExecHashSubPlanResultRelsByOid(ModifyTableState *mtstate, /* Hash all subplans by their Oid */ for (i = 0; i < mtstate->mt_nplans; i++) { - ResultRelInfo *rri = &mtstate->resultRelInfo[i]; + ResultRelInfo *rri = mtstate->resultRelInfos[i]; bool found; Oid partoid = RelationGetRelid(rri->ri_RelationDesc); SubplanResultRelHashElem *elem; @@ -508,7 +508,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, ModifyTable *node = (ModifyTable *) mtstate->ps.plan; Relation rootrel = rootResultRelInfo->ri_RelationDesc, partrel; - Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + Relation firstResultRel = mtstate->resultRelInfos[0]->ri_RelationDesc; ResultRelInfo *leaf_part_rri; MemoryContext oldcxt; AttrNumber *part_attnos = NULL; @@ -556,7 +556,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, List *wcoList; List *wcoExprs = NIL; ListCell *ll; - int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + int firstVarno = mtstate->resultRelInfos[0]->ri_RangeTableIndex; /* * In the case of INSERT on a partitioned table, there is only one @@ -621,7 +621,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, TupleTableSlot *slot; ExprContext *econtext; List *returningList; - int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + int firstVarno = mtstate->resultRelInfos[0]->ri_RangeTableIndex; /* See the comment above for WCO lists. */ Assert((node->operation == CMD_INSERT && @@ -681,7 +681,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, */ if (node && node->onConflictAction != ONCONFLICT_NONE) { - int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + int firstVarno = mtstate->resultRelInfos[0]->ri_RangeTableIndex; TupleDesc partrelDesc = RelationGetDescr(partrel); ExprContext *econtext = mtstate->ps.ps_ExprContext; ListCell *lc; diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index c9d024ead5..f35291cb12 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -57,6 +57,9 @@ #include "utils/memutils.h" #include "utils/rel.h" + /* Special values for mt_whichplan */ +#define WHICHPLAN_CHOOSE_PARTITIONS -1 +#define WHICHPLAN_NO_MATCHING_PARTITIONS -2 static bool ExecOnConflictUpdate(ModifyTableState *mtstate, ResultRelInfo *resultRelInfo, @@ -1261,7 +1264,7 @@ lreplace:; * retrieve the one for this resultRel, we need to know the * position of the resultRel in mtstate->resultRelInfo[]. */ - map_index = resultRelInfo - mtstate->resultRelInfo; + map_index = mtstate->mt_whichplan; Assert(map_index >= 0 && map_index < mtstate->mt_nplans); tupconv_map = tupconv_map_for_subplan(mtstate, map_index); if (tupconv_map != NULL) @@ -1707,12 +1710,12 @@ static void fireBSTriggers(ModifyTableState *node) { ModifyTable *plan = (ModifyTable *) node->ps.plan; - ResultRelInfo *resultRelInfo = node->resultRelInfo; + ResultRelInfo *resultRelInfo = node->resultRelInfos[0]; /* * If the node modifies a partitioned table, we must fire its triggers. - * Note that in that case, node->resultRelInfo points to the first leaf - * partition, not the root table. + * Note that in that case, node->resultRelInfos[0] points to the first + * leaf partition, not the root table. */ if (node->rootResultRelInfo != NULL) resultRelInfo = node->rootResultRelInfo; @@ -1750,13 +1753,14 @@ static ResultRelInfo * getTargetResultRelInfo(ModifyTableState *node) { /* - * Note that if the node modifies a partitioned table, node->resultRelInfo - * points to the first leaf partition, not the root table. + * Note that if the node modifies a partitioned table, + * node->resultRelInfos[0] points to the first leaf partition, not the + * root table. */ if (node->rootResultRelInfo != NULL) return node->rootResultRelInfo; else - return node->resultRelInfo; + return node->resultRelInfos[0]; } /* @@ -1935,7 +1939,7 @@ static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate) { ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate); - ResultRelInfo *resultRelInfos = mtstate->resultRelInfo; + ResultRelInfo **resultRelInfos = mtstate->resultRelInfos; TupleDesc outdesc; int numResultRelInfos = mtstate->mt_nplans; int i; @@ -1955,7 +1959,7 @@ ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate) for (i = 0; i < numResultRelInfos; ++i) { mtstate->mt_per_subplan_tupconv_maps[i] = - convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc), + convert_tuples_by_name(RelationGetDescr(resultRelInfos[i]->ri_RelationDesc), outdesc); } } @@ -2031,8 +2035,47 @@ ExecModifyTable(PlanState *pstate) node->fireBSTriggers = false; } + if (node->mt_whichplan < 0) + { + /* Handle choosing the valid partitions */ + if (node->mt_whichplan == WHICHPLAN_CHOOSE_PARTITIONS) + { + PartitionPruneState *prunestate = node->mt_prune_state; + + /* There should always be at least one */ + Assert(node->mt_nplans > 0); + + /* + * When partition pruning is enabled and exec params match the + * partition key then determine the minimum set of matching + * subnodes. Otherwise we match to all subnodes. + */ + if (prunestate != NULL && prunestate->do_exec_prune) + { + node->mt_valid_subplans = ExecFindMatchingSubPlans(prunestate); + node->mt_whichplan = bms_next_member(node->mt_valid_subplans, -1); + + /* If no subplan matches these params then we're done */ + if (node->mt_whichplan < 0) + goto done; + } + else + { + node->mt_valid_subplans = bms_add_range(NULL, 0, + node->mt_nplans - 1); + node->mt_whichplan = 0; + } + } + + /* partition pruning determined that no partitions match */ + else if (node->mt_whichplan == WHICHPLAN_NO_MATCHING_PARTITIONS) + goto done; + else + elog(ERROR, "invalid subplan index: %d", node->mt_whichplan); + } + /* Preload local variables */ - resultRelInfo = node->resultRelInfo + node->mt_whichplan; + resultRelInfo = node->resultRelInfos[node->mt_whichplan]; subplanstate = node->mt_plans[node->mt_whichplan]; junkfilter = resultRelInfo->ri_junkFilter; @@ -2074,10 +2117,12 @@ ExecModifyTable(PlanState *pstate) if (TupIsNull(planSlot)) { /* advance to next subplan if any */ - node->mt_whichplan++; - if (node->mt_whichplan < node->mt_nplans) + node->mt_whichplan = bms_next_member(node->mt_valid_subplans, + node->mt_whichplan); + + if (node->mt_whichplan >= 0) { - resultRelInfo++; + resultRelInfo = node->resultRelInfos[node->mt_whichplan]; subplanstate = node->mt_plans[node->mt_whichplan]; junkfilter = resultRelInfo->ri_junkFilter; estate->es_result_relation_info = resultRelInfo; @@ -2247,6 +2292,8 @@ ExecModifyTable(PlanState *pstate) /* Restore es_result_relation_info before exiting */ estate->es_result_relation_info = saved_resultRelInfo; +done: + /* * We're done, but fire AFTER STATEMENT triggers before exiting. */ @@ -2269,9 +2316,11 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) int nplans = list_length(node->plans); ResultRelInfo *saved_resultRelInfo; ResultRelInfo *resultRelInfo; + Bitmapset *validsubplans; Plan *subplan; ListCell *l; - int i; + int i, + j; Relation rel; bool update_tuple_routing_needed = node->partColsUpdated; @@ -2289,9 +2338,75 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->operation = operation; mtstate->canSetTag = node->canSetTag; mtstate->mt_done = false; + mtstate->mt_whichplan = WHICHPLAN_CHOOSE_PARTITIONS; + + /* If run-time partition pruning is enabled, then set that up now */ + if (node->part_prune_info != NULL) + { + PartitionPruneState *prunestate; + + ExecAssignExprContext(estate, &mtstate->ps); + + prunestate = ExecCreatePartitionPruneState(&mtstate->ps, + node->part_prune_info); + mtstate->mt_prune_state = prunestate; + + /* Perform an initial partition prune, if required. */ + if (prunestate->do_initial_prune) + { + /* Determine which subplans match the external params */ + validsubplans = ExecFindInitialMatchingSubPlans(prunestate, + list_length(node->plans)); + + /* + * The case where no subplans survive pruning must be handled + * specially. The problem here is that code in explain.c requires + * an Append to have at least one subplan in order for it to + * properly determine the Vars in that subplan's targetlist. We + * sidestep this issue by just initializing the first subplan and + * setting as_whichplan to NO_MATCHING_SUBPLANS to indicate that + * we don't really need to scan any subnodes. + */ + if (bms_is_empty(validsubplans)) + { + mtstate->mt_whichplan = WHICHPLAN_NO_MATCHING_PARTITIONS; + + /* Mark the first as valid so that it's initialized below */ + validsubplans = bms_make_singleton(0); + } + + nplans = bms_num_members(validsubplans); + } + else + { + /* We'll need to initialize all subplans */ + nplans = list_length(node->plans); + validsubplans = bms_add_range(NULL, 0, nplans - 1); + } + + /* + * If no runtime pruning is required, we can fill mt_valid_subplans + * immediately, preventing later calls to ExecFindMatchingSubPlans. + */ + if (!prunestate->do_exec_prune) + mtstate->mt_valid_subplans = bms_add_range(NULL, 0, nplans - 1); + } + else + { + nplans = list_length(node->plans); + + /* + * When run-time partition pruning is not enabled we can just mark all + * plans as valid, they must also all be initialized. + */ + validsubplans = bms_add_range(NULL, 0, nplans - 1); + mtstate->mt_prune_state = NULL; + } + mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans); - mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex; + mtstate->resultRelInfos = (ResultRelInfo **) + palloc(sizeof(ResultRelInfo *) * nplans); mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans); /* If modifying a partitioned table, initialize the root table info */ @@ -2318,11 +2433,18 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ saved_resultRelInfo = estate->es_result_relation_info; - resultRelInfo = mtstate->resultRelInfo; - i = 0; + j = i = 0; foreach(l, node->plans) { + if (!bms_is_member(i, validsubplans)) + { + i++; + continue; + } + subplan = (Plan *) lfirst(l); + resultRelInfo = estate->es_result_relations + node->resultRelIndex + i; + mtstate->resultRelInfos[j] = resultRelInfo; /* Initialize the usesFdwDirectModify flag */ resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i, @@ -2360,9 +2482,9 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* Now init the plan for this result rel */ estate->es_result_relation_info = resultRelInfo; - mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags); - mtstate->mt_scans[i] = - ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]), + mtstate->mt_plans[j] = ExecInitNode(subplan, estate, eflags); + mtstate->mt_scans[j] = + ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[j]), table_slot_callbacks(resultRelInfo->ri_RelationDesc)); /* Also let FDWs init themselves for foreign-table result rels */ @@ -2378,9 +2500,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) i, eflags); } - - resultRelInfo++; i++; + j++; } estate->es_result_relation_info = saved_resultRelInfo; @@ -2429,14 +2550,21 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* * Initialize any WITH CHECK OPTION constraints if needed. */ - resultRelInfo = mtstate->resultRelInfo; - i = 0; + j = i = 0; foreach(l, node->withCheckOptionLists) { - List *wcoList = (List *) lfirst(l); + List *wcoList; List *wcoExprs = NIL; ListCell *ll; + if (!bms_is_member(i, validsubplans)) + { + i++; + continue; + } + + wcoList = (List *) lfirst(l); + foreach(ll, wcoList) { WithCheckOption *wco = (WithCheckOption *) lfirst(ll); @@ -2446,9 +2574,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) wcoExprs = lappend(wcoExprs, wcoExpr); } + resultRelInfo = mtstate->resultRelInfos[j]; resultRelInfo->ri_WithCheckOptions = wcoList; resultRelInfo->ri_WithCheckOptionExprs = wcoExprs; - resultRelInfo++; + j++; i++; } @@ -2478,16 +2607,25 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* * Build a projection for each result rel. */ - resultRelInfo = mtstate->resultRelInfo; + j = i = 0; foreach(l, node->returningLists) { - List *rlist = (List *) lfirst(l); + List *rlist; + if (!bms_is_member(i, validsubplans)) + { + i++; + continue; + } + + rlist = (List *) lfirst(l); + + resultRelInfo = mtstate->resultRelInfos[j]; resultRelInfo->ri_returningList = rlist; resultRelInfo->ri_projectReturning = ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps, resultRelInfo->ri_RelationDesc->rd_att); - resultRelInfo++; + j++; } } else @@ -2498,12 +2636,10 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ mtstate->ps.plan->targetlist = NIL; ExecInitResultTypeTL(&mtstate->ps); - - mtstate->ps.ps_ExprContext = NULL; } /* Set the list of arbiter indexes if needed for ON CONFLICT */ - resultRelInfo = mtstate->resultRelInfo; + resultRelInfo = mtstate->resultRelInfos[0]; if (node->onConflictAction != ONCONFLICT_NONE) resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes; @@ -2597,7 +2733,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } /* select first subplan */ - mtstate->mt_whichplan = 0; subplan = (Plan *) linitial(node->plans); EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan, mtstate->mt_arowmarks[0]); @@ -2646,12 +2781,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (junk_filter_needed) { - resultRelInfo = mtstate->resultRelInfo; for (i = 0; i < nplans; i++) { JunkFilter *j; TupleTableSlot *junkresslot; + resultRelInfo = mtstate->resultRelInfos[i]; subplan = mtstate->mt_plans[i]->plan; if (operation == CMD_INSERT || operation == CMD_UPDATE) ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, @@ -2694,13 +2829,12 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } resultRelInfo->ri_junkFilter = j; - resultRelInfo++; } } else { if (operation == CMD_INSERT) - ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc, + ExecCheckPlanOutput(mtstate->resultRelInfos[0]->ri_RelationDesc, subplan->targetlist); } } @@ -2739,7 +2873,7 @@ ExecEndModifyTable(ModifyTableState *node) */ for (i = 0; i < node->mt_nplans; i++) { - ResultRelInfo *resultRelInfo = node->resultRelInfo + i; + ResultRelInfo *resultRelInfo = node->resultRelInfos[i]; if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 3432bb921d..d3a2fed402 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -215,6 +215,7 @@ _copyModifyTable(const ModifyTable *from) COPY_BITMAPSET_FIELD(fdwDirectModifyPlans); COPY_NODE_FIELD(rowMarks); COPY_SCALAR_FIELD(epqParam); + COPY_NODE_FIELD(part_prune_info); COPY_SCALAR_FIELD(onConflictAction); COPY_NODE_FIELD(arbiterIndexes); COPY_NODE_FIELD(onConflictSet); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index b0dcd02ff6..612d41c4fb 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -416,6 +416,7 @@ _outModifyTable(StringInfo str, const ModifyTable *node) WRITE_BITMAPSET_FIELD(fdwDirectModifyPlans); WRITE_NODE_FIELD(rowMarks); WRITE_INT_FIELD(epqParam); + WRITE_NODE_FIELD(part_prune_info); WRITE_ENUM_FIELD(onConflictAction, OnConflictAction); WRITE_NODE_FIELD(arbiterIndexes); WRITE_NODE_FIELD(onConflictSet); @@ -2089,6 +2090,7 @@ _outModifyTablePath(StringInfo str, const ModifyTablePath *node) WRITE_UINT_FIELD(rootRelation); WRITE_BOOL_FIELD(partColsUpdated); WRITE_NODE_FIELD(resultRelations); + WRITE_NODE_FIELD(partitioned_rels); WRITE_NODE_FIELD(subpaths); WRITE_NODE_FIELD(subroots); WRITE_NODE_FIELD(withCheckOptionLists); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 764e3bb90c..571587c114 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1616,6 +1616,7 @@ _readModifyTable(void) READ_BITMAPSET_FIELD(fdwDirectModifyPlans); READ_NODE_FIELD(rowMarks); READ_INT_FIELD(epqParam); + READ_NODE_FIELD(part_prune_info); READ_ENUM_FIELD(onConflictAction, OnConflictAction); READ_NODE_FIELD(arbiterIndexes); READ_NODE_FIELD(onConflictSet); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 0c036209f0..9262f39673 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -289,7 +289,8 @@ static ModifyTable *make_modifytable(PlannerInfo *root, bool partColsUpdated, List *resultRelations, List *subplans, List *subroots, List *withCheckOptionLists, List *returningLists, - List *rowMarks, OnConflictExpr *onconflict, int epqParam); + List *rowMarks, OnConflictExpr *onconflict, int epqParam, + PartitionPruneInfo *partpruneinfos); static GatherMerge *create_gather_merge_plan(PlannerInfo *root, GatherMergePath *best_path); @@ -1237,6 +1238,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) make_partition_pruneinfo(root, rel, best_path->subpaths, best_path->partitioned_rels, + NIL, prunequal); } @@ -1402,6 +1404,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, partpruneinfo = make_partition_pruneinfo(root, rel, best_path->subpaths, best_path->partitioned_rels, + NIL, prunequal); } @@ -2581,6 +2584,7 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) List *subplans = NIL; ListCell *subpaths, *subroots; + PartitionPruneInfo *partpruneinfos = NULL; /* Build the plan for each input path */ forboth(subpaths, best_path->subpaths, @@ -2609,6 +2613,30 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) subplans = lappend(subplans, subplan); } + if (enable_partition_pruning && + best_path->partitioned_rels != NIL && + !IS_DUMMY_MODIFYTABLE(best_path)) + { + RelOptInfo *rel = best_path->path.parent; + List *prunequal = NIL; + + prunequal = extract_actual_clauses(rel->baserestrictinfo, false); + + /* + * If any quals exist, then these may be useful to allow us to perform + * further partition pruning during execution. We'll generate a + * PartitionPruneInfo for each partitioned rel to store these quals + * and allow translation of partition indexes into subpath indexes. + */ + if (prunequal != NIL) + partpruneinfos = make_partition_pruneinfo(root, + best_path->path.parent, + best_path->subpaths, + best_path->partitioned_rels, + best_path->resultRelations, + prunequal); + } + plan = make_modifytable(root, best_path->operation, best_path->canSetTag, @@ -2622,7 +2650,8 @@ create_modifytable_plan(PlannerInfo *root, ModifyTablePath *best_path) best_path->returningLists, best_path->rowMarks, best_path->onconflict, - best_path->epqParam); + best_path->epqParam, + partpruneinfos); copy_generic_path_info(&plan->plan, &best_path->path); @@ -6616,7 +6645,8 @@ make_modifytable(PlannerInfo *root, bool partColsUpdated, List *resultRelations, List *subplans, List *subroots, List *withCheckOptionLists, List *returningLists, - List *rowMarks, OnConflictExpr *onconflict, int epqParam) + List *rowMarks, OnConflictExpr *onconflict, int epqParam, + PartitionPruneInfo *partpruneinfos) { ModifyTable *node = makeNode(ModifyTable); List *fdw_private_list; @@ -6677,6 +6707,7 @@ make_modifytable(PlannerInfo *root, node->returningLists = returningLists; node->rowMarks = rowMarks; node->epqParam = epqParam; + node->part_prune_info = partpruneinfos; /* * For each result relation that is a foreign table, allow the FDW to diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 17c5f086fb..bab7175d48 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1236,6 +1236,9 @@ inheritance_planner(PlannerInfo *root) RangeTblEntry *parent_rte; Bitmapset *parent_relids; Query **parent_parses; + PlannerInfo *partition_root = NULL; + List *partitioned_rels = NIL; + bool dummy_update = false; /* Should only get here for UPDATE or DELETE */ Assert(parse->commandType == CMD_UPDATE || @@ -1347,6 +1350,13 @@ inheritance_planner(PlannerInfo *root) * expand_partitioned_rtentry for the UPDATE target.) */ root->partColsUpdated = subroot->partColsUpdated; + + /* + * Save this for later so that we can enable run-time pruning on + * the partitioned table(s). + */ + if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE) + partition_root = subroot; } /*---------- @@ -1741,6 +1751,9 @@ inheritance_planner(PlannerInfo *root) returningLists = list_make1(parse->returningList); /* Disable tuple routing, too, just to be safe */ root->partColsUpdated = false; + + /* Mark that we're performing a dummy update */ + dummy_update = true; } else { @@ -1779,6 +1792,69 @@ inheritance_planner(PlannerInfo *root) else rowMarks = root->rowMarks; + + /* + * When performing UPDATE/DELETE on a partitioned table, if the query has + * a WHERE clause which supports it, we may be able to perform run-time + * partition pruning. The following code sets things up to allow this to + * be possible. + */ + if (partition_root && !dummy_update) + { + RelOptInfo *parent_rel; + int i; + + /* + * Fetch the target partitioned table from the SELECT version of + * the query which we performed above. This may have the base quals + * which could allow the run-time pruning to work. + */ + parent_rel = partition_root->simple_rel_array[top_parentRTindex]; + + final_rel->baserestrictinfo = parent_rel->baserestrictinfo; + + /* build a list of partitioned rels */ + i = -1; + while ((i = bms_next_member(parent_relids, i)) > 0) + partitioned_rels = lappend_int(partitioned_rels, i); + + + /* + * In order to build the run-time pruning data we'll need append rels + * any sub-partitioned tables. If there are some of those and the + * append_rel_array is not already allocated, then do that now. + */ + if (list_length(partitioned_rels) > 1 && + root->append_rel_array == NULL) + root->append_rel_array = palloc0(sizeof(AppendRelInfo *) * + root->simple_rel_array_size); + + /* + * There can only be a single partition hierarchy, so it's fine to + * just make a single element list of the partitioned_rels. + */ + partitioned_rels = list_make1(partitioned_rels); + + i = -1; + while ((i = bms_next_member(parent_relids, i)) >= 0) + { + Assert(root->simple_rel_array[i] == NULL); + + root->simple_rel_array[i] = partition_root->simple_rel_array[i]; + + /* + * The root partition won't have an append rel entry, so we can + * skip that. We'll need to take the partition_root's version for + * any sub-partitioned table's + */ + if (i != top_parentRTindex) + { + Assert(root->append_rel_array[i] == NULL); + root->append_rel_array[i] = partition_root->append_rel_array[i]; + } + } + } + /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */ add_path(final_rel, (Path *) create_modifytable_path(root, final_rel, @@ -1788,6 +1864,7 @@ inheritance_planner(PlannerInfo *root) rootRelation, root->partColsUpdated, resultRelations, + partitioned_rels, subpaths, subroots, withCheckOptionLists, @@ -2371,6 +2448,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, rootRelation, false, list_make1_int(parse->resultRelation), + NIL, list_make1(path), list_make1(root), withCheckOptionLists, diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 34acb732ee..c1ed3abe9a 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -3432,6 +3432,9 @@ create_lockrows_path(PlannerInfo *root, RelOptInfo *rel, * 'partColsUpdated' is true if any partitioning columns are being updated, * either from the target relation or a descendent partitioned table. * 'resultRelations' is an integer list of actual RT indexes of target rel(s) + * 'partitioned_rels' is an integer list of RT indexes of non-leaf tables in + * the partition tree, if this is an UPDATE/DELETE to a partitioned table. + * Otherwise NIL. * 'subpaths' is a list of Path(s) producing source data (one per rel) * 'subroots' is a list of PlannerInfo structs (one per rel) * 'withCheckOptionLists' is a list of WCO lists (one per rel) @@ -3445,8 +3448,8 @@ create_modifytable_path(PlannerInfo *root, RelOptInfo *rel, CmdType operation, bool canSetTag, Index nominalRelation, Index rootRelation, bool partColsUpdated, - List *resultRelations, List *subpaths, - List *subroots, + List *resultRelations, List *partitioned_rels, + List *subpaths, List *subroots, List *withCheckOptionLists, List *returningLists, List *rowMarks, OnConflictExpr *onconflict, int epqParam) @@ -3514,6 +3517,7 @@ create_modifytable_path(PlannerInfo *root, RelOptInfo *rel, pathnode->rootRelation = rootRelation; pathnode->partColsUpdated = partColsUpdated; pathnode->resultRelations = resultRelations; + pathnode->partitioned_rels = list_copy(partitioned_rels); pathnode->subpaths = subpaths; pathnode->subroots = subroots; pathnode->withCheckOptionLists = withCheckOptionLists; diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c index 29f1c11b24..97f3145d16 100644 --- a/src/backend/partitioning/partprune.c +++ b/src/backend/partitioning/partprune.c @@ -214,7 +214,10 @@ static void partkey_datum_from_expr(PartitionPruneContext *context, * 'parentrel' is the RelOptInfo for an appendrel, and 'subpaths' is the list * of scan paths for its child rels. * - * 'partitioned_rels' is a List containing Lists of relids of partitioned + * If 'resultRelations' is non-NIL, then this List of relids is used to build + * the mapping structures. Otherwise the 'subpaths' List is used. + * + * 'partitioned_rels' is a List containing Lists of relids of partitioned * tables (a/k/a non-leaf partitions) that are parents of some of the child * rels. Here we attempt to populate the PartitionPruneInfo by adding a * 'prune_infos' item for each sublist in the 'partitioned_rels' list. @@ -229,6 +232,7 @@ static void partkey_datum_from_expr(PartitionPruneContext *context, PartitionPruneInfo * make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, List *subpaths, List *partitioned_rels, + List *resultRelations, List *prunequal) { PartitionPruneInfo *pruneinfo; @@ -246,23 +250,36 @@ make_partition_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, relid_subplan_map = palloc0(sizeof(int) * root->simple_rel_array_size); /* - * relid_subplan_map maps relid of a leaf partition to the index in - * 'subpaths' of the scan plan for that partition. + * If 'resultRelations' are present then map these, otherwise we map the + * 'subpaths' List. */ - i = 1; - foreach(lc, subpaths) + if (resultRelations != NIL) { - Path *path = (Path *) lfirst(lc); - RelOptInfo *pathrel = path->parent; + i = 1; + foreach(lc, resultRelations) + { + int resultrel = lfirst_int(lc); - Assert(IS_SIMPLE_REL(pathrel)); - Assert(pathrel->relid < root->simple_rel_array_size); - /* No duplicates please */ - Assert(relid_subplan_map[pathrel->relid] == 0); - - relid_subplan_map[pathrel->relid] = i++; + Assert(resultrel < root->simple_rel_array_size); + relid_subplan_map[resultrel] = i++; + } } + else + { + i = 1; + foreach(lc, subpaths) + { + Path *path = (Path *)lfirst(lc); + RelOptInfo *pathrel = path->parent; + Assert(IS_SIMPLE_REL(pathrel)); + Assert(pathrel->relid < root->simple_rel_array_size); + /* No duplicates please */ + Assert(relid_subplan_map[pathrel->relid] == 0); + + relid_subplan_map[pathrel->relid] = i++; + } + } /* We now build a PartitionedRelPruneInfo for each partitioned rel. */ prunerelinfos = NIL; foreach(lc, partitioned_rels) @@ -404,9 +421,12 @@ make_partitionedrel_pruneinfo(PlannerInfo *root, RelOptInfo *parentrel, * an adjust_appendrel_attrs step. But it might not be, and then * we have to translate. We update the prunequal parameter here, * because in later iterations of the loop for child partitions, - * we want to translate from parent to child variables. + * we want to translate from parent to child variables. We don't + * need to do this when planning a non-SELECT as we're only + * working with a single partition hierarchy in that case. */ - if (!bms_equal(parentrel->relids, subpart->relids)) + if (root->parse->commandType == CMD_SELECT && + !bms_equal(parentrel->relids, subpart->relids)) { int nappinfos; AppendRelInfo **appinfos = find_appinfos_by_relids(root, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index b593d22c48..d058716ff8 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1166,7 +1166,7 @@ typedef struct ModifyTableState int mt_whichplan; /* which one is being executed (0..n-1) */ TupleTableSlot **mt_scans; /* input tuple corresponding to underlying * plans */ - ResultRelInfo *resultRelInfo; /* per-subplan target relations */ + ResultRelInfo **resultRelInfos; /* per-subplan target relations */ ResultRelInfo *rootResultRelInfo; /* root target relation (partitioned * table root) */ List **mt_arowmarks; /* per-subplan ExecAuxRowMark lists */ @@ -1191,6 +1191,14 @@ typedef struct ModifyTableState /* Per plan map for tuple conversion from child to root */ TupleConversionMap **mt_per_subplan_tupconv_maps; + + /* + * Details required to allow partitions to be eliminated from the scan, or + * NULL if not possible. + */ + struct PartitionPruneState *mt_prune_state; + Bitmapset *mt_valid_subplans; /* for runtime pruning, valid mt_plans + * indexes to scan. */ } ModifyTableState; /* ---------------- diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 23a06d718e..37e32166de 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -1380,6 +1380,10 @@ typedef struct AppendPath #define IS_DUMMY_APPEND(p) \ (IsA((p), AppendPath) && ((AppendPath *) (p))->subpaths == NIL) +#define IS_DUMMY_MODIFYTABLE(p) \ + (list_length((p)->subpaths) == 1 && \ + IS_DUMMY_APPEND(linitial((p)->subpaths))) + /* * A relation that's been proven empty will have one path that is dummy * (but might have projection paths on top). For historical reasons, @@ -1775,6 +1779,8 @@ typedef struct ModifyTablePath Index rootRelation; /* Root RT index, if target is partitioned */ bool partColsUpdated; /* some part key in hierarchy updated */ List *resultRelations; /* integer list of RT indexes */ + /* RT indexes of non-leaf tables in a partition tree */ + List *partitioned_rels; List *subpaths; /* Path(s) producing source data */ List *subroots; /* per-target-table PlannerInfos */ List *withCheckOptionLists; /* per-target-table WCO lists */ diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 8e6594e355..444ed14e7c 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -231,6 +231,8 @@ typedef struct ModifyTable Bitmapset *fdwDirectModifyPlans; /* indices of FDW DM plans */ List *rowMarks; /* PlanRowMarks (non-locking only) */ int epqParam; /* ID of Param for EvalPlanQual re-eval */ + /* Mapping details for run-time subplan pruning, one per partitioned_rels */ + struct PartitionPruneInfo *part_prune_info; OnConflictAction onConflictAction; /* ON CONFLICT action */ List *arbiterIndexes; /* List of ON CONFLICT arbiter index OIDs */ List *onConflictSet; /* SET for INSERT ON CONFLICT DO UPDATE */ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index a12af54971..c8f5a165d6 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -252,10 +252,12 @@ extern LockRowsPath *create_lockrows_path(PlannerInfo *root, RelOptInfo *rel, extern ModifyTablePath *create_modifytable_path(PlannerInfo *root, RelOptInfo *rel, CmdType operation, bool canSetTag, - Index nominalRelation, Index rootRelation, + Index nominalRelation, + Index rootRelation, bool partColsUpdated, - List *resultRelations, List *subpaths, - List *subroots, + List *resultRelations, + List *partitioned_rels, + List *subpaths, List *subroots, List *withCheckOptionLists, List *returningLists, List *rowMarks, OnConflictExpr *onconflict, int epqParam); diff --git a/src/include/partitioning/partprune.h b/src/include/partitioning/partprune.h index b3e926865a..a98566f004 100644 --- a/src/include/partitioning/partprune.h +++ b/src/include/partitioning/partprune.h @@ -72,6 +72,7 @@ extern PartitionPruneInfo *make_partition_pruneinfo(struct PlannerInfo *root, struct RelOptInfo *parentrel, List *subpaths, List *partitioned_rels, + List *resultRelations, List *prunequal); extern Bitmapset *prune_append_rel_partitions(struct RelOptInfo *rel); extern Bitmapset *get_matching_partitions(PartitionPruneContext *context, diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 82b68e793d..ccb72d915a 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -3459,6 +3459,111 @@ explain (analyze, costs off, summary off, timing off) select * from ma_test wher reset enable_seqscan; reset enable_sort; +-- +-- Test run-time pruning of ModifyTable subnodes +-- +-- Ensure only ma_test_p3 is scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 29); + QUERY PLAN +------------------------------------------------------ + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p1 + Delete on ma_test_p2 + Delete on ma_test_p3 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Seq Scan on ma_test_p1 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p2 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p3 (actual rows=1 loops=1) + Filter: (a = $0) + Rows Removed by Filter: 9 +(13 rows) + +-- Ensure no partitions are scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 30); + QUERY PLAN +----------------------------------------------- + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p1 + Delete on ma_test_p2 + Delete on ma_test_p3 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Seq Scan on ma_test_p1 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p2 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p3 (never executed) + Filter: (a = $0) +(12 rows) + +-- Ensure partition pruning works with an update of the partition key. +explain (analyze, costs off, summary off, timing off) update ma_test set a = 29 where a = (select 1); + QUERY PLAN +------------------------------------------------------ + Update on ma_test (actual rows=0 loops=1) + Update on ma_test_p1 + Update on ma_test_p2 + Update on ma_test_p3 + InitPlan 1 (returns $0) + -> Result (actual rows=1 loops=1) + -> Seq Scan on ma_test_p1 (actual rows=1 loops=1) + Filter: (a = $0) + Rows Removed by Filter: 9 + -> Seq Scan on ma_test_p2 (never executed) + Filter: (a = $0) + -> Seq Scan on ma_test_p3 (never executed) + Filter: (a = $0) +(13 rows) + +-- Verify the above command +select tableoid::regclass,a from ma_test where a = 29; + tableoid | a +------------+---- + ma_test_p3 | 29 +(1 row) + +truncate ma_test; +prepare mt_q1 (int) as +delete from ma_test where a > $1; +set plan_cache_mode = force_generic_plan; +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); + QUERY PLAN +------------------------------------------------------ + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p2 + Delete on ma_test_p3 + Subplans Removed: 1 + -> Seq Scan on ma_test_p2 (actual rows=0 loops=1) + Filter: (a > $1) + -> Seq Scan on ma_test_p3 (actual rows=0 loops=1) + Filter: (a > $1) +(8 rows) + +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); + QUERY PLAN +------------------------------------------------------ + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p3 + Subplans Removed: 2 + -> Seq Scan on ma_test_p3 (actual rows=0 loops=1) + Filter: (a > $1) +(5 rows) + +-- Ensure ModifyTable behaves correctly when no subplans match exec params +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); + QUERY PLAN +----------------------------------------------- + Delete on ma_test (actual rows=0 loops=1) + Delete on ma_test_p1 + Subplans Removed: 2 + -> Seq Scan on ma_test_p1 (never executed) + Filter: (a > $1) +(5 rows) + +reset plan_cache_mode; drop table ma_test; reset enable_indexonlyscan; -- diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 446af3b2c1..2c2a08e6f6 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -908,6 +908,36 @@ explain (analyze, costs off, summary off, timing off) select * from ma_test wher reset enable_seqscan; reset enable_sort; +-- +-- Test run-time pruning of ModifyTable subnodes +-- + +-- Ensure only ma_test_p3 is scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 29); + +-- Ensure no partitions are scanned. +explain (analyze, costs off, summary off, timing off) delete from ma_test where a = (select 30); + +-- Ensure partition pruning works with an update of the partition key. +explain (analyze, costs off, summary off, timing off) update ma_test set a = 29 where a = (select 1); + +-- Verify the above command +select tableoid::regclass,a from ma_test where a = 29; + +truncate ma_test; + +prepare mt_q1 (int) as +delete from ma_test where a > $1; + +set plan_cache_mode = force_generic_plan; + +explain (analyze, costs off, summary off, timing off) execute mt_q1(15); +explain (analyze, costs off, summary off, timing off) execute mt_q1(25); +-- Ensure ModifyTable behaves correctly when no subplans match exec params +explain (analyze, costs off, summary off, timing off) execute mt_q1(35); + +reset plan_cache_mode; + drop table ma_test; reset enable_indexonlyscan;