On Mon, 2021-03-01 at 23:13 +0000, Jacob Champion wrote: > On Mon, 2021-03-01 at 16:59 -0600, Justin Pryzby wrote: > > Why is this removed ? > > Mm, both of those analyze.c changes seem suspect. Possibly a mismerge > from the zedstore branch; let me double-check. > > > its (possessive) not it's ("it is") > > Thanks, I'll fix this at the same time.
Both fixed in v3; thanks for the catch! --Jacob
From 484cdaf5050f4010127ae2de5624f39d7f12982d Mon Sep 17 00:00:00 2001 From: Jacob Champion <pchamp...@vmware.com> Date: Tue, 2 Mar 2021 08:59:45 -0800 Subject: [PATCH v3] tableam: accept column projection list Co-authored-by: Soumyadeep Chakraborty <soumyadeep2...@gmail.com> Co-authored-by: Melanie Plageman <melanieplage...@gmail.com> Co-authored-by: Ashwin Agrawal <aagra...@pivotal.io> Co-authored-by: Jacob Champion <pchamp...@vmware.com> --- src/backend/access/heap/heapam_handler.c | 5 +- src/backend/access/nbtree/nbtsort.c | 3 +- src/backend/access/table/tableam.c | 5 +- src/backend/commands/trigger.c | 19 +++- src/backend/executor/execMain.c | 3 +- src/backend/executor/execPartition.c | 2 + src/backend/executor/execReplication.c | 6 +- src/backend/executor/execScan.c | 108 +++++++++++++++++++++ src/backend/executor/nodeIndexscan.c | 10 ++ src/backend/executor/nodeLockRows.c | 7 +- src/backend/executor/nodeModifyTable.c | 38 ++++++-- src/backend/executor/nodeSeqscan.c | 43 ++++++++- src/backend/executor/nodeTidscan.c | 11 ++- src/backend/nodes/copyfuncs.c | 2 + src/backend/nodes/equalfuncs.c | 2 + src/backend/nodes/outfuncs.c | 2 + src/backend/nodes/readfuncs.c | 2 + src/backend/optimizer/path/allpaths.c | 85 +++++++++++++++- src/backend/optimizer/plan/planner.c | 19 ++++ src/backend/optimizer/prep/preptlist.c | 13 ++- src/backend/optimizer/util/inherit.c | 37 ++++++- src/backend/parser/analyze.c | 40 ++++++-- src/backend/parser/parse_relation.c | 18 ++++ src/backend/partitioning/partbounds.c | 14 ++- src/backend/rewrite/rewriteHandler.c | 8 ++ src/include/access/tableam.h | 118 +++++++++++++++++++++-- src/include/executor/executor.h | 6 ++ src/include/nodes/execnodes.h | 1 + src/include/nodes/parsenodes.h | 14 +++ src/include/utils/rel.h | 14 +++ 30 files changed, 611 insertions(+), 44 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index bd5faf0c1f..5d7bc5f6bd 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -180,7 +180,8 @@ static bool heapam_fetch_row_version(Relation relation, ItemPointer tid, Snapshot snapshot, - TupleTableSlot *slot) + TupleTableSlot *slot, + Bitmapset *project_cols) { BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; Buffer buffer; @@ -348,7 +349,7 @@ static TM_Result heapam_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, - TM_FailureData *tmfd) + TM_FailureData *tmfd, Bitmapset *project_cols) { BufferHeapTupleTableSlot *bslot = (BufferHeapTupleTableSlot *) slot; TM_Result result; diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 2c4d7f6e25..57462eb9c7 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -1966,7 +1966,8 @@ _bt_parallel_scan_and_sort(BTSpool *btspool, BTSpool *btspool2, indexInfo = BuildIndexInfo(btspool->index); indexInfo->ii_Concurrent = btshared->isconcurrent; scan = table_beginscan_parallel(btspool->heap, - ParallelTableScanFromBTShared(btshared)); + ParallelTableScanFromBTShared(btshared), + NULL); reltuples = table_index_build_scan(btspool->heap, btspool->index, indexInfo, true, progress, _bt_build_callback, (void *) &buildstate, scan); diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 5ea5bdd810..9bda57247a 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -172,7 +172,7 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan, } TableScanDesc -table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) +table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan, Bitmapset *proj) { Snapshot snapshot; uint32 flags = SO_TYPE_SEQSCAN | @@ -194,6 +194,9 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc parallel_scan) snapshot = SnapshotAny; } + if (proj) + return relation->rd_tableam->scan_begin_with_column_projection(relation, snapshot, 0, NULL, + parallel_scan, flags, proj); return relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, parallel_scan, flags); } diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 4e4e05844c..e2d83d6927 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2532,6 +2532,10 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate, if (newtuple != trigtuple) heap_freetuple(newtuple); } + + /* Make sure the the new slot is not dependent on the original tuple */ + ExecMaterializeSlot(slot); + if (should_free) heap_freetuple(trigtuple); @@ -2813,6 +2817,10 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate, newtuple = NULL; } } + + /* Make sure the the new slot is not dependent on the original tuple */ + ExecMaterializeSlot(newslot); + if (should_free_trig) heap_freetuple(trigtuple); @@ -3019,7 +3027,8 @@ GetTupleForTrigger(EState *estate, estate->es_output_cid, lockmode, LockWaitBlock, lockflags, - &tmfd); + &tmfd, + bms_make_singleton(0)); switch (test) { @@ -3094,7 +3103,7 @@ GetTupleForTrigger(EState *estate, * suffices. */ if (!table_tuple_fetch_row_version(relation, tid, SnapshotAny, - oldslot)) + oldslot, bms_make_singleton(0))) elog(ERROR, "failed to fetch tuple for trigger"); } @@ -3958,7 +3967,8 @@ AfterTriggerExecute(EState *estate, if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid1), SnapshotAny, - LocTriggerData.tg_trigslot)) + LocTriggerData.tg_trigslot, + bms_make_singleton(0))) elog(ERROR, "failed to fetch tuple1 for AFTER trigger"); LocTriggerData.tg_trigtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig); @@ -3977,7 +3987,8 @@ AfterTriggerExecute(EState *estate, if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid2), SnapshotAny, - LocTriggerData.tg_newslot)) + LocTriggerData.tg_newslot, + bms_make_singleton(0))) elog(ERROR, "failed to fetch tuple2 for AFTER trigger"); LocTriggerData.tg_newtuple = ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index c74ce36ffb..0a3454d04e 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -2542,7 +2542,8 @@ EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot) /* ordinary table, fetch the tuple */ if (!table_tuple_fetch_row_version(erm->relation, (ItemPointer) DatumGetPointer(datum), - SnapshotAny, slot)) + SnapshotAny, slot, + bms_make_singleton(0))) elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); return true; } diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index b8da4c5967..ab95b85b78 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -906,6 +906,8 @@ ExecInitPartitionInfo(ModifyTableState *mtstate, EState *estate, ExecInitQual((List *) clause, &mtstate->ps); } } + + PopulateNeededColumnsForOnConflictUpdate(leaf_part_rri); } } diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 1e285e0349..3b4a65fa11 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -180,7 +180,8 @@ retry: lockmode, LockWaitBlock, 0 /* don't follow updates */ , - &tmfd); + &tmfd, + bms_make_singleton(0)); PopActiveSnapshot(); @@ -357,7 +358,8 @@ retry: lockmode, LockWaitBlock, 0 /* don't follow updates */ , - &tmfd); + &tmfd, + bms_make_singleton(0)); PopActiveSnapshot(); diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index 69ab34573e..dcc7dd688d 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -20,7 +20,9 @@ #include "executor/executor.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "utils/memutils.h" +#include "utils/rel.h" @@ -340,3 +342,109 @@ ExecScanReScan(ScanState *node) } } } + +typedef struct neededColumnContext +{ + Bitmapset **mask; + int n; +} neededColumnContext; + +static bool +neededColumnContextWalker(Node *node, neededColumnContext *c) +{ + if (node == NULL || contains_whole_row_col(*c->mask)) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *)node; + + if (var->varattno > 0) + { + Assert(var->varattno <= c->n); + *(c->mask) = bms_add_member(*(c->mask), var->varattno); + } + else if(var->varattno == 0) { + bms_free(*(c->mask)); + *(c->mask) = bms_make_singleton(0); + } + + return false; + } + return expression_tree_walker(node, neededColumnContextWalker, (void * )c); +} + +/* + * n specifies the number of allowed entries in mask: we use + * it for bounds-checking in the walker above. + */ +void +PopulateNeededColumnsForNode(Node *expr, int n, Bitmapset **scanCols) +{ + neededColumnContext c; + + c.mask = scanCols; + c.n = n; + + neededColumnContextWalker(expr, &c); +} + +Bitmapset * +PopulateNeededColumnsForScan(ScanState *scanstate, int ncol) +{ + Bitmapset *result = NULL; + Plan *plan = scanstate->ps.plan; + + PopulateNeededColumnsForNode((Node *) plan->targetlist, ncol, &result); + PopulateNeededColumnsForNode((Node *) plan->qual, ncol, &result); + + if (IsA(plan, IndexScan)) + { + PopulateNeededColumnsForNode((Node *) ((IndexScan *) plan)->indexqualorig, ncol, &result); + PopulateNeededColumnsForNode((Node *) ((IndexScan *) plan)->indexorderbyorig, ncol, &result); + } + else if (IsA(plan, BitmapHeapScan)) + PopulateNeededColumnsForNode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig, ncol, &result); + + return result; +} + +Bitmapset * +PopulateNeededColumnsForEPQ(EPQState *epqstate, int ncol) +{ + Bitmapset *epqCols = NULL; + Assert(epqstate && epqstate->plan); + PopulateNeededColumnsForNode((Node *) epqstate->plan->qual, + ncol, + &epqCols); + return epqCols; +} + +void +PopulateNeededColumnsForOnConflictUpdate(ResultRelInfo *resultRelInfo) +{ + ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause; + ProjectionInfo *oc_ProjInfo = resultRelInfo->ri_onConflict->oc_ProjInfo; + Relation relation = resultRelInfo->ri_RelationDesc; + Bitmapset *proj_cols = NULL; + ListCell *lc; + + if (onConflictSetWhere && onConflictSetWhere->expr) + PopulateNeededColumnsForNode((Node *) onConflictSetWhere->expr, + RelationGetDescr(relation)->natts, + &proj_cols); + + if (oc_ProjInfo) + PopulateNeededColumnsForNode((Node *) oc_ProjInfo->pi_state.expr, + RelationGetDescr(relation)->natts, + &proj_cols); + + foreach(lc, resultRelInfo->ri_WithCheckOptionExprs) + { + ExprState *wcoExpr = (ExprState *) lfirst(lc); + PopulateNeededColumnsForNode((Node *) wcoExpr->expr, + RelationGetDescr(relation)->natts, + &proj_cols); + } + resultRelInfo->ri_onConflict->proj_cols = proj_cols; +} diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 2fffb1b437..4923bb258c 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -115,6 +115,16 @@ IndexNext(IndexScanState *node) node->iss_NumScanKeys, node->iss_NumOrderByKeys); + if (table_scans_leverage_column_projection(node->ss.ss_currentRelation)) + { + Bitmapset *proj = NULL; + Scan *planNode = (Scan *)node->ss.ps.plan; + int rti = planNode->scanrelid; + RangeTblEntry *rte = list_nth(estate->es_plannedstmt->rtable, rti - 1); + proj = rte->scanCols; + table_index_fetch_set_column_projection(scandesc->xs_heapfetch, proj); + } + node->iss_ScanDesc = scandesc; /* diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index b2e5c30079..bbff96a85f 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -80,6 +80,7 @@ lnext: int lockflags = 0; TM_Result test; TupleTableSlot *markSlot; + Bitmapset *epqCols = NULL; /* clear any leftover test tuple for this rel */ markSlot = EvalPlanQualSlot(&node->lr_epqstate, erm->relation, erm->rti); @@ -179,11 +180,15 @@ lnext: if (!IsolationUsesXactSnapshot()) lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; + epqCols = PopulateNeededColumnsForEPQ(&node->lr_epqstate, + RelationGetDescr(erm->relation)->natts); + test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot, markSlot, estate->es_output_cid, lockmode, erm->waitPolicy, lockflags, - &tmfd); + &tmfd, + epqCols); switch (test) { diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 2993ba43e3..f2383ad245 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -240,7 +240,7 @@ ExecCheckTIDVisible(EState *estate, if (!IsolationUsesXactSnapshot()) return; - if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot)) + if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot, NULL)) elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); ExecCheckTupleVisible(estate, rel, tempSlot); ExecClearTuple(tempSlot); @@ -979,6 +979,7 @@ ldelete:; { TupleTableSlot *inputslot; TupleTableSlot *epqslot; + Bitmapset *epqCols = NULL; if (IsolationUsesXactSnapshot()) ereport(ERROR, @@ -993,12 +994,15 @@ ldelete:; inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex); + epqCols = PopulateNeededColumnsForEPQ(epqstate, + RelationGetDescr(resultRelationDesc)->natts); + result = table_tuple_lock(resultRelationDesc, tupleid, estate->es_snapshot, inputslot, estate->es_output_cid, LockTupleExclusive, LockWaitBlock, TUPLE_LOCK_FLAG_FIND_LAST_VERSION, - &tmfd); + &tmfd, epqCols); switch (result) { @@ -1151,8 +1155,23 @@ ldelete:; } else { + RangeTblEntry *resultrte = exec_rt_fetch(resultRelInfo->ri_RangeTableIndex, estate); + Bitmapset *project_cols = resultrte->returningCols; + /* + * XXX returningCols should never be empty if we have a RETURNING + * clause. Right now, if we have a view, we fail to populate the + * returningCols of its base table's RTE. + * If we encounter such a situation now, for correctness, ensure + * that we fetch all the columns. + */ + if(bms_is_empty(resultrte->returningCols)) + { + bms_free(resultrte->returningCols); + project_cols = bms_make_singleton(0); + } if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid, - SnapshotAny, slot)) + SnapshotAny, slot, + project_cols)) elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING"); } } @@ -1545,6 +1564,7 @@ lreplace:; { TupleTableSlot *inputslot; TupleTableSlot *epqslot; + Bitmapset *epqCols = NULL; if (IsolationUsesXactSnapshot()) ereport(ERROR, @@ -1558,12 +1578,14 @@ lreplace:; inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc, resultRelInfo->ri_RangeTableIndex); + epqCols = PopulateNeededColumnsForEPQ(epqstate, + RelationGetDescr(resultRelationDesc)->natts); result = table_tuple_lock(resultRelationDesc, tupleid, estate->es_snapshot, inputslot, estate->es_output_cid, lockmode, LockWaitBlock, TUPLE_LOCK_FLAG_FIND_LAST_VERSION, - &tmfd); + &tmfd, epqCols); switch (result) { @@ -1692,6 +1714,8 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, Relation relation = resultRelInfo->ri_RelationDesc; ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause; TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing; + ProjectionInfo *oc_ProjInfo = resultRelInfo->ri_onConflict->oc_ProjInfo; + Bitmapset *proj_cols = resultRelInfo->ri_onConflict->proj_cols; TM_FailureData tmfd; LockTupleMode lockmode; TM_Result test; @@ -1712,7 +1736,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, estate->es_snapshot, existing, estate->es_output_cid, lockmode, LockWaitBlock, 0, - &tmfd); + &tmfd, proj_cols); switch (test) { case TM_Ok: @@ -1860,7 +1884,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate, } /* Project the new tuple version */ - ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo); + ExecProject(oc_ProjInfo); /* * Note that it is possible that the target tuple has been modified in @@ -2641,6 +2665,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) &mtstate->ps); resultRelInfo->ri_onConflict->oc_WhereClause = qualexpr; } + + PopulateNeededColumnsForOnConflictUpdate(resultRelInfo); } /* diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c index 066f9ae37e..2535d1c7f4 100644 --- a/src/backend/executor/nodeSeqscan.c +++ b/src/backend/executor/nodeSeqscan.c @@ -31,6 +31,7 @@ #include "access/tableam.h" #include "executor/execdebug.h" #include "executor/nodeSeqscan.h" +#include "nodes/nodeFuncs.h" #include "utils/rel.h" static TupleTableSlot *SeqNext(SeqScanState *node); @@ -68,9 +69,22 @@ SeqNext(SeqScanState *node) * We reach here if the scan is not parallel, or if we're serially * executing a scan that was planned to be parallel. */ - scandesc = table_beginscan(node->ss.ss_currentRelation, - estate->es_snapshot, - 0, NULL); + if (table_scans_leverage_column_projection(node->ss.ss_currentRelation)) + { + Scan *planNode = (Scan *)node->ss.ps.plan; + int rti = planNode->scanrelid; + RangeTblEntry *rte = list_nth(estate->es_plannedstmt->rtable, rti - 1); + scandesc = table_beginscan_with_column_projection(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL, + rte->scanCols); + } + else + { + scandesc = table_beginscan(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL); + } node->ss.ss_currentScanDesc = scandesc; } @@ -270,14 +284,22 @@ ExecSeqScanInitializeDSM(SeqScanState *node, { EState *estate = node->ss.ps.state; ParallelTableScanDesc pscan; + Bitmapset *proj = NULL; pscan = shm_toc_allocate(pcxt->toc, node->pscan_len); + + if (table_scans_leverage_column_projection(node->ss.ss_currentRelation)) + { + proj = PopulateNeededColumnsForScan(&node->ss, + node->ss.ss_currentRelation->rd_att->natts); + } + table_parallelscan_initialize(node->ss.ss_currentRelation, pscan, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, proj); } /* ---------------------------------------------------------------- @@ -307,8 +329,19 @@ ExecSeqScanInitializeWorker(SeqScanState *node, ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; + Bitmapset *proj = NULL; + + /* + * FIXME: this is duplicate work with ExecSeqScanInitializeDSM. In future + * plan will have the we have projection list, then this overhead will not exist. + */ + if (table_scans_leverage_column_projection(node->ss.ss_currentRelation)) + { + proj = PopulateNeededColumnsForScan(&node->ss, + node->ss.ss_currentRelation->rd_att->natts); + } pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); node->ss.ss_currentScanDesc = - table_beginscan_parallel(node->ss.ss_currentRelation, pscan); + table_beginscan_parallel(node->ss.ss_currentRelation, pscan, proj); } diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c index 48c3737da2..ffb00c5f03 100644 --- a/src/backend/executor/nodeTidscan.c +++ b/src/backend/executor/nodeTidscan.c @@ -366,6 +366,7 @@ TidNext(TidScanState *node) while (node->tss_TidPtr >= 0 && node->tss_TidPtr < numTids) { ItemPointerData tid = tidList[node->tss_TidPtr]; + Bitmapset *project_cols = NULL; /* * For WHERE CURRENT OF, the tuple retrieved from the cursor might @@ -375,7 +376,15 @@ TidNext(TidScanState *node) if (node->tss_isCurrentOf) table_tuple_get_latest_tid(scan, &tid); - if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot)) + /* + * TODO: Remove this hack!! This should be done once at the start of the tid scan. + * Ideally we should probably set the list of projection cols in the + * generic scan desc, perhaps in TableScanDesc. + */ + project_cols = PopulateNeededColumnsForScan((ScanState *) node, + RelationGetDescr(heapRelation)->natts); + + if (table_tuple_fetch_row_version(heapRelation, &tid, snapshot, slot, project_cols)) return slot; /* Bad TID or failed snapshot qual; try next */ diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index aaba1ec2c4..0bde541c60 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -2460,8 +2460,10 @@ _copyRangeTblEntry(const RangeTblEntry *from) COPY_SCALAR_FIELD(checkAsUser); COPY_BITMAPSET_FIELD(selectedCols); COPY_BITMAPSET_FIELD(insertedCols); + COPY_BITMAPSET_FIELD(returningCols); COPY_BITMAPSET_FIELD(updatedCols); COPY_BITMAPSET_FIELD(extraUpdatedCols); + COPY_BITMAPSET_FIELD(scanCols); COPY_NODE_FIELD(securityQuals); return newnode; diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index c2d73626fc..fceb395bce 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -2711,8 +2711,10 @@ _equalRangeTblEntry(const RangeTblEntry *a, const RangeTblEntry *b) COMPARE_SCALAR_FIELD(checkAsUser); COMPARE_BITMAPSET_FIELD(selectedCols); COMPARE_BITMAPSET_FIELD(insertedCols); + COMPARE_BITMAPSET_FIELD(returningCols); COMPARE_BITMAPSET_FIELD(updatedCols); COMPARE_BITMAPSET_FIELD(extraUpdatedCols); + COMPARE_BITMAPSET_FIELD(scanCols); COMPARE_NODE_FIELD(securityQuals); return true; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 8fc432bfe1..712f150bde 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -3222,8 +3222,10 @@ _outRangeTblEntry(StringInfo str, const RangeTblEntry *node) WRITE_OID_FIELD(checkAsUser); WRITE_BITMAPSET_FIELD(selectedCols); WRITE_BITMAPSET_FIELD(insertedCols); + WRITE_BITMAPSET_FIELD(returningCols); WRITE_BITMAPSET_FIELD(updatedCols); WRITE_BITMAPSET_FIELD(extraUpdatedCols); + WRITE_BITMAPSET_FIELD(scanCols); WRITE_NODE_FIELD(securityQuals); } diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 718fb58e86..1ca2199b65 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -1503,8 +1503,10 @@ _readRangeTblEntry(void) READ_OID_FIELD(checkAsUser); READ_BITMAPSET_FIELD(selectedCols); READ_BITMAPSET_FIELD(insertedCols); + READ_BITMAPSET_FIELD(returningCols); READ_BITMAPSET_FIELD(updatedCols); READ_BITMAPSET_FIELD(extraUpdatedCols); + READ_BITMAPSET_FIELD(scanCols); READ_NODE_FIELD(securityQuals); READ_DONE(); diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index d73ac562eb..241443b9ac 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -27,6 +27,7 @@ #include "miscadmin.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "utils/rel.h" #ifdef OPTIMIZER_DEBUG #include "nodes/print.h" #endif @@ -141,7 +142,7 @@ static void subquery_push_qual(Query *subquery, static void recurse_push_qual(Node *setOp, Query *topquery, RangeTblEntry *rte, Index rti, Node *qual); static void remove_unused_subquery_outputs(Query *subquery, RelOptInfo *rel); - +static void extract_scan_columns(PlannerInfo *root); /* * make_one_rel @@ -184,6 +185,8 @@ make_one_rel(PlannerInfo *root, List *joinlist) */ set_base_rel_sizes(root); + extract_scan_columns(root); + /* * We should now have size estimates for every actual table involved in * the query, and we also know which if any have been deleted from the @@ -234,6 +237,86 @@ make_one_rel(PlannerInfo *root, List *joinlist) return rel; } +static void +extract_scan_columns(PlannerInfo *root) +{ + for (int i = 1; i < root->simple_rel_array_size; i++) + { + ListCell *lc; + RangeTblEntry *rte = root->simple_rte_array[i]; + RelOptInfo *rel = root->simple_rel_array[i]; + if (rte == NULL) + continue; + if (rel == NULL) + continue; + if (IS_DUMMY_REL(rel)) + continue; + rte->scanCols = NULL; + foreach(lc, rel->reltarget->exprs) + { + Node *node; + List *vars; + ListCell *lc1; + node = lfirst(lc); + /* + * TODO: suggest a default for vars_only to make maintenance less burdensome + */ + vars = pull_var_clause(node, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + foreach(lc1, vars) + { + Var *var = lfirst(lc1); + if (var->varno == i) + { + if (var->varattno > 0) + rte->scanCols = bms_add_member(rte->scanCols, var->varattno); + else if (var->varattno == 0) + { + /* + * If there is a whole-row var, we have to fetch the whole row. + */ + bms_free(rte->scanCols); + rte->scanCols = bms_make_singleton(0); + goto outer; + } + } + } + } + foreach(lc, rel->baserestrictinfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + List *vars = pull_var_clause((Node *)rinfo->clause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_RECURSE_PLACEHOLDERS); + ListCell *lc1; + if (contains_whole_row_col(rte->scanCols)) + break; + foreach(lc1, vars) + { + Var *var = lfirst(lc1); + if (var->varno == i) + { + if (var->varattno > 0) + rte->scanCols = bms_add_member(rte->scanCols, var->varattno); + else if (var->varattno == 0) + { + /* + * If there is a whole-row var, we have to fetch the whole row. + */ + bms_free(rte->scanCols); + rte->scanCols = bms_make_singleton(0); + break; + } + } + } + } + outer:; + } +} + /* * set_base_rel_consider_startup * Set the consider_[param_]startup flags for each base-relation entry. diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 545b56bcaf..12f6b36a7b 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1479,6 +1479,9 @@ inheritance_planner(PlannerInfo *root) RelOptInfo *sub_final_rel; Path *subpath; + ListCell *listCell; + int rti; + /* * expand_inherited_rtentry() always processes a parent before any of * that parent's children, so the parent query for this relation @@ -1683,6 +1686,22 @@ inheritance_planner(PlannerInfo *root) /* Build list of modified subroots, too */ subroots = lappend(subroots, subroot); + rti = 0; + foreach(listCell, subroot->parse->rtable) + { + RangeTblEntry *subroot_rte = lfirst(listCell); + RangeTblEntry *finalroot_rte = list_nth(final_rtable, rti); + if (finalroot_rte != subroot_rte) + { + finalroot_rte->scanCols = bms_union(finalroot_rte->scanCols, subroot_rte->scanCols); + if(contains_whole_row_col(finalroot_rte->scanCols)) + { + bms_free(finalroot_rte->scanCols); + finalroot_rte->scanCols = bms_make_singleton(0); + } + } + rti++; + } /* Build list of target-relation RT indexes */ resultRelations = lappend_int(resultRelations, appinfo->child_relid); diff --git a/src/backend/optimizer/prep/preptlist.c b/src/backend/optimizer/prep/preptlist.c index 23f9f861f4..471f9d27a5 100644 --- a/src/backend/optimizer/prep/preptlist.c +++ b/src/backend/optimizer/prep/preptlist.c @@ -190,8 +190,19 @@ preprocess_targetlist(PlannerInfo *root) * to make these Vars available for the RETURNING calculation. Vars that * belong to the result rel don't need to be added, because they will be * made to refer to the actual heap tuple. + * + * XXX: Avoid adding cols from the returningList to avoid overestimation + * of scanCols from RelOptInfo->reltarget exprs. This is done to avoid + * additional cols from the RETURNING clause making its way into scanCols + * for queries such as: + * delete from base_tbl using other_tbl t where base_tbl.col1 = t.col1 returning *; + * where base_tbl is the root table of an inheritance hierarchy + * TODO: Delete the result_relation guard below if and when + * inheritance_planner() is refactored to not fake a round of planning + * pretending we have a SELECT query (which causes result_relation to be 0 + * in the first place) */ - if (parse->returningList && list_length(parse->rtable) > 1) + if (result_relation && parse->returningList && list_length(parse->rtable) > 1) { List *vars; ListCell *l; diff --git a/src/backend/optimizer/util/inherit.c b/src/backend/optimizer/util/inherit.c index be1c9ddd96..b58b5abf4f 100644 --- a/src/backend/optimizer/util/inherit.c +++ b/src/backend/optimizer/util/inherit.c @@ -50,7 +50,9 @@ static Bitmapset *translate_col_privs(const Bitmapset *parent_privs, static void expand_appendrel_subquery(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte, Index rti); - +static Bitmapset * +translate_parent_cols(Bitmapset *parent_cols, List *translated_vars, + Relation parent_rel); /* * expand_inherited_rtentry * Expand a rangetable entry that has the "inh" bit set. @@ -518,6 +520,13 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, childrte->alias = childrte->eref = makeAlias(parentrte->eref->aliasname, child_colnames); + if (childOID != parentOID) + childrte->returningCols = + translate_parent_cols(parentrte->returningCols, + appinfo->translated_vars, parentrel); + else + childrte->returningCols = bms_copy(parentrte->returningCols); + /* * Translate the column permissions bitmaps to the child's attnums (we * have to build the translated_vars list before we can do this). But if @@ -587,6 +596,32 @@ expand_single_inheritance_child(PlannerInfo *root, RangeTblEntry *parentrte, } } +/* + * We need to translate the list of ordinal attnos from a parent table's + * RangeTblEntry to the ordinal attribute numbers for the child's entry. + */ + +static Bitmapset * +translate_parent_cols(Bitmapset *parent_cols, List *translated_vars, + Relation parent_rel) +{ + int col = -1; + Bitmapset *result = NULL; + /* + * Enumerate the set of parent columns for translation if there is a whole + * row var + */ + if(contains_whole_row_col(parent_cols)) + parent_cols = get_ordinal_attnos(parent_rel); + while ((col = bms_next_member(parent_cols, col)) >= 0) + { + Var *var = (Var *) list_nth(translated_vars, col - 1); + if (var) + result = bms_add_member(result, var->varattno); + } + return result; +} + /* * translate_col_privs * Translate a bitmapset representing per-column privileges from the diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index 0f3a70c49a..1644e590d3 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -69,7 +69,7 @@ static Node *transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, static void determineRecursiveColTypes(ParseState *pstate, Node *larg, List *nrtargetlist); static Query *transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt); -static List *transformReturningList(ParseState *pstate, List *returningList); +static void transformReturningList(ParseState *pstate, Query *qry, List *returningList); static List *transformUpdateTargetList(ParseState *pstate, List *targetList); static Query *transformPLAssignStmt(ParseState *pstate, @@ -449,7 +449,7 @@ transformDeleteStmt(ParseState *pstate, DeleteStmt *stmt) qual = transformWhereClause(pstate, stmt->whereClause, EXPR_KIND_WHERE, "WHERE"); - qry->returningList = transformReturningList(pstate, stmt->returningList); + transformReturningList(pstate, qry, stmt->returningList); /* done building the range table and jointree */ qry->rtable = pstate->p_rtable; @@ -865,8 +865,7 @@ transformInsertStmt(ParseState *pstate, InsertStmt *stmt) pstate->p_namespace = NIL; addNSItemToQuery(pstate, pstate->p_target_nsitem, false, true, true); - qry->returningList = transformReturningList(pstate, - stmt->returningList); + transformReturningList(pstate, qry, stmt->returningList); } /* done building the range table and jointree */ @@ -2272,7 +2271,7 @@ transformUpdateStmt(ParseState *pstate, UpdateStmt *stmt) qual = transformWhereClause(pstate, stmt->whereClause, EXPR_KIND_WHERE, "WHERE"); - qry->returningList = transformReturningList(pstate, stmt->returningList); + transformReturningList(pstate, qry, stmt->returningList); /* * Now we are done with SELECT-like processing, and can get on with @@ -2367,14 +2366,16 @@ transformUpdateTargetList(ParseState *pstate, List *origTlist) * transformReturningList - * handle a RETURNING clause in INSERT/UPDATE/DELETE */ -static List * -transformReturningList(ParseState *pstate, List *returningList) +static void +transformReturningList(ParseState *pstate, Query *qry, List *returningList) { List *rlist; int save_next_resno; + List *vars; + ListCell *l; if (returningList == NIL) - return NIL; /* nothing to do */ + return; /* * We need to assign resnos starting at one in the RETURNING list. Save @@ -2387,6 +2388,27 @@ transformReturningList(ParseState *pstate, List *returningList) /* transform RETURNING identically to a SELECT targetlist */ rlist = transformTargetList(pstate, returningList, EXPR_KIND_RETURNING); + vars = pull_var_clause((Node *) rlist, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | + PVC_INCLUDE_PLACEHOLDERS); + foreach (l, vars) + { + Var *var = (Var *) lfirst(l); + RangeTblEntry *rte = (RangeTblEntry *) list_nth(pstate->p_rtable, var->varno - 1); + if (var->varattno > 0) + rte->returningCols = bms_add_member(rte->returningCols, var->varattno); + else if (var->varattno == 0) + { + /* + * If there is a whole-row var, we have to fetch the whole row. + */ + bms_free(rte->returningCols); + rte->returningCols = bms_make_singleton(0); + break; + } + } + /* * Complain if the nonempty tlist expanded to nothing (which is possible * if it contains only a star-expansion of a zero-column table). If we @@ -2410,7 +2432,7 @@ transformReturningList(ParseState *pstate, List *returningList) /* restore state */ pstate->p_next_resno = save_next_resno; - return rlist; + qry->returningList = rlist; } diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c index ca02982e0b..b871673c90 100644 --- a/src/backend/parser/parse_relation.c +++ b/src/backend/parser/parse_relation.c @@ -1446,7 +1446,9 @@ addRangeTableEntry(ParseState *pstate, rte->selectedCols = NULL; rte->insertedCols = NULL; rte->updatedCols = NULL; + rte->returningCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its @@ -1533,7 +1535,9 @@ addRangeTableEntryForRelation(ParseState *pstate, rte->checkAsUser = InvalidOid; /* not set-uid by default, either */ rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; + rte->scanCols = NULL; rte->extraUpdatedCols = NULL; /* @@ -1630,8 +1634,10 @@ addRangeTableEntryForSubquery(ParseState *pstate, rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its @@ -1936,8 +1942,10 @@ addRangeTableEntryForFunction(ParseState *pstate, rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its @@ -2007,8 +2015,10 @@ addRangeTableEntryForTableFunc(ParseState *pstate, rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its @@ -2094,8 +2104,10 @@ addRangeTableEntryForValues(ParseState *pstate, rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its @@ -2183,8 +2195,10 @@ addRangeTableEntryForJoin(ParseState *pstate, rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its @@ -2332,8 +2346,10 @@ addRangeTableEntryForCTE(ParseState *pstate, rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its @@ -2457,6 +2473,8 @@ addRangeTableEntryForENR(ParseState *pstate, rte->requiredPerms = 0; rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; + rte->returningCols = NULL; + rte->scanCols = NULL; /* * Add completed RTE to pstate's range table list, so that we know its diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index e5f3482d52..122f2eb482 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -3197,6 +3197,7 @@ check_default_partition_contents(Relation parent, Relation default_rel, TableScanDesc scan; MemoryContext oldCxt; TupleTableSlot *tupslot; + Bitmapset *proj = NULL; /* Lock already taken above. */ if (part_relid != RelationGetRelid(default_rel)) @@ -3261,7 +3262,15 @@ check_default_partition_contents(Relation parent, Relation default_rel, econtext = GetPerTupleExprContext(estate); snapshot = RegisterSnapshot(GetLatestSnapshot()); tupslot = table_slot_create(part_rel, &estate->es_tupleTable); - scan = table_beginscan(part_rel, snapshot, 0, NULL); + if (table_scans_leverage_column_projection(part_rel)) + { + PopulateNeededColumnsForNode((Node*)partqualstate->expr, tupslot->tts_tupleDescriptor->natts, &proj); + scan = table_beginscan_with_column_projection(part_rel, snapshot, 0, NULL, proj); + } + else + { + scan = table_beginscan(part_rel, snapshot, 0, NULL); + } /* * Switch to per-tuple memory context and reset it for each tuple @@ -3292,6 +3301,9 @@ check_default_partition_contents(Relation parent, Relation default_rel, if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel)) table_close(part_rel, NoLock); /* keep the lock until commit */ + + if (proj) + pfree(proj); } } diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 0672f497c6..b22d341c91 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -1857,8 +1857,10 @@ ApplyRetrieveRule(Query *parsetree, rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; /* * For the most part, Vars referencing the view should remain as @@ -1958,15 +1960,19 @@ ApplyRetrieveRule(Query *parsetree, subrte->checkAsUser = rte->checkAsUser; subrte->selectedCols = rte->selectedCols; subrte->insertedCols = rte->insertedCols; + subrte->returningCols = rte->returningCols; subrte->updatedCols = rte->updatedCols; subrte->extraUpdatedCols = rte->extraUpdatedCols; + subrte->scanCols = rte->scanCols; rte->requiredPerms = 0; /* no permission check on subquery itself */ rte->checkAsUser = InvalidOid; rte->selectedCols = NULL; rte->insertedCols = NULL; + rte->returningCols = NULL; rte->updatedCols = NULL; rte->extraUpdatedCols = NULL; + rte->scanCols = NULL; return parsetree; } @@ -3317,6 +3323,7 @@ rewriteTargetView(Query *parsetree, Relation view) * base_rte instead of copying it. */ new_rte = base_rte; + new_rte->returningCols = bms_copy(view_rte->returningCols); new_rte->rellockmode = RowExclusiveLock; parsetree->rtable = lappend(parsetree->rtable, new_rte); @@ -3669,6 +3676,7 @@ rewriteTargetView(Query *parsetree, Relation view) } } + new_rte->returningCols = bms_copy(view_rte->returningCols); table_close(base_rel, NoLock); return parsetree; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 414b6b4d57..049e734aca 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -265,6 +265,7 @@ typedef struct TableAmRoutine { /* this must be set to T_TableAmRoutine */ NodeTag type; + bool scans_leverage_column_projection; /* ------------------------------------------------------------------------ @@ -305,6 +306,30 @@ typedef struct TableAmRoutine ParallelTableScanDesc pscan, uint32 flags); + /* + * Variant of scan_begin() with a column projection bitmap that lists the + * ordinal attribute numbers to be fetched during the scan. + * + * If project_columns is an empty bitmap, none of the data columns are to be + * fetched. + * + * If project_columns is a singleton bitmap with a whole-row reference (0), + * all of the data columns are to be fetched. + * + * Please note: project_cols only deals with non system columns (attnum >= 0) + * + * Please note: Due to the limitations of the slot_get***() APIs, the + * scan_getnextslot() tableAM call must return a TupleTableSlot that is densely + * populated (missing cols indicated with isnull = true upto the largest + * attno in the projection list) + */ + TableScanDesc (*scan_begin_with_column_projection)(Relation relation, + Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + ParallelTableScanDesc parallel_scan, + uint32 flags, + Bitmapset *project_columns); + /* * Release resources and deallocate scan. If TableScanDesc.temp_snap, * TableScanDesc.rs_snapshot needs to be unregistered. @@ -408,6 +433,26 @@ typedef struct TableAmRoutine */ void (*index_fetch_end) (struct IndexFetchTableData *data); + /* + * Set up a column projection list that can be used by index_fetch_tuple() + * to fetch a subset of columns for a tuple. + * + * If project_columns is an empty bitmap, none of the data columns are to be + * fetched. + * + * If project_columns is a singleton bitmap with a whole-row reference (0), + * all of the data columns are to be fetched. + * + * Please note: project_columns only deals with non system columns (attnum >= 0) + * + * Please note: Due to the limitations of the slot_get***() APIs, + * index_fetch_tuple() must return a TupleTableSlot that is densely + * populated (missing cols indicated with isnull = true upto the largest + * attno in the projection list) + */ + void (*index_fetch_set_column_projection) (struct IndexFetchTableData *data, + Bitmapset *project_columns); + /* * Fetch tuple at `tid` into `slot`, after doing a visibility test * according to `snapshot`. If a tuple was found and passed the visibility @@ -444,11 +489,27 @@ typedef struct TableAmRoutine * Fetch tuple at `tid` into `slot`, after doing a visibility test * according to `snapshot`. If a tuple was found and passed the visibility * test, returns true, false otherwise. + * + * project_cols is a set of columns to be fetched for the given row. + * + * If project_cols is an empty bitmap, none of the data columns are to be + * fetched. + * + * If project_cols is a singleton bitmap with a whole-row reference (0), + * all of the data columns are to be fetched. + * + * Please note: project_cols only deals with non system columns (attnum >= 0) + * + * Please note: Due to the limitations of the slot_get***() APIs, + * tuple_fetch_row_version() must return a TupleTableSlot that is densely + * populated (missing cols indicated with isnull = true upto the largest + * attno in the projection list) */ bool (*tuple_fetch_row_version) (Relation rel, ItemPointer tid, Snapshot snapshot, - TupleTableSlot *slot); + TupleTableSlot *slot, + Bitmapset *project_cols); /* * Is tid valid for a scan of this relation. @@ -535,7 +596,8 @@ typedef struct TableAmRoutine LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, - TM_FailureData *tmfd); + TM_FailureData *tmfd, + Bitmapset *project_cols); /* * Perform operations necessary to complete insertions made via @@ -889,6 +951,12 @@ table_beginscan(Relation rel, Snapshot snapshot, return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } +static inline bool +table_scans_leverage_column_projection(Relation relation) +{ + return relation->rd_tableam->scans_leverage_column_projection; +} + /* * Like table_beginscan(), but for scanning catalog. It'll automatically use a * snapshot appropriate for scanning catalog relations. @@ -918,6 +986,19 @@ table_beginscan_strat(Relation rel, Snapshot snapshot, return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags); } +static inline TableScanDesc +table_beginscan_with_column_projection(Relation relation, Snapshot snapshot, + int nkeys, struct ScanKeyData *key, + Bitmapset *project_column) +{ + uint32 flags = SO_TYPE_SEQSCAN | + SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE; + + Assert(relation->rd_tableam->scans_leverage_column_projection); + return relation->rd_tableam->scan_begin_with_column_projection( + relation, snapshot, nkeys, key, NULL, flags, project_column); +} + /* * table_beginscan_bm is an alternative entry point for setting up a * TableScanDesc for a bitmap heap scan. Although that scan technology is @@ -1132,7 +1213,8 @@ extern void table_parallelscan_initialize(Relation rel, * Caller must hold a suitable lock on the relation. */ extern TableScanDesc table_beginscan_parallel(Relation rel, - ParallelTableScanDesc pscan); + ParallelTableScanDesc pscan, + Bitmapset *proj); /* * Restart a parallel scan. Call this in the leader process. Caller is @@ -1182,6 +1264,13 @@ table_index_fetch_end(struct IndexFetchTableData *scan) scan->rel->rd_tableam->index_fetch_end(scan); } +static inline void +table_index_fetch_set_column_projection(struct IndexFetchTableData *scan, + Bitmapset *project_column) +{ + scan->rel->rd_tableam->index_fetch_set_column_projection(scan, project_column); +} + /* * Fetches, as part of an index scan, tuple at `tid` into `slot`, after doing * a visibility test according to `snapshot`. If a tuple was found and passed @@ -1257,7 +1346,8 @@ static inline bool table_tuple_fetch_row_version(Relation rel, ItemPointer tid, Snapshot snapshot, - TupleTableSlot *slot) + TupleTableSlot *slot, + Bitmapset *project_cols) { /* * We don't expect direct calls to table_tuple_fetch_row_version with @@ -1267,7 +1357,7 @@ table_tuple_fetch_row_version(Relation rel, if (unlikely(TransactionIdIsValid(CheckXidAlive) && !bsysscan)) elog(ERROR, "unexpected table_tuple_fetch_row_version call during logical decoding"); - return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot); + return rel->rd_tableam->tuple_fetch_row_version(rel, tid, snapshot, slot, project_cols); } /* @@ -1528,6 +1618,20 @@ table_tuple_update(Relation rel, ItemPointer otid, TupleTableSlot *slot, * also lock descendant tuples if lock modes don't conflict. * If TUPLE_LOCK_FLAG_FIND_LAST_VERSION, follow the update chain and lock * latest version. + * project_cols: It is a set of columns to be fetched for the tuple being locked. + * + * If project_cols is an empty bitmap, none of the data columns are to be + * fetched. + * + * If project_cols is a singleton bitmap with a whole-row reference (0), + * all of the data columns are to be fetched. + * + * Please note: project_cols only deals with non system columns (attnum >= 0) + * + * Please note: Due to the limitations of the slot_get***() APIs, + * tuple_lock() must return a TupleTableSlot that is densely + * populated (missing cols indicated with isnull = true upto the largest + * attno in the projection list) * * Output parameters: * *slot: contains the target tuple @@ -1549,11 +1653,11 @@ static inline TM_Result table_tuple_lock(Relation rel, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, LockWaitPolicy wait_policy, uint8 flags, - TM_FailureData *tmfd) + TM_FailureData *tmfd, Bitmapset *project_cols) { return rel->rd_tableam->tuple_lock(rel, tid, snapshot, slot, cid, mode, wait_policy, - flags, tmfd); + flags, tmfd, project_cols); } /* diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 071e363d54..7949bc16eb 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -621,5 +621,11 @@ extern void CheckCmdReplicaIdentity(Relation rel, CmdType cmd); extern void CheckSubscriptionRelkind(char relkind, const char *nspname, const char *relname); +extern void +PopulateNeededColumnsForNode(Node *expr, int n, Bitmapset **scanCols); +extern Bitmapset * +PopulateNeededColumnsForScan(ScanState *scanstate, int ncol); +extern Bitmapset *PopulateNeededColumnsForEPQ(EPQState *epqstate, int ncol); +extern void PopulateNeededColumnsForOnConflictUpdate(ResultRelInfo *resultRelInfo); #endif /* EXECUTOR_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index e31ad6204e..687fedba71 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -385,6 +385,7 @@ typedef struct OnConflictSetState TupleTableSlot *oc_ProjSlot; /* CONFLICT ... SET ... projection target */ ProjectionInfo *oc_ProjInfo; /* for ON CONFLICT DO UPDATE SET */ ExprState *oc_WhereClause; /* state for the WHERE clause */ + Bitmapset *proj_cols; /* cols to be scanned during the operation */ } OnConflictSetState; /* diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 236832a2ca..b68173302c 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -1128,6 +1128,20 @@ typedef struct RangeTblEntry Bitmapset *updatedCols; /* columns needing UPDATE permission */ Bitmapset *extraUpdatedCols; /* generated columns being updated */ List *securityQuals; /* security barrier quals to apply, if any */ + + /* + * scanCols: Columns to be retrieved during a physical scan. + * returningCols: Columns to be retrieved to satisfy the RETURNING clause. + * + * Please note: These bitmaps only deal with non-system columns (attnum >= 0) + * + * These bitmaps have some special values: + * - A singleton bitmap with the element 0 indicates that all non-system + * columns must be fetched. + * - An empty bitmap indicates that no non-system column must be fetched. + */ + Bitmapset *scanCols; /* columns to be fetched during a physical scan */ + Bitmapset *returningCols; /* columns in the RETURNING clause */ } RangeTblEntry; /* diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 10b63982c0..2a6351336e 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -638,6 +638,20 @@ typedef struct ViewOptions RelationNeedsWAL(relation) && \ !IsCatalogRelation(relation)) +static inline bool +contains_whole_row_col(Bitmapset *cols) +{ + return bms_is_member(0, cols); +} + +static inline Bitmapset * +get_ordinal_attnos(Relation rel) +{ + Bitmapset *attnos = NULL; + attnos = bms_add_range(attnos, 1, RelationGetDescr(rel)->natts); + return attnos; +} + /* routines in utils/cache/relcache.c */ extern void RelationIncrementReferenceCount(Relation rel); extern void RelationDecrementReferenceCount(Relation rel); -- 2.25.1