On Fri, 14 Feb 2025 at 10:59, Peter Eisentraut <pe...@eisentraut.org> wrote: > > On 13.02.25 14:06, jian he wrote: > > I didn't solve the out join semantic issue. > > i am wondering, can we do the virtual generated column expansion in > > the rewrite stage as is, > > and wrap the expressions in PHVs if the virtual generated > > columns come from the nullable side of an outer join. > > PlaceHolderVar looks like a fitting mechanism for this. But it's so far > a planner node, so it might take some additional consideration if we > want to expand where it's used.
It seems pretty baked into how PHVs work that they should only be added by the planner, so I think that I agree with Richard -- virtual generated columns probably have to be expanded in the planner rather than the rewriter. > Maybe a short-term fix would be to error out if we find ourselves about > to expand a Var with varnullingrels != NULL. That would mean you > couldn't use a virtual generated column on the nullable output side of > an outer join, which is annoying but not fatal, and we could fix it > incrementally later. I think that would be rather a sad limitation to have. It would be nice to have this fully working for the next release. Attached is a rough patch that moves the expansion of virtual generated columns to the planner. It needs a lot more testing (and some regression tests), but it does seem to fix all the issues mentioned in this thread. Regards, Dean
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c new file mode 100644 index 7b1a8a0..041c152 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -712,6 +712,13 @@ subquery_planner(PlannerGlobal *glob, Qu transform_MERGE_to_join(parse); /* + * Expand any virtual generated columns. Note that this step does not + * descend into subqueries; if we pull up any subqueries below, their + * virtual generated columns are expanded just before pulling them up. + */ + parse = root->parse = expand_virtual_generated_columns(root, parse); + + /* * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so * that we don't need so many special cases to deal with that situation. */ diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c new file mode 100644 index 5d9225e..95ef3f4 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -4,6 +4,8 @@ * Planner preprocessing for subqueries and join tree manipulation. * * NOTE: the intended sequence for invoking these operations is + * transform_MERGE_to_join + * expand_virtual_generated_columns * replace_empty_jointree * pull_up_sublinks * preprocess_function_rtes @@ -25,6 +27,7 @@ */ #include "postgres.h" +#include "access/table.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "miscadmin.h" @@ -39,8 +42,25 @@ #include "optimizer/tlist.h" #include "parser/parse_relation.h" #include "parser/parsetree.h" +#include "rewrite/rewriteHandler.h" #include "rewrite/rewriteManip.h" +#include "utils/rel.h" + + +typedef struct expand_generated_table_context +{ + int num_attrs; /* number of table columns */ + List *tlist; /* virtual generated column replacements */ + Node **eg_cache; /* cache for results with PHVs */ +} expand_generated_table_context; +typedef struct expand_generated_context +{ + PlannerInfo *root; + Query *parse; /* query being rewritten */ + int sublevels_up; /* (current) nesting depth */ + expand_generated_table_context *tbl_contexts; /* per RTE of query */ +} expand_generated_context; typedef struct nullingrel_info { @@ -87,6 +107,8 @@ typedef struct reduce_outer_joins_partia Relids unreduced_side; /* relids in its still-nullable side */ } reduce_outer_joins_partial_state; +static Node *expand_virtual_generated_columns_callback(Node *node, + expand_generated_context *context); static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode, Relids *relids); static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node, @@ -378,6 +400,227 @@ transform_MERGE_to_join(Query *parse) } /* + * Expand all virtual generated column references in a query. + * + * Note that this only processes virtual generated columns in relation RTEs + * in the query's rtable; it does not descend into subqueries. + */ +Query * +expand_virtual_generated_columns(PlannerInfo *root, Query *parse) +{ + expand_generated_context egcontext; + int rt_index; + + egcontext.root = root; + egcontext.parse = parse; + egcontext.sublevels_up = 0; + egcontext.tbl_contexts = NULL; + + /* Scan the range table for relations with virtual generated columns */ + rt_index = 0; + foreach_node(RangeTblEntry, rte, parse->rtable) + { + Relation rel; + int num_attrs; + List *tlist; + + ++rt_index; + + /* Only normal relations can have virtual generated columns */ + if (rte->rtekind != RTE_RELATION) + continue; + + /* + * Look for virtual generated columns. We assume that previous code + * already acquired a lock on the table, so we need no lock here. + */ + rel = table_open(rte->relid, NoLock); + num_attrs = RelationGetDescr(rel)->natts; + tlist = get_virtual_generated_columns(rel, rt_index); + table_close(rel, NoLock); + + if (tlist) + { + expand_generated_table_context *tctx; + + /* + * Record details of this table's virtual generated columns. The + * first time through, build the per-table context array. + */ + if (egcontext.tbl_contexts == NULL) + egcontext.tbl_contexts = palloc0(list_length(parse->rtable) * + sizeof(expand_generated_table_context)); + + tctx = &egcontext.tbl_contexts[rt_index - 1]; + tctx->num_attrs = num_attrs; + tctx->tlist = tlist; + /* PHV cache for each attr, plus wholerow (attrnum = 0) */ + tctx->eg_cache = palloc0((num_attrs + 1) * sizeof(Node *)); + } + } + + /* + * If any relations had virtual generated columns, perform the necessary + * replacements. + */ + if (egcontext.tbl_contexts != NULL) + parse = query_tree_mutator(parse, + expand_virtual_generated_columns_callback, + &egcontext, + 0); + + return parse; +} + +static Node * +expand_virtual_generated_columns_callback(Node *node, + expand_generated_context *context) +{ + if (node == NULL) + return NULL; + else if (IsA(node, Var)) + { + Query *parse = context->parse; + Var *var = (Var *) node; + + /* + * If the Var is from a relation with virtual generated columns, then + * replace it with the appropriate expression, if necessary. + */ + if (var->varlevelsup == context->sublevels_up && + var->varno > 0 && + var->varno <= list_length(parse->rtable) && + context->tbl_contexts[var->varno - 1].tlist) + { + expand_generated_table_context *tctx; + Node *newnode; + + tctx = &context->tbl_contexts[var->varno - 1]; + + /* + * If the Var has nonempty varnullingrels, the replacement + * expression (if any) is wrapped in a PlaceHolderVar, unless it + * is simply another Var. These are cached to avoid generating + * identical PHVs with different IDs, which would lead to + * duplicate evaluations at runtime. See similar code in + * pullup_replace_vars_callback(). + * + * The cached items have phlevelsup = 0 and phnullingrels = NULL, + * so we need to copy and adjust them. + */ + if (var->varnullingrels != NULL && + var->varattno >= InvalidAttrNumber && + var->varattno <= tctx->num_attrs && + tctx->eg_cache[var->varattno] != NULL) + { + /* Copy the cached item and adjust its varlevelsup */ + newnode = copyObject(tctx->eg_cache[var->varattno]); + if (var->varlevelsup > 0) + IncrementVarSublevelsUp(newnode, var->varlevelsup, 0); + } + else + { + /* + * Generate the replacement expression. This takes care of + * expanding wholerow references, as well as adjusting + * varlevelsup and varreturningtype. + */ + newnode = ReplaceVarFromTargetList(var, + rt_fetch(var->varno, + parse->rtable), + tctx->tlist, + parse->resultRelation, + REPLACEVARS_CHANGE_VARNO, + var->varno); + + /* Insert PlaceHolderVar if needed */ + if (var->varnullingrels != NULL && !IsA(newnode, Var)) + { + /* XXX: Consider not wrapping every expression */ + newnode = (Node *) + make_placeholder_expr(context->root, + (Expr *) newnode, + bms_make_singleton(var->varno)); + + /* + * Adjust the PlaceHolderVar's phlevelsup (the wrapped + * expression itself is already correct at this point). + */ + ((PlaceHolderVar *) newnode)->phlevelsup = var->varlevelsup; + + /* + * Cache it if possible (ie, if the attno is in range, + * which it probably always should be), ensuring that the + * cached item has phlevelsup = 0. + */ + if (var->varattno >= InvalidAttrNumber && + var->varattno <= tctx->num_attrs) + { + Node *cachenode = copyObject(newnode); + + if (var->varlevelsup > 0) + IncrementVarSublevelsUp(cachenode, + -((int) var->varlevelsup), + 0); + + tctx->eg_cache[var->varattno] = cachenode; + } + } + } + + /* Propagate any varnullingrels into the replacement expression */ + if (var->varnullingrels != NULL) + { + if (IsA(newnode, Var)) + { + Var *newvar = (Var *) newnode; + + Assert(newvar->varlevelsup == var->varlevelsup); + newvar->varnullingrels = bms_add_members(newvar->varnullingrels, + var->varnullingrels); + } + else if (IsA(newnode, PlaceHolderVar)) + { + PlaceHolderVar *newphv = (PlaceHolderVar *) newnode; + + Assert(newphv->phlevelsup == var->varlevelsup); + newphv->phnullingrels = bms_add_members(newphv->phnullingrels, + var->varnullingrels); + } + else + { + /* + * Currently, the code above wraps all non-Var expressions + * in PlaceHolderVars, so we should never see anything + * else here. If that changes, this will need code + * similar to that in pullup_replace_vars_callback(). + */ + elog(ERROR, "unexpected expression for virtual generated column"); + } + } + + return newnode; + } + } + else if (IsA(node, Query)) + { + Query *newnode; + + context->sublevels_up++; + newnode = query_tree_mutator((Query *) node, + expand_virtual_generated_columns_callback, + context, + 0); + context->sublevels_up--; + + return (Node *) newnode; + } + return expression_tree_mutator(node, + expand_virtual_generated_columns_callback, + context); +} + +/* * replace_empty_jointree * If the Query's jointree is empty, replace it with a dummy RTE_RESULT * relation. @@ -1179,6 +1422,13 @@ pull_up_simple_subquery(PlannerInfo *roo Assert(subquery->cteList == NIL); /* + * Expand any virtual generated columns in the subquery, before we pull it + * up (this has already been done for the parent query). + */ + subquery = subroot->parse = expand_virtual_generated_columns(subroot, + subquery); + + /* * If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so * that we don't need so many special cases to deal with that situation. */ diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c new file mode 100644 index e996bdc..ecc695f --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -96,8 +96,6 @@ static List *matchLocks(CmdType event, R int varno, Query *parsetree, bool *hasUpdate); static Query *fireRIRrules(Query *parsetree, List *activeRIRs); static Bitmapset *adjust_view_column_set(Bitmapset *cols, List *targetlist); -static Node *expand_generated_columns_internal(Node *node, Relation rel, int rt_index, - RangeTblEntry *rte, int result_relation); /* @@ -2190,10 +2188,6 @@ fireRIRrules(Query *parsetree, List *act * requires special recursion detection if the new quals have sublink * subqueries, and if we did it in the loop above query_tree_walker would * then recurse into those quals a second time. - * - * Finally, we expand any virtual generated columns. We do this after - * each table's RLS policies are applied because the RLS policies might - * also refer to the table's virtual generated columns. */ rt_index = 0; foreach(lc, parsetree->rtable) @@ -2207,11 +2201,10 @@ fireRIRrules(Query *parsetree, List *act ++rt_index; - /* - * Only normal relations can have RLS policies or virtual generated - * columns. - */ - if (rte->rtekind != RTE_RELATION) + /* Only normal relations can have RLS policies */ + if (rte->rtekind != RTE_RELATION || + (rte->relkind != RELKIND_RELATION && + rte->relkind != RELKIND_PARTITIONED_TABLE)) continue; rel = table_open(rte->relid, NoLock); @@ -2300,16 +2293,6 @@ fireRIRrules(Query *parsetree, List *act if (hasSubLinks) parsetree->hasSubLinks = true; - /* - * Expand any references to virtual generated columns of this table. - * Note that subqueries in virtual generated column expressions are - * not currently supported, so this cannot add any more sublinks. - */ - parsetree = (Query *) - expand_generated_columns_internal((Node *) parsetree, - rel, rt_index, rte, - parsetree->resultRelation); - table_close(rel, NoLock); } @@ -4425,31 +4408,20 @@ RewriteQuery(Query *parsetree, List *rew /* - * Expand virtual generated columns - * - * If the table contains virtual generated columns, build a target list - * containing the expanded expressions and use ReplaceVarsFromTargetList() to - * do the replacements. - * - * Vars matching rt_index at the current query level are replaced by the - * virtual generated column expressions from rel, if there are any. + * Get the virtual generated columns of a relation. * - * The caller must also provide rte, the RTE describing the target relation, - * in order to handle any whole-row Vars referencing the target, and - * result_relation, the index of the result relation, if this is part of an - * INSERT/UPDATE/DELETE/MERGE query. + * The returned list is in the form of a targetlist (of TargetEntry nodes) + * suitable for use by ReplaceVarFromTargetList/ReplaceVarsFromTargetList to + * expand references to virtual generated columns. */ -static Node * -expand_generated_columns_internal(Node *node, Relation rel, int rt_index, - RangeTblEntry *rte, int result_relation) +List * +get_virtual_generated_columns(Relation rel, int rt_index) { - TupleDesc tupdesc; + TupleDesc tupdesc = RelationGetDescr(rel); + List *tlist = NIL; - tupdesc = RelationGetDescr(rel); if (tupdesc->constr && tupdesc->constr->has_generated_virtual) { - List *tlist = NIL; - for (int i = 0; i < tupdesc->natts; i++) { Form_pg_attribute attr = TupleDescAttr(tupdesc, i); @@ -4491,14 +4463,8 @@ expand_generated_columns_internal(Node * } Assert(list_length(tlist) > 0); - - node = ReplaceVarsFromTargetList(node, rt_index, 0, rte, tlist, - result_relation, - REPLACEVARS_CHANGE_VARNO, rt_index, - NULL); } - - return node; + return tlist; } /* @@ -4515,6 +4481,7 @@ expand_generated_columns_in_expr(Node *n if (tupdesc->constr && tupdesc->constr->has_generated_virtual) { RangeTblEntry *rte; + List *tlist; rte = makeNode(RangeTblEntry); /* eref needs to be set, but the actual name doesn't matter */ @@ -4522,7 +4489,11 @@ expand_generated_columns_in_expr(Node *n rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel); - node = expand_generated_columns_internal(node, rel, rt_index, rte, 0); + tlist = get_virtual_generated_columns(rel, rt_index); + + node = ReplaceVarsFromTargetList(node, rt_index, 0, rte, tlist, 0, + REPLACEVARS_CHANGE_VARNO, rt_index, + NULL); } return node; diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c new file mode 100644 index a115b21..cc268a5 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -1736,6 +1736,23 @@ ReplaceVarsFromTargetList_callback(Var * replace_rte_variables_context *context) { ReplaceVarsFromTargetList_context *rcon = (ReplaceVarsFromTargetList_context *) context->callback_arg; + + return ReplaceVarFromTargetList(var, + rcon->target_rte, + rcon->targetlist, + rcon->result_relation, + rcon->nomatch_option, + rcon->nomatch_varno); +} + +Node * +ReplaceVarFromTargetList(Var *var, + RangeTblEntry *target_rte, + List *targetlist, + int result_relation, + ReplaceVarsNoMatchOption nomatch_option, + int nomatch_varno) +{ TargetEntry *tle; if (var->varattno == InvalidAttrNumber) @@ -1744,6 +1761,7 @@ ReplaceVarsFromTargetList_callback(Var * RowExpr *rowexpr; List *colnames; List *fields; + ListCell *lc; /* * If generating an expansion for a var of a named rowtype (ie, this @@ -1755,15 +1773,27 @@ ReplaceVarsFromTargetList_callback(Var * * The varreturningtype is copied onto each individual field Var, so * that it is handled correctly when we recurse. */ - expandRTE(rcon->target_rte, + expandRTE(target_rte, var->varno, var->varlevelsup, var->varreturningtype, var->location, (var->vartype != RECORDOID), &colnames, &fields); /* Adjust the generated per-field Vars... */ - fields = (List *) replace_rte_variables_mutator((Node *) fields, - context); rowexpr = makeNode(RowExpr); - rowexpr->args = fields; + rowexpr->args = NIL; + foreach(lc, fields) + { + Node *field = lfirst(lc); + + if (field != NULL) + field = ReplaceVarFromTargetList((Var *) field, + target_rte, + targetlist, + result_relation, + nomatch_option, + nomatch_varno); + + rowexpr->args = lappend(rowexpr->args, field); + } rowexpr->row_typeid = var->vartype; rowexpr->row_format = COERCE_IMPLICIT_CAST; rowexpr->colnames = (var->vartype == RECORDOID) ? colnames : NIL; @@ -1785,12 +1815,12 @@ ReplaceVarsFromTargetList_callback(Var * } /* Normal case referencing one targetlist element */ - tle = get_tle_by_resno(rcon->targetlist, var->varattno); + tle = get_tle_by_resno(targetlist, var->varattno); if (tle == NULL || tle->resjunk) { /* Failed to find column in targetlist */ - switch (rcon->nomatch_option) + switch (nomatch_option) { case REPLACEVARS_REPORT_ERROR: /* fall through, throw error below */ @@ -1798,7 +1828,7 @@ ReplaceVarsFromTargetList_callback(Var * case REPLACEVARS_CHANGE_VARNO: var = copyObject(var); - var->varno = rcon->nomatch_varno; + var->varno = nomatch_varno; /* we leave the syntactic referent alone */ return (Node *) var; @@ -1854,15 +1884,15 @@ ReplaceVarsFromTargetList_callback(Var * * Copy varreturningtype onto any Vars in the tlist item that * refer to result_relation (which had better be non-zero). */ - if (rcon->result_relation == 0) + if (result_relation == 0) elog(ERROR, "variable returning old/new found outside RETURNING list"); - SetVarReturningType((Node *) newnode, rcon->result_relation, + SetVarReturningType((Node *) newnode, result_relation, var->varlevelsup, var->varreturningtype); /* Wrap it in a ReturningExpr, if needed, per comments above */ if (!IsA(newnode, Var) || - ((Var *) newnode)->varno != rcon->result_relation || + ((Var *) newnode)->varno != result_relation || ((Var *) newnode)->varlevelsup != var->varlevelsup) { ReturningExpr *rexpr = makeNode(ReturningExpr); diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h new file mode 100644 index 0ae57ec..9c93643 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -22,6 +22,7 @@ * prototypes for prepjointree.c */ extern void transform_MERGE_to_join(Query *parse); +extern Query *expand_virtual_generated_columns(PlannerInfo *root, Query *parse); extern void replace_empty_jointree(Query *parse); extern void pull_up_sublinks(PlannerInfo *root); extern void preprocess_function_rtes(PlannerInfo *root); diff --git a/src/include/rewrite/rewriteHandler.h b/src/include/rewrite/rewriteHandler.h new file mode 100644 index 88fe13c..21876cf --- a/src/include/rewrite/rewriteHandler.h +++ b/src/include/rewrite/rewriteHandler.h @@ -38,6 +38,7 @@ extern void error_view_not_updatable(Rel List *mergeActionList, const char *detail); +extern List *get_virtual_generated_columns(Relation rel, int rt_index); extern Node *expand_generated_columns_in_expr(Node *node, Relation rel, int rt_index); #endif /* REWRITEHANDLER_H */ diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h new file mode 100644 index 5128230..afce743 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -85,6 +85,13 @@ extern Node *map_variable_attnos(Node *n const struct AttrMap *attno_map, Oid to_rowtype, bool *found_whole_row); +extern Node *ReplaceVarFromTargetList(Var *var, + RangeTblEntry *target_rte, + List *targetlist, + int result_relation, + ReplaceVarsNoMatchOption nomatch_option, + int nomatch_varno); + extern Node *ReplaceVarsFromTargetList(Node *node, int target_varno, int sublevels_up, RangeTblEntry *target_rte, diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list new file mode 100644 index b6c170a..4d0fcab --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3484,6 +3484,8 @@ eval_const_expressions_context exec_thread_arg execution_state exit_function +expand_generated_context +expand_generated_table_context explain_get_index_name_hook_type f_smgr fasthash_state