On Fri, 14 Feb 2025 at 10:59, Peter Eisentraut <[email protected]> wrote:
>
> On 13.02.25 14:06, jian he wrote:
> > I didn't solve the out join semantic issue.
> > i am wondering, can we do the virtual generated column expansion in
> > the rewrite stage as is,
> > and wrap the expressions in PHVs if the virtual generated
> > columns come from the nullable side of an outer join.
>
> PlaceHolderVar looks like a fitting mechanism for this. But it's so far
> a planner node, so it might take some additional consideration if we
> want to expand where it's used.
It seems pretty baked into how PHVs work that they should only be
added by the planner, so I think that I agree with Richard -- virtual
generated columns probably have to be expanded in the planner rather
than the rewriter.
> Maybe a short-term fix would be to error out if we find ourselves about
> to expand a Var with varnullingrels != NULL. That would mean you
> couldn't use a virtual generated column on the nullable output side of
> an outer join, which is annoying but not fatal, and we could fix it
> incrementally later.
I think that would be rather a sad limitation to have. It would be
nice to have this fully working for the next release.
Attached is a rough patch that moves the expansion of virtual
generated columns to the planner. It needs a lot more testing (and
some regression tests), but it does seem to fix all the issues
mentioned in this thread.
Regards,
Dean
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
new file mode 100644
index 7b1a8a0..041c152
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -712,6 +712,13 @@ subquery_planner(PlannerGlobal *glob, Qu
transform_MERGE_to_join(parse);
/*
+ * Expand any virtual generated columns. Note that this step does not
+ * descend into subqueries; if we pull up any subqueries below, their
+ * virtual generated columns are expanded just before pulling them up.
+ */
+ parse = root->parse = expand_virtual_generated_columns(root, parse);
+
+ /*
* If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
* that we don't need so many special cases to deal with that situation.
*/
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
new file mode 100644
index 5d9225e..95ef3f4
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -4,6 +4,8 @@
* Planner preprocessing for subqueries and join tree manipulation.
*
* NOTE: the intended sequence for invoking these operations is
+ * transform_MERGE_to_join
+ * expand_virtual_generated_columns
* replace_empty_jointree
* pull_up_sublinks
* preprocess_function_rtes
@@ -25,6 +27,7 @@
*/
#include "postgres.h"
+#include "access/table.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "miscadmin.h"
@@ -39,8 +42,25 @@
#include "optimizer/tlist.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
+#include "rewrite/rewriteHandler.h"
#include "rewrite/rewriteManip.h"
+#include "utils/rel.h"
+
+
+typedef struct expand_generated_table_context
+{
+ int num_attrs; /* number of table columns */
+ List *tlist; /* virtual generated column replacements */
+ Node **eg_cache; /* cache for results with PHVs */
+} expand_generated_table_context;
+typedef struct expand_generated_context
+{
+ PlannerInfo *root;
+ Query *parse; /* query being rewritten */
+ int sublevels_up; /* (current) nesting depth */
+ expand_generated_table_context *tbl_contexts; /* per RTE of query */
+} expand_generated_context;
typedef struct nullingrel_info
{
@@ -87,6 +107,8 @@ typedef struct reduce_outer_joins_partia
Relids unreduced_side; /* relids in its still-nullable side */
} reduce_outer_joins_partial_state;
+static Node *expand_virtual_generated_columns_callback(Node *node,
+ expand_generated_context *context);
static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
Relids *relids);
static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
@@ -378,6 +400,227 @@ transform_MERGE_to_join(Query *parse)
}
/*
+ * Expand all virtual generated column references in a query.
+ *
+ * Note that this only processes virtual generated columns in relation RTEs
+ * in the query's rtable; it does not descend into subqueries.
+ */
+Query *
+expand_virtual_generated_columns(PlannerInfo *root, Query *parse)
+{
+ expand_generated_context egcontext;
+ int rt_index;
+
+ egcontext.root = root;
+ egcontext.parse = parse;
+ egcontext.sublevels_up = 0;
+ egcontext.tbl_contexts = NULL;
+
+ /* Scan the range table for relations with virtual generated columns */
+ rt_index = 0;
+ foreach_node(RangeTblEntry, rte, parse->rtable)
+ {
+ Relation rel;
+ int num_attrs;
+ List *tlist;
+
+ ++rt_index;
+
+ /* Only normal relations can have virtual generated columns */
+ if (rte->rtekind != RTE_RELATION)
+ continue;
+
+ /*
+ * Look for virtual generated columns. We assume that previous code
+ * already acquired a lock on the table, so we need no lock here.
+ */
+ rel = table_open(rte->relid, NoLock);
+ num_attrs = RelationGetDescr(rel)->natts;
+ tlist = get_virtual_generated_columns(rel, rt_index);
+ table_close(rel, NoLock);
+
+ if (tlist)
+ {
+ expand_generated_table_context *tctx;
+
+ /*
+ * Record details of this table's virtual generated columns. The
+ * first time through, build the per-table context array.
+ */
+ if (egcontext.tbl_contexts == NULL)
+ egcontext.tbl_contexts = palloc0(list_length(parse->rtable) *
+ sizeof(expand_generated_table_context));
+
+ tctx = &egcontext.tbl_contexts[rt_index - 1];
+ tctx->num_attrs = num_attrs;
+ tctx->tlist = tlist;
+ /* PHV cache for each attr, plus wholerow (attrnum = 0) */
+ tctx->eg_cache = palloc0((num_attrs + 1) * sizeof(Node *));
+ }
+ }
+
+ /*
+ * If any relations had virtual generated columns, perform the necessary
+ * replacements.
+ */
+ if (egcontext.tbl_contexts != NULL)
+ parse = query_tree_mutator(parse,
+ expand_virtual_generated_columns_callback,
+ &egcontext,
+ 0);
+
+ return parse;
+}
+
+static Node *
+expand_virtual_generated_columns_callback(Node *node,
+ expand_generated_context *context)
+{
+ if (node == NULL)
+ return NULL;
+ else if (IsA(node, Var))
+ {
+ Query *parse = context->parse;
+ Var *var = (Var *) node;
+
+ /*
+ * If the Var is from a relation with virtual generated columns, then
+ * replace it with the appropriate expression, if necessary.
+ */
+ if (var->varlevelsup == context->sublevels_up &&
+ var->varno > 0 &&
+ var->varno <= list_length(parse->rtable) &&
+ context->tbl_contexts[var->varno - 1].tlist)
+ {
+ expand_generated_table_context *tctx;
+ Node *newnode;
+
+ tctx = &context->tbl_contexts[var->varno - 1];
+
+ /*
+ * If the Var has nonempty varnullingrels, the replacement
+ * expression (if any) is wrapped in a PlaceHolderVar, unless it
+ * is simply another Var. These are cached to avoid generating
+ * identical PHVs with different IDs, which would lead to
+ * duplicate evaluations at runtime. See similar code in
+ * pullup_replace_vars_callback().
+ *
+ * The cached items have phlevelsup = 0 and phnullingrels = NULL,
+ * so we need to copy and adjust them.
+ */
+ if (var->varnullingrels != NULL &&
+ var->varattno >= InvalidAttrNumber &&
+ var->varattno <= tctx->num_attrs &&
+ tctx->eg_cache[var->varattno] != NULL)
+ {
+ /* Copy the cached item and adjust its varlevelsup */
+ newnode = copyObject(tctx->eg_cache[var->varattno]);
+ if (var->varlevelsup > 0)
+ IncrementVarSublevelsUp(newnode, var->varlevelsup, 0);
+ }
+ else
+ {
+ /*
+ * Generate the replacement expression. This takes care of
+ * expanding wholerow references, as well as adjusting
+ * varlevelsup and varreturningtype.
+ */
+ newnode = ReplaceVarFromTargetList(var,
+ rt_fetch(var->varno,
+ parse->rtable),
+ tctx->tlist,
+ parse->resultRelation,
+ REPLACEVARS_CHANGE_VARNO,
+ var->varno);
+
+ /* Insert PlaceHolderVar if needed */
+ if (var->varnullingrels != NULL && !IsA(newnode, Var))
+ {
+ /* XXX: Consider not wrapping every expression */
+ newnode = (Node *)
+ make_placeholder_expr(context->root,
+ (Expr *) newnode,
+ bms_make_singleton(var->varno));
+
+ /*
+ * Adjust the PlaceHolderVar's phlevelsup (the wrapped
+ * expression itself is already correct at this point).
+ */
+ ((PlaceHolderVar *) newnode)->phlevelsup = var->varlevelsup;
+
+ /*
+ * Cache it if possible (ie, if the attno is in range,
+ * which it probably always should be), ensuring that the
+ * cached item has phlevelsup = 0.
+ */
+ if (var->varattno >= InvalidAttrNumber &&
+ var->varattno <= tctx->num_attrs)
+ {
+ Node *cachenode = copyObject(newnode);
+
+ if (var->varlevelsup > 0)
+ IncrementVarSublevelsUp(cachenode,
+ -((int) var->varlevelsup),
+ 0);
+
+ tctx->eg_cache[var->varattno] = cachenode;
+ }
+ }
+ }
+
+ /* Propagate any varnullingrels into the replacement expression */
+ if (var->varnullingrels != NULL)
+ {
+ if (IsA(newnode, Var))
+ {
+ Var *newvar = (Var *) newnode;
+
+ Assert(newvar->varlevelsup == var->varlevelsup);
+ newvar->varnullingrels = bms_add_members(newvar->varnullingrels,
+ var->varnullingrels);
+ }
+ else if (IsA(newnode, PlaceHolderVar))
+ {
+ PlaceHolderVar *newphv = (PlaceHolderVar *) newnode;
+
+ Assert(newphv->phlevelsup == var->varlevelsup);
+ newphv->phnullingrels = bms_add_members(newphv->phnullingrels,
+ var->varnullingrels);
+ }
+ else
+ {
+ /*
+ * Currently, the code above wraps all non-Var expressions
+ * in PlaceHolderVars, so we should never see anything
+ * else here. If that changes, this will need code
+ * similar to that in pullup_replace_vars_callback().
+ */
+ elog(ERROR, "unexpected expression for virtual generated column");
+ }
+ }
+
+ return newnode;
+ }
+ }
+ else if (IsA(node, Query))
+ {
+ Query *newnode;
+
+ context->sublevels_up++;
+ newnode = query_tree_mutator((Query *) node,
+ expand_virtual_generated_columns_callback,
+ context,
+ 0);
+ context->sublevels_up--;
+
+ return (Node *) newnode;
+ }
+ return expression_tree_mutator(node,
+ expand_virtual_generated_columns_callback,
+ context);
+}
+
+/*
* replace_empty_jointree
* If the Query's jointree is empty, replace it with a dummy RTE_RESULT
* relation.
@@ -1179,6 +1422,13 @@ pull_up_simple_subquery(PlannerInfo *roo
Assert(subquery->cteList == NIL);
/*
+ * Expand any virtual generated columns in the subquery, before we pull it
+ * up (this has already been done for the parent query).
+ */
+ subquery = subroot->parse = expand_virtual_generated_columns(subroot,
+ subquery);
+
+ /*
* If the FROM clause is empty, replace it with a dummy RTE_RESULT RTE, so
* that we don't need so many special cases to deal with that situation.
*/
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
new file mode 100644
index e996bdc..ecc695f
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -96,8 +96,6 @@ static List *matchLocks(CmdType event, R
int varno, Query *parsetree, bool *hasUpdate);
static Query *fireRIRrules(Query *parsetree, List *activeRIRs);
static Bitmapset *adjust_view_column_set(Bitmapset *cols, List *targetlist);
-static Node *expand_generated_columns_internal(Node *node, Relation rel, int rt_index,
- RangeTblEntry *rte, int result_relation);
/*
@@ -2190,10 +2188,6 @@ fireRIRrules(Query *parsetree, List *act
* requires special recursion detection if the new quals have sublink
* subqueries, and if we did it in the loop above query_tree_walker would
* then recurse into those quals a second time.
- *
- * Finally, we expand any virtual generated columns. We do this after
- * each table's RLS policies are applied because the RLS policies might
- * also refer to the table's virtual generated columns.
*/
rt_index = 0;
foreach(lc, parsetree->rtable)
@@ -2207,11 +2201,10 @@ fireRIRrules(Query *parsetree, List *act
++rt_index;
- /*
- * Only normal relations can have RLS policies or virtual generated
- * columns.
- */
- if (rte->rtekind != RTE_RELATION)
+ /* Only normal relations can have RLS policies */
+ if (rte->rtekind != RTE_RELATION ||
+ (rte->relkind != RELKIND_RELATION &&
+ rte->relkind != RELKIND_PARTITIONED_TABLE))
continue;
rel = table_open(rte->relid, NoLock);
@@ -2300,16 +2293,6 @@ fireRIRrules(Query *parsetree, List *act
if (hasSubLinks)
parsetree->hasSubLinks = true;
- /*
- * Expand any references to virtual generated columns of this table.
- * Note that subqueries in virtual generated column expressions are
- * not currently supported, so this cannot add any more sublinks.
- */
- parsetree = (Query *)
- expand_generated_columns_internal((Node *) parsetree,
- rel, rt_index, rte,
- parsetree->resultRelation);
-
table_close(rel, NoLock);
}
@@ -4425,31 +4408,20 @@ RewriteQuery(Query *parsetree, List *rew
/*
- * Expand virtual generated columns
- *
- * If the table contains virtual generated columns, build a target list
- * containing the expanded expressions and use ReplaceVarsFromTargetList() to
- * do the replacements.
- *
- * Vars matching rt_index at the current query level are replaced by the
- * virtual generated column expressions from rel, if there are any.
+ * Get the virtual generated columns of a relation.
*
- * The caller must also provide rte, the RTE describing the target relation,
- * in order to handle any whole-row Vars referencing the target, and
- * result_relation, the index of the result relation, if this is part of an
- * INSERT/UPDATE/DELETE/MERGE query.
+ * The returned list is in the form of a targetlist (of TargetEntry nodes)
+ * suitable for use by ReplaceVarFromTargetList/ReplaceVarsFromTargetList to
+ * expand references to virtual generated columns.
*/
-static Node *
-expand_generated_columns_internal(Node *node, Relation rel, int rt_index,
- RangeTblEntry *rte, int result_relation)
+List *
+get_virtual_generated_columns(Relation rel, int rt_index)
{
- TupleDesc tupdesc;
+ TupleDesc tupdesc = RelationGetDescr(rel);
+ List *tlist = NIL;
- tupdesc = RelationGetDescr(rel);
if (tupdesc->constr && tupdesc->constr->has_generated_virtual)
{
- List *tlist = NIL;
-
for (int i = 0; i < tupdesc->natts; i++)
{
Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
@@ -4491,14 +4463,8 @@ expand_generated_columns_internal(Node *
}
Assert(list_length(tlist) > 0);
-
- node = ReplaceVarsFromTargetList(node, rt_index, 0, rte, tlist,
- result_relation,
- REPLACEVARS_CHANGE_VARNO, rt_index,
- NULL);
}
-
- return node;
+ return tlist;
}
/*
@@ -4515,6 +4481,7 @@ expand_generated_columns_in_expr(Node *n
if (tupdesc->constr && tupdesc->constr->has_generated_virtual)
{
RangeTblEntry *rte;
+ List *tlist;
rte = makeNode(RangeTblEntry);
/* eref needs to be set, but the actual name doesn't matter */
@@ -4522,7 +4489,11 @@ expand_generated_columns_in_expr(Node *n
rte->rtekind = RTE_RELATION;
rte->relid = RelationGetRelid(rel);
- node = expand_generated_columns_internal(node, rel, rt_index, rte, 0);
+ tlist = get_virtual_generated_columns(rel, rt_index);
+
+ node = ReplaceVarsFromTargetList(node, rt_index, 0, rte, tlist, 0,
+ REPLACEVARS_CHANGE_VARNO, rt_index,
+ NULL);
}
return node;
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
new file mode 100644
index a115b21..cc268a5
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -1736,6 +1736,23 @@ ReplaceVarsFromTargetList_callback(Var *
replace_rte_variables_context *context)
{
ReplaceVarsFromTargetList_context *rcon = (ReplaceVarsFromTargetList_context *) context->callback_arg;
+
+ return ReplaceVarFromTargetList(var,
+ rcon->target_rte,
+ rcon->targetlist,
+ rcon->result_relation,
+ rcon->nomatch_option,
+ rcon->nomatch_varno);
+}
+
+Node *
+ReplaceVarFromTargetList(Var *var,
+ RangeTblEntry *target_rte,
+ List *targetlist,
+ int result_relation,
+ ReplaceVarsNoMatchOption nomatch_option,
+ int nomatch_varno)
+{
TargetEntry *tle;
if (var->varattno == InvalidAttrNumber)
@@ -1744,6 +1761,7 @@ ReplaceVarsFromTargetList_callback(Var *
RowExpr *rowexpr;
List *colnames;
List *fields;
+ ListCell *lc;
/*
* If generating an expansion for a var of a named rowtype (ie, this
@@ -1755,15 +1773,27 @@ ReplaceVarsFromTargetList_callback(Var *
* The varreturningtype is copied onto each individual field Var, so
* that it is handled correctly when we recurse.
*/
- expandRTE(rcon->target_rte,
+ expandRTE(target_rte,
var->varno, var->varlevelsup, var->varreturningtype,
var->location, (var->vartype != RECORDOID),
&colnames, &fields);
/* Adjust the generated per-field Vars... */
- fields = (List *) replace_rte_variables_mutator((Node *) fields,
- context);
rowexpr = makeNode(RowExpr);
- rowexpr->args = fields;
+ rowexpr->args = NIL;
+ foreach(lc, fields)
+ {
+ Node *field = lfirst(lc);
+
+ if (field != NULL)
+ field = ReplaceVarFromTargetList((Var *) field,
+ target_rte,
+ targetlist,
+ result_relation,
+ nomatch_option,
+ nomatch_varno);
+
+ rowexpr->args = lappend(rowexpr->args, field);
+ }
rowexpr->row_typeid = var->vartype;
rowexpr->row_format = COERCE_IMPLICIT_CAST;
rowexpr->colnames = (var->vartype == RECORDOID) ? colnames : NIL;
@@ -1785,12 +1815,12 @@ ReplaceVarsFromTargetList_callback(Var *
}
/* Normal case referencing one targetlist element */
- tle = get_tle_by_resno(rcon->targetlist, var->varattno);
+ tle = get_tle_by_resno(targetlist, var->varattno);
if (tle == NULL || tle->resjunk)
{
/* Failed to find column in targetlist */
- switch (rcon->nomatch_option)
+ switch (nomatch_option)
{
case REPLACEVARS_REPORT_ERROR:
/* fall through, throw error below */
@@ -1798,7 +1828,7 @@ ReplaceVarsFromTargetList_callback(Var *
case REPLACEVARS_CHANGE_VARNO:
var = copyObject(var);
- var->varno = rcon->nomatch_varno;
+ var->varno = nomatch_varno;
/* we leave the syntactic referent alone */
return (Node *) var;
@@ -1854,15 +1884,15 @@ ReplaceVarsFromTargetList_callback(Var *
* Copy varreturningtype onto any Vars in the tlist item that
* refer to result_relation (which had better be non-zero).
*/
- if (rcon->result_relation == 0)
+ if (result_relation == 0)
elog(ERROR, "variable returning old/new found outside RETURNING list");
- SetVarReturningType((Node *) newnode, rcon->result_relation,
+ SetVarReturningType((Node *) newnode, result_relation,
var->varlevelsup, var->varreturningtype);
/* Wrap it in a ReturningExpr, if needed, per comments above */
if (!IsA(newnode, Var) ||
- ((Var *) newnode)->varno != rcon->result_relation ||
+ ((Var *) newnode)->varno != result_relation ||
((Var *) newnode)->varlevelsup != var->varlevelsup)
{
ReturningExpr *rexpr = makeNode(ReturningExpr);
diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h
new file mode 100644
index 0ae57ec..9c93643
--- a/src/include/optimizer/prep.h
+++ b/src/include/optimizer/prep.h
@@ -22,6 +22,7 @@
* prototypes for prepjointree.c
*/
extern void transform_MERGE_to_join(Query *parse);
+extern Query *expand_virtual_generated_columns(PlannerInfo *root, Query *parse);
extern void replace_empty_jointree(Query *parse);
extern void pull_up_sublinks(PlannerInfo *root);
extern void preprocess_function_rtes(PlannerInfo *root);
diff --git a/src/include/rewrite/rewriteHandler.h b/src/include/rewrite/rewriteHandler.h
new file mode 100644
index 88fe13c..21876cf
--- a/src/include/rewrite/rewriteHandler.h
+++ b/src/include/rewrite/rewriteHandler.h
@@ -38,6 +38,7 @@ extern void error_view_not_updatable(Rel
List *mergeActionList,
const char *detail);
+extern List *get_virtual_generated_columns(Relation rel, int rt_index);
extern Node *expand_generated_columns_in_expr(Node *node, Relation rel, int rt_index);
#endif /* REWRITEHANDLER_H */
diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h
new file mode 100644
index 5128230..afce743
--- a/src/include/rewrite/rewriteManip.h
+++ b/src/include/rewrite/rewriteManip.h
@@ -85,6 +85,13 @@ extern Node *map_variable_attnos(Node *n
const struct AttrMap *attno_map,
Oid to_rowtype, bool *found_whole_row);
+extern Node *ReplaceVarFromTargetList(Var *var,
+ RangeTblEntry *target_rte,
+ List *targetlist,
+ int result_relation,
+ ReplaceVarsNoMatchOption nomatch_option,
+ int nomatch_varno);
+
extern Node *ReplaceVarsFromTargetList(Node *node,
int target_varno, int sublevels_up,
RangeTblEntry *target_rte,
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
new file mode 100644
index b6c170a..4d0fcab
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3484,6 +3484,8 @@ eval_const_expressions_context
exec_thread_arg
execution_state
exit_function
+expand_generated_context
+expand_generated_table_context
explain_get_index_name_hook_type
f_smgr
fasthash_state