On Sat, Feb 15, 2025 at 8:37 PM Dean Rasheed <dean.a.rash...@gmail.com> wrote: > > On Fri, 14 Feb 2025 at 10:59, Peter Eisentraut <pe...@eisentraut.org> wrote: > > > > On 13.02.25 14:06, jian he wrote: > > > I didn't solve the out join semantic issue. > > > i am wondering, can we do the virtual generated column expansion in > > > the rewrite stage as is, > > > and wrap the expressions in PHVs if the virtual generated > > > columns come from the nullable side of an outer join. > > > > PlaceHolderVar looks like a fitting mechanism for this. But it's so far > > a planner node, so it might take some additional consideration if we > > want to expand where it's used. > > It seems pretty baked into how PHVs work that they should only be > added by the planner, so I think that I agree with Richard -- virtual > generated columns probably have to be expanded in the planner rather > than the rewriter. > > > Maybe a short-term fix would be to error out if we find ourselves about > > to expand a Var with varnullingrels != NULL. That would mean you > > couldn't use a virtual generated column on the nullable output side of > > an outer join, which is annoying but not fatal, and we could fix it > > incrementally later. > > I think that would be rather a sad limitation to have. It would be > nice to have this fully working for the next release. > error out seems not that hard, IMHO. i think, we still have time to figure out how to make it fully working in v18.
> Attached is a rough patch that moves the expansion of virtual > generated columns to the planner. It needs a lot more testing (and > some regression tests), but it does seem to fix all the issues > mentioned in this thread. > I will review it later. In the meantime, I also came up with my patch (including tests) that solves all the issues.
From 69d4749613127323bd01fcad41c8154f6744a52f Mon Sep 17 00:00:00 2001 From: jian he <jian.universality@gmail.com> Date: Mon, 17 Feb 2025 14:37:17 +0800 Subject: [PATCH] fix expand virtual generated column Var node varnullingrels field To expand the virtual generated column Var node, we need to copy the fields of the original virtual generated column Var node, including varnullingrels, varlevelsup, varreturningtype, and varattno, to the newly expanded Var node. ReplaceVarsFromTargetList_callback didn't taken care of varnullingrels, this patch fix this issue. discussion: 75eb1a6f-d59f-42e6-8a78-124ee808cda7@gmail.com">https://postgr.es/m/75eb1a6f-d59f-42e6-8a78-124ee808cda7@gmail.com --- src/backend/optimizer/plan/planner.c | 5 + src/backend/optimizer/prep/prepjointree.c | 122 ++++++++++++++++++ src/backend/rewrite/rewriteHandler.c | 39 ++++-- src/backend/rewrite/rewriteManip.c | 120 +++++++++++++++++ src/include/optimizer/prep.h | 1 + src/include/rewrite/rewriteHandler.h | 1 + src/include/rewrite/rewriteManip.h | 4 + .../regress/expected/generated_virtual.out | 59 +++++++++ src/test/regress/sql/generated_virtual.sql | 17 +++ src/tools/pgindent/typedefs.list | 1 + 10 files changed, 359 insertions(+), 10 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 7b1a8a0a9f1..8f0be3dfadc 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -57,6 +57,7 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "partitioning/partdesc.h" +#include "rewrite/rewriteHandler.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -706,6 +707,10 @@ subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root, if (parse->cteList) SS_process_ctes(root); + parse = (Query *) preprocess_nullable_generated_cols(root, (Node *) root->parse); + parse = (Query *) expand_generated_columns((Node *) parse); + root->parse = parse; + /* * If it's a MERGE command, transform the joinlist as appropriate. */ diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 5d9225e9909..8224a53da47 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -25,6 +25,7 @@ */ #include "postgres.h" +#include "access/table.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "miscadmin.h" @@ -40,6 +41,7 @@ #include "parser/parse_relation.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" +#include "utils/rel.h" typedef struct nullingrel_info @@ -68,6 +70,14 @@ typedef struct pullup_replace_vars_context Node **rv_cache; /* cache for results with PHVs */ } pullup_replace_vars_context; +typedef struct replace_varsnulling_context +{ + PlannerInfo *root; + RangeTblEntry *target_rte; /* RTE of subquery */ + Bitmapset *gen_varattno; + int target_varno; /* RTE index to search for */ + int sublevels_up; /* (current) nesting depth */ +} replace_varsnulling_context; typedef struct reduce_outer_joins_pass1_state { Relids relids; /* base relids within this subtree */ @@ -160,6 +170,118 @@ static void get_nullingrels_recurse(Node *jtnode, Relids upper_nullingrels, nullingrel_info *info); +static Node * +replace_varnulling_variables_mutator(Node *node, + replace_varsnulling_context *context) +{ + if (node == NULL) + return NULL; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varno == context->target_varno && + var->varlevelsup == context->sublevels_up && + bms_is_member(var->varattno, context->gen_varattno) && + var->varnullingrels != NULL) + { + Node *newnode; + PlaceHolderVar *newphv; + newnode = (Node *) copyObject(var); + + newnode = (Node *) + make_placeholder_expr(context->root, + (Expr *) newnode, + bms_make_singleton(context->target_varno)); + + newphv = (PlaceHolderVar *) newnode; + newphv->phnullingrels = bms_add_members(newphv->phnullingrels, + var->varnullingrels); + newphv->phlevelsup = var->varlevelsup; + return newnode; + } + } + else if (IsA(node, Query)) + { + Query *newnode; + context->sublevels_up++; + newnode = query_tree_mutator((Query *) node, + replace_varnulling_variables_mutator, + context, + 0); + context->sublevels_up--; + return (Node *) newnode; + } + return expression_tree_mutator(node, replace_varnulling_variables_mutator, context); +} + +static Node * +replace_varsnulling(Node *node, replace_varsnulling_context *context) +{ + Node *result; + + result = query_or_expression_tree_mutator(node, + replace_varnulling_variables_mutator, + context, + 0); + return result; +} + +Node * +preprocess_nullable_generated_cols(PlannerInfo *root, Node *node) +{ + int rt_index = 0; + Query *parse = (Query *) node; + + foreach_node(RangeTblEntry, rte, parse->rtable) + { + ++rt_index; + + if (rte->rtekind == RTE_RELATION) + { + Relation rel; + TupleDesc tupdesc; + + rel = table_open(rte->relid, NoLock); + tupdesc = RelationGetDescr(rel); + + if (tupdesc->constr && tupdesc->constr->has_generated_virtual) + { + int i; + Bitmapset *generated_cols = NULL; + + for (i = 1; i <= tupdesc->natts; i++) + { + Form_pg_attribute attr = TupleDescAttr(tupdesc, i - 1); + + /* Ignore dropped attributes. */ + if (attr->attisdropped) + continue; + + if (attr->attgenerated != ATTRIBUTE_GENERATED_VIRTUAL) + continue; + + if (attno_contain_varnullingrel((Node *) parse->targetList, rt_index, attr->attnum)) + generated_cols = bms_add_member(generated_cols, attr->attnum); + } + if (generated_cols != NULL) + { + replace_varsnulling_context rvcontext; + rvcontext.root = root; + rvcontext.target_rte = rte; + rvcontext.gen_varattno = generated_cols; + rvcontext.target_varno = rt_index; + rvcontext.sublevels_up = 0; + + parse->targetList = (List *) replace_varsnulling((Node *) parse->targetList, &rvcontext); + } + } + table_close(rel, NoLock); + } + } + return (Node *)parse; +} + /* * transform_MERGE_to_join * Replace a MERGE's jointree to also include the target relation. diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index e996bdc0d21..2ab9566e1a0 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -2300,16 +2300,6 @@ fireRIRrules(Query *parsetree, List *activeRIRs) if (hasSubLinks) parsetree->hasSubLinks = true; - /* - * Expand any references to virtual generated columns of this table. - * Note that subqueries in virtual generated column expressions are - * not currently supported, so this cannot add any more sublinks. - */ - parsetree = (Query *) - expand_generated_columns_internal((Node *) parsetree, - rel, rt_index, rte, - parsetree->resultRelation); - table_close(rel, NoLock); } @@ -4501,6 +4491,35 @@ expand_generated_columns_internal(Node *node, Relation rel, int rt_index, return node; } +Node * +expand_generated_columns(Node *node) +{ + int rt_index = 0; + Query *parse = (Query *) node; + foreach_node(RangeTblEntry, rte, parse->rtable) + { + Relation rel; + + ++rt_index; + if (rte->rtekind == RTE_SUBQUERY) + { + rte->subquery = (Query *) + expand_generated_columns((Node *) rte->subquery); + } + + if (rte->rtekind != RTE_RELATION) + continue; + + rel = table_open(rte->relid, NoLock); + parse = (Query *) + expand_generated_columns_internal((Node *) parse, + rel, rt_index, rte, + parse->resultRelation); + table_close(rel, NoLock); + } + return (Node *) parse; +} + /* * Expand virtual generated columns in an expression * diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index a115b217c91..d1f2e00b3a3 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -30,6 +30,13 @@ typedef struct int sublevels_up; } contain_aggs_of_level_context; +typedef struct +{ + int varno; + int sublevels_up; + int varattno; +} attno_varnullingrel_context; + typedef struct { int agg_location; @@ -884,6 +891,64 @@ IncrementVarSublevelsUp_rtable(List *rtable, int delta_sublevels_up, QTW_EXAMINE_RTES_BEFORE); } +/* + * SetVarNullingrels - adjust Var nodes for a specified varnullingrels. + * + * Find all Var nodes referring to the specified varno in the given + * expression and set their varnullingrels to the specified value. + */ +typedef struct +{ + int sublevels_up; + int varno; + Bitmapset *varnullingrels; +} SetVarNullingrels_context; + +static bool +SetVarNullingrels_walker(Node *node, + SetVarNullingrels_context *context) +{ + if (node == NULL) + return false; + + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varlevelsup == context->sublevels_up && + var->varno == context->varno) + var->varnullingrels = bms_union(context->varnullingrels, + var->varnullingrels); + + return false; + } + + if (IsA(node, Query)) + { + /* Recurse into subselects */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, SetVarNullingrels_walker, + context, 0); + context->sublevels_up--; + return result; + } + return expression_tree_walker(node, SetVarNullingrels_walker, context); +} +void +SetVarNullingrels(Node *node, int sublevels_up, int varno, Bitmapset *varnullingrels) +{ + SetVarNullingrels_context context; + + context.sublevels_up = sublevels_up; + context.varno = varno; + context.varnullingrels = varnullingrels; + + /* Expect to start with an expression */ + SetVarNullingrels_walker(node, &context); +} + /* * SetVarReturningType - adjust Var nodes for a specified varreturningtype. * @@ -1199,6 +1264,57 @@ AddInvertedQual(Query *parsetree, Node *qual) } +static bool +attno_contain_varnullingrel_walker(Node *node, + attno_varnullingrel_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var)) + { + Var *var = (Var *) node; + + if (var->varno == context->varno + && var->varattno == context->varattno + && var->varlevelsup == context->sublevels_up) + { + if (var->varnullingrels != NULL) + return true; + } + return false; + } + if (IsA(node, Query)) + { + /* Recurse into subselects */ + bool result; + + context->sublevels_up++; + result = query_tree_walker((Query *) node, + attno_contain_varnullingrel_walker, + context, 0); + context->sublevels_up--; + + return result; + } + return expression_tree_walker(node, attno_contain_varnullingrel_walker, + context); +} + +bool +attno_contain_varnullingrel(Node *node, int varno, int varattno) +{ + attno_varnullingrel_context context; + + context.varno = varno; + context.sublevels_up = 0; + context.varattno = varattno; + + return query_or_expression_tree_walker(node, + attno_contain_varnullingrel_walker, + &context, + 0); +} + /* * add_nulling_relids() finds Vars and PlaceHolderVars that belong to any * of the target_relids, and adds added_relids to their varnullingrels @@ -1832,6 +1948,10 @@ ReplaceVarsFromTargetList_callback(Var *var, if (var->varlevelsup > 0) IncrementVarSublevelsUp((Node *) newnode, var->varlevelsup, 0); + if (var->varnullingrels != NULL) + SetVarNullingrels((Node *) newnode, var->varlevelsup, + var->varno, + var->varnullingrels); /* * Check to see if the tlist item contains a PARAM_MULTIEXPR Param, * and throw error if so. This case could only happen when expanding diff --git a/src/include/optimizer/prep.h b/src/include/optimizer/prep.h index 0ae57ec24a4..e63af1f2a85 100644 --- a/src/include/optimizer/prep.h +++ b/src/include/optimizer/prep.h @@ -33,6 +33,7 @@ extern Relids get_relids_in_jointree(Node *jtnode, bool include_outer_joins, bool include_inner_joins); extern Relids get_relids_for_join(Query *query, int joinrelid); +extern Node *preprocess_nullable_generated_cols(PlannerInfo *root, Node *node); /* * prototypes for preptlist.c */ diff --git a/src/include/rewrite/rewriteHandler.h b/src/include/rewrite/rewriteHandler.h index 88fe13c5f4f..dff5746c0d6 100644 --- a/src/include/rewrite/rewriteHandler.h +++ b/src/include/rewrite/rewriteHandler.h @@ -39,5 +39,6 @@ extern void error_view_not_updatable(Relation view, const char *detail); extern Node *expand_generated_columns_in_expr(Node *node, Relation rel, int rt_index); +extern Node *expand_generated_columns(Node *node); #endif /* REWRITEHANDLER_H */ diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index 512823033b9..c16d18345ab 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -48,6 +48,9 @@ extern void ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up); extern void IncrementVarSublevelsUp(Node *node, int delta_sublevels_up, int min_sublevels_up); +extern void SetVarNullingrels(Node *node, int sublevels_up, + int varno, + Bitmapset *varnullingrels); extern void IncrementVarSublevelsUp_rtable(List *rtable, int delta_sublevels_up, int min_sublevels_up); @@ -60,6 +63,7 @@ extern void AddQual(Query *parsetree, Node *qual); extern void AddInvertedQual(Query *parsetree, Node *qual); extern bool contain_aggs_of_level(Node *node, int levelsup); +extern bool attno_contain_varnullingrel(Node *node, int varno, int varattno); extern int locate_agg_of_level(Node *node, int levelsup); extern bool contain_windowfuncs(Node *node); extern int locate_windowfunc(Node *node); diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index 35638812be9..babf16db8ba 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -19,6 +19,14 @@ SELECT table_name, column_name, dependent_column FROM information_schema.column_ gtest1 | a | b (1 row) +--left join generated expression expandsion varnulling field is sane +insert into gtest0 values(1); +select t2.b is null as true from gtest0 t1 left join gtest0 t2 on false; + true +------ + t +(1 row) + \d gtest1 Table "generated_virtual_tests.gtest1" Column | Type | Collation | Nullable | Default @@ -166,6 +174,12 @@ SELECT a, b FROM gtest1 WHERE b = 4 ORDER BY a; 2 | 4 (1 row) +SELECT t2.b is null as true FROM gtest1 t1 LEFT JOIN gtest1 t2 ON false WHERE t1.b = 4; + true +------ + t +(1 row) + -- test that overflow error happens on read INSERT INTO gtest1 VALUES (2000000000); SELECT * FROM gtest1; @@ -181,6 +195,51 @@ SELECT * FROM gtestx, gtest1 WHERE gtestx.y = gtest1.a; 22 | 2 | 2 | 4 (2 rows) +--bug: 75eb1a6f-d59f-42e6-8a78-124ee808cda7@gmail.com">https://postgr.es/m/75eb1a6f-d59f-42e6-8a78-124ee808cda7@gmail.com +SELECT SUM(t.b) OVER (PARTITION BY t.a) FROM gtestx AS t1 LEFT JOIN gtest1 T ON true; + sum +----- + 6 + 6 + 6 + 12 + 12 + 12 +(6 rows) + +--this query result should be same as as the above +SELECT SUM((select t.b from ((select t.b from gtest1 as t order by t.b limit 1)))) OVER (PARTITION BY t.a) +FROM gtestx AS t1 left JOIN gtest1 T ON true; + sum +----- + 6 + 6 + 6 + 12 + 12 + 12 +(6 rows) + +SELECT SUM((select t.b from gtest1 as t order by t.b limit 1)) OVER (PARTITION BY t.a) +FROM gtestx AS t1 left JOIN gtest1 T ON true; + sum +----- + 6 + 6 + 6 + 6 + 6 + 6 +(6 rows) + +SELECT t.x FROM gtestx t LEFT JOIN gtest1 vt on t.x = vt.a WHERE coalesce(vt.b, 1) = 1 OR t.x IS NULL; + x +---- + 11 + 22 + 33 +(3 rows) + DROP TABLE gtestx; -- test UPDATE/DELETE quals SELECT * FROM gtest1 ORDER BY a; diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql index 34870813910..aaf5661ea3d 100644 --- a/src/test/regress/sql/generated_virtual.sql +++ b/src/test/regress/sql/generated_virtual.sql @@ -12,6 +12,10 @@ SELECT table_name, column_name, column_default, is_nullable, is_generated, gener SELECT table_name, column_name, dependent_column FROM information_schema.column_column_usage WHERE table_schema = 'generated_virtual_tests' ORDER BY 1, 2, 3; +--left join generated expression expandsion varnulling field is sane +insert into gtest0 values(1); +select t2.b is null as true from gtest0 t1 left join gtest0 t2 on false; + \d gtest1 -- duplicate generated @@ -71,6 +75,8 @@ SELECT * FROM gtest1 ORDER BY a; SELECT a, b, b * 2 AS b2 FROM gtest1 ORDER BY a; SELECT a, b FROM gtest1 WHERE b = 4 ORDER BY a; +SELECT t2.b is null as true FROM gtest1 t1 LEFT JOIN gtest1 t2 ON false WHERE t1.b = 4; + -- test that overflow error happens on read INSERT INTO gtest1 VALUES (2000000000); SELECT * FROM gtest1; @@ -80,6 +86,17 @@ DELETE FROM gtest1 WHERE a = 2000000000; CREATE TABLE gtestx (x int, y int); INSERT INTO gtestx VALUES (11, 1), (22, 2), (33, 3); SELECT * FROM gtestx, gtest1 WHERE gtestx.y = gtest1.a; +--bug: 75eb1a6f-d59f-42e6-8a78-124ee808cda7@gmail.com">https://postgr.es/m/75eb1a6f-d59f-42e6-8a78-124ee808cda7@gmail.com +SELECT SUM(t.b) OVER (PARTITION BY t.a) FROM gtestx AS t1 LEFT JOIN gtest1 T ON true; +--this query result should be same as as the above +SELECT SUM((select t.b from ((select t.b from gtest1 as t order by t.b limit 1)))) OVER (PARTITION BY t.a) +FROM gtestx AS t1 left JOIN gtest1 T ON true; + +SELECT SUM((select t.b from gtest1 as t order by t.b limit 1)) OVER (PARTITION BY t.a) +FROM gtestx AS t1 left JOIN gtest1 T ON true; + +SELECT t.x FROM gtestx t LEFT JOIN gtest1 vt on t.x = vt.a WHERE coalesce(vt.b, 1) = 1 OR t.x IS NULL; + DROP TABLE gtestx; -- test UPDATE/DELETE quals diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index b6c170ac249..6cb39ac76c5 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2633,6 +2633,7 @@ SetOperation SetOperationStmt SetQuantifier SetToDefault +SetVarNullingrels_context SetVarReturningType_context SetupWorkerPtrType ShDependObjectInfo -- 2.34.1