Hi!
On 09.02.2025 18:38, Alexander Korotkov wrote:
Also, aren't we too restrictive while requiring is_simple_values_sequence()?
For instance, I believe cases like this (containing Var) could be transformed
too.
select * from t t1, lateral (select * from t t2 where t2.i in (values (t1.i),
(1)));
I'm still working on it.
Also, I think there is quite a code duplication about construction of
SAOP between match_orclause_to_indexcol() and convert_VALUES_to_ANY()
functions. I would like to see a refactoring as a separate first
patch, which extracts the common part into a function.
Done.
I have attached a patch. In addition to the transfer, I added the
process of searching for a suitable operator and type for the left
expression for input expressions: const and left expression, since they
may differ from the declared types. Additionally, we convert the left
expr to a type suitable for the found operator.
--
Regards,
Alena Rybakina
Postgres Professional
From 252e6a0b9a0b187e21a0721931f185d20061e66f Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybak...@postgrespro.ru>
Date: Thu, 20 Feb 2025 23:30:01 +0300
Subject: [PATCH 2/2] Add an implementation of the x IN VALUES to 'x = ANY
[...]', a special case of the ScalarArrayOpExpr operation.
It will simplify the query tree, eliminating the appearance of
an unnecessary join.
Since VALUES describes a relational table, and the value of such
a list is a table row, the optimizer will likely face an underestimation
problem due to the inability to estimate cardinality through
MCV statistics. The cardinality evaluation mechanism
can work with the array inclusion check operation.
If the array is small enough (< 100 elements), it will perform
a statistical evaluation element by element.
We perform the transformation in the convert_ANY_sublink_to_join
if VALUES RTE is proper and the transformation is convertible.
The conversion is only possible for operations on scalar values.
Authors: Andrei Lepikhov <a.lepik...@postgrespro.ru>,
Alena Rybakina <lena.riback...@yandex.ru>
---
src/backend/optimizer/plan/subselect.c | 106 ++++++
src/backend/optimizer/prep/prepjointree.c | 7 +
src/include/optimizer/subselect.h | 2 +
src/test/regress/expected/subselect.out | 443 ++++++++++++++++++++++
src/test/regress/sql/subselect.sql | 155 ++++++++
5 files changed, 713 insertions(+)
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 8230cbea3c3..08c4648f936 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -1214,6 +1214,112 @@ inline_cte_walker(Node *node, inline_cte_walker_context *context)
return expression_tree_walker(node, inline_cte_walker, context);
}
+/*
+ * The function traverses the tree looking for elements of type var.
+ * If it finds it, it returns true.
+ */
+static bool
+values_simplicity_check_walker(Node *node, void *ctx)
+{
+ if (node == NULL)
+ {
+ return false;
+ }
+ else if(IsA(node, Var))
+ return true;
+ else if(IsA(node, Query))
+ return query_tree_walker((Query *) node,
+ values_simplicity_check_walker,
+ (void*) ctx,
+ QTW_EXAMINE_RTES_BEFORE);
+
+ return expression_tree_walker(node, values_simplicity_check_walker,
+ (void *) ctx);
+}
+
+/*
+ * Designed in analogy with is_simple_values
+ */
+static bool
+is_simple_values_sequence(Query *query)
+{
+ RangeTblEntry *rte;
+
+ /* In theory removing (altering) part of restrictions */
+ if (list_length(query->targetList) > 1 ||
+ query->limitCount != NULL || query->limitOffset != NULL ||
+ query->sortClause != NIL ||
+ list_length(query->rtable) != 1)
+ return false;
+
+ rte = linitial_node(RangeTblEntry,query->rtable);
+
+ /* Permanent restrictions */
+ if (rte->rtekind != RTE_VALUES ||
+ list_length(rte->values_lists) <= 1 ||
+ contain_volatile_functions((Node *) query))
+ return false;
+
+ /*
+ * Go to the query tree to be sure that expression doesn't
+ * have any Var type elements.
+ */
+ return !expression_tree_walker((Node *) (rte->values_lists),
+ values_simplicity_check_walker,
+ NULL);
+}
+
+/*
+ * Transform appropriate testexpr and const VALUES expression to SaOpExpr.
+ *
+ * Return NULL, if transformation isn't allowed.
+ */
+ScalarArrayOpExpr *
+convert_VALUES_to_ANY(Query *query, Node *testexpr)
+{
+ RangeTblEntry *rte;
+ Node *leftop;
+ Oid matchOpno;
+ ListCell *lc;
+ Oid inputcollid;
+ bool have_param = false;
+ List *consts = NIL;
+
+ /* Extract left side of SAOP from test epression */
+
+ if (!IsA(testexpr, OpExpr) ||
+ list_length(((OpExpr *) testexpr)->args) != 2 ||
+ !is_simple_values_sequence(query))
+ return NULL;
+
+ rte = linitial_node(RangeTblEntry,query->rtable);
+ leftop = linitial(((OpExpr *) testexpr)->args);
+ matchOpno = ((OpExpr *) testexpr)->opno;
+ inputcollid = linitial_oid(rte->colcollations);
+
+ foreach (lc, rte->values_lists)
+ {
+ List *elem = lfirst(lc);
+ Node *value = linitial(elem);
+
+ value = eval_const_expressions(NULL, value);
+
+ if (!IsA(value, Const))
+ have_param = true;
+ else if (((Const *)value)->constisnull)
+
+ /*
+ * Constant expression isn't converted because it is a NULL. NULLS
+ * just not supported by the construct_array routine.
+ */
+ return NULL;
+
+ consts = lappend(consts, value);
+ }
+
+ return makeSAOPArrayExpr(matchOpno, leftop, linitial_oid(rte->coltypes),
+ inputcollid, consts, have_param);
+}
/*
* convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index 5d9225e9909..d25b220d818 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -649,6 +649,13 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
/* Is it a convertible ANY or EXISTS clause? */
if (sublink->subLinkType == ANY_SUBLINK)
{
+ ScalarArrayOpExpr *saop;
+
+ if ((saop = convert_VALUES_to_ANY((Query *) sublink->subselect,
+ sublink->testexpr)) != NULL)
+ /* VALUES sequence was simplified. Nothing more to do here, */
+ return (Node *) saop;
+
if ((j = convert_ANY_sublink_to_join(root, sublink,
available_rels1)) != NULL)
{
diff --git a/src/include/optimizer/subselect.h b/src/include/optimizer/subselect.h
index 8b9ab6e5792..d6a07d352c4 100644
--- a/src/include/optimizer/subselect.h
+++ b/src/include/optimizer/subselect.h
@@ -17,6 +17,8 @@
#include "nodes/plannodes.h"
extern void SS_process_ctes(PlannerInfo *root);
+extern ScalarArrayOpExpr *convert_VALUES_to_ANY(Query *query,
+ Node *testexpr);
extern JoinExpr *convert_ANY_sublink_to_join(PlannerInfo *root,
SubLink *sublink,
Relids available_rels);
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index ebc545e2461..5138a00349c 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -2652,3 +2652,446 @@ ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
Filter: (odd = b.odd)
(16 rows)
+--
+-- Test VALUES to ARRAY (VtA) transformation
+--
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek, (VALUES(147, 'RFAAAA'), (931, 'VJAAAA')) AS v (i, j)
+ WHERE onek.unique1 = v.i AND onek.stringu1 = v.j;
+ QUERY PLAN
+-------------------------------------------------------------
+ Nested Loop
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using onek_stringu1 on onek
+ Index Cond: (stringu1 = ("*VALUES*".column2)::text)
+ Filter: ("*VALUES*".column1 = unique1)
+(5 rows)
+
+SELECT * FROM onek,
+ (VALUES ((SELECT i FROM
+ (VALUES(10000), (2), (389), (1000), (2000), ((SELECT 10029))) AS foo(i)
+ ORDER BY i ASC LIMIT 1))) bar (i)
+ WHERE onek.unique1 = bar.i;
+ unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 | i
+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------+---
+ 2 | 326 | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 4 | 5 | CAAAAA | OMAAAA | OOOOxx | 2
+(1 row)
+
+-- Forbid VTA transformation for a composite argument
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE (unique1,ten) IN (VALUES (1,1), (20,0), (99,9), (17,99))
+ ORDER BY unique1;
+ QUERY PLAN
+-----------------------------------------------------------------
+ Sort
+ Sort Key: onek.unique1
+ -> Nested Loop
+ -> HashAggregate
+ Group Key: "*VALUES*".column1, "*VALUES*".column2
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using onek_unique1 on onek
+ Index Cond: (unique1 = "*VALUES*".column1)
+ Filter: ("*VALUES*".column2 = ten)
+(9 rows)
+
+-- Values to Array transformation
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029))
+ ORDER BY unique1;
+ QUERY PLAN
+----------------------------------------------------------------------------------------
+ Sort
+ Sort Key: unique1
+ -> Bitmap Heap Scan on onek
+ Recheck Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
+ -> Bitmap Index Scan on onek_unique1
+ Index Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
+(6 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE unique1 IN (VALUES(1200), (1));
+ QUERY PLAN
+-------------------------------------------------------------
+ Bitmap Heap Scan on onek
+ Recheck Cond: (unique1 = ANY ('{1200,1}'::integer[]))
+ -> Bitmap Index Scan on onek_unique1
+ Index Cond: (unique1 = ANY ('{1200,1}'::integer[]))
+(4 rows)
+
+-- TODO: Recursively evaluate constant queries.
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE unique1 IN (SELECT x*x FROM (VALUES(1200), (1)) AS x(x));
+ QUERY PLAN
+---------------------------------------------------------------------------
+ Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: (("*VALUES*".column1 * "*VALUES*".column1))
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using onek_unique1 on onek
+ Index Cond: (unique1 = ("*VALUES*".column1 * "*VALUES*".column1))
+(7 rows)
+
+-- transformation doesn't apply because values RTE exists in subquery
+EXPLAIN (COSTS OFF)
+select * FROM onek,
+ (VALUES ((select i FROM
+ (VALUES(10000), (2), (389), (1000), (2000), (10029)) as foo(i)
+ ORDER BY i ASC LIMIT 1))) bar (i)
+ WHERE onek.unique1 = bar.i;
+ QUERY PLAN
+-------------------------------------------------
+ Index Scan using onek_unique1 on onek
+ Index Cond: (unique1 = (InitPlan 2).col1)
+ InitPlan 1
+ -> Limit
+ -> Sort
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ InitPlan 2
+ -> Limit
+ -> Sort
+ Sort Key: "*VALUES*_1".column1
+ -> Values Scan on "*VALUES*_1"
+(12 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT unique1, stringu1 FROM onek WHERE stringu1::name IN (VALUES('RFAAAA'), ('VJAAAA'));
+ QUERY PLAN
+------------------------------------------------------------------
+ Bitmap Heap Scan on onek
+ Recheck Cond: (stringu1 = ANY ('{RFAAAA,VJAAAA}'::text[]))
+ -> Bitmap Index Scan on onek_stringu1
+ Index Cond: (stringu1 = ANY ('{RFAAAA,VJAAAA}'::text[]))
+(4 rows)
+
+-- transformation doesn't apply because of type differences
+EXPLAIN (COSTS OFF)
+SELECT unique1, stringu1 FROM onek WHERE stringu1::text IN (VALUES('RFAAAA'), ('VJAAAA'));
+ QUERY PLAN
+----------------------------------------------------------------
+ Seq Scan on onek
+ Filter: ((stringu1)::text = ANY ('{RFAAAA,VJAAAA}'::text[]))
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT * from onek WHERE unique1 in (VALUES(1200::bigint), (1));
+ QUERY PLAN
+------------------------------------------------------------
+ Bitmap Heap Scan on onek
+ Recheck Cond: (unique1 = ANY ('{1200,1}'::bigint[]))
+ -> Bitmap Index Scan on onek_unique1
+ Index Cond: (unique1 = ANY ('{1200,1}'::bigint[]))
+(4 rows)
+
+-- Recursive VTA transformation
+EXPLAIN (COSTS OFF)
+SELECT * FROM (
+ SELECT ten FROM onek
+ WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029))
+ OFFSET 0
+);
+ QUERY PLAN
+----------------------------------------------------------------------------------
+ Bitmap Heap Scan on onek
+ Recheck Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
+ -> Bitmap Index Scan on onek_unique1
+ Index Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
+(4 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT (SELECT avg(ten::integer) FROM onek
+WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029)));
+ QUERY PLAN
+------------------------------------------------------------------------------------------------
+ Result
+ InitPlan 1
+ -> Aggregate
+ -> Bitmap Heap Scan on onek
+ Recheck Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
+ -> Bitmap Index Scan on onek_unique1
+ Index Cond: (unique1 = ANY ('{10000,2,389,1000,2000,10029}'::integer[]))
+(7 rows)
+
+-- VTA shouldn't depend on the side of the join probing with the VALUES expression.
+EXPLAIN (COSTS OFF)
+SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
+WHERE a.oid IN (VALUES (1), (2));
+ QUERY PLAN
+---------------------------------------------------------
+ Nested Loop
+ -> Seq Scan on pg_am a
+ Filter: (oid = ANY ('{1,2}'::oid[]))
+ -> Index Scan using pg_class_oid_index on pg_class c
+ Index Cond: (oid = a.oid)
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
+WHERE c.oid IN (VALUES (1), (2));
+ QUERY PLAN
+---------------------------------------------------------------
+ Hash Join
+ Hash Cond: (a.oid = c.oid)
+ -> Seq Scan on pg_am a
+ -> Hash
+ -> Index Scan using pg_class_oid_index on pg_class c
+ Index Cond: (oid = ANY ('{1,2}'::oid[]))
+(6 rows)
+
+-- Complexity of test expression doesn't matter
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE sin(two)+four IN (VALUES (sin(0.5)), (2));
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on onek
+ Filter: ((sin((two)::double precision) + (four)::double precision) = ANY ('{0.479425538604203,2}'::double precision[]))
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+-- VTA Doesn't allow NULLs in the list - may be until a better solution.
+SELECT ten FROM onek WHERE sin(two)+four IN (VALUES (sin(0.5)), (NULL), (2));
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------
+ Hash Semi Join
+ Hash Cond: ((sin((onek.two)::double precision) + (onek.four)::double precision) = "*VALUES*".column1)
+ -> Seq Scan on onek
+ -> Hash
+ -> Values Scan on "*VALUES*"
+(5 rows)
+
+PREPARE test (int,numeric, text) AS
+ SELECT ten FROM onek WHERE sin(two)*four/($3::real) IN (VALUES (sin($2)), (2), ($1));
+EXPLAIN (COSTS OFF) EXECUTE test(42, 3.14, '-1.5');
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on onek
+ Filter: (((sin((two)::double precision) * (four)::double precision) / '-1.5'::real) = ANY ('{0.0015926529164868282,2,42}'::double precision[]))
+(2 rows)
+
+EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, NULL);
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
+EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, '-1.5');
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on onek
+ Filter: (((sin((two)::double precision) * (four)::double precision) / '-1.5'::real) = ANY ('{0.0015926529164868282,2,NULL}'::double precision[]))
+(2 rows)
+
+PREPARE test1 (int,numeric, text) AS
+ SELECT ten FROM onek
+ WHERE sin(two)*four/($3::real) IN (VALUES (sin($2)), (2), ($2), ($2), ($1));
+-- VTA forbidden because sin($1) can't be evaluated to Const.
+EXPLAIN (COSTS OFF) EXECUTE test1(NULL, 2, '2');
+ QUERY PLAN
+---------------------------------------------------------------------------------------------------------------------------------------------------
+ Seq Scan on onek
+ Filter: (((sin((two)::double precision) * (four)::double precision) / '2'::real) = ANY ('{0.9092974268256817,2,2,2,NULL}'::double precision[]))
+(2 rows)
+
+PREPARE test2 (int,numeric, text) AS
+ SELECT ten FROM onek
+ WHERE sin(two)*four/($3::real) IN (VALUES (2), ($2), ($2), ($1));
+-- VTA forbidden because of unresolved casting of numeric parameter to common type
+EXPLAIN (COSTS OFF) EXECUTE test2(2, 2, '2');
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------
+ Hash Semi Join
+ Hash Cond: (((sin((onek.two)::double precision) * (onek.four)::double precision) / '2'::real) = ("*VALUES*".column1)::double precision)
+ -> Seq Scan on onek
+ -> Hash
+ -> Values Scan on "*VALUES*"
+(5 rows)
+
+PREPARE test3 (int,int, text) AS
+ SELECT ten FROM onek
+ WHERE sin(two)*four/($3::real) IN (VALUES (2), ($2), ($2), ($1));
+EXPLAIN (COSTS OFF) EXECUTE test3(2, 2, '2');
+ QUERY PLAN
+-------------------------------------------------------------------------------------------------------------------------------------------
+ Hash Semi Join
+ Hash Cond: (((sin((onek.two)::double precision) * (onek.four)::double precision) / '2'::real) = ("*VALUES*".column1)::double precision)
+ -> Seq Scan on onek
+ -> Hash
+ -> Values Scan on "*VALUES*"
+(5 rows)
+
+-- Be careful in case of sort_specification clauses like LIMIT, OFFSET etc.
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) OFFSET 1);
+ QUERY PLAN
+----------------------------------------------------
+ Nested Loop
+ -> HashAggregate
+ Group Key: "*VALUES*".column1
+ -> Limit
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using onek_unique1 on onek
+ Index Cond: (unique1 = "*VALUES*".column1)
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) ORDER BY 1);
+ QUERY PLAN
+----------------------------------------------------
+ Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: "*VALUES*".column1
+ -> Sort
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using onek_unique1 on onek
+ Index Cond: (unique1 = "*VALUES*".column1)
+(9 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) LIMIT 1);
+ QUERY PLAN
+----------------------------------------------------
+ Nested Loop
+ -> HashAggregate
+ Group Key: "*VALUES*".column1
+ -> Limit
+ -> Values Scan on "*VALUES*"
+ -> Index Scan using onek_unique1 on onek
+ Index Cond: (unique1 = "*VALUES*".column1)
+(7 rows)
+
+SELECT oid,relname FROM pg_class WHERE oid IN (VALUES (sin(0.5)), (2)); -- ERROR
+ERROR: operator does not exist: oid = double precision
+LINE 1: SELECT oid,relname FROM pg_class WHERE oid IN (VALUES (sin(0...
+ ^
+HINT: No operator matches the given name and argument types. You might need to add explicit type casts.
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (sin(0.5)), (2));
+ QUERY PLAN
+---------------------------------------------------------------------------------------------
+ Seq Scan on onek
+ Filter: ((unique1)::double precision = ANY ('{0.479425538604203,2}'::double precision[]))
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t
+WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
+ WHERE c.unique2 = t.unique1))::integer));
+ QUERY PLAN
+------------------------------------------------------------
+ Nested Loop Semi Join
+ -> Seq Scan on onek t
+ -> Values Scan on "*VALUES*"
+ Filter: (t.unique1 = column1)
+ SubPlan 1
+ -> Index Only Scan using onek_unique2 on onek c
+ Index Cond: (unique2 = t.unique1)
+(7 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t
+WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
+ WHERE c.unique2 IN (VALUES (sin(0.5)), (2))))::integer));
+ QUERY PLAN
+-----------------------------------------------------------------------------------------------------------------------
+ Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ SubPlan 1
+ -> Index Only Scan using onek_unique2 on onek c
+ Filter: ((unique2)::double precision = ANY ('{0.479425538604203,2}'::double precision[]))
+ -> Index Scan using onek_unique1 on onek t
+ Index Cond: (unique1 = "*VALUES*".column1)
+(10 rows)
+
+-- Doesn't allow the VtA dispite of possibility. XXX: should we improve it later?
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), ((2 IN (
+ (VALUES (1), (3))))::integer)
+);
+ QUERY PLAN
+-------------------------------------------------------
+ Nested Loop
+ -> Unique
+ -> Sort
+ Sort Key: "*VALUES*".column1
+ -> Values Scan on "*VALUES*"
+ SubPlan 1
+ -> Values Scan on "*VALUES*_1"
+ -> Index Scan using onek_unique1 on onek t
+ Index Cond: (unique1 = "*VALUES*".column1)
+(9 rows)
+
+-- The VtA works. Do we need to constify the 'SELECT 3' subquery?
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), ((2 IN
+ (SELECT (3)))::integer)
+);
+ QUERY PLAN
+------------------------------------------------------------------------------------------------
+ Bitmap Heap Scan on onek t
+ Recheck Cond: (unique1 = ANY (ARRAY[0, ((ANY (2 = (hashed SubPlan 1).col1)))::integer]))
+ -> Bitmap Index Scan on onek_unique1
+ Index Cond: (unique1 = ANY (ARRAY[0, ((ANY (2 = (hashed SubPlan 1).col1)))::integer]))
+ SubPlan 1
+ -> Result
+(6 rows)
+
+-- Alow to transformation and hold conversion between types of colemns and
+-- declared type of column pointed in RTE
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE unique1 IN (0, 1.0 IN (0, 3)::integer);
+ QUERY PLAN
+----------------------------------------------------------
+ Bitmap Heap Scan on onek t
+ Recheck Cond: (unique1 = ANY ('{0,0}'::integer[]))
+ -> Bitmap Index Scan on onek_unique1
+ Index Cond: (unique1 = ANY ('{0,0}'::integer[]))
+(4 rows)
+
+EXPLAIN
+SELECT ten FROM onek t WHERE 1 IN ((VALUES (1), (3)));
+ QUERY PLAN
+----------------------------------------------------------
+ Seq Scan on onek t (cost=0.00..45.00 rows=1000 width=4)
+(1 row)
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));
+ QUERY PLAN
+--------------------
+ Seq Scan on onek t
+(1 row)
+
+-- Don't allow transformation because of imcompatibility of types
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE 1.0 IN ((VALUES (1), (3))::integer);
+ QUERY PLAN
+---------------------------------------------------------
+ Result
+ One-Time Filter: (1.0 = ((InitPlan 1).col1)::numeric)
+ InitPlan 1
+ -> Values Scan on "*VALUES*"
+ -> Seq Scan on onek t
+(5 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE 1.0 IN (VALUES (1), (3));
+ QUERY PLAN
+---------------------------------------------------------------------
+ Result
+ One-Time Filter: (ANY (1.0 = ((hashed SubPlan 1).col1)::numeric))
+ -> Seq Scan on onek t
+ SubPlan 1
+ -> Values Scan on "*VALUES*"
+(5 rows)
+
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 6ed3636a9e4..480f8d7b852 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -1202,3 +1202,158 @@ WHERE a.thousand < 750;
explain (costs off)
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
+
+--
+-- Test VALUES to ARRAY (VtA) transformation
+--
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek, (VALUES(147, 'RFAAAA'), (931, 'VJAAAA')) AS v (i, j)
+ WHERE onek.unique1 = v.i AND onek.stringu1 = v.j;
+
+SELECT * FROM onek,
+ (VALUES ((SELECT i FROM
+ (VALUES(10000), (2), (389), (1000), (2000), ((SELECT 10029))) AS foo(i)
+ ORDER BY i ASC LIMIT 1))) bar (i)
+ WHERE onek.unique1 = bar.i;
+
+-- Forbid VTA transformation for a composite argument
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE (unique1,ten) IN (VALUES (1,1), (20,0), (99,9), (17,99))
+ ORDER BY unique1;
+
+-- Values to Array transformation
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029))
+ ORDER BY unique1;
+
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE unique1 IN (VALUES(1200), (1));
+
+-- TODO: Recursively evaluate constant queries.
+EXPLAIN (COSTS OFF)
+SELECT * FROM onek
+ WHERE unique1 IN (SELECT x*x FROM (VALUES(1200), (1)) AS x(x));
+
+-- transformation doesn't apply because values RTE exists in subquery
+EXPLAIN (COSTS OFF)
+select * FROM onek,
+ (VALUES ((select i FROM
+ (VALUES(10000), (2), (389), (1000), (2000), (10029)) as foo(i)
+ ORDER BY i ASC LIMIT 1))) bar (i)
+ WHERE onek.unique1 = bar.i;
+
+EXPLAIN (COSTS OFF)
+SELECT unique1, stringu1 FROM onek WHERE stringu1::name IN (VALUES('RFAAAA'), ('VJAAAA'));
+
+-- transformation doesn't apply because of type differences
+EXPLAIN (COSTS OFF)
+SELECT unique1, stringu1 FROM onek WHERE stringu1::text IN (VALUES('RFAAAA'), ('VJAAAA'));
+
+EXPLAIN (COSTS OFF)
+SELECT * from onek WHERE unique1 in (VALUES(1200::bigint), (1));
+
+-- Recursive VTA transformation
+EXPLAIN (COSTS OFF)
+SELECT * FROM (
+ SELECT ten FROM onek
+ WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029))
+ OFFSET 0
+);
+
+EXPLAIN (COSTS OFF)
+SELECT (SELECT avg(ten::integer) FROM onek
+WHERE unique1 IN (VALUES(10000), (2), (389), (1000), (2000), (10029)));
+
+-- VTA shouldn't depend on the side of the join probing with the VALUES expression.
+EXPLAIN (COSTS OFF)
+SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
+WHERE a.oid IN (VALUES (1), (2));
+EXPLAIN (COSTS OFF)
+SELECT c.oid,c.relname FROM pg_class c JOIN pg_am a USING (oid)
+WHERE c.oid IN (VALUES (1), (2));
+
+-- Complexity of test expression doesn't matter
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE sin(two)+four IN (VALUES (sin(0.5)), (2));
+EXPLAIN (COSTS OFF)
+
+-- VTA Doesn't allow NULLs in the list - may be until a better solution.
+SELECT ten FROM onek WHERE sin(two)+four IN (VALUES (sin(0.5)), (NULL), (2));
+
+PREPARE test (int,numeric, text) AS
+ SELECT ten FROM onek WHERE sin(two)*four/($3::real) IN (VALUES (sin($2)), (2), ($1));
+EXPLAIN (COSTS OFF) EXECUTE test(42, 3.14, '-1.5');
+EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, NULL);
+EXPLAIN (COSTS OFF) EXECUTE test(NULL, 3.14, '-1.5');
+PREPARE test1 (int,numeric, text) AS
+ SELECT ten FROM onek
+ WHERE sin(two)*four/($3::real) IN (VALUES (sin($2)), (2), ($2), ($2), ($1));
+-- VTA forbidden because sin($1) can't be evaluated to Const.
+EXPLAIN (COSTS OFF) EXECUTE test1(NULL, 2, '2');
+
+PREPARE test2 (int,numeric, text) AS
+ SELECT ten FROM onek
+ WHERE sin(two)*four/($3::real) IN (VALUES (2), ($2), ($2), ($1));
+-- VTA forbidden because of unresolved casting of numeric parameter to common type
+EXPLAIN (COSTS OFF) EXECUTE test2(2, 2, '2');
+PREPARE test3 (int,int, text) AS
+ SELECT ten FROM onek
+ WHERE sin(two)*four/($3::real) IN (VALUES (2), ($2), ($2), ($1));
+
+EXPLAIN (COSTS OFF) EXECUTE test3(2, 2, '2');
+
+-- Be careful in case of sort_specification clauses like LIMIT, OFFSET etc.
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) OFFSET 1);
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) ORDER BY 1);
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (1), (2) LIMIT 1);
+
+SELECT oid,relname FROM pg_class WHERE oid IN (VALUES (sin(0.5)), (2)); -- ERROR
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek WHERE unique1 IN (VALUES (sin(0.5)), (2));
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t
+WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
+ WHERE c.unique2 = t.unique1))::integer));
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t
+WHERE unique1 IN (VALUES (0), ((2 IN (SELECT unique2 FROM onek c
+ WHERE c.unique2 IN (VALUES (sin(0.5)), (2))))::integer));
+
+-- Doesn't allow the VtA dispite of possibility. XXX: should we improve it later?
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), ((2 IN (
+ (VALUES (1), (3))))::integer)
+);
+
+-- The VtA works. Do we need to constify the 'SELECT 3' subquery?
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE unique1 IN (VALUES (0), ((2 IN
+ (SELECT (3)))::integer)
+);
+
+-- Alow to transformation and hold conversion between types of colemns and
+-- declared type of column pointed in RTE
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE unique1 IN (0, 1.0 IN (0, 3)::integer);
+
+EXPLAIN
+SELECT ten FROM onek t WHERE 1 IN ((VALUES (1), (3)));
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));
+
+-- Don't allow transformation because of imcompatibility of types
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE 1.0 IN ((VALUES (1), (3))::integer);
+
+EXPLAIN (COSTS OFF)
+SELECT ten FROM onek t WHERE 1.0 IN (VALUES (1), (3));
--
2.34.1
From 762f8a0ec276a64edda3d6f57d67be1ef7419b8f Mon Sep 17 00:00:00 2001
From: Alena Rybakina <a.rybak...@postgrespro.ru>
Date: Thu, 20 Feb 2025 23:12:04 +0300
Subject: [PATCH 1/2] Move the function for generating ArrayExpr to another
place so that it can be used by other optimizations.
Since const and expr types may be different from the column type,
we must take this into account and therefore must form a suitable operator
for the input types and convert left expr to the target output type.
We do this to avoid a situation where the incoming operator for Array
expression cannot be applied to the given types.
clauses.c file was chosen due to the fact that only two additional libraries
needed to be added compared to other places.
---
src/backend/optimizer/path/indxpath.c | 63 +--------
src/backend/optimizer/util/clauses.c | 176 ++++++++++++++++++++++++++
src/include/optimizer/optimizer.h | 2 +
3 files changed, 181 insertions(+), 60 deletions(-)
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index a43ca16d683..e8910c6caf4 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -33,10 +33,8 @@
#include "optimizer/paths.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
-#include "utils/array.h"
#include "utils/lsyscache.h"
#include "utils/selfuncs.h"
-#include "utils/syscache.h"
/* XXX see PartCollMatchesExprColl */
@@ -3247,7 +3245,6 @@ match_orclause_to_indexcol(PlannerInfo *root,
BoolExpr *orclause = (BoolExpr *) rinfo->orclause;
Node *indexExpr = NULL;
List *consts = NIL;
- Node *arrayNode = NULL;
ScalarArrayOpExpr *saopexpr = NULL;
Oid matchOpno = InvalidOid;
IndexClause *iclause;
@@ -3418,63 +3415,9 @@ match_orclause_to_indexcol(PlannerInfo *root,
return NULL;
}
- /*
- * Assemble an array from the list of constants. It seems more profitable
- * to build a const array. But in the presence of other nodes, we don't
- * have a specific value here and must employ an ArrayExpr instead.
- */
- if (haveNonConst)
- {
- ArrayExpr *arrayExpr = makeNode(ArrayExpr);
-
- /* array_collid will be set by parse_collate.c */
- arrayExpr->element_typeid = consttype;
- arrayExpr->array_typeid = arraytype;
- arrayExpr->multidims = false;
- arrayExpr->elements = consts;
- arrayExpr->location = -1;
-
- arrayNode = (Node *) arrayExpr;
- }
- else
- {
- int16 typlen;
- bool typbyval;
- char typalign;
- Datum *elems;
- int i = 0;
- ArrayType *arrayConst;
-
- get_typlenbyvalalign(consttype, &typlen, &typbyval, &typalign);
-
- elems = (Datum *) palloc(sizeof(Datum) * list_length(consts));
- foreach_node(Const, value, consts)
- {
- Assert(!value->constisnull);
-
- elems[i++] = value->constvalue;
- }
-
- arrayConst = construct_array(elems, i, consttype,
- typlen, typbyval, typalign);
- arrayNode = (Node *) makeConst(arraytype, -1, inputcollid,
- -1, PointerGetDatum(arrayConst),
- false, false);
-
- pfree(elems);
- list_free(consts);
- }
-
- /* Build the SAOP expression node */
- saopexpr = makeNode(ScalarArrayOpExpr);
- saopexpr->opno = matchOpno;
- saopexpr->opfuncid = get_opcode(matchOpno);
- saopexpr->hashfuncid = InvalidOid;
- saopexpr->negfuncid = InvalidOid;
- saopexpr->useOr = true;
- saopexpr->inputcollid = inputcollid;
- saopexpr->args = list_make2(indexExpr, arrayNode);
- saopexpr->location = -1;
+ saopexpr = makeSAOPArrayExpr(matchOpno, indexExpr,
+ index->opcintype[indexcol],
+ inputcollid, consts, haveNonConst);
/*
* Finally, build an IndexClause based on the SAOP node. Use
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 43dfecfb47f..1fb60fa1d18 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -40,7 +40,9 @@
#include "optimizer/planmain.h"
#include "parser/analyze.h"
#include "parser/parse_coerce.h"
+#include "parser/parse_collate.h"
#include "parser/parse_func.h"
+#include "parser/parse_oper.h"
#include "rewrite/rewriteHandler.h"
#include "rewrite/rewriteManip.h"
#include "tcop/tcopprot.h"
@@ -5439,3 +5441,177 @@ pull_paramids_walker(Node *node, Bitmapset **context)
}
return expression_tree_walker(node, pull_paramids_walker, context);
}
+
+/* The function returns compatible binary operator foe two input types
+ * otherwice returns InvalidOid.
+ * In addition, the function finds two target types for
+ * both sides and it stores them in target_arg1 and target_arg2.
+ * Besides it returns opfuncid.
+ */
+static int
+get_compatible_oper_ids(List *op,
+ Oid arg1, Oid arg2,
+ Oid *target_arg1, Oid *target_arg2,
+ Oid *opfuncid)
+{
+ Operator tup;
+ Form_pg_operator opform;
+ Oid tup_id;
+
+ /* We need to form compatible union operator for both sides
+ * and only when we work with Const type.
+ */
+ tup = compatible_oper(NULL, op, arg1, arg2, true, -1);
+
+ if (!HeapTupleIsValid(tup))
+ return InvalidOid;
+ opform = (Form_pg_operator)GETSTRUCT(tup);
+
+ /* If it's not a shell, we will give up */
+ if (!RegProcedureIsValid(opform->oprcode) ||
+ !IsBinaryCoercible(arg1, opform->oprleft) ||
+ !IsBinaryCoercible(arg2, opform->oprright))
+ tup_id = InvalidOid;
+ else
+ tup_id = oprid(tup);
+
+ *target_arg1 = opform->oprleft;
+ *target_arg2 = opform->oprright;
+ *opfuncid = opform->oprcode;
+
+ ReleaseSysCache(tup);
+ return tup_id;
+}
+
+ScalarArrayOpExpr *
+makeSAOPArrayExpr(Oid oper, Node *leftexpr, Oid coltype,
+ Oid inputcollid, List *consts, bool haveNonConst)
+{
+ Oid arraytype1;
+ HeapTuple opertup;
+ char *oprname;
+ ScalarArrayOpExpr *saopexpr;
+ Oid input_typeids[2];
+ Oid target_typeids[2];
+ Oid arraytype;
+ Form_pg_operator operform;
+ Node *arrayNode;
+
+ input_typeids[0] = exprType(leftexpr);
+ input_typeids[1] = select_common_type(NULL, consts, "VALUES", NULL);
+
+ /* Lookup for operator to fetch necessary information for the SAOP node */
+ opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(oper));
+
+ if (!HeapTupleIsValid(opertup))
+ elog(ERROR, "cache lookup failed for operator %u", oper);
+
+ operform = (Form_pg_operator) GETSTRUCT(opertup);
+ oprname = NameStr(operform->oprname);
+
+ /* Build the SAOP expression node */
+ saopexpr = makeNode(ScalarArrayOpExpr);
+
+ /*
+ * Get compatible operator id for ScalarArrayOp type and
+ * types for both sides
+ */
+ saopexpr->opno = get_compatible_oper_ids(list_make1(makeString(oprname)),
+ input_typeids[0], input_typeids[1],
+ &(target_typeids[0]), &(target_typeids[1]),
+ &(saopexpr->opfuncid));
+
+ if (saopexpr->opno == InvalidOid)
+ goto end;
+
+ arraytype = get_array_type(input_typeids[1]);
+ if (!OidIsValid(arraytype))
+ goto end;
+
+ /*
+ * Assemble an array from the list of constants. It seems more profitable
+ * to build a const array. But in the presence of other nodes, we don't
+ * have a specific value here and must employ an ArrayExpr instead.
+ */
+ if (haveNonConst)
+ {
+ ArrayExpr *arrayExpr = makeNode(ArrayExpr);
+
+ /* array_collid will be set by parse_collate.c */
+ arrayExpr->element_typeid = input_typeids[1];
+ arrayExpr->array_typeid = arraytype;
+ arrayExpr->multidims = false;
+ arrayExpr->elements = consts;
+ arrayExpr->location = -1;
+
+ arrayNode = (Node *) arrayExpr;
+ }
+ else
+ {
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ Datum *elems;
+ int i = 0;
+ ArrayType *arrayConst;
+
+ get_typlenbyvalalign(input_typeids[1], &typlen, &typbyval, &typalign);
+
+ elems = (Datum *) palloc(sizeof(Datum) * list_length(consts));
+ foreach_node(Const, value, consts)
+ {
+ Assert(!value->constisnull);
+
+ elems[i++] = value->constvalue;
+ }
+
+ arrayConst = construct_array(elems, i, input_typeids[1],
+ typlen, typbyval, typalign);
+ arrayNode = (Node *) makeConst(arraytype, -1, inputcollid,
+ -1, PointerGetDatum(arrayConst),
+ false, false);
+
+ pfree(elems);
+ list_free(consts);
+ }
+
+ /* Unlike parameters constants can be coerced to target type by the user.
+ * After finding a compatible operator to implement operation accounting
+ * coercion, we must prepare the expression's left side.
+ */
+ leftexpr = coerce_to_target_type(NULL,
+ leftexpr,
+ input_typeids[0],
+ target_typeids[0], -1,
+ COERCION_IMPLICIT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+
+ arraytype1 = get_array_type(target_typeids[1]);
+ arrayNode = coerce_to_target_type(NULL,
+ arrayNode,
+ arraytype,
+ arraytype1,
+ -1,
+ COERCION_IMPLICIT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+
+ inputcollid = select_common_collation(NULL,
+ list_make2(leftexpr, arrayNode), false);
+
+ saopexpr->negfuncid = InvalidOid;
+ saopexpr->hashfuncid = InvalidOid;
+ saopexpr->useOr = true;
+ saopexpr->inputcollid = inputcollid;
+ saopexpr->args = list_make2(leftexpr, arrayNode);
+ saopexpr->location = -1;
+
+end:
+ ReleaseSysCache(opertup);
+
+ if (saopexpr->args != NIL)
+ return saopexpr;
+ else
+ return NULL;
+}
\ No newline at end of file
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 78e05d88c8e..8acb8846c72 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -206,5 +206,7 @@ extern int locate_var_of_level(Node *node, int levelsup);
extern List *pull_var_clause(Node *node, int flags);
extern Node *flatten_join_alias_vars(PlannerInfo *root, Query *query, Node *node);
extern Node *flatten_group_exprs(PlannerInfo *root, Query *query, Node *node);
+extern ScalarArrayOpExpr * makeSAOPArrayExpr(Oid oper, Node *leftexpr, Oid coltype,
+ Oid inputcollid, List *consts, bool haveNonConst);
#endif /* OPTIMIZER_H */
--
2.34.1