On Tue, 20 Feb 2024 at 14:49, Dean Rasheed <dean.a.rash...@gmail.com> wrote: > > On the face of it, the simplest fix is to tweak is_simple_union_all() > to prevent UNION ALL subquery pullup for MERGE, forcing a > subquery-scan plan. A quick test shows that that fixes the reported > issue. > > However, that leaves the question of whether we should do the same for > UPDATE and DELETE. >
Attached is a patch that prevents UNION ALL subquery pullup in MERGE only. I've re-used and extended the isolation test cases added by 1d5caec221, since it's clear that replacing the plain source relation in those tests with a UNION ALL subquery that returns the same results should produce the same end result. (Without this patch, the UNION ALL subquery is pulled up, EPQ rechecking fails to re-find the match, and a WHEN NOT MATCHED THEN INSERT action is executed instead, resulting in a primary key violation.) It's still not quite clear whether preventing UNION ALL subquery pullup should also apply to UPDATE and DELETE, but I wasn't able to find any live bug there, so I think they're best left alone. This fixes the reported issue, though it's worth noting that concurrent WHEN NOT MATCHED THEN INSERT actions will still lead to duplicate rows being inserted, which is a limitation that is already documented [1]. [1] https://www.postgresql.org/docs/current/transaction-iso.html Regards, Dean
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c new file mode 100644 index aa83dd3..50ebdd2 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -102,7 +102,7 @@ static bool is_simple_values(PlannerInfo static Node *pull_up_constant_function(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, AppendRelInfo *containing_appendrel); -static bool is_simple_union_all(Query *subquery); +static bool is_simple_union_all(PlannerInfo *root, Query *subquery); static bool is_simple_union_all_recurse(Node *setOp, Query *setOpQuery, List *colTypes); static bool is_safe_append_member(Query *subquery); @@ -849,7 +849,7 @@ pull_up_subqueries_recurse(PlannerInfo * * in set_append_rel_pathlist, not here.) */ if (rte->rtekind == RTE_SUBQUERY && - is_simple_union_all(rte->subquery)) + is_simple_union_all(root, rte->subquery)) return pull_up_simple_union_all(root, jtnode, rte); /* @@ -1888,8 +1888,9 @@ pull_up_constant_function(PlannerInfo *r * same datatypes. */ static bool -is_simple_union_all(Query *subquery) +is_simple_union_all(PlannerInfo *root, Query *subquery) { + Query *parse = root->parse; SetOperationStmt *topop; /* Let's just make sure it's a valid subselect ... */ @@ -1910,6 +1911,21 @@ is_simple_union_all(Query *subquery) subquery->cteList) return false; + /* + * UPDATE/DELETE/MERGE is also a problem, because preprocess_rowmarks() + * will add a rowmark to the UNION ALL subquery, to ensure that it returns + * the same row if rescanned by EvalPlanQual. Allowing the subquery to be + * pulled up would render that rowmark ineffective. + * + * In practice, however, this seems to only be a problem for MERGE, which + * allows the subquery to appear under an outer join with the target + * relation --- such an outer join might output multiple not-matched rows + * in addition to the required matched row, breaking EvalPlanQual's + * expectation that rerunning the query returns just one row. + */ + if (parse->commandType == CMD_MERGE) + return false; + /* Recursively check the tree of set operations */ return is_simple_union_all_recurse((Node *) topop, subquery, topop->colTypes); diff --git a/src/test/isolation/expected/merge-join.out b/src/test/isolation/expected/merge-join.out new file mode 100644 index 57f048c..6cf045a --- a/src/test/isolation/expected/merge-join.out +++ b/src/test/isolation/expected/merge-join.out @@ -146,3 +146,151 @@ id|val 3| 30 (3 rows) + +starting permutation: b1 b2 m1 hj exu m2u c1 c2 s1 +step b1: BEGIN ISOLATION LEVEL READ COMMITTED; +step b2: BEGIN ISOLATION LEVEL READ COMMITTED; +step m1: MERGE INTO tgt USING src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); +step hj: SET LOCAL enable_mergejoin = off; SET LOCAL enable_nestloop = off; +step exu: EXPLAIN (verbose, costs off) + MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); +QUERY PLAN +----------------------------------------------------- +Merge on public.tgt + -> Hash Left Join + Output: tgt.ctid, src.val, src.id, src.* + Inner Unique: true + Hash Cond: (src.id = tgt.id) + -> Subquery Scan on src + Output: src.val, src.id, src.* + -> Append + -> Seq Scan on public.src src_1 + Output: src_1.id, src_1.val + -> Seq Scan on public.src2 + Output: src2.id, src2.val + -> Hash + Output: tgt.ctid, tgt.id + -> Seq Scan on public.tgt + Output: tgt.ctid, tgt.id +(16 rows) + +step m2u: MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); <waiting ...> +step c1: COMMIT; +step m2u: <... completed> +step c2: COMMIT; +step s1: SELECT * FROM tgt; +id|val +--+--- + 1| 10 + 2| 20 + 3| 30 +(3 rows) + + +starting permutation: b1 b2 m1 mj exu m2u c1 c2 s1 +step b1: BEGIN ISOLATION LEVEL READ COMMITTED; +step b2: BEGIN ISOLATION LEVEL READ COMMITTED; +step m1: MERGE INTO tgt USING src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); +step mj: SET LOCAL enable_hashjoin = off; SET LOCAL enable_nestloop = off; +step exu: EXPLAIN (verbose, costs off) + MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); +QUERY PLAN +----------------------------------------------------------- +Merge on public.tgt + -> Merge Right Join + Output: tgt.ctid, src.val, src.id, src.* + Merge Cond: (tgt.id = src.id) + -> Index Scan using tgt_pkey on public.tgt + Output: tgt.ctid, tgt.id + -> Sort + Output: src.val, src.id, src.* + Sort Key: src.id + -> Subquery Scan on src + Output: src.val, src.id, src.* + -> Append + -> Seq Scan on public.src src_1 + Output: src_1.id, src_1.val + -> Seq Scan on public.src2 + Output: src2.id, src2.val +(16 rows) + +step m2u: MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); <waiting ...> +step c1: COMMIT; +step m2u: <... completed> +step c2: COMMIT; +step s1: SELECT * FROM tgt; +id|val +--+--- + 1| 10 + 2| 20 + 3| 30 +(3 rows) + + +starting permutation: b1 b2 m1 nl exu m2u c1 c2 s1 +step b1: BEGIN ISOLATION LEVEL READ COMMITTED; +step b2: BEGIN ISOLATION LEVEL READ COMMITTED; +step m1: MERGE INTO tgt USING src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); +step nl: SET LOCAL enable_hashjoin = off; SET LOCAL enable_mergejoin = off; +step exu: EXPLAIN (verbose, costs off) + MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); +QUERY PLAN +----------------------------------------------------- +Merge on public.tgt + -> Nested Loop Left Join + Output: tgt.ctid, src.val, src.id, src.* + Inner Unique: true + -> Subquery Scan on src + Output: src.val, src.id, src.* + -> Append + -> Seq Scan on public.src src_1 + Output: src_1.id, src_1.val + -> Seq Scan on public.src2 + Output: src2.id, src2.val + -> Index Scan using tgt_pkey on public.tgt + Output: tgt.ctid, tgt.id + Index Cond: (tgt.id = src.id) +(14 rows) + +step m2u: MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); <waiting ...> +step c1: COMMIT; +step m2u: <... completed> +step c2: COMMIT; +step s1: SELECT * FROM tgt; +id|val +--+--- + 1| 10 + 2| 20 + 3| 30 +(3 rows) + diff --git a/src/test/isolation/specs/merge-join.spec b/src/test/isolation/specs/merge-join.spec new file mode 100644 index e33a02c..564702b --- a/src/test/isolation/specs/merge-join.spec +++ b/src/test/isolation/specs/merge-join.spec @@ -6,6 +6,7 @@ setup { CREATE TABLE src (id int PRIMARY KEY, val int); + CREATE TABLE src2 (id int PRIMARY KEY, val int); CREATE TABLE tgt (id int PRIMARY KEY, val int); INSERT INTO src SELECT x, x*10 FROM generate_series(1,3) g(x); INSERT INTO tgt SELECT x, x FROM generate_series(1,3) g(x); @@ -13,7 +14,7 @@ setup teardown { - DROP TABLE src, tgt; + DROP TABLE src, src2, tgt; } session s1 @@ -36,6 +37,17 @@ step ex { EXPLAIN (verbose, costs off) step m2 { MERGE INTO tgt USING src ON tgt.id = src.id WHEN MATCHED THEN UPDATE SET val = src.val WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); } +step exu { EXPLAIN (verbose, costs off) + MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); } +step m2u { MERGE INTO tgt USING (SELECT * FROM src + UNION ALL + SELECT * FROM src2) src ON tgt.id = src.id + WHEN MATCHED THEN UPDATE SET val = src.val + WHEN NOT MATCHED THEN INSERT VALUES (src.id, src.val); } step s2 { SELECT * FROM tgt; } step c2 { COMMIT; } @@ -43,3 +55,6 @@ permutation b1 m1 s1 c1 b2 m2 s2 c2 permutation b1 b2 m1 hj ex m2 c1 c2 s1 permutation b1 b2 m1 mj ex m2 c1 c2 s1 permutation b1 b2 m1 nl ex m2 c1 c2 s1 +permutation b1 b2 m1 hj exu m2u c1 c2 s1 +permutation b1 b2 m1 mj exu m2u c1 c2 s1 +permutation b1 b2 m1 nl exu m2u c1 c2 s1