wenhui qiu <qiuwenhu...@gmail.com> 于2025年2月24日周一 09:48写道:

> Hi Richard Guo
>    I found this path https://commitfest.postgresql.org/patch/3235/
> already supports anti join , But I've found that in many cases it doesn't
> work.It always uses SubPlan Here's my testing process.
>
> ###########
> create table join1 (id integer,name varchar(300),k1 integer);
> create table join2 (id integer,name varchar(300),score integer);
> insert into join1 values (
> generate_series(1,20000),'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',10);
> insert into join1 values (
> generate_series(1,20000),'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',10);
> insert into join1 values (
> generate_series(1,20000),'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',10);
> insert into join1 values (
> generate_series(50201,50300),'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAASSSSSAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',10);
> insert into join1 values (
> generate_series(50201,50300),'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAASSSSSAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',10);
> insert into join1 values (
> generate_series(150201,1350300),'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAASSSSSAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',10);
> insert into join2 values (
> generate_series(1,40000),'aaaaaaaaaaaaaaaaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',1);
> insert into join2 values (
> generate_series(1,40000),'aaaaaaaaaaaaaaaaAAAAAAABBAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',2);
> insert into join2 values (
> generate_series(20001,22000),'aaaaaaaaaaaaaaaaAACCCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',3);
> insert into join2 values (
> generate_series(150201,950300),'aaaaaaaaaaaaaaaaAACCCCAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaAAAAAAAAAAAAAAAAAAAAaaaaaaaAAAAAAAAAAAAAAAAAAAAA',3);
> create index idx_j1 on join1(id);
> create index idx_j2 on join2(id);
> VACUUM ANALYZE JOIN1;
> VACUUM ANALYZE JOIN2;
>
> test=#  explain  SELECT T1.id,T1.K1 FROM join1 t1 WHERE T1.id NOT IN
> (SELECT T2.id FROM join2 t2 WHERE T2.ID>10000)
> test-# ;
>                                                QUERY PLAN
>
> ---------------------------------------------------------------------------------------------------------
>  Gather  (cost=1000.42..9016319078.86 rows=630150 width=8)
>    Workers Planned: 2
>    ->  Parallel Seq Scan on join1 t1  (cost=0.42..9016255063.86
> rows=262562 width=8)
>          Filter: (NOT (ANY (id = (SubPlan 1).col1)))
>          SubPlan 1
>            ->  Materialize  (cost=0.42..32181.54 rows=863294 width=4)
>                  ->  Index Only Scan using idx_j2 on join2 t2
>  (cost=0.42..24492.07 rows=863294 width=4)
>                        Index Cond: (id > 10000)
> (8 rows)
>
> test=#  explain  SELECT T1.id,T1.K1 FROM join1 t1 WHERE T1.id NOT IN
> (SELECT T2.id FROM join2 t2 );
>                                                QUERY PLAN
>
> ---------------------------------------------------------------------------------------------------------
>  Gather  (cost=1000.42..8633476697.61 rows=630150 width=8)
>    Workers Planned: 2
>    ->  Parallel Seq Scan on join1 t1  (cost=0.42..8633412682.61
> rows=262562 width=8)
>          Filter: (NOT (ANY (id = (SubPlan 1).col1)))
>          SubPlan 1
>            ->  Materialize  (cost=0.42..30676.42 rows=882100 width=4)
>                  ->  Index Only Scan using idx_j2 on join2 t2
>  (cost=0.42..22819.92 rows=882100 width=4)
> (7 rows)
>
> test=#  explain  SELECT T1.id,T1.K1 FROM join1 t1 WHERE T1.id NOT IN
> (SELECT T2.id FROM join2 t2 where T2.ID < 1000);
>                                        QUERY PLAN
>
> ----------------------------------------------------------------------------------------
>  Seq Scan on join1 t1  (cost=61.73..38730.47 rows=630150 width=8)
>    Filter: (NOT (ANY (id = (hashed SubPlan 1).col1)))
>    SubPlan 1
>      ->  Index Only Scan using idx_j2 on join2 t2  (cost=0.42..57.06
> rows=1865 width=4)
>            Index Cond: (id < 1000)
> (5 rows)
>
> test=#  explain  SELECT T1.id,T1.K1 FROM join1 t1 WHERE T1.id NOT IN
> (SELECT T2.id FROM join2 t2 where T2.ID = 1000);
>                                      QUERY PLAN
>
> ------------------------------------------------------------------------------------
>  Seq Scan on join1 t1  (cost=4.45..38673.19 rows=630150 width=8)
>    Filter: (NOT (ANY (id = (hashed SubPlan 1).col1)))
>    SubPlan 1
>      ->  Index Only Scan using idx_j2 on join2 t2  (cost=0.42..4.44 rows=1
> width=4)
>            Index Cond: (id = 1000)
> (5 rows)
>
>
Planner now doesn't support pulling up the "NOT IN" sublink. The "NOT IN"
sublink will be transformed into SubPlan.


-- 
Thanks,
Tender Wang

Reply via email to