suibianwanwan created CALCITE-7006:
--------------------------------------

             Summary: Incorrect left join results with IS NOT DISTINCT FROM 
under specific plan
                 Key: CALCITE-7006
                 URL: https://issues.apache.org/jira/browse/CALCITE-7006
             Project: Calcite
          Issue Type: Bug
            Reporter: suibianwanwan


 I forcibly applied the EnumerableNestedLoopJoin rule to make the execution 
plans identical for {{=}} and {{{}is not distinct from{}}}. Here are test in 
Quidem:
{code:java}
SELECT *
FROM (
  SELECT EMPNO, COMM, COMM + 1000 AS e1
  FROM EMP
) e_outer
LEFT JOIN (
  SELECT e0, TRUE AS EXISTS_FLAG
  FROM (
    SELECT t1.EMPNO, t1.e1, t2.e0
    FROM (
      SELECT EMPNO, COMM + 100 AS e1, COMM
      FROM EMP
    ) t1
    JOIN (
      SELECT COMM + 1000 AS e0
      FROM EMP
      GROUP BY COMM + 1000
    ) t2
    ON t1.COMM IS NULL OR t1.e1 > t2.e0
  ) j
  GROUP BY e0
) subq
ON e_outer.e1 = subq.e0;

 EMPNO | COMM    | E1      | E0      | EXISTS_FLAG
-------+---------+---------+---------+-------------
  7369 |         |         |         | null
  7499 |  300.00 | 1300.00 | 1300.00 | true
  7521 |  500.00 | 1500.00 | 1500.00 | true
  7566 |         |         |         | null
  7654 | 1400.00 | 2400.00 | 2400.00 | true
  7698 |         |         |         | null
  7782 |         |         |         | null
  7788 |         |         |         | null
  7839 |         |         |         | null
  7844 |    0.00 | 1000.00 | 1000.00 | true
  7876 |         |         |         | null
  7900 |         |         |         | null
  7902 |         |         |         | null
  7934 |         |         |         | null
(14 rows)

EnumerableNestedLoopJoin(condition=[=($2, $3)], joinType=[left])
  EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)], 
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
    EnumerableTableScan(table=[[scott, EMP]])
  EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
    EnumerableAggregate(group=[{3}])
      EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))], 
joinType=[inner])
        EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6, 
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
          EnumerableTableScan(table=[[scott, EMP]])
        EnumerableAggregate(group=[{0}])
          EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, 
$t8)], E0=[$t9])
            EnumerableTableScan(table=[[scott, EMP]]) {code}
Replace = with is not distinct from:
{code:java}
SELECT *
FROM (
  SELECT EMPNO, COMM, COMM + 1000 AS e1
  FROM EMP
) e_outer
LEFT JOIN (
  SELECT e0, TRUE AS EXISTS_FLAG
  FROM (
    SELECT t1.EMPNO, t1.e1, t2.e0
    FROM (
      SELECT EMPNO, COMM + 100 AS e1, COMM
      FROM EMP
    ) t1
    JOIN (
      SELECT COMM + 1000 AS e0
      FROM EMP
      GROUP BY COMM + 1000
    ) t2
    ON t1.COMM IS NULL OR t1.e1 > t2.e0
  ) j
  GROUP BY e0
) subq
ON e_outer.e1 is not distinct from subq.e0;

 EMPNO | COMM    | E1      | E0 | EXISTS_FLAG
-------+---------+---------+----+-------------
  7369 |         |         |    | true
  7499 |  300.00 | 1300.00 |    | null
  7521 |  500.00 | 1500.00 |    | null
  7566 |         |         |    | true
  7654 | 1400.00 | 2400.00 |    | null
  7698 |         |         |    | true
  7782 |         |         |    | true
  7788 |         |         |    | true
  7839 |         |         |    | true
  7844 |    0.00 | 1000.00 |    | null
  7876 |         |         |    | true
  7900 |         |         |    | true
  7902 |         |         |    | true
  7934 |         |         |    | true
(14 rows)

EnumerableNestedLoopJoin(condition=[IS NOT DISTINCT FROM($2, $3)], 
joinType=[left])
  EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, $t8)], 
EMPNO=[$t0], COMM=[$t6], E1=[$t9])
    EnumerableTableScan(table=[[scott, EMP]])
  EnumerableCalc(expr#0=[{inputs}], expr#1=[true], proj#0..1=[{exprs}])
    EnumerableAggregate(group=[{3}])
      EnumerableNestedLoopJoin(condition=[OR(IS NULL($2), >($1, $3))], 
joinType=[inner])
        EnumerableCalc(expr#0..7=[{inputs}], expr#8=[100], expr#9=[+($t6, 
$t8)], EMPNO=[$t0], E1=[$t9], COMM=[$t6])
          EnumerableTableScan(table=[[scott, EMP]])
        EnumerableAggregate(group=[{0}])
          EnumerableCalc(expr#0..7=[{inputs}], expr#8=[1000], expr#9=[+($t6, 
$t8)], E0=[$t9])
            EnumerableTableScan(table=[[scott, EMP]]) {code}
>From the plan, aside from IS NOT DISTINCT FROM, the plans are identical. 
>However, the IS NOT DISTINCT FROM plan failed to join non-NULL data 
>successfully. I tried simplifying the SQL but couldn't reproduce the issue.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to