Jefffrey commented on code in PR #22104:
URL: https://github.com/apache/datafusion/pull/22104#discussion_r3270963780


##########
datafusion/proto/src/logical_plan/mod.rs:
##########
@@ -1492,6 +1493,7 @@ impl AsLogicalPlan for LogicalPlanNode {
                 join_type,
                 join_constraint,
                 null_equality,
+                null_aware,
                 ..

Review Comment:
   Should we be getting rid of this `..` to avoid future such issues?



##########
datafusion/proto/src/logical_plan/mod.rs:
##########
@@ -850,6 +849,13 @@ impl AsLogicalPlan for LogicalPlanNode {
                     from_proto::parse_exprs(&join.left_join_key, ctx, 
extension_codec)?;
                 let right_keys: Vec<Expr> =
                     from_proto::parse_exprs(&join.right_join_key, ctx, 
extension_codec)?;
+                if left_keys.len() != right_keys.len() {
+                    return internal_err!(

Review Comment:
   I think this would also be a proto error?



##########
datafusion/proto/tests/cases/roundtrip_logical_plan.rs:
##########
@@ -3173,3 +3173,111 @@ async fn roundtrip_empty_table_scan_with_projection() 
-> Result<()> {
     );
     Ok(())
 }
+
+// Regression test for https://github.com/apache/datafusion/issues/22065:
+// the decoder must preserve `null_aware = true` (NOT IN semantics)
+// across a to_proto -> from_proto round trip. `null_equality` is at
+// its default (`NullEqualsNothing`).
+#[tokio::test]
+async fn roundtrip_join_null_aware() -> Result<()> {
+    use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
+    use datafusion_expr::JoinType;
+
+    let ctx = SessionContext::new();
+    let sql = "
+        SELECT id
+        FROM (VALUES (1), (2), (3)) AS t1(id)
+        WHERE id NOT IN (
+            SELECT bad_id
+            FROM (VALUES (CAST(1 AS INT)), (CAST(NULL AS INT))) AS 
excludes(bad_id)
+        )
+    ";
+
+    let df = ctx.sql(sql).await?;
+    let plan = ctx.state().optimize(df.logical_plan())?;
+
+    let mut found_null_aware = false;
+    plan.apply(|n| {
+        if let LogicalPlan::Join(j) = n
+            && j.join_type == JoinType::LeftAnti
+            && j.null_aware
+        {
+            found_null_aware = true;
+        }
+        Ok(TreeNodeRecursion::Continue)
+    })?;
+    assert!(found_null_aware);
+
+    let bytes = logical_plan_to_bytes(&plan)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
+    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+
+    let direct: usize = ctx
+        .execute_logical_plan(plan)
+        .await?
+        .collect()
+        .await?
+        .iter()
+        .map(|b| b.num_rows())
+        .sum();
+    let rt: usize = ctx
+        .execute_logical_plan(logical_round_trip)
+        .await?
+        .collect()
+        .await?
+        .iter()
+        .map(|b| b.num_rows())
+        .sum();
+    assert_eq!(direct, 0);
+    assert_eq!(rt, direct);
+
+    Ok(())
+}
+
+// Regression test for `null_equality` round-trip (related to #22065):
+// the decoder must preserve a non-default `null_equality`
+// (`NullEqualsNull`) across a to_proto -> from_proto round trip.
+// `null_aware` is at its default (`false`).
+#[tokio::test]
+async fn roundtrip_join_null_equality() -> Result<()> {
+    use datafusion_common::NullEquality;
+    use datafusion_common::tree_node::{Transformed, TreeNode, 
TreeNodeRecursion};
+    use datafusion_expr::logical_plan::Join;
+
+    let ctx = SessionContext::new();
+    let sql = "
+        SELECT t1.a FROM (VALUES (1)) AS t1(a)
+        INNER JOIN (VALUES (1)) AS t2(b) ON t1.a = t2.b
+    ";
+    let plan = ctx
+        .sql(sql)
+        .await?
+        .into_optimized_plan()?
+        .transform_up(|node| {
+            Ok(match node {
+                LogicalPlan::Join(j) => 
Transformed::yes(LogicalPlan::Join(Join {
+                    null_equality: NullEquality::NullEqualsNull,

Review Comment:
   Would these cases be easier to read if we manually constructed the join 
instead of verifying/modifying after the SQL parsing?



##########
datafusion/proto/tests/cases/roundtrip_logical_plan.rs:
##########
@@ -3173,3 +3173,111 @@ async fn roundtrip_empty_table_scan_with_projection() 
-> Result<()> {
     );
     Ok(())
 }
+
+// Regression test for https://github.com/apache/datafusion/issues/22065:
+// the decoder must preserve `null_aware = true` (NOT IN semantics)
+// across a to_proto -> from_proto round trip. `null_equality` is at
+// its default (`NullEqualsNothing`).
+#[tokio::test]
+async fn roundtrip_join_null_aware() -> Result<()> {
+    use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
+    use datafusion_expr::JoinType;
+
+    let ctx = SessionContext::new();
+    let sql = "
+        SELECT id
+        FROM (VALUES (1), (2), (3)) AS t1(id)
+        WHERE id NOT IN (
+            SELECT bad_id
+            FROM (VALUES (CAST(1 AS INT)), (CAST(NULL AS INT))) AS 
excludes(bad_id)
+        )
+    ";
+
+    let df = ctx.sql(sql).await?;
+    let plan = ctx.state().optimize(df.logical_plan())?;
+
+    let mut found_null_aware = false;
+    plan.apply(|n| {
+        if let LogicalPlan::Join(j) = n
+            && j.join_type == JoinType::LeftAnti
+            && j.null_aware
+        {
+            found_null_aware = true;
+        }
+        Ok(TreeNodeRecursion::Continue)
+    })?;
+    assert!(found_null_aware);
+
+    let bytes = logical_plan_to_bytes(&plan)?;
+    let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
+    assert_eq!(format!("{plan:?}"), format!("{logical_round_trip:?}"));
+
+    let direct: usize = ctx

Review Comment:
   Do we need this test of execution? I feel comparing the logical structure 
itself should be sufficient



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to