ShashidharM0118 commented on code in PR #19323:
URL: https://github.com/apache/datafusion/pull/19323#discussion_r2622732219
##########
datafusion/spark/src/function/hash/sha2.rs:
##########
@@ -225,3 +264,94 @@ fn compute_sha2(
}
.map(|hashed| spark_sha2_hex(&[hashed]).unwrap())
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use arrow::datatypes::Field;
+
+ #[test]
+ fn test_sha2_nullability() -> Result<()> {
+ let func = SparkSha2::new();
+
+ let non_nullable_expr: FieldRef =
+ Arc::new(Field::new("expr", DataType::Binary, false));
+ let non_nullable_bit_length: FieldRef =
+ Arc::new(Field::new("bit_length", DataType::Int32, false));
+
+ let out = func.return_field_from_args(ReturnFieldArgs {
+ arg_fields: &[
+ Arc::clone(&non_nullable_expr),
+ Arc::clone(&non_nullable_bit_length),
+ ],
+ scalar_arguments: &[None, None],
+ })?;
+ assert!(!out.is_nullable());
+ assert_eq!(out.data_type(), &DataType::Utf8);
+
+ let nullable_expr: FieldRef =
+ Arc::new(Field::new("expr", DataType::Binary, true));
+ let out = func.return_field_from_args(ReturnFieldArgs {
+ arg_fields: &[
+ Arc::clone(&nullable_expr),
+ Arc::clone(&non_nullable_bit_length),
+ ],
+ scalar_arguments: &[None, None],
+ })?;
+ assert!(out.is_nullable());
+ assert_eq!(out.data_type(), &DataType::Utf8);
+
+ let nullable_bit_length: FieldRef =
+ Arc::new(Field::new("bit_length", DataType::Int32, true));
+ let out = func.return_field_from_args(ReturnFieldArgs {
+ arg_fields: &[
+ Arc::clone(&non_nullable_expr),
+ Arc::clone(&nullable_bit_length),
+ ],
+ scalar_arguments: &[None, None],
+ })?;
+ assert!(out.is_nullable());
+ assert_eq!(out.data_type(), &DataType::Utf8);
+
+ let null_expr: FieldRef = Arc::new(Field::new("expr", DataType::Null,
true));
+ let out = func.return_field_from_args(ReturnFieldArgs {
+ arg_fields: &[null_expr, Arc::clone(&non_nullable_bit_length)],
+ scalar_arguments: &[None, None],
+ })?;
+ assert!(out.is_nullable());
+ assert_eq!(out.data_type(), &DataType::Null);
Review Comment:
> Please add comments above each case. I don't see what is the difference
between nullable_expr
(https://github.com/apache/datafusion/pull/19323/changes#diff-288c6716efda738c6ce1f71da2e679e600cfa79b78aeadd7871ce063d701d334R292-R302)
and null_expr
(https://github.com/apache/datafusion/pull/19323/changes#diff-288c6716efda738c6ce1f71da2e679e600cfa79b78aeadd7871ce063d701d334R316-R322)
Hi @martin-g, thanks for the review.
The goal was to cover both branches of this part of `return_field_from_args`:
```
match arg_types[0] {
DataType::Utf8View
| DataType::LargeUtf8
| DataType::Utf8
| DataType::Binary
| DataType::BinaryView
| DataType::LargeBinary => DataType::Utf8,
DataType::Null => DataType::Null,
}
```
For the `nullable_expr` case I picked one representative type from the
“string / binary” group above, `DataType::Binary`:
- `expr = Field::new("expr", DataType::Binary, /* nullable = */ true)`
- `bit_length = Int32 (non-null)`
- This hits the first arm and checks we return **`Utf8` and nullable**
(nullable Binary → nullable Utf8).
For the `null_expr` case I picked the only type from the second arm:
- `expr = Field::new("expr", DataType::Null, /* nullable = */ true)`
- `bit_length = Int32 (non-null)`
- This hits the `DataType::Null => DataType::Null` arm and checks we return
**`Null` and nullable** (Null expr type → Null result type).
So the two tests are intentionally chosen representatives: one from the
“string/binary → Utf8” branch and one from the “Null → Null” branch, to make
sure both type paths are covered.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]