Jefffrey commented on code in PR #17808:
URL: https://github.com/apache/datafusion/pull/17808#discussion_r2386609740


##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -955,28 +963,106 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<Data
 
     match (lhs_type, rhs_type) {
         // Prefer decimal data type over floating point for comparison 
operation
+        (Decimal32(_, _), Decimal32(_, _)) => get_wider_decimal_type(lhs_type, 
rhs_type),
+        (Decimal32(_, _), Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
+        (Decimal32(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+        (Decimal64(_, _), Decimal64(_, _)) => get_wider_decimal_type(lhs_type, 
rhs_type),
+        (Decimal64(_, _), Decimal32(_, _) | Decimal128(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
+        (Decimal64(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
         (Decimal128(_, _), Decimal128(_, _)) => {
             get_wider_decimal_type(lhs_type, rhs_type)
         }
+        (Decimal128(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
         (Decimal128(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
-        (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (Decimal256(_, _), Decimal256(_, _)) => {
             get_wider_decimal_type(lhs_type, rhs_type)
         }
+        (Decimal256(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
         (Decimal256(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+        (_, Decimal32(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+        (_, Decimal64(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+        (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (_, Decimal256(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (_, _) => None,
     }
 }
 
+/// Handle cross-variant decimal widening by choosing the larger variant
+fn get_wider_decimal_type_cross_variant(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+
+    let (p1, s1) = match lhs_type {
+        Decimal32(p, s) => (*p, *s),
+        Decimal64(p, s) => (*p, *s),
+        Decimal128(p, s) => (*p, *s),
+        Decimal256(p, s) => (*p, *s),
+        _ => return None,
+    };
+
+    let (p2, s2) = match rhs_type {
+        Decimal32(p, s) => (*p, *s),
+        Decimal64(p, s) => (*p, *s),
+        Decimal128(p, s) => (*p, *s),
+        Decimal256(p, s) => (*p, *s),
+        _ => return None,
+    };
+
+    // max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2)
+    let s = s1.max(s2);
+    let range = (p1 as i8 - s1).max(p2 as i8 - s2);
+    let required_precision = (range + s) as u8;

Review Comment:
   What happens if we have:
   
   Decimal256 with precision 76 (max) and scale 0, and Decimal128 with 
precision 38 (max) with scale 1;
   
   So `s = 1`, `range = 76`, `required_precision = 76 + 1` -> overflow?
   
   Is this a valid case?



##########
datafusion/spark/src/function/math/width_bucket.rs:
##########
@@ -96,7 +96,14 @@ impl ScalarUDFImpl for SparkWidthBucket {
         let is_num = |t: &DataType| {
             matches!(
                 t,
-                Int8 | Int16 | Int32 | Int64 | Float32 | Float64 | 
Decimal128(_, _)
+                Int8 | Int16
+                    | Int32
+                    | Int64
+                    | Float32
+                    | Float64
+                    | Decimal32(_, _)
+                    | Decimal64(_, _)
+                    | Decimal128(_, _)

Review Comment:
   Can use 
[`is_signed_numeric()`](https://github.com/apache/datafusion/blob/2d947b31d0355710dc179d1d72ca5366b7721b2a/datafusion/expr/src/type_coercion/mod.rs#L43-L59)
 here potentially (though that brings in `Float16`)



##########
datafusion/physical-plan/src/joins/sort_merge_join/stream.rs:
##########
@@ -1994,6 +1996,8 @@ fn is_join_arrays_equal(
             DataType::BinaryView => compare_value!(BinaryViewArray),
             DataType::FixedSizeBinary(_) => 
compare_value!(FixedSizeBinaryArray),
             DataType::LargeBinary => compare_value!(LargeBinaryArray),
+            DataType::Decimal32(..) => compare_value!(Decimal32Array),
+            DataType::Decimal64(..) => compare_value!(Decimal64Array),

Review Comment:
   Yeah I am curious why 256 is omitted? Perhaps we can add it in if there's no 
compiler error doing so?



##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -357,6 +357,14 @@ fn math_decimal_coercion(
         | (Decimal256(_, _), Decimal256(_, _)) => {
             Some((lhs_type.clone(), rhs_type.clone()))
         }
+        // Cross-variant decimal coercion - choose larger variant with 
appropriate precision/scale
+        (Decimal32(_, _), Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, 
_))
+        | (Decimal64(_, _), Decimal32(_, _) | Decimal128(_, _) | Decimal256(_, 
_))
+        | (Decimal128(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, 
_))
+        | (Decimal256(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, 
_)) => {
+            let coerced_type = get_wider_decimal_type_cross_variant(lhs_type, 
rhs_type)?;
+            Some((coerced_type.clone(), coerced_type))
+        }

Review Comment:
   ```suggestion
           // Cross-variant decimal coercion - choose larger variant with 
appropriate precision/scale
           (lhs, rhs) if is_decimal(lhs) && is_decimal(rhs) && 
std::mem::discriminant(lhs) != std::mem::discriminant(rhs) => {
               let coerced_type = 
get_wider_decimal_type_cross_variant(lhs_type, rhs_type)?;
               Some((coerced_type.clone(), coerced_type))
           }
   ```
   
   Using 
[`is_decimal`](https://github.com/apache/datafusion/blob/2d947b31d0355710dc179d1d72ca5366b7721b2a/datafusion/expr/src/type_coercion/mod.rs#L92-L101)
 and 
[`std::mem::discriminant`](https://doc.rust-lang.org/std/mem/fn.discriminant.html)
   
   Dunno if it's better to list out each combination (more explicit but could 
be easier to miss if we're missing a combination) or do this way which might 
not be as clear but could be more robust 🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to