Re: [PR] More decimal 32/64 support - type coercsion and misc gaps [datafusion]

via GitHub Thu, 02 Oct 2025 12:08:28 -0700


Jefffrey commented on code in PR #17808:
URL: https://github.com/apache/datafusion/pull/17808#discussion_r2387718774



##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -955,28 +974,111 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<Data
 
     match (lhs_type, rhs_type) {
         // Prefer decimal data type over floating point for comparison 
operation
+        (Decimal32(_, _), Decimal32(_, _)) => get_wider_decimal_type(lhs_type, 
rhs_type),
+        (Decimal32(_, _), Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
+        (Decimal32(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+        (Decimal64(_, _), Decimal64(_, _)) => get_wider_decimal_type(lhs_type, 
rhs_type),
+        (Decimal64(_, _), Decimal32(_, _) | Decimal128(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
+        (Decimal64(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
         (Decimal128(_, _), Decimal128(_, _)) => {
             get_wider_decimal_type(lhs_type, rhs_type)
         }
+        (Decimal128(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
         (Decimal128(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
-        (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (Decimal256(_, _), Decimal256(_, _)) => {
             get_wider_decimal_type(lhs_type, rhs_type)
         }
+        (Decimal256(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
         (Decimal256(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+        (_, Decimal32(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+        (_, Decimal64(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+        (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (_, Decimal256(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (_, _) => None,
     }
 }
 
+/// Handle cross-variant decimal widening by choosing the larger variant
+fn get_wider_decimal_type_cross_variant(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+
+    let (p1, s1) = match lhs_type {
+        Decimal32(p, s) => (*p, *s),
+        Decimal64(p, s) => (*p, *s),
+        Decimal128(p, s) => (*p, *s),
+        Decimal256(p, s) => (*p, *s),
+        _ => return None,
+    };
+
+    let (p2, s2) = match rhs_type {
+        Decimal32(p, s) => (*p, *s),
+        Decimal64(p, s) => (*p, *s),
+        Decimal128(p, s) => (*p, *s),
+        Decimal256(p, s) => (*p, *s),
+        _ => return None,
+    };
+
+    // max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2)
+    let s = s1.max(s2);
+    let range = (p1 as i8 - s1).max(p2 as i8 - s2);
+    let required_precision = (range + s) as u8;
+
+    // We currently don't handle cases where the required precision overflows
+    if required_precision > DECIMAL256_MAX_PRECISION {
+        return None;
+    }
+
+    // Choose the larger variant between the two input types
+    match (lhs_type, rhs_type) {
+        (Decimal32(_, _), Decimal64(_, _)) | (Decimal64(_, _), Decimal32(_, 
_)) => {
+            Some(Decimal64(required_precision, s))
+        }
+        (Decimal32(_, _), Decimal128(_, _)) | (Decimal128(_, _), Decimal32(_, 
_)) => {
+            Some(Decimal128(required_precision, s))
+        }
+        (Decimal32(_, _), Decimal256(_, _)) | (Decimal256(_, _), Decimal32(_, 
_)) => {
+            Some(Decimal256(required_precision, s))
+        }
+        (Decimal64(_, _), Decimal128(_, _)) | (Decimal128(_, _), Decimal64(_, 
_)) => {
+            Some(Decimal128(required_precision, s))
+        }
+        (Decimal64(_, _), Decimal256(_, _)) | (Decimal256(_, _), Decimal64(_, 
_)) => {
+            Some(Decimal256(required_precision, s))
+        }
+        (Decimal128(_, _), Decimal256(_, _)) | (Decimal256(_, _), 
Decimal128(_, _)) => {
+            Some(Decimal256(required_precision, s))
+        }

Review Comment:
   ```suggestion
       // Choose the larger variant between the two input types
       match (lhs_type, rhs_type) {
           (Decimal32(_, _), Decimal64(_, _)) | (Decimal64(_, _), Decimal32(_, 
_)) if required_precision <= DECIMAL64_MAX_PRECISION => {
               Some(Decimal64(required_precision, s))
           }
           (Decimal32(_, _), Decimal128(_, _)) | (Decimal128(_, _), 
Decimal32(_, _)) if required_precision <= DECIMAL128_MAX_PRECISION => {
               Some(Decimal128(required_precision, s))
           }
           (Decimal32(_, _), Decimal256(_, _)) | (Decimal256(_, _), 
Decimal32(_, _)) if required_precision <= DECIMAL64_MAX_PRECISION => {
               Some(Decimal256(required_precision, s))
           }
           (Decimal64(_, _), Decimal128(_, _)) | (Decimal128(_, _), 
Decimal64(_, _)) if required_precision <= DECIMAL128_MAX_PRECISION => {
               Some(Decimal128(required_precision, s))
           }
           (Decimal64(_, _), Decimal256(_, _)) | (Decimal256(_, _), 
Decimal64(_, _)) if required_precision <= DECIMAL64_MAX_PRECISION => {
               Some(Decimal256(required_precision, s))
           }
           (Decimal128(_, _), Decimal256(_, _)) | (Decimal256(_, _), 
Decimal128(_, _)) if required_precision <= DECIMAL64_MAX_PRECISION => {
               Some(Decimal256(required_precision, s))
           }
   ```
   
   Do we need to account for the other decimal types like so?



##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -955,28 +963,106 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<Data
 
     match (lhs_type, rhs_type) {
         // Prefer decimal data type over floating point for comparison 
operation
+        (Decimal32(_, _), Decimal32(_, _)) => get_wider_decimal_type(lhs_type, 
rhs_type),
+        (Decimal32(_, _), Decimal64(_, _) | Decimal128(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
+        (Decimal32(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+        (Decimal64(_, _), Decimal64(_, _)) => get_wider_decimal_type(lhs_type, 
rhs_type),
+        (Decimal64(_, _), Decimal32(_, _) | Decimal128(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
+        (Decimal64(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
         (Decimal128(_, _), Decimal128(_, _)) => {
             get_wider_decimal_type(lhs_type, rhs_type)
         }
+        (Decimal128(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
         (Decimal128(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
-        (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (Decimal256(_, _), Decimal256(_, _)) => {
             get_wider_decimal_type(lhs_type, rhs_type)
         }
+        (Decimal256(_, _), Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, 
_)) => {
+            get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
+        }
         (Decimal256(_, _), _) => get_common_decimal_type(lhs_type, rhs_type),
+        (_, Decimal32(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+        (_, Decimal64(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
+        (_, Decimal128(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (_, Decimal256(_, _)) => get_common_decimal_type(rhs_type, lhs_type),
         (_, _) => None,
     }
 }
 
+/// Handle cross-variant decimal widening by choosing the larger variant
+fn get_wider_decimal_type_cross_variant(
+    lhs_type: &DataType,
+    rhs_type: &DataType,
+) -> Option<DataType> {
+    use arrow::datatypes::DataType::*;
+
+    let (p1, s1) = match lhs_type {
+        Decimal32(p, s) => (*p, *s),
+        Decimal64(p, s) => (*p, *s),
+        Decimal128(p, s) => (*p, *s),
+        Decimal256(p, s) => (*p, *s),
+        _ => return None,
+    };
+
+    let (p2, s2) = match rhs_type {
+        Decimal32(p, s) => (*p, *s),
+        Decimal64(p, s) => (*p, *s),
+        Decimal128(p, s) => (*p, *s),
+        Decimal256(p, s) => (*p, *s),
+        _ => return None,
+    };
+
+    // max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2)
+    let s = s1.max(s2);
+    let range = (p1 as i8 - s1).max(p2 as i8 - s2);
+    let required_precision = (range + s) as u8;

Review Comment:
   Cheers; left another minor comment related to the check below. Also would be 
nice if we had a test for this edge case.



##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -955,28 +974,111 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type: 
&DataType) -> Option<Data
 
     match (lhs_type, rhs_type) {

Review Comment:
   This might be cleaner like so:
   
   ```rust
   /// Decimal coercion rules.
   pub fn decimal_coercion(lhs_type: &DataType, rhs_type: &DataType) -> 
Option<DataType> {
       use arrow::datatypes::DataType::*;
   
       // Prefer decimal data type over floating point for comparison operation
       match (lhs_type, rhs_type) {
           // Same decimal types
           (lhs_type, rhs_type)
               if std::mem::discriminant(lhs_type) == 
std::mem::discriminant(rhs_type) =>
           {
               get_wider_decimal_type(lhs_type, rhs_type)
           }
           // Mismatched decimal types
           (lhs_type, rhs_type)
               if is_decimal(lhs_type)
                   && is_decimal(rhs_type)
                   && std::mem::discriminant(lhs_type)
                       != std::mem::discriminant(rhs_type) =>
           {
               get_wider_decimal_type_cross_variant(lhs_type, rhs_type)
           }
           // Decimal + non-decimal types
           (Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | 
Decimal256(_, _), _)
           | (_, Decimal32(_, _) | Decimal64(_, _) | Decimal128(_, _) | 
Decimal256(_, _)) => {
               get_common_decimal_type(lhs_type, rhs_type)
           }
           (_, _) => None,
       }
   }
   ```
   
   Following what was done above



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] More decimal 32/64 support - type coercsion and misc gaps [datafusion]

Reply via email to