adriangb commented on code in PR #20426:
URL: https://github.com/apache/datafusion/pull/20426#discussion_r2906316372
##########
datafusion/core/src/physical_planner.rs:
##########
@@ -3436,16 +3436,16 @@ mod tests {
#[tokio::test]
async fn in_list_types() -> Result<()> {
- // expression: "a in ('a', 1)"
- let list = vec![lit("a"), lit(1i64)];
Review Comment:
I guess the point of this change is that `a in ('a', 1)` now is an error? It
would be great to move that to another test that explicitly checks the error,
if it is plan time or execution time, etc.
##########
datafusion/expr-common/src/type_coercion/binary.rs:
##########
@@ -843,102 +842,100 @@ pub fn try_type_union_resolution_with_struct(
Ok(final_struct_types)
}
-/// Coerce `lhs_type` and `rhs_type` to a common type for the purposes of a
-/// comparison operation
+/// Coerce `lhs_type` and `rhs_type` to a common type for type unification
+/// contexts — where two values must be brought to a common type but are not
+/// being compared. Examples: UNION, CASE THEN/ELSE branches, NVL2.
///
-/// Example comparison operations are `lhs = rhs` and `lhs > rhs`
+/// Prefers strings over numeric types (e.g., `SELECT 1 UNION SELECT '2'`
+/// coerces both sides to `Utf8`).
///
-/// Binary comparison kernels require the two arguments to be the (exact) same
-/// data type. However, users can write queries where the two arguments are
-/// different data types. In such cases, the data types are automatically cast
-/// (coerced) to a single data type to pass to the kernels.
-///
-/// # Numeric comparisons
-///
-/// When comparing numeric values, the lower precision type is coerced to the
-/// higher precision type to avoid losing data. For example when comparing
-/// `Int32` to `Int64` the coerced type is `Int64` so the `Int32` argument will
-/// be cast.
-///
-/// # Numeric / String comparisons
-///
-/// When comparing numeric values and strings, both values will be coerced to
-/// strings. For example when comparing `'2' > 1`, the arguments will be
-/// coerced to `Utf8` for comparison
-pub fn comparison_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
+/// For comparisons (`=`, `<`, `>`), use [`comparison_coercion`] instead.
+pub fn type_union_coercion(lhs_type: &DataType, rhs_type: &DataType) ->
Option<DataType> {
if lhs_type.equals_datatype(rhs_type) {
- // same type => equality is possible
return Some(lhs_type.clone());
}
binary_numeric_coercion(lhs_type, rhs_type)
- .or_else(|| dictionary_comparison_coercion(lhs_type, rhs_type, true))
- .or_else(|| ree_comparison_coercion(lhs_type, rhs_type, true))
+ .or_else(|| dictionary_coercion(lhs_type, rhs_type, true,
type_union_coercion))
+ .or_else(|| ree_coercion(lhs_type, rhs_type, true,
type_union_coercion))
.or_else(|| temporal_coercion_nonstrict_timezone(lhs_type, rhs_type))
.or_else(|| string_coercion(lhs_type, rhs_type))
.or_else(|| list_coercion(lhs_type, rhs_type))
.or_else(|| null_coercion(lhs_type, rhs_type))
- .or_else(|| string_numeric_coercion(lhs_type, rhs_type))
+ .or_else(|| string_numeric_union_coercion(lhs_type, rhs_type))
.or_else(|| string_temporal_coercion(lhs_type, rhs_type))
.or_else(|| binary_coercion(lhs_type, rhs_type))
- .or_else(|| struct_coercion(lhs_type, rhs_type))
- .or_else(|| map_coercion(lhs_type, rhs_type))
+ .or_else(|| struct_coercion(lhs_type, rhs_type, type_union_coercion))
+ .or_else(|| map_coercion(lhs_type, rhs_type, type_union_coercion))
}
-/// Similar to [`comparison_coercion`] but prefers numeric if compares with
-/// numeric and string
+/// Coerce `lhs_type` and `rhs_type` to a common type for comparison
+/// contexts — any context where two values are compared rather than
+/// unified. This includes binary comparison operators, IN lists,
+/// CASE/WHEN conditions, and BETWEEN.
+///
+/// When the two types differ, this function determines the common type
+/// to cast to.
///
/// # Numeric comparisons
///
-/// When comparing numeric values and strings, the values will be coerced to
the
-/// numeric type. For example, `'2' > 1` if `1` is an `Int32`, the arguments
-/// will be coerced to `Int32`.
-pub fn comparison_coercion_numeric(
- lhs_type: &DataType,
- rhs_type: &DataType,
-) -> Option<DataType> {
- if lhs_type == rhs_type {
+/// The lower precision type is widened to the higher precision type
+/// (e.g., `Int32` vs `Int64` → `Int64`).
+///
+/// # Numeric / String comparisons
+///
+/// Prefers the numeric type (e.g., `'2' > 1` where `1` is `Int32` coerces
+/// `'2'` to `Int32`).
+///
+/// For type unification contexts (UNION, CASE THEN/ELSE), use
+/// [`type_union_coercion`] instead.
Review Comment:
Worth including a bit more here, e.g. rationale, why we have different
behavior (matches Postgres, etc?)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]