This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 5981d62c28 chore: remove datatype check functions in favour of
upstream versions (#20104)
5981d62c28 is described below
commit 5981d62c2860268790986246b705a1963e25f4e7
Author: Jeffrey Vo <[email protected]>
AuthorDate: Wed Feb 4 06:26:14 2026 +0900
chore: remove datatype check functions in favour of upstream versions
(#20104)
We now have
[`DataType::is_string`](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_string)
and
[`DataType::is_decimal`](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#method.is_decimal)
so make the most of them
---
datafusion/expr-common/src/type_coercion/binary.rs | 22 ++++++--------------
datafusion/expr/src/type_coercion/mod.rs | 24 ----------------------
datafusion/optimizer/src/analyzer/type_coercion.rs | 23 +++++++++++----------
.../physical-expr/src/expressions/negative.rs | 4 ++--
datafusion/pruning/src/pruning_predicate.rs | 11 ++--------
5 files changed, 22 insertions(+), 62 deletions(-)
diff --git a/datafusion/expr-common/src/type_coercion/binary.rs
b/datafusion/expr-common/src/type_coercion/binary.rs
index 427ebc5980..9051f412bd 100644
--- a/datafusion/expr-common/src/type_coercion/binary.rs
+++ b/datafusion/expr-common/src/type_coercion/binary.rs
@@ -351,16 +351,6 @@ impl<'a> BinaryTypeCoercer<'a> {
// TODO Move the rest inside of BinaryTypeCoercer
-fn is_decimal(data_type: &DataType) -> bool {
- matches!(
- data_type,
- DataType::Decimal32(..)
- | DataType::Decimal64(..)
- | DataType::Decimal128(..)
- | DataType::Decimal256(..)
- )
-}
-
/// Returns true if both operands are Date types (Date32 or Date64)
/// Used to detect Date - Date operations which should return Int64 (days
difference)
fn is_date_minus_date(lhs: &DataType, rhs: &DataType) -> bool {
@@ -402,8 +392,8 @@ fn math_decimal_coercion(
}
// Cross-variant decimal coercion - choose larger variant with
appropriate precision/scale
(lhs, rhs)
- if is_decimal(lhs)
- && is_decimal(rhs)
+ if lhs.is_decimal()
+ && rhs.is_decimal()
&& std::mem::discriminant(lhs) != std::mem::discriminant(rhs)
=>
{
let coerced_type = get_wider_decimal_type_cross_variant(lhs_type,
rhs_type)?;
@@ -1018,8 +1008,8 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<Data
match (lhs_type, rhs_type) {
// Same decimal types
(lhs_type, rhs_type)
- if is_decimal(lhs_type)
- && is_decimal(rhs_type)
+ if lhs_type.is_decimal()
+ && rhs_type.is_decimal()
&& std::mem::discriminant(lhs_type)
== std::mem::discriminant(rhs_type) =>
{
@@ -1027,8 +1017,8 @@ pub fn decimal_coercion(lhs_type: &DataType, rhs_type:
&DataType) -> Option<Data
}
// Mismatched decimal types
(lhs_type, rhs_type)
- if is_decimal(lhs_type)
- && is_decimal(rhs_type)
+ if lhs_type.is_decimal()
+ && rhs_type.is_decimal()
&& std::mem::discriminant(lhs_type)
!= std::mem::discriminant(rhs_type) =>
{
diff --git a/datafusion/expr/src/type_coercion/mod.rs
b/datafusion/expr/src/type_coercion/mod.rs
index bd1acd3f3a..c92d434e34 100644
--- a/datafusion/expr/src/type_coercion/mod.rs
+++ b/datafusion/expr/src/type_coercion/mod.rs
@@ -58,11 +58,6 @@ pub fn is_signed_numeric(dt: &DataType) -> bool {
)
}
-/// Determine whether the given data type `dt` is `Null`.
-pub fn is_null(dt: &DataType) -> bool {
- *dt == DataType::Null
-}
-
/// Determine whether the given data type `dt` is a `Timestamp`.
pub fn is_timestamp(dt: &DataType) -> bool {
matches!(dt, DataType::Timestamp(_, _))
@@ -80,22 +75,3 @@ pub fn is_datetime(dt: &DataType) -> bool {
DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _)
)
}
-
-/// Determine whether the given data type `dt` is a `Utf8` or `Utf8View` or
`LargeUtf8`.
-pub fn is_utf8_or_utf8view_or_large_utf8(dt: &DataType) -> bool {
- matches!(
- dt,
- DataType::Utf8 | DataType::Utf8View | DataType::LargeUtf8
- )
-}
-
-/// Determine whether the given data type `dt` is a `Decimal`.
-pub fn is_decimal(dt: &DataType) -> bool {
- matches!(
- dt,
- DataType::Decimal32(_, _)
- | DataType::Decimal64(_, _)
- | DataType::Decimal128(_, _)
- | DataType::Decimal256(_, _)
- )
-}
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 8b57cc798d..1e59ceb972 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -43,10 +43,10 @@ use datafusion_expr::expr_schema::cast_subquery;
use datafusion_expr::logical_plan::Subquery;
use datafusion_expr::type_coercion::binary::{comparison_coercion,
like_coercion};
use datafusion_expr::type_coercion::functions::{UDFCoercionExt,
fields_with_udf};
+use datafusion_expr::type_coercion::is_datetime;
use datafusion_expr::type_coercion::other::{
get_coerce_type_for_case_expression, get_coerce_type_for_list,
};
-use datafusion_expr::type_coercion::{is_datetime,
is_utf8_or_utf8view_or_large_utf8};
use datafusion_expr::utils::merge_schema;
use datafusion_expr::{
Cast, Expr, ExprSchemable, Join, Limit, LogicalPlan, Operator, Projection,
Union,
@@ -513,10 +513,8 @@ impl TreeNodeRewriter for TypeCoercionRewriter<'_> {
.data;
let expr_type = expr.get_type(self.schema)?;
let subquery_type = new_plan.schema().field(0).data_type();
- if (expr_type.is_numeric()
- && is_utf8_or_utf8view_or_large_utf8(subquery_type))
- || (subquery_type.is_numeric()
- && is_utf8_or_utf8view_or_large_utf8(&expr_type))
+ if (expr_type.is_numeric() && subquery_type.is_string())
+ || (subquery_type.is_numeric() && expr_type.is_string())
{
return plan_err!(
"expr type {expr_type} can't cast to {subquery_type}
in SetComparison"
@@ -890,12 +888,15 @@ fn coerce_frame_bound(
fn extract_window_frame_target_type(col_type: &DataType) -> Result<DataType> {
if col_type.is_numeric()
- || is_utf8_or_utf8view_or_large_utf8(col_type)
- || matches!(col_type, DataType::List(_))
- || matches!(col_type, DataType::LargeList(_))
- || matches!(col_type, DataType::FixedSizeList(_, _))
- || matches!(col_type, DataType::Null)
- || matches!(col_type, DataType::Boolean)
+ || col_type.is_string()
+ || col_type.is_null()
+ || matches!(
+ col_type,
+ DataType::List(_)
+ | DataType::LargeList(_)
+ | DataType::FixedSizeList(_, _)
+ | DataType::Boolean
+ )
{
Ok(col_type.clone())
} else if is_datetime(col_type) {
diff --git a/datafusion/physical-expr/src/expressions/negative.rs
b/datafusion/physical-expr/src/expressions/negative.rs
index 0c9476beba..c727c8fa5f 100644
--- a/datafusion/physical-expr/src/expressions/negative.rs
+++ b/datafusion/physical-expr/src/expressions/negative.rs
@@ -37,7 +37,7 @@ use datafusion_expr::statistics::Distribution::{
};
use datafusion_expr::{
ColumnarValue,
- type_coercion::{is_interval, is_null, is_signed_numeric, is_timestamp},
+ type_coercion::{is_interval, is_signed_numeric, is_timestamp},
};
/// Negative expression
@@ -190,7 +190,7 @@ pub fn negative(
input_schema: &Schema,
) -> Result<Arc<dyn PhysicalExpr>> {
let data_type = arg.data_type(input_schema)?;
- if is_null(&data_type) {
+ if data_type.is_null() {
Ok(arg)
} else if !is_signed_numeric(&data_type)
&& !is_interval(&data_type)
diff --git a/datafusion/pruning/src/pruning_predicate.rs
b/datafusion/pruning/src/pruning_predicate.rs
index 5f1b4233b5..d0cb067442 100644
--- a/datafusion/pruning/src/pruning_predicate.rs
+++ b/datafusion/pruning/src/pruning_predicate.rs
@@ -1205,13 +1205,6 @@ fn is_compare_op(op: Operator) -> bool {
)
}
-fn is_string_type(data_type: &DataType) -> bool {
- matches!(
- data_type,
- DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View
- )
-}
-
// The pruning logic is based on the comparing the min/max bounds.
// Must make sure the two type has order.
// For example, casts from string to numbers is not correct.
@@ -1233,7 +1226,7 @@ fn verify_support_type_for_prune(from_type: &DataType,
to_type: &DataType) -> Re
// If both types are strings or both are not strings (number, timestamp,
etc)
// then we can compare them.
// PruningPredicate does not support casting of strings to numbers and
such.
- if is_string_type(from_type) == is_string_type(to_type) {
+ if from_type.is_string() == to_type.is_string() {
Ok(())
} else {
plan_err!(
@@ -4681,7 +4674,7 @@ mod tests {
true,
// s1 ["AB", "A\u{10ffff}\u{10ffff}\u{10ffff}"] ==> some rows
could pass (must keep)
true,
- // s1 ["A\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no
row match. (min, max) maybe truncate
+ // s1 ["A\u{10ffff}\u{10ffff}", "A\u{10ffff}\u{10ffff}"] ==> no
row match. (min, max) maybe truncate
// original (min, max) maybe ("A\u{10ffff}\u{10ffff}\u{10ffff}",
"A\u{10ffff}\u{10ffff}\u{10ffff}\u{10ffff}")
true,
];
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]