This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 7f29cb050a Add `arrow_try_cast` UDF (#21130)
7f29cb050a is described below

commit 7f29cb050acd055c5cb52d620282fe9cb8ac2af0
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Tue Mar 24 08:38:16 2026 -0500

    Add `arrow_try_cast` UDF (#21130)
    
    ## Which issue does this PR close?
    
    N/A - new feature
    
    ## Rationale for this change
    
    `arrow_cast(expr, 'DataType')` casts to Arrow data types specified as
    strings but errors on failure. `try_cast(expr AS type)` returns NULL on
    failure but only works with SQL types. There's currently no way to
    attempt a cast to a specific Arrow type and get NULL on failure instead
    of an error.
    
    ## What changes are included in this PR?
    
    Adds a new `arrow_try_cast(expression, datatype)` scalar function that
    combines the behavior of `arrow_cast` and `try_cast`:
    - Accepts Arrow data type strings (like `arrow_cast`)
    - Returns NULL on cast failure instead of erroring (like `try_cast`)
    
    Implementation details:
    - Reuses `arrow_cast`'s `data_type_from_args` helper (made `pub(crate)`)
    - Simplifies to `Expr::TryCast` during optimization (vs `Expr::Cast` for
    `arrow_cast`)
    - Registered alongside existing core functions
    
    ## Are these changes tested?
    
    Yes — new sqllogictest file `arrow_try_cast.slt` covering:
    - Successful casts (Int64, Float64, LargeUtf8, Dictionary)
    - Failed cast returning NULL
    - Same-type passthrough
    - NULL input
    - Invalid type string errors
    - Multiple casts in one query
    
    ## Are there any user-facing changes?
    
    New `arrow_try_cast` SQL function available.
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    ---------
    
    Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
 datafusion/functions/src/core/arrow_cast.rs        |   8 +-
 .../src/core/{arrow_cast.rs => arrow_try_cast.rs}  | 110 ++++++---------------
 datafusion/functions/src/core/mod.rs               |   9 +-
 .../sqllogictest/test_files/arrow_try_cast.slt     | 109 ++++++++++++++++++++
 docs/source/user-guide/sql/scalar_functions.md     |  27 +++++
 5 files changed, 178 insertions(+), 85 deletions(-)

diff --git a/datafusion/functions/src/core/arrow_cast.rs 
b/datafusion/functions/src/core/arrow_cast.rs
index e555081e41..3e0a23f1ad 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -163,7 +163,7 @@ impl ScalarUDFImpl for ArrowCastFunc {
         info: &SimplifyContext,
     ) -> Result<ExprSimplifyResult> {
         // convert this into a real cast
-        let target_type = data_type_from_args(&args)?;
+        let target_type = data_type_from_args(self.name(), &args)?;
         // remove second (type) argument
         args.pop().unwrap();
         let arg = args.pop().unwrap();
@@ -189,12 +189,12 @@ impl ScalarUDFImpl for ArrowCastFunc {
 }
 
 /// Returns the requested type from the arguments
-fn data_type_from_args(args: &[Expr]) -> Result<DataType> {
-    let [_, type_arg] = take_function_args("arrow_cast", args)?;
+pub(crate) fn data_type_from_args(name: &str, args: &[Expr]) -> 
Result<DataType> {
+    let [_, type_arg] = take_function_args(name, args)?;
 
     let Expr::Literal(ScalarValue::Utf8(Some(val)), _) = type_arg else {
         return exec_err!(
-            "arrow_cast requires its second argument to be a constant string, 
got {:?}",
+            "{name} requires its second argument to be a constant string, got 
{:?}",
             type_arg
         );
     };
diff --git a/datafusion/functions/src/core/arrow_cast.rs 
b/datafusion/functions/src/core/arrow_try_cast.rs
similarity index 57%
copy from datafusion/functions/src/core/arrow_cast.rs
copy to datafusion/functions/src/core/arrow_try_cast.rs
index e555081e41..a221c81e07 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_try_cast.rs
@@ -15,14 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! [`ArrowCastFunc`]: Implementation of the `arrow_cast`
+//! [`ArrowTryCastFunc`]: Implementation of the `arrow_try_cast`
 
 use arrow::datatypes::{DataType, Field, FieldRef};
 use arrow::error::ArrowError;
 use datafusion_common::{
-    Result, ScalarValue, arrow_datafusion_err, datatype::DataTypeExt,
-    exec_datafusion_err, exec_err, internal_err, types::logical_string,
-    utils::take_function_args,
+    Result, arrow_datafusion_err, datatype::DataTypeExt, exec_datafusion_err, 
exec_err,
+    internal_err, types::logical_string, utils::take_function_args,
 };
 use std::any::Any;
 
@@ -33,52 +32,25 @@ use datafusion_expr::{
 };
 use datafusion_macros::user_doc;
 
-/// Implements casting to arbitrary arrow types (rather than SQL types)
-///
-/// Note that the `arrow_cast` function is somewhat special in that its
-/// return depends only on the *value* of its second argument (not its type)
-///
-/// It is implemented by calling the same underlying arrow `cast` kernel as
-/// normal SQL casts.
-///
-/// For example to cast to `int` using SQL  (which is then mapped to the arrow
-/// type `Int32`)
-///
-/// ```sql
-/// select cast(column_x as int) ...
-/// ```
-///
-/// Use the `arrow_cast` function to cast to a specific arrow type
+use super::arrow_cast::data_type_from_args;
+
+/// Like [`arrow_cast`](super::arrow_cast::ArrowCastFunc) but returns NULL on 
cast failure instead of erroring.
 ///
-/// For example
-/// ```sql
-/// select arrow_cast(column_x, 'Float64')
-/// ```
+/// This is implemented by simplifying `arrow_try_cast(expr, 'Type')` into
+/// `Expr::TryCast` during optimization.
 #[user_doc(
     doc_section(label = "Other Functions"),
-    description = "Casts a value to a specific Arrow data type.",
-    syntax_example = "arrow_cast(expression, datatype)",
+    description = "Casts a value to a specific Arrow data type, returning NULL 
if the cast fails.",
+    syntax_example = "arrow_try_cast(expression, datatype)",
     sql_example = r#"```sql
-> select
-  arrow_cast(-5,    'Int8') as a,
-  arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c;
-
-+----+-----+-----+
-| a  | b   | c   |
-+----+-----+-----+
-| -5 | foo | bar |
-+----+-----+-----+
-
-> select
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs, "+08:00")') as d,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs)') as e;
-
-+---------------------------+---------------------+
-| d                         | e                   |
-+---------------------------+---------------------+
-| 2023-01-02T12:53:02+08:00 | 2023-01-02T12:53:02 |
-+---------------------------+---------------------+
+> select arrow_try_cast('123', 'Int64') as a,
+         arrow_try_cast('not_a_number', 'Int64') as b;
+
++-----+------+
+| a   | b    |
++-----+------+
+| 123 | NULL |
++-----+------+
 ```"#,
     argument(
         name = "expression",
@@ -90,17 +62,17 @@ use datafusion_macros::user_doc;
     )
 )]
 #[derive(Debug, PartialEq, Eq, Hash)]
-pub struct ArrowCastFunc {
+pub struct ArrowTryCastFunc {
     signature: Signature,
 }
 
-impl Default for ArrowCastFunc {
+impl Default for ArrowTryCastFunc {
     fn default() -> Self {
         Self::new()
     }
 }
 
-impl ArrowCastFunc {
+impl ArrowTryCastFunc {
     pub fn new() -> Self {
         Self {
             signature: Signature::coercible(
@@ -114,13 +86,13 @@ impl ArrowCastFunc {
     }
 }
 
-impl ScalarUDFImpl for ArrowCastFunc {
+impl ScalarUDFImpl for ArrowTryCastFunc {
     fn as_any(&self) -> &dyn Any {
         self
     }
 
     fn name(&self) -> &str {
-        "arrow_cast"
+        "arrow_try_cast"
     }
 
     fn signature(&self) -> &Signature {
@@ -132,8 +104,7 @@ impl ScalarUDFImpl for ArrowCastFunc {
     }
 
     fn return_field_from_args(&self, args: ReturnFieldArgs) -> 
Result<FieldRef> {
-        let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
-
+        // TryCast can always return NULL (on cast failure), so always nullable
         let [_, type_arg] = take_function_args(self.name(), 
args.scalar_arguments)?;
 
         type_arg
@@ -146,7 +117,9 @@ impl ScalarUDFImpl for ArrowCastFunc {
                     )
                 },
                 |casted_type| match casted_type.parse::<DataType>() {
-                    Ok(data_type) => Ok(Field::new(self.name(), data_type, 
nullable).into()),
+                    Ok(data_type) => {
+                        Ok(Field::new(self.name(), data_type, true).into())
+                    }
                     Err(ArrowError::ParseError(e)) => 
Err(exec_datafusion_err!("{e}")),
                     Err(e) => Err(arrow_datafusion_err!(e)),
                 },
@@ -154,7 +127,7 @@ impl ScalarUDFImpl for ArrowCastFunc {
     }
 
     fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
-        internal_err!("arrow_cast should have been simplified to cast")
+        internal_err!("arrow_try_cast should have been simplified to try_cast")
     }
 
     fn simplify(
@@ -162,24 +135,20 @@ impl ScalarUDFImpl for ArrowCastFunc {
         mut args: Vec<Expr>,
         info: &SimplifyContext,
     ) -> Result<ExprSimplifyResult> {
-        // convert this into a real cast
-        let target_type = data_type_from_args(&args)?;
+        let target_type = data_type_from_args(self.name(), &args)?;
         // remove second (type) argument
         args.pop().unwrap();
         let arg = args.pop().unwrap();
 
         let source_type = info.get_data_type(&arg)?;
         let new_expr = if source_type == target_type {
-            // the argument's data type is already the correct type
             arg
         } else {
-            // Use an actual cast to get the correct type
-            Expr::Cast(datafusion_expr::Cast {
+            Expr::TryCast(datafusion_expr::TryCast {
                 expr: Box::new(arg),
                 field: target_type.into_nullable_field_ref(),
             })
         };
-        // return the newly written argument to DataFusion
         Ok(ExprSimplifyResult::Simplified(new_expr))
     }
 
@@ -187,22 +156,3 @@ impl ScalarUDFImpl for ArrowCastFunc {
         self.doc()
     }
 }
-
-/// Returns the requested type from the arguments
-fn data_type_from_args(args: &[Expr]) -> Result<DataType> {
-    let [_, type_arg] = take_function_args("arrow_cast", args)?;
-
-    let Expr::Literal(ScalarValue::Utf8(Some(val)), _) = type_arg else {
-        return exec_err!(
-            "arrow_cast requires its second argument to be a constant string, 
got {:?}",
-            type_arg
-        );
-    };
-
-    val.parse().map_err(|e| match e {
-        // If the data type cannot be parsed, return a Plan error to signal an
-        // error in the input rather than a more general ArrowError
-        ArrowError::ParseError(e) => exec_datafusion_err!("{e}"),
-        e => arrow_datafusion_err!(e),
-    })
-}
diff --git a/datafusion/functions/src/core/mod.rs 
b/datafusion/functions/src/core/mod.rs
index a14d563737..e8737612a1 100644
--- a/datafusion/functions/src/core/mod.rs
+++ b/datafusion/functions/src/core/mod.rs
@@ -22,6 +22,7 @@ use std::sync::Arc;
 
 pub mod arrow_cast;
 pub mod arrow_metadata;
+pub mod arrow_try_cast;
 pub mod arrowtypeof;
 pub mod coalesce;
 pub mod expr_ext;
@@ -42,6 +43,7 @@ pub mod version;
 
 // create UDFs
 make_udf_function!(arrow_cast::ArrowCastFunc, arrow_cast);
+make_udf_function!(arrow_try_cast::ArrowTryCastFunc, arrow_try_cast);
 make_udf_function!(nullif::NullIfFunc, nullif);
 make_udf_function!(nvl::NVLFunc, nvl);
 make_udf_function!(nvl2::NVL2Func, nvl2);
@@ -67,7 +69,11 @@ pub mod expr_fn {
         arg1 arg2
     ),(
         arrow_cast,
-        "Returns value2 if value1 is NULL; otherwise it returns value1",
+        "Casts a value to a specific Arrow data type",
+        arg1 arg2
+    ),(
+        arrow_try_cast,
+        "Casts a value to a specific Arrow data type, returning NULL if the 
cast fails",
         arg1 arg2
     ),(
         nvl,
@@ -140,6 +146,7 @@ pub fn functions() -> Vec<Arc<ScalarUDF>> {
     vec![
         nullif(),
         arrow_cast(),
+        arrow_try_cast(),
         arrow_metadata(),
         nvl(),
         nvl2(),
diff --git a/datafusion/sqllogictest/test_files/arrow_try_cast.slt 
b/datafusion/sqllogictest/test_files/arrow_try_cast.slt
new file mode 100644
index 0000000000..fffb340798
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/arrow_try_cast.slt
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+##########
+# Tests for arrow_try_cast: like arrow_cast but returns NULL on cast failure
+##########
+
+# Successful cast to Float64
+query R
+select arrow_try_cast(123, 'Float64');
+----
+123
+
+# Successful cast to Int64
+query I
+select arrow_try_cast('123', 'Int64');
+----
+123
+
+# Failed cast returns NULL
+query I
+select arrow_try_cast('not_a_number', 'Int64');
+----
+NULL
+
+# Same-type passthrough
+query I
+select arrow_try_cast(1, 'Int32');
+----
+1
+
+# Cast to LargeUtf8
+query T
+select arrow_try_cast('foo', 'LargeUtf8');
+----
+foo
+
+# Cast integer to string
+query T
+select arrow_try_cast(42, 'Utf8');
+----
+42
+
+# Cast to dictionary type
+query T
+select arrow_try_cast('bar', 'Dictionary(Int32, Utf8)');
+----
+bar
+
+# NULL input stays NULL
+query I
+select arrow_try_cast(NULL, 'Int64');
+----
+NULL
+
+# Error on invalid type string
+statement error
+select arrow_try_cast(1, 'NotAType');
+
+# Error when second argument is not a string constant
+statement error
+select arrow_try_cast(1, 123);
+
+# Multiple arrow_try_cast in one query
+query IT
+select arrow_try_cast('456', 'Int64') as a,
+       arrow_try_cast(789, 'Utf8') as b;
+----
+456 789
+
+# Tests that exercise physical execution (not constant folding)
+
+# Cast column values to Int64, with mixed valid/null/invalid inputs
+query I
+select arrow_try_cast(a, 'Int64') from (values('100'), (NULL), ('foo')) t(a);
+----
+100
+NULL
+NULL
+
+# Cast column values to Float64
+query R
+select arrow_try_cast(a, 'Float64') from (values('3.14'), ('not_num'), (NULL)) 
t(a);
+----
+3.14
+NULL
+NULL
+
+# Cast integer column to Utf8
+query T
+select arrow_try_cast(a, 'Utf8') from (values(1), (2), (NULL)) t(a);
+----
+1
+2
+NULL
diff --git a/docs/source/user-guide/sql/scalar_functions.md 
b/docs/source/user-guide/sql/scalar_functions.md
index 918bae0f7d..5a8ef4db3d 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -5185,6 +5185,7 @@ union_tag(union_expression)
 
 - [arrow_cast](#arrow_cast)
 - [arrow_metadata](#arrow_metadata)
+- [arrow_try_cast](#arrow_try_cast)
 - [arrow_typeof](#arrow_typeof)
 - [get_field](#get_field)
 - [version](#version)
@@ -5257,6 +5258,32 @@ arrow_metadata(expression[, key])
 +-------------------------------+
 ```
 
+### `arrow_try_cast`
+
+Casts a value to a specific Arrow data type, returning NULL if the cast fails.
+
+```sql
+arrow_try_cast(expression, datatype)
+```
+
+#### Arguments
+
+- **expression**: Expression to cast. The expression can be a constant, 
column, or function, and any combination of operators.
+- **datatype**: [Arrow data 
type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name to 
cast to, as a string. The format is the same as that returned by 
[`arrow_typeof`]
+
+#### Example
+
+```sql
+> select arrow_try_cast('123', 'Int64') as a,
+         arrow_try_cast('not_a_number', 'Int64') as b;
+
++-----+------+
+| a   | b    |
++-----+------+
+| 123 | NULL |
++-----+------+
+```
+
 ### `arrow_typeof`
 
 Returns the name of the underlying [Arrow data 
type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the 
expression.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to