This is an automated email from the ASF dual-hosted git repository.

timsaucer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 015dd76f Pass Field information back and forth when using scalar UDFs 
(#1299)
015dd76f is described below

commit 015dd76f9fdc8fe74cce1c87fa348b20698903fd
Author: Tim Saucer <[email protected]>
AuthorDate: Wed Feb 4 08:54:45 2026 -0500

    Pass Field information back and forth when using scalar UDFs (#1299)
    
    * Pass Field information back and forth when using scalar UDFs
    
    * Add ArrowArrayExportable class and use it to create pyarrow arrays for 
python UDFs
    
    * Minor user documentation update
    
    * Update naming from type to field where appropriate
    
    * Add unit test to check field inputs
    
    * Update docstring
    
    * Add text to user documentation on passing field information for scalar 
UDFs
    
    * Minor change requested in code review
    
    * Make type hints match outer
---
 .../user-guide/common-operations/udf-and-udfa.rst  |  11 ++
 pyproject.toml                                     |   1 +
 python/datafusion/user_defined.py                  |  79 +++++++----
 python/tests/test_udf.py                           |  86 +++++++++++-
 src/array.rs                                       |  82 ++++++++++++
 src/lib.rs                                         |   1 +
 src/udf.rs                                         | 146 ++++++++++++++++-----
 uv.lock                                            |  55 +-------
 8 files changed, 348 insertions(+), 113 deletions(-)

diff --git a/docs/source/user-guide/common-operations/udf-and-udfa.rst 
b/docs/source/user-guide/common-operations/udf-and-udfa.rst
index 0830fa81..d554e1e2 100644
--- a/docs/source/user-guide/common-operations/udf-and-udfa.rst
+++ b/docs/source/user-guide/common-operations/udf-and-udfa.rst
@@ -90,6 +90,17 @@ converting to Python objects to do the evaluation.
 
     df.select(col("a"), is_null_arr(col("a")).alias("is_null")).show()
 
+In this example we passed the PyArrow ``DataType`` when we defined the function
+by calling ``udf()``. If you need additional control, such as specifying
+metadata or nullability of the input or output, you can instead specify a
+PyArrow ``Field``.
+
+If you need to write a custom function but do not want to incur the performance
+cost of converting to Python objects and back, a more advanced approach is to
+write Rust based UDFs and to expose them to Python. There is an example in the
+`DataFusion blog 
<https://datafusion.apache.org/blog/2024/11/19/datafusion-python-udf-comparisons/>`_
+describing how to do this.
+
 Aggregate Functions
 -------------------
 
diff --git a/pyproject.toml b/pyproject.toml
index 9ad7dab8..497943a3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -141,6 +141,7 @@ dev = [
     "maturin>=1.8.1",
     "numpy>1.25.0;python_version<'3.14'",
     "numpy>=2.3.2;python_version>='3.14'",
+    "pyarrow>=19.0.0",
     "pre-commit>=4.3.0",
     "pyyaml>=6.0.3",
     "pytest>=7.4.4",
diff --git a/python/datafusion/user_defined.py 
b/python/datafusion/user_defined.py
index e6543e76..5dd62656 100644
--- a/python/datafusion/user_defined.py
+++ b/python/datafusion/user_defined.py
@@ -34,7 +34,7 @@ if TYPE_CHECKING:
     from _typeshed import CapsuleType as _PyCapsule
 
     _R = TypeVar("_R", bound=pa.DataType)
-    from collections.abc import Callable
+    from collections.abc import Callable, Sequence
 
 
 class Volatility(Enum):
@@ -81,6 +81,27 @@ class Volatility(Enum):
         return self.name.lower()
 
 
+def data_type_or_field_to_field(value: pa.DataType | pa.Field, name: str) -> 
pa.Field:
+    """Helper function to return a Field from either a Field or DataType."""
+    if isinstance(value, pa.Field):
+        return value
+    return pa.field(name, type=value)
+
+
+def data_types_or_fields_to_field_list(
+    inputs: Sequence[pa.Field | pa.DataType] | pa.Field | pa.DataType,
+) -> list[pa.Field]:
+    """Helper function to return a list of Fields."""
+    if isinstance(inputs, pa.DataType):
+        return [pa.field("value", type=inputs)]
+    if isinstance(inputs, pa.Field):
+        return [inputs]
+
+    return [
+        data_type_or_field_to_field(v, f"value_{idx}") for (idx, v) in 
enumerate(inputs)
+    ]
+
+
 class ScalarUDFExportable(Protocol):
     """Type hint for object that has __datafusion_scalar_udf__ PyCapsule."""
 
@@ -103,8 +124,8 @@ class ScalarUDF:
         self,
         name: str,
         func: Callable[..., _R],
-        input_types: pa.DataType | list[pa.DataType],
-        return_type: _R,
+        input_fields: list[pa.Field],
+        return_field: _R,
         volatility: Volatility | str,
     ) -> None:
         """Instantiate a scalar user-defined function (UDF).
@@ -114,10 +135,10 @@ class ScalarUDF:
         if hasattr(func, "__datafusion_scalar_udf__"):
             self._udf = df_internal.ScalarUDF.from_pycapsule(func)
             return
-        if isinstance(input_types, pa.DataType):
-            input_types = [input_types]
+        if isinstance(input_fields, pa.DataType):
+            input_fields = [input_fields]
         self._udf = df_internal.ScalarUDF(
-            name, func, input_types, return_type, str(volatility)
+            name, func, input_fields, return_field, str(volatility)
         )
 
     def __repr__(self) -> str:
@@ -136,8 +157,8 @@ class ScalarUDF:
     @overload
     @staticmethod
     def udf(
-        input_types: list[pa.DataType],
-        return_type: _R,
+        input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | 
pa.Field,
+        return_field: pa.DataType | pa.Field,
         volatility: Volatility | str,
         name: str | None = None,
     ) -> Callable[..., ScalarUDF]: ...
@@ -146,8 +167,8 @@ class ScalarUDF:
     @staticmethod
     def udf(
         func: Callable[..., _R],
-        input_types: list[pa.DataType],
-        return_type: _R,
+        input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | 
pa.Field,
+        return_field: pa.DataType | pa.Field,
         volatility: Volatility | str,
         name: str | None = None,
     ) -> ScalarUDF: ...
@@ -163,20 +184,24 @@ class ScalarUDF:
         This class can be used both as either a function or a decorator.
 
         Usage:
-            - As a function: ``udf(func, input_types, return_type, volatility, 
name)``.
-            - As a decorator: ``@udf(input_types, return_type, volatility, 
name)``.
+            - As a function: ``udf(func, input_fields, return_field, 
volatility, name)``.
+            - As a decorator: ``@udf(input_fields, return_field, volatility, 
name)``.
               When used a decorator, do **not** pass ``func`` explicitly.
 
+        In lieu of passing a PyArrow Field, you can pass a DataType for 
simplicity.
+        When you do so, it will be assumed that the nullability of the inputs 
and
+        output are True and that they have no metadata.
+
         Args:
             func (Callable, optional): Only needed when calling as a function.
                 Skip this argument when using `udf` as a decorator. If you 
have a Rust
                 backed ScalarUDF within a PyCapsule, you can pass this 
parameter
                 and ignore the rest. They will be determined directly from the
                 underlying function. See the online documentation for more 
information.
-            input_types (list[pa.DataType]): The data types of the arguments
-                to ``func``. This list must be of the same length as the 
number of
-                arguments.
-            return_type (_R): The data type of the return value from the 
function.
+            input_fields (list[pa.Field | pa.DataType]): The data types or 
Fields
+                of the arguments to ``func``. This list must be of the same 
length
+                as the number of arguments.
+            return_field (_R): The field of the return value from the function.
             volatility (Volatility | str): See `Volatility` for allowed values.
             name (Optional[str]): A descriptive name for the function.
 
@@ -196,12 +221,12 @@ class ScalarUDF:
             @udf([pa.int32()], pa.int32(), "volatile", "double_it")
             def double_udf(x):
                 return x * 2
-        """
+        """  # noqa: W505 E501
 
         def _function(
             func: Callable[..., _R],
-            input_types: list[pa.DataType],
-            return_type: _R,
+            input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | 
pa.Field,
+            return_field: pa.DataType | pa.Field,
             volatility: Volatility | str,
             name: str | None = None,
         ) -> ScalarUDF:
@@ -213,23 +238,25 @@ class ScalarUDF:
                     name = func.__qualname__.lower()
                 else:
                     name = func.__class__.__name__.lower()
+            input_fields = data_types_or_fields_to_field_list(input_fields)
+            return_field = data_type_or_field_to_field(return_field, "value")
             return ScalarUDF(
                 name=name,
                 func=func,
-                input_types=input_types,
-                return_type=return_type,
+                input_fields=input_fields,
+                return_field=return_field,
                 volatility=volatility,
             )
 
         def _decorator(
-            input_types: list[pa.DataType],
-            return_type: _R,
+            input_fields: Sequence[pa.DataType | pa.Field] | pa.DataType | 
pa.Field,
+            return_field: _R,
             volatility: Volatility | str,
             name: str | None = None,
         ) -> Callable:
             def decorator(func: Callable) -> Callable:
                 udf_caller = ScalarUDF.udf(
-                    func, input_types, return_type, volatility, name
+                    func, input_fields, return_field, volatility, name
                 )
 
                 @functools.wraps(func)
@@ -260,8 +287,8 @@ class ScalarUDF:
         return ScalarUDF(
             name=name,
             func=func,
-            input_types=None,
-            return_type=None,
+            input_fields=None,
+            return_field=None,
             volatility=None,
         )
 
diff --git a/python/tests/test_udf.py b/python/tests/test_udf.py
index a6c04755..c90668ef 100644
--- a/python/tests/test_udf.py
+++ b/python/tests/test_udf.py
@@ -17,7 +17,8 @@
 
 import pyarrow as pa
 import pytest
-from datafusion import column, udf
+from datafusion import SessionContext, column, udf
+from datafusion import functions as f
 
 
 @pytest.fixture
@@ -124,3 +125,86 @@ def test_udf_with_parameters_decorator(df) -> None:
     result = df2.collect()[0].column(0)
 
     assert result == pa.array([False, True, True])
+
+
+def test_udf_with_metadata(ctx) -> None:
+    from uuid import UUID
+
+    @udf([pa.string()], pa.uuid(), "stable")
+    def uuid_from_string(uuid_string):
+        return pa.array((UUID(s).bytes for s in uuid_string.to_pylist()), 
pa.uuid())
+
+    @udf([pa.uuid()], pa.int64(), "stable")
+    def uuid_version(uuid):
+        return pa.array(s.version for s in uuid.to_pylist())
+
+    batch = pa.record_batch({"idx": pa.array(range(5))})
+    results = (
+        ctx.create_dataframe([[batch]])
+        .with_column("uuid_string", f.uuid())
+        .with_column("uuid", uuid_from_string(column("uuid_string")))
+        .select(uuid_version(column("uuid").alias("uuid_version")))
+        .collect()
+    )
+
+    assert results[0][0].to_pylist() == [4, 4, 4, 4, 4]
+
+
+def test_udf_with_nullability(ctx: SessionContext) -> None:
+    import pyarrow.compute as pc
+
+    field_nullable_i64 = pa.field("with_nulls", type=pa.int64(), nullable=True)
+    field_non_nullable_i64 = pa.field("no_nulls", type=pa.int64(), 
nullable=False)
+
+    @udf([field_nullable_i64], field_nullable_i64, "stable")
+    def nullable_abs(input_col):
+        return pc.abs(input_col)
+
+    @udf([field_non_nullable_i64], field_non_nullable_i64, "stable")
+    def non_nullable_abs(input_col):
+        return pc.abs(input_col)
+
+    batch = pa.record_batch(
+        {
+            "with_nulls": pa.array([-2, None, 0, 1, 2]),
+            "no_nulls": pa.array([-2, -1, 0, 1, 2]),
+        },
+        schema=pa.schema(
+            [
+                field_nullable_i64,
+                field_non_nullable_i64,
+            ]
+        ),
+    )
+    ctx.register_record_batches("t", [[batch]])
+    df = ctx.table("t")
+
+    # Input matches expected, nullable
+    df_result = df.select(nullable_abs(column("with_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert returned_field.nullable
+    results = df_result.collect()
+    assert results[0][0].to_pylist() == [2, None, 0, 1, 2]
+
+    # Input coercible to expected, nullable
+    df_result = df.select(nullable_abs(column("no_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert returned_field.nullable
+    results = df_result.collect()
+    assert results[0][0].to_pylist() == [2, 1, 0, 1, 2]
+
+    # Input matches expected, no nulls
+    df_result = df.select(non_nullable_abs(column("no_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert not returned_field.nullable
+    results = df_result.collect()
+    assert results[0][0].to_pylist() == [2, 1, 0, 1, 2]
+
+    # Invalid - requires non-nullable input but that is not possible
+    df_result = df.select(non_nullable_abs(column("with_nulls")))
+    returned_field = df_result.schema().field(0)
+    assert not returned_field.nullable
+
+    with pytest.raises(Exception) as e_info:
+        _results = df_result.collect()
+    assert "InvalidArgumentError" in str(e_info)
diff --git a/src/array.rs b/src/array.rs
new file mode 100644
index 00000000..4dbd708c
--- /dev/null
+++ b/src/array.rs
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow::array::{Array, ArrayRef};
+use arrow::datatypes::{Field, FieldRef};
+use arrow::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
+use arrow::pyarrow::ToPyArrow;
+use pyo3::prelude::{PyAnyMethods, PyCapsuleMethods};
+use pyo3::types::PyCapsule;
+use pyo3::{pyclass, pymethods, Bound, PyAny, PyResult, Python};
+
+use crate::errors::PyDataFusionResult;
+use crate::utils::validate_pycapsule;
+
+/// A Python object which implements the Arrow PyCapsule for importing
+/// into other libraries.
+#[pyclass(name = "ArrowArrayExportable", module = "datafusion", frozen)]
+#[derive(Clone)]
+pub struct PyArrowArrayExportable {
+    array: ArrayRef,
+    field: FieldRef,
+}
+
+#[pymethods]
+impl PyArrowArrayExportable {
+    #[pyo3(signature = (requested_schema=None))]
+    fn __arrow_c_array__<'py>(
+        &'py self,
+        py: Python<'py>,
+        requested_schema: Option<Bound<'py, PyCapsule>>,
+    ) -> PyDataFusionResult<(Bound<'py, PyCapsule>, Bound<'py, PyCapsule>)> {
+        let field = if let Some(schema_capsule) = requested_schema {
+            validate_pycapsule(&schema_capsule, "arrow_schema")?;
+
+            let schema_ptr = unsafe { 
schema_capsule.reference::<FFI_ArrowSchema>() };
+            let desired_field = Field::try_from(schema_ptr)?;
+
+            Arc::new(desired_field)
+        } else {
+            Arc::clone(&self.field)
+        };
+
+        let ffi_schema = FFI_ArrowSchema::try_from(&field)?;
+        let schema_capsule = PyCapsule::new(py, ffi_schema, 
Some(cr"arrow_schema".into()))?;
+
+        let ffi_array = FFI_ArrowArray::new(&self.array.to_data());
+        let array_capsule = PyCapsule::new(py, ffi_array, 
Some(cr"arrow_array".into()))?;
+
+        Ok((schema_capsule, array_capsule))
+    }
+}
+
+impl ToPyArrow for PyArrowArrayExportable {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
+        let module = py.import("pyarrow")?;
+        let method = module.getattr("array")?;
+        let array = method.call((self.clone(),), None)?;
+        Ok(array)
+    }
+}
+
+impl PyArrowArrayExportable {
+    pub fn new(array: ArrayRef, field: FieldRef) -> Self {
+        Self { array, field }
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 9483a525..eda50fe1 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -52,6 +52,7 @@ pub mod store;
 pub mod table;
 pub mod unparser;
 
+mod array;
 #[cfg(feature = "substrait")]
 pub mod substrait;
 #[allow(clippy::borrow_deref_ref)]
diff --git a/src/udf.rs b/src/udf.rs
index c5d25a4b..3eec936c 100644
--- a/src/udf.rs
+++ b/src/udf.rs
@@ -15,67 +15,143 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::any::Any;
+use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
-use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef};
+use arrow::datatypes::{Field, FieldRef};
+use arrow::pyarrow::ToPyArrow;
+use datafusion::arrow::array::{make_array, ArrayData};
 use datafusion::arrow::datatypes::DataType;
-use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType, ToPyArrow};
+use datafusion::arrow::pyarrow::{FromPyArrow, PyArrowType};
+use datafusion::common::internal_err;
 use datafusion::error::DataFusionError;
-use datafusion::logical_expr::function::ScalarFunctionImplementation;
-use datafusion::logical_expr::{create_udf, ColumnarValue, ScalarUDF, 
ScalarUDFImpl};
+use datafusion::logical_expr::{
+    ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDF, 
ScalarUDFImpl, Signature,
+    Volatility,
+};
 use datafusion_ffi::udf::FFI_ScalarUDF;
 use pyo3::prelude::*;
 use pyo3::types::{PyCapsule, PyTuple};
 
+use crate::array::PyArrowArrayExportable;
 use crate::errors::{py_datafusion_err, to_datafusion_err, PyDataFusionResult};
 use crate::expr::PyExpr;
 use crate::utils::{parse_volatility, validate_pycapsule};
 
-/// Create a Rust callable function from a python function that expects 
pyarrow arrays
-fn pyarrow_function_to_rust(
+/// This struct holds the Python written function that is a
+/// ScalarUDF.
+#[derive(Debug)]
+struct PythonFunctionScalarUDF {
+    name: String,
     func: Py<PyAny>,
-) -> impl Fn(&[ArrayRef]) -> Result<ArrayRef, DataFusionError> {
-    move |args: &[ArrayRef]| -> Result<ArrayRef, DataFusionError> {
+    signature: Signature,
+    return_field: FieldRef,
+}
+
+impl PythonFunctionScalarUDF {
+    fn new(
+        name: String,
+        func: Py<PyAny>,
+        input_fields: Vec<Field>,
+        return_field: Field,
+        volatility: Volatility,
+    ) -> Self {
+        let input_types = input_fields.iter().map(|f| 
f.data_type().clone()).collect();
+        let signature = Signature::exact(input_types, volatility);
+        Self {
+            name,
+            func,
+            signature,
+            return_field: Arc::new(return_field),
+        }
+    }
+}
+
+impl Eq for PythonFunctionScalarUDF {}
+impl PartialEq for PythonFunctionScalarUDF {
+    fn eq(&self, other: &Self) -> bool {
+        self.name == other.name
+            && self.signature == other.signature
+            && self.return_field == other.return_field
+            && Python::attach(|py| 
self.func.bind(py).eq(other.func.bind(py)).unwrap_or(false))
+    }
+}
+
+impl Hash for PythonFunctionScalarUDF {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.name.hash(state);
+        self.signature.hash(state);
+        self.return_field.hash(state);
+
+        Python::attach(|py| {
+            let py_hash = self.func.bind(py).hash().unwrap_or(0); // Handle 
unhashable objects
+
+            state.write_isize(py_hash);
+        });
+    }
+}
+
+impl ScalarUDFImpl for PythonFunctionScalarUDF {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    fn signature(&self) -> &Signature {
+        &self.signature
+    }
+
+    fn return_type(&self, _arg_types: &[DataType]) -> 
datafusion::common::Result<DataType> {
+        internal_err!(
+            "return_field should not be called when return_field_from_args is 
implemented."
+        )
+    }
+
+    fn return_field_from_args(
+        &self,
+        _args: ReturnFieldArgs,
+    ) -> datafusion::common::Result<FieldRef> {
+        Ok(Arc::clone(&self.return_field))
+    }
+
+    fn invoke_with_args(
+        &self,
+        args: ScalarFunctionArgs,
+    ) -> datafusion::common::Result<ColumnarValue> {
+        let num_rows = args.number_rows;
         Python::attach(|py| {
             // 1. cast args to Pyarrow arrays
             let py_args = args
-                .iter()
-                .map(|arg| {
-                    arg.into_data()
+                .args
+                .into_iter()
+                .zip(args.arg_fields)
+                .map(|(arg, field)| {
+                    let array = arg.to_array(num_rows)?;
+                    PyArrowArrayExportable::new(array, field)
                         .to_pyarrow(py)
-                        .map_err(|e| 
DataFusionError::Execution(format!("{e:?}")))
+                        .map_err(to_datafusion_err)
                 })
                 .collect::<Result<Vec<_>, _>>()?;
             let py_args = PyTuple::new(py, 
py_args).map_err(to_datafusion_err)?;
 
             // 2. call function
-            let value = func
+            let value = self
+                .func
                 .call(py, py_args, None)
                 .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?;
 
             // 3. cast to arrow::array::Array
             let array_data = ArrayData::from_pyarrow_bound(value.bind(py))
                 .map_err(|e| DataFusionError::Execution(format!("{e:?}")))?;
-            Ok(make_array(array_data))
+            Ok(ColumnarValue::Array(make_array(array_data)))
         })
     }
 }
 
-/// Create a DataFusion's UDF implementation from a python function
-/// that expects pyarrow arrays. This is more efficient as it performs
-/// a zero-copy of the contents.
-fn to_scalar_function_impl(func: Py<PyAny>) -> ScalarFunctionImplementation {
-    // Make the python function callable from rust
-    let pyarrow_func = pyarrow_function_to_rust(func);
-
-    // Convert input/output from datafusion ColumnarValue to arrow arrays
-    Arc::new(move |args: &[ColumnarValue]| {
-        let array_refs = ColumnarValue::values_to_arrays(args)?;
-        let array_result = pyarrow_func(&array_refs)?;
-        Ok(array_result.into())
-    })
-}
-
 /// Represents a PyScalarUDF
 #[pyclass(frozen, name = "ScalarUDF", module = "datafusion", subclass)]
 #[derive(Debug, Clone)]
@@ -88,19 +164,21 @@ impl PyScalarUDF {
     #[new]
     #[pyo3(signature=(name, func, input_types, return_type, volatility))]
     fn new(
-        name: &str,
+        name: String,
         func: Py<PyAny>,
-        input_types: PyArrowType<Vec<DataType>>,
-        return_type: PyArrowType<DataType>,
+        input_types: PyArrowType<Vec<Field>>,
+        return_type: PyArrowType<Field>,
         volatility: &str,
     ) -> PyResult<Self> {
-        let function = create_udf(
+        let py_function = PythonFunctionScalarUDF::new(
             name,
+            func,
             input_types.0,
             return_type.0,
             parse_volatility(volatility)?,
-            to_scalar_function_impl(func),
         );
+        let function = ScalarUDF::new_from_impl(py_function);
+
         Ok(Self { function })
     }
 
diff --git a/uv.lock b/uv.lock
index 92a10c7c..74361100 100644
--- a/uv.lock
+++ b/uv.lock
@@ -253,8 +253,7 @@ wheels = [
 name = "datafusion"
 source = { editable = "." }
 dependencies = [
-    { name = "pyarrow", version = "18.1.0", source = { registry = 
"https://pypi.org/simple"; }, marker = "python_full_version < '3.14'" },
-    { name = "pyarrow", version = "22.0.0", source = { registry = 
"https://pypi.org/simple"; }, marker = "python_full_version >= '3.14'" },
+    { name = "pyarrow" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
 
@@ -265,6 +264,7 @@ dev = [
     { name = "numpy", version = "2.2.1", source = { registry = 
"https://pypi.org/simple"; }, marker = "python_full_version < '3.14'" },
     { name = "numpy", version = "2.3.4", source = { registry = 
"https://pypi.org/simple"; }, marker = "python_full_version >= '3.14'" },
     { name = "pre-commit" },
+    { name = "pyarrow" },
     { name = "pygithub" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
@@ -298,6 +298,7 @@ dev = [
     { name = "numpy", marker = "python_full_version < '3.14'", specifier = 
">1.25.0" },
     { name = "numpy", marker = "python_full_version >= '3.14'", specifier = 
">=2.3.2" },
     { name = "pre-commit", specifier = ">=4.3.0" },
+    { name = "pyarrow", specifier = ">=19.0.0" },
     { name = "pygithub", specifier = "==2.5.0" },
     { name = "pytest", specifier = ">=7.4.4" },
     { name = "pytest-asyncio", specifier = ">=0.23.3" },
@@ -920,60 +921,10 @@ wheels = [
     { url = 
"https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl";,
 hash = 
"sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size 
= 11842 },
 ]
 
-[[package]]
-name = "pyarrow"
-version = "18.1.0"
-source = { registry = "https://pypi.org/simple"; }
-resolution-markers = [
-    "python_full_version >= '3.12' and python_full_version < '3.14'",
-    "python_full_version == '3.11.*'",
-    "python_full_version < '3.11'",
-]
-sdist = { url = 
"https://files.pythonhosted.org/packages/7f/7b/640785a9062bb00314caa8a387abce547d2a420cf09bd6c715fe659ccffb/pyarrow-18.1.0.tar.gz";,
 hash = 
"sha256:9386d3ca9c145b5539a1cfc75df07757dff870168c959b473a0bccbc3abc8c73", size 
= 1118671 }
-wheels = [
-    { url = 
"https://files.pythonhosted.org/packages/1a/bb/8d4a1573f66e0684f190dd2b55fd0b97a7214de8882d58a3867e777bf640/pyarrow-18.1.0-cp310-cp310-macosx_12_0_arm64.whl";,
 hash = 
"sha256:e21488d5cfd3d8b500b3238a6c4b075efabc18f0f6d80b29239737ebd69caa6c", size 
= 29531620 },
-    { url = 
"https://files.pythonhosted.org/packages/30/90/893acfad917533b624a97b9e498c0e8393908508a0a72d624fe935e632bf/pyarrow-18.1.0-cp310-cp310-macosx_12_0_x86_64.whl";,
 hash = 
"sha256:b516dad76f258a702f7ca0250885fc93d1fa5ac13ad51258e39d402bd9e2e1e4", size 
= 30836521 },
-    { url = 
"https://files.pythonhosted.org/packages/a3/2a/526545a7464b5fb2fa6e2c4bad16ca90e59e1843025c534fd907b7f73e5a/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl";,
 hash = 
"sha256:4f443122c8e31f4c9199cb23dca29ab9427cef990f283f80fe15b8e124bcc49b", size 
= 39213905 },
-    { url = 
"https://files.pythonhosted.org/packages/8a/77/4b3fab91a30e19e233e738d0c5eca5a8f6dd05758bc349a2ca262c65de79/pyarrow-18.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl";,
 hash = 
"sha256:c0a03da7f2758645d17b7b4f83c8bffeae5bbb7f974523fe901f36288d2eab71", size 
= 40128881 },
-    { url = 
"https://files.pythonhosted.org/packages/aa/e2/a88e16c5e45e562449c52305bd3bc2f9d704295322d3434656e7ccac1444/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:ba17845efe3aa358ec266cf9cc2800fa73038211fb27968bfa88acd09261a470", size 
= 38627517 },
-    { url = 
"https://files.pythonhosted.org/packages/6d/84/8037c20005ccc7b869726465be0957bd9c29cfc88612962030f08292ad06/pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:3c35813c11a059056a22a3bef520461310f2f7eea5c8a11ef9de7062a23f8d56", size 
= 40060187 },
-    { url = 
"https://files.pythonhosted.org/packages/2a/38/d6435c723ff73df8ae74626ea778262fbcc2b9b0d1a4f3db915b61711b05/pyarrow-18.1.0-cp310-cp310-win_amd64.whl";,
 hash = 
"sha256:9736ba3c85129d72aefa21b4f3bd715bc4190fe4426715abfff90481e7d00812", size 
= 25118314 },
-    { url = 
"https://files.pythonhosted.org/packages/9e/4d/a4988e7d82f4fbc797715db4185939a658eeffb07a25bab7262bed1ea076/pyarrow-18.1.0-cp311-cp311-macosx_12_0_arm64.whl";,
 hash = 
"sha256:eaeabf638408de2772ce3d7793b2668d4bb93807deed1725413b70e3156a7854", size 
= 29554860 },
-    { url = 
"https://files.pythonhosted.org/packages/59/03/3a42c5c1e4bd4c900ab62aa1ff6b472bdb159ba8f1c3e5deadab7222244f/pyarrow-18.1.0-cp311-cp311-macosx_12_0_x86_64.whl";,
 hash = 
"sha256:3b2e2239339c538f3464308fd345113f886ad031ef8266c6f004d49769bb074c", size 
= 30867076 },
-    { url = 
"https://files.pythonhosted.org/packages/75/7e/332055ac913373e89256dce9d14b7708f55f7bd5be631456c897f0237738/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl";,
 hash = 
"sha256:f39a2e0ed32a0970e4e46c262753417a60c43a3246972cfc2d3eb85aedd01b21", size 
= 39212135 },
-    { url = 
"https://files.pythonhosted.org/packages/8c/64/5099cdb325828722ef7ffeba9a4696f238eb0cdeae227f831c2d77fcf1bd/pyarrow-18.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl";,
 hash = 
"sha256:e31e9417ba9c42627574bdbfeada7217ad8a4cbbe45b9d6bdd4b62abbca4c6f6", size 
= 40125195 },
-    { url = 
"https://files.pythonhosted.org/packages/83/88/1938d783727db1b178ff71bc6a6143d7939e406db83a9ec23cad3dad325c/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:01c034b576ce0eef554f7c3d8c341714954be9b3f5d5bc7117006b85fcf302fe", size 
= 38641884 },
-    { url = 
"https://files.pythonhosted.org/packages/5e/b5/9e14e9f7590e0eaa435ecea84dabb137284a4dbba7b3c337b58b65b76d95/pyarrow-18.1.0-cp311-cp311-manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:f266a2c0fc31995a06ebd30bcfdb7f615d7278035ec5b1cd71c48d56daaf30b0", size 
= 40076877 },
-    { url = 
"https://files.pythonhosted.org/packages/4d/a3/817ac7fe0891a2d66e247e223080f3a6a262d8aefd77e11e8c27e6acf4e1/pyarrow-18.1.0-cp311-cp311-win_amd64.whl";,
 hash = 
"sha256:d4f13eee18433f99adefaeb7e01d83b59f73360c231d4782d9ddfaf1c3fbde0a", size 
= 25119811 },
-    { url = 
"https://files.pythonhosted.org/packages/6a/50/12829e7111b932581e51dda51d5cb39207a056c30fe31ef43f14c63c4d7e/pyarrow-18.1.0-cp312-cp312-macosx_12_0_arm64.whl";,
 hash = 
"sha256:9f3a76670b263dc41d0ae877f09124ab96ce10e4e48f3e3e4257273cee61ad0d", size 
= 29514620 },
-    { url = 
"https://files.pythonhosted.org/packages/d1/41/468c944eab157702e96abab3d07b48b8424927d4933541ab43788bb6964d/pyarrow-18.1.0-cp312-cp312-macosx_12_0_x86_64.whl";,
 hash = 
"sha256:da31fbca07c435be88a0c321402c4e31a2ba61593ec7473630769de8346b54ee", size 
= 30856494 },
-    { url = 
"https://files.pythonhosted.org/packages/68/f9/29fb659b390312a7345aeb858a9d9c157552a8852522f2c8bad437c29c0a/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl";,
 hash = 
"sha256:543ad8459bc438efc46d29a759e1079436290bd583141384c6f7a1068ed6f992", size 
= 39203624 },
-    { url = 
"https://files.pythonhosted.org/packages/6e/f6/19360dae44200e35753c5c2889dc478154cd78e61b1f738514c9f131734d/pyarrow-18.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl";,
 hash = 
"sha256:0743e503c55be0fdb5c08e7d44853da27f19dc854531c0570f9f394ec9671d54", size 
= 40139341 },
-    { url = 
"https://files.pythonhosted.org/packages/bb/e6/9b3afbbcf10cc724312e824af94a2e993d8ace22994d823f5c35324cebf5/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:d4b3d2a34780645bed6414e22dda55a92e0fcd1b8a637fba86800ad737057e33", size 
= 38618629 },
-    { url = 
"https://files.pythonhosted.org/packages/3a/2e/3b99f8a3d9e0ccae0e961978a0d0089b25fb46ebbcfb5ebae3cca179a5b3/pyarrow-18.1.0-cp312-cp312-manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:c52f81aa6f6575058d8e2c782bf79d4f9fdc89887f16825ec3a66607a5dd8e30", size 
= 40078661 },
-    { url = 
"https://files.pythonhosted.org/packages/76/52/f8da04195000099d394012b8d42c503d7041b79f778d854f410e5f05049a/pyarrow-18.1.0-cp312-cp312-win_amd64.whl";,
 hash = 
"sha256:0ad4892617e1a6c7a551cfc827e072a633eaff758fa09f21c4ee548c30bcaf99", size 
= 25092330 },
-    { url = 
"https://files.pythonhosted.org/packages/cb/87/aa4d249732edef6ad88899399047d7e49311a55749d3c373007d034ee471/pyarrow-18.1.0-cp313-cp313-macosx_12_0_arm64.whl";,
 hash = 
"sha256:84e314d22231357d473eabec709d0ba285fa706a72377f9cc8e1cb3c8013813b", size 
= 29497406 },
-    { url = 
"https://files.pythonhosted.org/packages/3c/c7/ed6adb46d93a3177540e228b5ca30d99fc8ea3b13bdb88b6f8b6467e2cb7/pyarrow-18.1.0-cp313-cp313-macosx_12_0_x86_64.whl";,
 hash = 
"sha256:f591704ac05dfd0477bb8f8e0bd4b5dc52c1cadf50503858dce3a15db6e46ff2", size 
= 30835095 },
-    { url = 
"https://files.pythonhosted.org/packages/41/d7/ed85001edfb96200ff606943cff71d64f91926ab42828676c0fc0db98963/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl";,
 hash = 
"sha256:acb7564204d3c40babf93a05624fc6a8ec1ab1def295c363afc40b0c9e66c191", size 
= 39194527 },
-    { url = 
"https://files.pythonhosted.org/packages/59/16/35e28eab126342fa391593415d79477e89582de411bb95232f28b131a769/pyarrow-18.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl";,
 hash = 
"sha256:74de649d1d2ccb778f7c3afff6085bd5092aed4c23df9feeb45dd6b16f3811aa", size 
= 40131443 },
-    { url = 
"https://files.pythonhosted.org/packages/0c/95/e855880614c8da20f4cd74fa85d7268c725cf0013dc754048593a38896a0/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:f96bd502cb11abb08efea6dab09c003305161cb6c9eafd432e35e76e7fa9b90c", size 
= 38608750 },
-    { url = 
"https://files.pythonhosted.org/packages/54/9d/f253554b1457d4fdb3831b7bd5f8f00f1795585a606eabf6fec0a58a9c38/pyarrow-18.1.0-cp313-cp313-manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:36ac22d7782554754a3b50201b607d553a8d71b78cdf03b33c1125be4b52397c", size 
= 40066690 },
-    { url = 
"https://files.pythonhosted.org/packages/2f/58/8912a2563e6b8273e8aa7b605a345bba5a06204549826f6493065575ebc0/pyarrow-18.1.0-cp313-cp313-win_amd64.whl";,
 hash = 
"sha256:25dbacab8c5952df0ca6ca0af28f50d45bd31c1ff6fcf79e2d120b4a65ee7181", size 
= 25081054 },
-    { url = 
"https://files.pythonhosted.org/packages/82/f9/d06ddc06cab1ada0c2f2fd205ac8c25c2701182de1b9c4bf7a0a44844431/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_arm64.whl";,
 hash = 
"sha256:6a276190309aba7bc9d5bd2933230458b3521a4317acfefe69a354f2fe59f2bc", size 
= 29525542 },
-    { url = 
"https://files.pythonhosted.org/packages/ab/94/8917e3b961810587ecbdaa417f8ebac0abb25105ae667b7aa11c05876976/pyarrow-18.1.0-cp313-cp313t-macosx_12_0_x86_64.whl";,
 hash = 
"sha256:ad514dbfcffe30124ce655d72771ae070f30bf850b48bc4d9d3b25993ee0e386", size 
= 30829412 },
-    { url = 
"https://files.pythonhosted.org/packages/5e/e3/3b16c3190f3d71d3b10f6758d2d5f7779ef008c4fd367cedab3ed178a9f7/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl";,
 hash = 
"sha256:aebc13a11ed3032d8dd6e7171eb6e86d40d67a5639d96c35142bd568b9299324", size 
= 39119106 },
-    { url = 
"https://files.pythonhosted.org/packages/1d/d6/5d704b0d25c3c79532f8c0639f253ec2803b897100f64bcb3f53ced236e5/pyarrow-18.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl";,
 hash = 
"sha256:d6cf5c05f3cee251d80e98726b5c7cc9f21bab9e9783673bac58e6dfab57ecc8", size 
= 40090940 },
-    { url = 
"https://files.pythonhosted.org/packages/37/29/366bc7e588220d74ec00e497ac6710c2833c9176f0372fe0286929b2d64c/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_aarch64.whl";,
 hash = 
"sha256:11b676cd410cf162d3f6a70b43fb9e1e40affbc542a1e9ed3681895f2962d3d9", size 
= 38548177 },
-    { url = 
"https://files.pythonhosted.org/packages/c8/11/fabf6ecabb1fe5b7d96889228ca2a9158c4c3bb732e3b8ee3f7f6d40b703/pyarrow-18.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl";,
 hash = 
"sha256:b76130d835261b38f14fc41fdfb39ad8d672afb84c447126b84d5472244cfaba", size 
= 40043567 },
-]
-
 [[package]]
 name = "pyarrow"
 version = "22.0.0"
 source = { registry = "https://pypi.org/simple"; }
-resolution-markers = [
-    "python_full_version >= '3.14'",
-]
 sdist = { url = 
"https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz";,
 hash = 
"sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size 
= 1151151 }
 wheels = [
     { url = 
"https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl";,
 hash = 
"sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size 
= 34223968 },


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to