This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new e559dd0  ARROW-9440: [Python] Expose Fill Null kernel
e559dd0 is described below

commit e559dd080a27875bab3d5cdb0da115c62e2f60bb
Author: c-jamie <[email protected]>
AuthorDate: Mon Jul 13 19:53:47 2020 -0500

    ARROW-9440: [Python] Expose Fill Null kernel
    
    Closes #7736 from c-jamie/ARROW-9440
    
    Lead-authored-by: c-jamie <[email protected]>
    Co-authored-by: Wes McKinney <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 python/pyarrow/array.pxi             |  6 ++++
 python/pyarrow/compute.py            | 41 +++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  1 +
 python/pyarrow/scalar.pxi            | 13 ++++++++
 python/pyarrow/table.pxi             |  6 ++++
 python/pyarrow/tests/test_compute.py | 63 ++++++++++++++++++++++++++++++++++++
 python/pyarrow/tests/test_scalars.py |  9 ++++++
 7 files changed, 139 insertions(+)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index 1cffd37..1dcff02 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -1004,6 +1004,12 @@ cdef class Array(_PandasConvertible):
         """
         return _pc().is_valid(self)
 
+    def fill_null(self, fill_value):
+        """
+        See pyarrow.compute.fill_null for usage.
+        """
+        return _pc().fill_null(self, fill_value)
+
     def __getitem__(self, key):
         """
         Slice or return value at given index
diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
index c8443ed..b8e678f 100644
--- a/python/pyarrow/compute.py
+++ b/python/pyarrow/compute.py
@@ -24,6 +24,7 @@ from pyarrow._compute import (  # noqa
     call_function,
     TakeOptions
 )
+import pyarrow as pa
 import pyarrow._compute as _pc
 
 
@@ -259,3 +260,43 @@ def take(data, indices, boundscheck=True):
     """
     options = TakeOptions(boundscheck)
     return call_function('take', [data, indices], options)
+
+
+def fill_null(values, fill_value):
+    """
+    Replace each null element in values with fill_value. The fill_value must be
+    the same type as values or able to be implicitly casted to the array's
+    type.
+
+    Parameters
+    ----------
+    data : Array, ChunkedArray
+        replace each null element with fill_value
+    fill_value: Scalar-like object
+        Either a pyarrow.Scalar or any python object coercible to a
+        Scalar. If not same type as data will attempt to cast.
+
+    Returns
+    -------
+    result : depends on inputs
+
+    Examples
+    --------
+    >>> import pyarrow as pa
+    >>> arr = pa.array([1, 2, None, 3], type=pa.int8())
+    >>> fill_value = pa.scalar(5, type=pa.int8())
+    >>> arr.fill_null(fill_value)
+    pyarrow.lib.Int8Array object at 0x7f95437f01a0>
+    [
+      1,
+      2,
+      5,
+      3
+    ]
+    """
+    if not isinstance(fill_value, pa.Scalar):
+        fill_value = pa.scalar(fill_value, type=values.type)
+    elif values.type != fill_value.type:
+        fill_value = pa.scalar(fill_value.as_py(), type=values.type)
+
+    return call_function("fill_null", [values, fill_value])
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 213ef24..c8e7c5b 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -887,6 +887,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_bool is_valid
         c_string ToString() const
         c_bool Equals(const CScalar& other) const
+        CResult[shared_ptr[CScalar]] CastTo(shared_ptr[CDataType] to) const
 
     cdef cppclass CScalarHash" arrow::Scalar::Hash":
         size_t operator()(const shared_ptr[CScalar]& scalar) const
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 903faae..248d926 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -63,6 +63,19 @@ cdef class Scalar:
         """
         return self.wrapped.get().is_valid
 
+    def cast(self, object target_type):
+        """
+        Attempt a safe cast to target data type.
+        """
+        cdef:
+            DataType type = ensure_type(target_type)
+            shared_ptr[CScalar] result
+
+        with nogil:
+            result = GetResultValue(self.wrapped.get().CastTo(type.sp_type))
+
+        return Scalar.wrap(result)
+
     def __repr__(self):
         return '<pyarrow.{}: {!r}>'.format(
             self.__class__.__name__, self.as_py()
diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi
index 08e3f75..688d668 100644
--- a/python/pyarrow/table.pxi
+++ b/python/pyarrow/table.pxi
@@ -191,6 +191,12 @@ cdef class ChunkedArray(_PandasConvertible):
         except TypeError:
             return NotImplemented
 
+    def fill_null(self, fill_value):
+        """
+        See pyarrow.compute.fill_null docstring for usage.
+        """
+        return _pc().fill_null(self, fill_value)
+
     def equals(self, ChunkedArray other):
         """
         Return whether the contents of two chunked arrays are equal.
diff --git a/python/pyarrow/tests/test_compute.py 
b/python/pyarrow/tests/test_compute.py
index ca30a82..59f004f 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -484,3 +484,66 @@ def test_is_null():
     result = arr.is_valid()
     expected = pa.chunked_array([[True, True], [True, False]])
     assert result.equals(expected)
+
+
+def test_fill_null():
+    arr = pa.array([1, 2, None, 4], type=pa.int8())
+    fill_value = pa.array([5], type=pa.int8())
+    with pytest.raises(TypeError):
+        arr.fill_null(fill_value)
+
+    arr = pa.array([None, None, None, None], type=pa.null())
+    fill_value = pa.scalar(None, type=pa.null())
+    result = arr.fill_null(fill_value)
+    expected = pa.array([None, None, None, None])
+    assert result.equals(expected)
+
+
[email protected]('arrow_type', numerical_arrow_types)
+def test_fill_null_array(arrow_type):
+    arr = pa.array([1, 2, None, 4], type=arrow_type)
+    fill_value = pa.scalar(5, type=arrow_type)
+    result = arr.fill_null(fill_value)
+    expected = pa.array([1, 2, 5, 4], type=arrow_type)
+    assert result.equals(expected)
+
+    # Implicit conversions
+    result = arr.fill_null(5)
+    assert result.equals(expected)
+
+    # ARROW-9451: Unsigned integers allow this for some reason
+    if not pa.types.is_unsigned_integer(arr.type):
+        with pytest.raises((ValueError, TypeError)):
+            arr.fill_null('5')
+
+    result = arr.fill_null(pa.scalar(5, type='int8'))
+    assert result.equals(expected)
+
+
[email protected]('arrow_type', numerical_arrow_types)
+def test_fill_null_chunked_array(arrow_type):
+    fill_value = pa.scalar(5, type=arrow_type)
+    arr = pa.chunked_array([pa.array([None, 2, 3, 4], type=arrow_type)])
+    result = arr.fill_null(fill_value)
+    expected = pa.chunked_array([pa.array([5, 2, 3, 4], type=arrow_type)])
+    assert result.equals(expected)
+
+    arr = pa.chunked_array([
+        pa.array([1, 2], type=arrow_type),
+        pa.array([], type=arrow_type),
+        pa.array([None, 4], type=arrow_type)
+    ])
+    expected = pa.chunked_array([
+        pa.array([1, 2], type=arrow_type),
+        pa.array([], type=arrow_type),
+        pa.array([5, 4], type=arrow_type)
+    ])
+    result = arr.fill_null(fill_value)
+    assert result.equals(expected)
+
+    # Implicit conversions
+    result = arr.fill_null(5)
+    assert result.equals(expected)
+
+    result = arr.fill_null(pa.scalar(5, type='int8'))
+    assert result.equals(expected)
diff --git a/python/pyarrow/tests/test_scalars.py 
b/python/pyarrow/tests/test_scalars.py
index 81b2c3f..8a778bf 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -181,6 +181,15 @@ def test_time():
             assert s.as_py() == t
 
 
+def test_cast():
+    val = pa.scalar(5, type='int8')
+    assert val.cast('int64') == pa.scalar(5, type='int64')
+    assert val.cast('uint32') == pa.scalar(5, type='uint32')
+    assert val.cast('string') == pa.scalar('5', type='string')
+    with pytest.raises(ValueError):
+        pa.scalar('foo').cast('int32')
+
+
 @pytest.mark.pandas
 def test_timestamp():
     import pandas as pd

Reply via email to