This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new ba8b7be ARROW-9504: [C++/Python] Segmentation fault on
ChunkedArray.take
ba8b7be is described below
commit ba8b7be81c1e0f2857fca2dbe4efc154cfe0f932
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Thu Jul 16 15:27:41 2020 +0200
ARROW-9504: [C++/Python] Segmentation fault on ChunkedArray.take
Closes #7783 from kszucs/ARROW-9504
Authored-by: Krisztián Szűcs <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
python/pyarrow/_compute.pyx | 12 +++++----
python/pyarrow/tests/test_compute.py | 49 ++++++++++++++++++++++++++++++++++++
2 files changed, 56 insertions(+), 5 deletions(-)
diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx
index b97e347..15db02c 100644
--- a/python/pyarrow/_compute.pyx
+++ b/python/pyarrow/_compute.pyx
@@ -174,10 +174,12 @@ num_kernels: {}
def num_kernels(self):
return self.base_func.num_kernels()
- def call(self, args, FunctionOptions options=None):
+ def call(self, args, FunctionOptions options=None,
+ MemoryPool memory_pool=None):
cdef:
const CFunctionOptions* c_options = NULL
- CExecContext* c_exec_ctx = NULL
+ CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
+ CExecContext c_exec_ctx = CExecContext(pool)
vector[CDatum] c_args
CDatum result
@@ -189,7 +191,7 @@ num_kernels: {}
with nogil:
result = GetResultValue(self.base_func.Execute(c_args,
c_options,
- c_exec_ctx))
+ &c_exec_ctx))
return wrap_datum(result)
@@ -291,9 +293,9 @@ def function_registry():
return _global_func_registry
-def call_function(name, args, options=None):
+def call_function(name, args, options=None, memory_pool=None):
func = _global_func_registry.get_function(name)
- return func.call(args, options=options)
+ return func.call(args, options=options, memory_pool=memory_pool)
cdef class FunctionOptions:
diff --git a/python/pyarrow/tests/test_compute.py
b/python/pyarrow/tests/test_compute.py
index f93366d..bdc057d 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -284,6 +284,55 @@ def test_take_indices_types():
arr.take(indices)
+def test_take_on_chunked_array():
+ # ARROW-9504
+ arr = pa.chunked_array([
+ [
+ "a",
+ "b",
+ "c",
+ "d",
+ "e"
+ ],
+ [
+ "f",
+ "g",
+ "h",
+ "i",
+ "j"
+ ]
+ ])
+
+ indices = np.array([0, 5, 1, 6, 9, 2])
+ result = arr.take(indices)
+ expected = pa.chunked_array([["a", "f", "b", "g", "j", "c"]])
+ assert result.equals(expected)
+
+ indices = pa.chunked_array([[1], [9, 2]])
+ result = arr.take(indices)
+ expected = pa.chunked_array([
+ [
+ "b"
+ ],
+ [
+ "j",
+ "c"
+ ]
+ ])
+ assert result.equals(expected)
+
+
+def test_call_function_with_memory_pool():
+ arr = pa.array(["foo", "bar", "baz"])
+ indices = np.array([2, 2, 1])
+ result1 = arr.take(indices)
+ result2 = pc.call_function('take', [arr, indices],
+ memory_pool=pa.default_memory_pool())
+ expected = pa.array(["baz", "baz", "bar"])
+ assert result1.equals(expected)
+ assert result2.equals(expected)
+
+
@pytest.mark.parametrize('ordered', [False, True])
def test_take_dictionary(ordered):
arr = pa.DictionaryArray.from_arrays([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'],