This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new cab3e216e1 ARROW-17018: [C++][Python] Timedelta dtype metadata base
unit is globally mutated by the Table.to_pandas() method (#13553)
cab3e216e1 is described below
commit cab3e216e17ce8422a15f91480bb408a052b578c
Author: Phillip Cloud <[email protected]>
AuthorDate: Fri Jul 8 22:05:16 2022 -0400
ARROW-17018: [C++][Python] Timedelta dtype metadata base unit is globally
mutated by the Table.to_pandas() method (#13553)
Authored-by: Phillip Cloud <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/arrow/python/numpy_internal.h | 5 +++--
python/pyarrow/tests/test_pandas.py | 12 ++++++++++++
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/python/numpy_internal.h
b/cpp/src/arrow/python/numpy_internal.h
index 50d1a0fcb7..b9b632f9f9 100644
--- a/cpp/src/arrow/python/numpy_internal.h
+++ b/cpp/src/arrow/python/numpy_internal.h
@@ -167,8 +167,9 @@ inline bool PyBoolScalar_Check(PyObject* obj) {
}
static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
- if (type == NPY_DATETIME) {
- // It is not safe to mutate the result of DescrFromType
+ if (type == NPY_DATETIME || type == NPY_TIMEDELTA) {
+ // It is not safe to mutate the result of DescrFromType for datetime and
+ // timedelta descriptors
return PyArray_DescrNewFromType(type);
} else {
return PyArray_DescrFromType(type);
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 215bf2fb8c..d3cc7fd063 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4473,3 +4473,15 @@ def test_timestamp_as_object_non_nanosecond(resolution,
tz, dt):
def test_threaded_pandas_import():
invoke_script("pandas_threaded_import.py")
+
+
+def test_does_not_mutate_timedelta_dtype():
+ expected = np.dtype('<m8')
+
+ assert np.dtype(np.timedelta64) == expected
+
+ df = pd.DataFrame({"a": [np.timedelta64()]})
+ t = pa.Table.from_pandas(df)
+ t.to_pandas()
+
+ assert np.dtype(np.timedelta64) == expected