This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new cab3e216e1 ARROW-17018: [C++][Python] Timedelta dtype metadata base 
unit is globally mutated by the Table.to_pandas() method (#13553)
cab3e216e1 is described below

commit cab3e216e17ce8422a15f91480bb408a052b578c
Author: Phillip Cloud <[email protected]>
AuthorDate: Fri Jul 8 22:05:16 2022 -0400

    ARROW-17018: [C++][Python] Timedelta dtype metadata base unit is globally 
mutated by the Table.to_pandas() method (#13553)
    
    Authored-by: Phillip Cloud <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/arrow/python/numpy_internal.h |  5 +++--
 python/pyarrow/tests/test_pandas.py   | 12 ++++++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/python/numpy_internal.h 
b/cpp/src/arrow/python/numpy_internal.h
index 50d1a0fcb7..b9b632f9f9 100644
--- a/cpp/src/arrow/python/numpy_internal.h
+++ b/cpp/src/arrow/python/numpy_internal.h
@@ -167,8 +167,9 @@ inline bool PyBoolScalar_Check(PyObject* obj) {
 }
 
 static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
-  if (type == NPY_DATETIME) {
-    // It is not safe to mutate the result of DescrFromType
+  if (type == NPY_DATETIME || type == NPY_TIMEDELTA) {
+    // It is not safe to mutate the result of DescrFromType for datetime and
+    // timedelta descriptors
     return PyArray_DescrNewFromType(type);
   } else {
     return PyArray_DescrFromType(type);
diff --git a/python/pyarrow/tests/test_pandas.py 
b/python/pyarrow/tests/test_pandas.py
index 215bf2fb8c..d3cc7fd063 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -4473,3 +4473,15 @@ def test_timestamp_as_object_non_nanosecond(resolution, 
tz, dt):
 
 def test_threaded_pandas_import():
     invoke_script("pandas_threaded_import.py")
+
+
+def test_does_not_mutate_timedelta_dtype():
+    expected = np.dtype('<m8')
+
+    assert np.dtype(np.timedelta64) == expected
+
+    df = pd.DataFrame({"a": [np.timedelta64()]})
+    t = pa.Table.from_pandas(df)
+    t.to_pandas()
+
+    assert np.dtype(np.timedelta64) == expected

Reply via email to