This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 618d5fbe84 GH-47389: [Python] CSV and JSON options lack a nice
repr/str (#47397)
618d5fbe84 is described below
commit 618d5fbe84ba417123b0341bc1f255a014128311
Author: Nic Crane <[email protected]>
AuthorDate: Wed Mar 25 08:38:36 2026 +0000
GH-47389: [Python] CSV and JSON options lack a nice repr/str (#47397)
### Rationale for this change
CSV and JSON options lack a nice repr/str dunder method
### What changes are included in this PR?
Add both these methods
### Are these changes tested?
Will be once it's ready for review
### Are there any user-facing changes?
No
* GitHub Issue: #47389
Lead-authored-by: Nic Crane <[email protected]>
Co-authored-by: AlenkaF <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
---
python/pyarrow/_csv.pyx | 68 +++++++++++++++++++++++++++++++++++++++
python/pyarrow/_json.pyx | 22 +++++++++++++
python/pyarrow/tests/test_csv.py | 50 ++++++++++++++++++++++++++++
python/pyarrow/tests/test_json.py | 15 +++++++++
4 files changed, 155 insertions(+)
diff --git a/python/pyarrow/_csv.pyx b/python/pyarrow/_csv.pyx
index ed9d20beb6..79985530af 100644
--- a/python/pyarrow/_csv.pyx
+++ b/python/pyarrow/_csv.pyx
@@ -332,6 +332,22 @@ cdef class ReadOptions(_Weakrefable):
except TypeError:
return False
+ def _repr_base(self):
+ return (f"""
+ use_threads={self.use_threads},
+ block_size={self.block_size},
+ skip_rows={self.skip_rows},
+ skip_rows_after_names={self.skip_rows_after_names},
+ column_names={self.column_names},
+ autogenerate_column_names={self.autogenerate_column_names},
+ encoding={self.encoding!r}""")
+
+ def __repr__(self):
+ return (f"<pyarrow.csv.ReadOptions>({self._repr_base()})")
+
+ def __str__(self):
+ return (f"ReadOptions({self._repr_base()})")
+
cdef class ParseOptions(_Weakrefable):
"""
@@ -585,6 +601,23 @@ cdef class ParseOptions(_Weakrefable):
except TypeError:
return False
+ def _repr_base(self):
+ return (f"""
+ delimiter={self.delimiter!r},
+ quote_char={self.quote_char!r},
+ double_quote={self.double_quote},
+ escape_char={self.escape_char!r},
+ newlines_in_values={self.newlines_in_values},
+ ignore_empty_lines={self.ignore_empty_lines},
+ invalid_row_handler={getattr(self.invalid_row_handler, '__name__',
+ self.invalid_row_handler)}""")
+
+ def __repr__(self):
+ return (f"<pyarrow.csv.ParseOptions>({self._repr_base()})")
+
+ def __str__(self):
+ return (f"ParseOptions({self._repr_base()})")
+
cdef class _ISO8601(_Weakrefable):
"""
@@ -1108,6 +1141,28 @@ cdef class ConvertOptions(_Weakrefable):
except TypeError:
return False
+ def _repr_base(self):
+ return (f"""
+ check_utf8={self.check_utf8},
+ column_types={self.column_types},
+ null_values={self.null_values},
+ true_values={self.true_values},
+ false_values={self.false_values},
+ decimal_point={self.decimal_point!r},
+ strings_can_be_null={self.strings_can_be_null},
+ quoted_strings_can_be_null={self.quoted_strings_can_be_null},
+ include_columns={self.include_columns},
+ include_missing_columns={self.include_missing_columns},
+ auto_dict_encode={self.auto_dict_encode},
+ auto_dict_max_cardinality={self.auto_dict_max_cardinality},
+ timestamp_parsers={[str(i) for i in self.timestamp_parsers]}""")
+
+ def __repr__(self):
+ return (f"<pyarrow.csv.ConvertOptions>({self._repr_base()})")
+
+ def __str__(self):
+ return (f"ConvertOptions({self._repr_base()})")
+
cdef _get_reader(input_file, ReadOptions read_options,
shared_ptr[CInputStream]* out):
@@ -1459,6 +1514,19 @@ cdef class WriteOptions(_Weakrefable):
def validate(self):
check_status(self.options.get().Validate())
+ def _repr_base(self):
+ return (f"""
+ include_header={self.include_header},
+ batch_size={self.batch_size},
+ delimiter={self.delimiter!r},
+ quoting_style={self.quoting_style!r}""")
+
+ def __repr__(self):
+ return (f"<pyarrow.csv.WriteOptions>({self._repr_base()})")
+
+ def __str__(self):
+ return (f"WriteOptions({self._repr_base()})")
+
cdef _get_write_options(WriteOptions write_options, CCSVWriteOptions* out):
if write_options is None:
diff --git a/python/pyarrow/_json.pyx b/python/pyarrow/_json.pyx
index 07e615dd5e..f8373feeab 100644
--- a/python/pyarrow/_json.pyx
+++ b/python/pyarrow/_json.pyx
@@ -105,6 +105,16 @@ cdef class ReadOptions(_Weakrefable):
except TypeError:
return False
+ def __repr__(self):
+ return (f"""<pyarrow.json.ReadOptions>(
+ use_threads={self.use_threads},
+ block_size={self.block_size})""")
+
+ def __str__(self):
+ return (f"""ReadOptions(
+ use_threads={self.use_threads},
+ block_size={self.block_size})""")
+
@staticmethod
cdef ReadOptions wrap(CJSONReadOptions options):
out = ReadOptions()
@@ -244,6 +254,18 @@ cdef class ParseOptions(_Weakrefable):
except TypeError:
return False
+ def _repr_base(self):
+ return (f"""
+ explicit_schema={self.explicit_schema},
+ newlines_in_values={self.newlines_in_values},
+ unexpected_field_behavior={self.unexpected_field_behavior!r}""")
+
+ def __repr__(self):
+ return (f"<pyarrow.json.ParseOptions>({self._repr_base()})")
+
+ def __str__(self):
+ return (f"ParseOptions({self._repr_base()})")
+
@staticmethod
cdef ParseOptions wrap(CJSONParseOptions options):
out = ParseOptions()
diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py
index dce605c715..d608d2bee5 100644
--- a/python/pyarrow/tests/test_csv.py
+++ b/python/pyarrow/tests/test_csv.py
@@ -213,6 +213,18 @@ def test_read_options(pickle_module):
opts.column_names = ('a', 'b')
opts.validate()
+ expected_repr_inner = """
+ use_threads=True,
+ block_size=1048576,
+ skip_rows=0,
+ skip_rows_after_names=0,
+ column_names=['a', 'b'],
+ autogenerate_column_names=True,
+ encoding='utf8'"""
+
+ assert repr(opts) == f"<pyarrow.csv.ReadOptions>({expected_repr_inner})"
+ assert str(opts) == f"ReadOptions({expected_repr_inner})"
+
def test_parse_options(pickle_module):
cls = ParseOptions
@@ -273,6 +285,18 @@ def test_parse_options(pickle_module):
opts.escape_char = "\r"
opts.validate()
+ expected_repr_inner = r"""
+ delimiter=',',
+ quote_char='"',
+ double_quote=True,
+ escape_char='\r',
+ newlines_in_values=False,
+ ignore_empty_lines=True,
+ invalid_row_handler=None"""
+
+ assert repr(opts) == f"<pyarrow.csv.ParseOptions>({expected_repr_inner})"
+ assert str(opts) == f"ParseOptions({expected_repr_inner})"
+
def test_convert_options(pickle_module):
cls = ConvertOptions
@@ -354,6 +378,23 @@ def test_convert_options(pickle_module):
assert opts.auto_dict_max_cardinality == 999
assert opts.timestamp_parsers == [ISO8601, '%Y-%m-%d']
+ expected_repr_inner = ("""
+ check_utf8=True,
+ column_types={'a': DataType(null)},
+ null_values=['N', 'nn'],
+ true_values=['T', 'tt'],
+ false_values=['F', 'ff'],
+ decimal_point='.',
+ strings_can_be_null=False,
+ quoted_strings_can_be_null=True,
+ include_columns=[],
+ include_missing_columns=False,
+ auto_dict_encode=False,
+ auto_dict_max_cardinality=999,
+ timestamp_parsers=['ISO8601', '%Y-%m-%d']""")
+ assert repr(opts) == f"<pyarrow.csv.ConvertOptions>({expected_repr_inner})"
+ assert str(opts) == f"ConvertOptions({expected_repr_inner})"
+
def test_write_options():
cls = WriteOptions
@@ -378,6 +419,15 @@ def test_write_options():
opts.batch_size = 0
opts.validate()
+ expected_repr_inner = """
+ include_header=True,
+ batch_size=0,
+ delimiter=',',
+ quoting_style='needed'"""
+
+ assert repr(opts) == f"<pyarrow.csv.WriteOptions>({expected_repr_inner})"
+ assert str(opts) == f"WriteOptions({expected_repr_inner})"
+
class BaseTestCSV(abc.ABC):
"""Common tests which are shared by streaming and non streaming readers"""
diff --git a/python/pyarrow/tests/test_json.py
b/python/pyarrow/tests/test_json.py
index c3f9fe333b..8d5e6f43db 100644
--- a/python/pyarrow/tests/test_json.py
+++ b/python/pyarrow/tests/test_json.py
@@ -80,6 +80,13 @@ def test_read_options(pickle_module):
assert opts.block_size == 1234
assert opts.use_threads is False
+ expected_repr_inner = """
+ use_threads=False,
+ block_size=1234"""
+
+ assert repr(opts) == f"<pyarrow.json.ReadOptions>({expected_repr_inner})"
+ assert str(opts) == f"ReadOptions({expected_repr_inner})"
+
check_options_class_pickling(cls, pickler=pickle_module,
block_size=1234,
use_threads=False)
@@ -94,6 +101,14 @@ def test_parse_options(pickle_module):
opts.newlines_in_values = True
assert opts.newlines_in_values is True
+ expected_repr_inner = """
+ explicit_schema=None,
+ newlines_in_values=True,
+ unexpected_field_behavior='infer'"""
+
+ assert repr(opts) == f"<pyarrow.json.ParseOptions>({expected_repr_inner})"
+ assert str(opts) == f"ParseOptions({expected_repr_inner})"
+
schema = pa.schema([pa.field('foo', pa.int32())])
opts.explicit_schema = schema
assert opts.explicit_schema == schema