This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 658618e ARROW-7208: [Python][Parquet] Raise better error message when
passing a directory path instead of a file path to ParquetFile
658618e is described below
commit 658618ecd540bc6af76efa608cd1ff7b7938ba4c
Author: Wes McKinney <[email protected]>
AuthorDate: Sun Jul 12 22:31:18 2020 -0500
ARROW-7208: [Python][Parquet] Raise better error message when passing a
directory path instead of a file path to ParquetFile
Closes #7722 from wesm/ARROW-7208
Authored-by: Wes McKinney <[email protected]>
Signed-off-by: Wes McKinney <[email protected]>
---
python/pyarrow/io.pxi | 9 +++++++++
python/pyarrow/tests/test_parquet.py | 9 +++++++++
2 files changed, 18 insertions(+)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 8f8cbd1..76a058d 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -776,11 +776,19 @@ def memory_map(path, mode='r'):
-------
mmap : MemoryMappedFile
"""
+ _check_is_file(path)
+
cdef MemoryMappedFile mmap = MemoryMappedFile()
mmap._open(path, mode)
return mmap
+cdef _check_is_file(path):
+ if os.path.isdir(path):
+ raise IOError("Expected file path, but {0} is a directory"
+ .format(path))
+
+
def create_memory_map(path, size):
"""
Create a file of the given size and memory-map it.
@@ -807,6 +815,7 @@ cdef class OSFile(NativeFile):
object path
def __cinit__(self, path, mode='r', MemoryPool memory_pool=None):
+ _check_is_file(path)
self.path = path
cdef:
diff --git a/python/pyarrow/tests/test_parquet.py
b/python/pyarrow/tests/test_parquet.py
index 539c444..410eee1 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -3448,6 +3448,15 @@ def test_empty_row_groups(tempdir):
assert reader.read_row_group(i).equals(table)
+def test_parquet_file_pass_directory_instead_of_file(tempdir):
+ # ARROW-7208
+ path = tempdir / 'directory'
+ os.mkdir(str(path))
+
+ with pytest.raises(IOError, match="Expected file path"):
+ pq.ParquetFile(path)
+
+
@pytest.mark.pandas
@parametrize_legacy_dataset
def test_parquet_writer_with_caller_provided_filesystem(use_legacy_dataset):