This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 1112f7a  bug: Support read_parquet glob file paths (#34)
1112f7a is described below

commit 1112f7a7fe2145b93418329fe81fa245d19e11ef
Author: Peter Nguyen <[email protected]>
AuthorDate: Fri Sep 5 22:09:15 2025 -0700

    bug: Support read_parquet glob file paths (#34)
    
    * Add 'test_read_parquet_local_glob' python test
    
    * Fix glob behavior by only converting to globs once
    
    * Clean up
---
 python/sedonadb/tests/test_context.py  | 14 ++++++++++++++
 rust/sedona-geoparquet/src/provider.rs | 24 ++++++++++++++----------
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/python/sedonadb/tests/test_context.py 
b/python/sedonadb/tests/test_context.py
index fe1300c..ce8b102 100644
--- a/python/sedonadb/tests/test_context.py
+++ b/python/sedonadb/tests/test_context.py
@@ -35,6 +35,20 @@ def test_read_parquet(con, geoarrow_data):
     assert len(tab) == 244
 
 
+def test_read_parquet_local_glob(con, geoarrow_data):
+    # The above test uses .glob() method, this test uses the raw string
+    tab = con.read_parquet(
+        geoarrow_data / "example/files/*_geo.parquet"
+    ).to_arrow_table()
+    assert tab["geometry"].type.extension_name == "geoarrow.wkb"
+    assert len(tab) == 244
+
+    tab = con.read_parquet(
+        geoarrow_data / "example/files/example_polygon-*geo.parquet"
+    ).to_arrow_table()
+    assert len(tab) == 12
+
+
 def test_read_parquet_error(con):
     with pytest.raises(sedonadb._lib.SedonaError, match="No table paths were 
provided"):
         con.read_parquet([])
diff --git a/rust/sedona-geoparquet/src/provider.rs 
b/rust/sedona-geoparquet/src/provider.rs
index 485e57e..6fcb4df 100644
--- a/rust/sedona-geoparquet/src/provider.rs
+++ b/rust/sedona-geoparquet/src/provider.rs
@@ -24,7 +24,7 @@ use datafusion::{
         file_format::parquet::ParquetFormat,
         listing::{ListingOptions, ListingTable, ListingTableConfig, 
ListingTableUrl},
     },
-    execution::{context::DataFilePaths, options::ReadOptions, SessionState},
+    execution::{options::ReadOptions, SessionState},
     prelude::{ParquetReadOptions, SessionConfig, SessionContext},
 };
 use datafusion_common::{exec_err, Result};
@@ -36,12 +36,11 @@ use crate::format::GeoParquetFormat;
 /// Because [ListingTable] implements `TableProvider`, this can be used to
 /// implement geo-aware Parquet reading with interfaces that are otherwise
 /// hard-coded to the built-in Parquet reader.
-pub async fn geoparquet_listing_table<P: DataFilePaths>(
+pub async fn geoparquet_listing_table(
     context: &SessionContext,
-    table_paths: P,
+    table_paths: Vec<ListingTableUrl>,
     options: GeoParquetReadOptions<'_>,
 ) -> Result<ListingTable> {
-    let table_paths = table_paths.to_urls()?;
     let session_config = context.copied_config();
     let listing_options =
         options.to_listing_options(&session_config, 
context.copied_table_options());
@@ -134,7 +133,9 @@ mod test {
         let data_dir = geoarrow_data_dir().unwrap();
         let tab = geoparquet_listing_table(
             &ctx,
-            format!("{data_dir}/example/files/*_geo.parquet"),
+            vec![
+                
ListingTableUrl::parse(format!("{data_dir}/example/files/*_geo.parquet")).unwrap(),
+            ],
             GeoParquetReadOptions::default(),
         )
         .await
@@ -169,15 +170,18 @@ mod test {
     #[tokio::test]
     async fn listing_table_errors() {
         let ctx = SessionContext::new();
-        let err =
-            geoparquet_listing_table(&ctx, Vec::<String>::new(), 
GeoParquetReadOptions::default())
-                .await
-                .unwrap_err();
+        let err = geoparquet_listing_table(
+            &ctx,
+            Vec::<ListingTableUrl>::new(),
+            GeoParquetReadOptions::default(),
+        )
+        .await
+        .unwrap_err();
         assert_eq!(err.message(), "No table paths were provided");
 
         let err = geoparquet_listing_table(
             &ctx,
-            "foofy.wrongextension",
+            vec![ListingTableUrl::parse("foofy.wrongextension").unwrap()],
             GeoParquetReadOptions::default(),
         )
         .await

Reply via email to