This is an automated email from the ASF dual-hosted git repository.

fengzhang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 5633898  feat: Add options parameter to read_parquet() for S3 
configuration (#44)
5633898 is described below

commit 5633898f591c88b9d1c4db8f292d40cb330bfb0e
Author: Feng Zhang <[email protected]>
AuthorDate: Wed Sep 10 10:34:46 2025 -0700

    feat: Add options parameter to read_parquet() for S3 configuration (#44)
---
 Cargo.lock                                 | 172 ++++++++++-------------------
 python/sedonadb/Cargo.toml                 |   1 +
 python/sedonadb/python/sedonadb/context.py |  16 ++-
 python/sedonadb/src/context.rs             |  24 +++-
 python/sedonadb/tests/test_context.py      |  62 ++++++++++-
 rust/sedona-geoparquet/src/provider.rs     |  81 +++++++++++++-
 rust/sedona/src/context.rs                 |  35 +++---
 rust/sedona/src/object_storage.rs          | 110 +++++++++++++-----
 8 files changed, 333 insertions(+), 168 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c630e5c..21d1617 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -124,12 +124,6 @@ version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
 
-[[package]]
-name = "android-tzdata"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0"
-
 [[package]]
 name = "android_system_properties"
 version = "0.1.5"
@@ -581,9 +575,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-rs"
-version = "1.13.3"
+version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "5c953fe1ba023e6b7730c0d4b031d06f267f23a46167dcbd40316644b10a17ba"
+checksum = "94b8ff6c09cd57b16da53641caa860168b88c172a5ee163b0288d3d6eea12786"
 dependencies = [
  "aws-lc-sys",
  "zeroize",
@@ -591,11 +585,11 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-sys"
-version = "0.30.0"
+version = "0.31.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff"
+checksum = "0e44d16778acaf6a9ec9899b92cebd65580b83f685446bf2e1f5d3d732f99dcd"
 dependencies = [
- "bindgen 0.69.5",
+ "bindgen 0.72.1",
  "cc",
  "cmake",
  "dunce",
@@ -932,32 +926,29 @@ dependencies = [
 
 [[package]]
 name = "bindgen"
-version = "0.69.5"
+version = "0.71.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
+checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
 dependencies = [
  "bitflags",
  "cexpr",
  "clang-sys",
- "itertools 0.12.1",
- "lazy_static",
- "lazycell",
+ "itertools 0.13.0",
  "log",
  "prettyplease",
  "proc-macro2",
  "quote",
  "regex",
- "rustc-hash 1.1.0",
+ "rustc-hash",
  "shlex",
  "syn 2.0.106",
- "which",
 ]
 
 [[package]]
 name = "bindgen"
-version = "0.71.1"
+version = "0.72.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
 dependencies = [
  "bitflags",
  "cexpr",
@@ -968,7 +959,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "rustc-hash 2.1.1",
+ "rustc-hash",
  "shlex",
  "syn 2.0.106",
 ]
@@ -1150,15 +1141,14 @@ checksum = 
"613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
 
 [[package]]
 name = "chrono"
-version = "0.4.41"
+version = "0.4.42"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
+checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
 dependencies = [
- "android-tzdata",
  "iana-time-zone",
  "num-traits",
  "serde",
- "windows-link 0.1.3",
+ "windows-link 0.2.0",
 ]
 
 [[package]]
@@ -1457,7 +1447,7 @@ dependencies = [
  "crossterm_winapi",
  "document-features",
  "parking_lot",
- "rustix 1.0.8",
+ "rustix",
  "winapi",
 ]
 
@@ -2383,12 +2373,12 @@ checksum = 
"877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "errno"
-version = "0.3.13"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys 0.60.2",
+ "windows-sys 0.61.0",
 ]
 
 [[package]]
@@ -2410,7 +2400,7 @@ source = 
"registry+https://github.com/rust-lang/crates.io-index";
 checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
 dependencies = [
  "cfg-if",
- "rustix 1.0.8",
+ "rustix",
  "windows-sys 0.59.0",
 ]
 
@@ -2627,7 +2617,7 @@ dependencies = [
 [[package]]
 name = "geo-generic-alg"
 version = "0.1.0"
-source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#ebc0ca413d20c0106f832c59eebda386691c1456";
+source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#86f5f3aba769d998336a3c36ac0a3d9f5536596f";
 dependencies = [
  "earcutr",
  "float_next_after",
@@ -2662,7 +2652,7 @@ dependencies = [
 [[package]]
 name = "geo-traits"
 version = "0.2.0"
-source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#ebc0ca413d20c0106f832c59eebda386691c1456";
+source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#86f5f3aba769d998336a3c36ac0a3d9f5536596f";
 dependencies = [
  "geo-types",
 ]
@@ -2679,7 +2669,7 @@ dependencies = [
 [[package]]
 name = "geo-traits-ext"
 version = "0.1.0"
-source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#ebc0ca413d20c0106f832c59eebda386691c1456";
+source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#86f5f3aba769d998336a3c36ac0a3d9f5536596f";
 dependencies = [
  "approx",
  "geo-traits 0.2.0",
@@ -2691,7 +2681,7 @@ dependencies = [
 [[package]]
 name = "geo-types"
 version = "0.7.16"
-source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#ebc0ca413d20c0106f832c59eebda386691c1456";
+source = 
"git+https://github.com/wherobots/geo.git?branch=generic-alg#86f5f3aba769d998336a3c36ac0a3d9f5536596f";
 dependencies = [
  "approx",
  "num-traits",
@@ -2757,7 +2747,7 @@ dependencies = [
  "js-sys",
  "libc",
  "r-efi",
- "wasi 0.14.4+wasi-0.2.4",
+ "wasi 0.14.5+wasi-0.2.4",
  "wasm-bindgen",
 ]
 
@@ -3193,9 +3183,9 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.11.0"
+version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9"
+checksum = "206a8042aec68fa4a62e8d3f7aa4ceb508177d9324faf261e1959e495b7a1921"
 dependencies = [
  "equivalent",
  "hashbrown 0.15.5",
@@ -3275,15 +3265,6 @@ dependencies = [
  "either",
 ]
 
-[[package]]
-name = "itertools"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
-dependencies = [
- "either",
-]
-
 [[package]]
 name = "itertools"
 version = "0.13.0"
@@ -3352,18 +3333,6 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "lazy_static"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
-
-[[package]]
-name = "lazycell"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
-
 [[package]]
 name = "lexical-core"
 version = "1.0.5"
@@ -3541,15 +3510,9 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.15"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
-
-[[package]]
-name = "linux-raw-sys"
-version = "0.9.4"
+version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
 
 [[package]]
 name = "litemap"
@@ -4249,7 +4212,7 @@ dependencies = [
  "pin-project-lite",
  "quinn-proto",
  "quinn-udp",
- "rustc-hash 2.1.1",
+ "rustc-hash",
  "rustls",
  "socket2",
  "thiserror 2.0.16",
@@ -4269,7 +4232,7 @@ dependencies = [
  "lru-slab",
  "rand 0.9.2",
  "ring",
- "rustc-hash 2.1.1",
+ "rustc-hash",
  "rustls",
  "rustls-pki-types",
  "slab",
@@ -4602,12 +4565,6 @@ version = "0.1.26"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace"
 
-[[package]]
-name = "rustc-hash"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
-
 [[package]]
 name = "rustc-hash"
 version = "2.1.1"
@@ -4625,28 +4582,15 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.44"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
-dependencies = [
- "bitflags",
- "errno",
- "libc",
- "linux-raw-sys 0.4.15",
- "windows-sys 0.59.0",
-]
-
-[[package]]
-name = "rustix"
-version = "1.0.8"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
+checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
 dependencies = [
  "bitflags",
  "errno",
  "libc",
- "linux-raw-sys 0.9.4",
- "windows-sys 0.60.2",
+ "linux-raw-sys",
+ "windows-sys 0.61.0",
 ]
 
 [[package]]
@@ -4752,11 +4696,11 @@ dependencies = [
 
 [[package]]
 name = "schannel"
-version = "0.1.27"
+version = "0.1.28"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d"
+checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.61.0",
 ]
 
 [[package]]
@@ -5189,6 +5133,7 @@ dependencies = [
  "pyo3",
  "sedona",
  "sedona-adbc",
+ "sedona-geoparquet",
  "sedona-proj",
  "sedona-schema",
  "sedona-tg",
@@ -5500,21 +5445,21 @@ dependencies = [
 
 [[package]]
 name = "target-lexicon"
-version = "0.13.2"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
+checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c"
 
 [[package]]
 name = "tempfile"
-version = "3.21.0"
+version = "3.22.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "15b61f8f20e3a6f7e0649d825294eaf317edce30f82cf6026e7e4cb9222a7d1e"
+checksum = "84fa4d11fadde498443cca10fd3ac23c951f0dc59e080e9f4b93d4df4e4eea53"
 dependencies = [
  "fastrand",
  "getrandom 0.3.3",
  "once_cell",
- "rustix 1.0.8",
- "windows-sys 0.60.2",
+ "rustix",
+ "windows-sys 0.61.0",
 ]
 
 [[package]]
@@ -5971,9 +5916,18 @@ checksum = 
"ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
 [[package]]
 name = "wasi"
-version = "0.14.4+wasi-0.2.4"
+version = "0.14.5+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "a4494f6290a82f5fe584817a676a34b9d6763e8d9d18204009fb31dceca98fd4"
+dependencies = [
+ "wasip2",
+]
+
+[[package]]
+name = "wasip2"
+version = "1.0.0+wasi-0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "88a5f4a424faf49c3c2c344f166f0662341d470ea185e939657aaff130f0ec4a"
+checksum = "03fa2761397e5bd52002cd7e73110c71af2109aca4e521a9f40473fe685b0a24"
 dependencies = [
  "wit-bindgen",
 ]
@@ -6083,18 +6037,6 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "which"
-version = "4.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
-dependencies = [
- "either",
- "home",
- "once_cell",
- "rustix 0.38.44",
-]
-
 [[package]]
 name = "winapi"
 version = "0.3.9"
@@ -6435,7 +6377,7 @@ source = 
"registry+https://github.com/rust-lang/crates.io-index";
 checksum = "af3a19837351dc82ba89f8a125e22a3c475f05aba604acc023d62b2739ae2909"
 dependencies = [
  "libc",
- "rustix 1.0.8",
+ "rustix",
 ]
 
 [[package]]
diff --git a/python/sedonadb/Cargo.toml b/python/sedonadb/Cargo.toml
index 4e8a28a..9c3f096 100644
--- a/python/sedonadb/Cargo.toml
+++ b/python/sedonadb/Cargo.toml
@@ -41,6 +41,7 @@ futures = { workspace = true }
 pyo3 = { version = "0.25.1", features = ["extension-module"] }
 sedona = { path = "../../rust/sedona" }
 sedona-adbc = { path = "../../rust/sedona-adbc" }
+sedona-geoparquet = { path = "../../rust/sedona-geoparquet" }
 sedona-schema = { path = "../../rust/sedona-schema" }
 sedona-proj = { path = "../../c/sedona-proj", default-features = false }
 sedona-tg = { path = "../../c/sedona-tg" }
diff --git a/python/sedonadb/python/sedonadb/context.py 
b/python/sedonadb/python/sedonadb/context.py
index 40323a1..8e50d8f 100644
--- a/python/sedonadb/python/sedonadb/context.py
+++ b/python/sedonadb/python/sedonadb/context.py
@@ -17,7 +17,7 @@
 import os
 import sys
 from pathlib import Path
-from typing import Iterable, Literal, Union, Any
+from typing import Any, Dict, Iterable, Literal, Optional, Union
 
 from sedonadb._lib import InternalContext, configure_proj_shared
 from sedonadb.dataframe import DataFrame, _create_data_frame
@@ -106,12 +106,18 @@ class SedonaContext:
         """
         self._impl.drop_view(name)
 
-    def read_parquet(self, table_paths: Union[str, Path, Iterable[str]]) -> 
DataFrame:
+    def read_parquet(
+        self,
+        table_paths: Union[str, Path, Iterable[str]],
+        options: Optional[Dict[str, Any]] = None,
+    ) -> DataFrame:
         """Create a [DataFrame][sedonadb.dataframe.DataFrame] from one or more 
Parquet files
 
         Args:
             table_paths: A str, Path, or iterable of paths containing URLs to 
Parquet
                 files.
+            options: Optional dictionary of options to pass to the Parquet 
reader.
+                For S3 access, use {"aws.skip_signature": True, "aws.region": 
"us-west-2"} for anonymous access to public buckets.
 
         Examples:
 
@@ -124,8 +130,12 @@ class SedonaContext:
         if isinstance(table_paths, (str, Path)):
             table_paths = [table_paths]
 
+        if options is None:
+            options = {}
+
         return DataFrame(
-            self._impl, self._impl.read_parquet([str(path) for path in 
table_paths])
+            self._impl,
+            self._impl.read_parquet([str(path) for path in table_paths], 
options),
         )
 
     def sql(self, sql: str) -> DataFrame:
diff --git a/python/sedonadb/src/context.rs b/python/sedonadb/src/context.rs
index 7b66806..0e39a7c 100644
--- a/python/sedonadb/src/context.rs
+++ b/python/sedonadb/src/context.rs
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use std::sync::Arc;
+use std::{collections::HashMap, sync::Arc};
 
 use pyo3::prelude::*;
 use sedona::context::SedonaContext;
@@ -74,11 +74,31 @@ impl InternalContext {
         &self,
         py: Python<'py>,
         table_paths: Vec<String>,
+        options: HashMap<String, PyObject>,
     ) -> Result<InternalDataFrame, PySedonaError> {
+        // Convert Python options to strings, filtering out None values
+        let rust_options: HashMap<String, String> = options
+            .into_iter()
+            .filter_map(|(k, v)| {
+                if v.is_none(py) {
+                    None
+                } else {
+                    v.bind(py)
+                        .str()
+                        .and_then(|s| s.extract())
+                        .map(|s: String| (k, s))
+                        .ok()
+                }
+            })
+            .collect();
+
+        let geo_options =
+            
sedona_geoparquet::provider::GeoParquetReadOptions::from_table_options(rust_options)
+                .map_err(|e| PySedonaError::SedonaPython(format!("Invalid 
table options: {e}")))?;
         let df = wait_for_future(
             py,
             &self.runtime,
-            self.inner.read_parquet(table_paths, Default::default()),
+            self.inner.read_parquet(table_paths, geo_options),
         )??;
         Ok(InternalDataFrame::new(df, self.runtime.clone()))
     }
diff --git a/python/sedonadb/tests/test_context.py 
b/python/sedonadb/tests/test_context.py
index ce8b102..39ed012 100644
--- a/python/sedonadb/tests/test_context.py
+++ b/python/sedonadb/tests/test_context.py
@@ -63,7 +63,63 @@ def test_dynamic_object_stores():
     schema = pa.schema(con.read_parquet(url))
     assert schema.field("geometry").type.extension_name == "geoarrow.wkb"
 
-    # Fresh context
+
+def test_read_parquet_options_parameter(con, geoarrow_data):
+    """Test the options parameter functionality for read_parquet()"""
+    test_file = geoarrow_data / 
"quadrangles/files/quadrangles_100k_geo.parquet"
+
+    # Test 1: Backward compatibility - no options parameter
+    tab1 = con.read_parquet(test_file).to_arrow_table()
+    assert tab1["geometry"].type.extension_name == "geoarrow.wkb"
+
+    # Test 2: options=None (explicit None)
+    tab2 = con.read_parquet(test_file, options=None).to_arrow_table()
+    assert tab2["geometry"].type.extension_name == "geoarrow.wkb"
+    assert len(tab2) == len(tab1)  # Should be identical
+
+    # Test 3: Empty options dictionary
+    tab3 = con.read_parquet(test_file, options={}).to_arrow_table()
+    assert tab3["geometry"].type.extension_name == "geoarrow.wkb"
+    assert len(tab3) == len(tab1)  # Should be identical
+
+    # Test 4: Options with string values
+    tab4 = con.read_parquet(
+        test_file, options={"test.option": "value"}
+    ).to_arrow_table()
+    assert tab4["geometry"].type.extension_name == "geoarrow.wkb"
+    assert len(tab4) == len(
+        tab1
+    )  # Should be identical (option ignored but not errored)
+
+
+def test_read_geoparquet_s3_anonymous_access():
+    """Test reading from a public S3 bucket geoparquet file with anonymous 
access"""
     con = sedonadb.connect()
-    schema = pa.schema(con.sql(f"SELECT * FROM '{url}'"))
-    assert schema.field("geometry").type.extension_name == "geoarrow.wkb"
+    s3_url = "s3://wherobots-examples/data/onboarding_1/nyc_buildings.parquet"
+
+    # Use aws.skip_signature with region for anonymous access
+    tab = con.read_parquet(
+        s3_url, options={"aws.skip_signature": True, "aws.region": "us-west-2"}
+    ).to_arrow_table()
+    assert len(tab) > 0
+    assert "geom" in tab.schema.names  # This dataset uses 'geom' instead of 
'geometry'
+
+
+def test_read_parquet_invalid_aws_option():
+    """Test that invalid AWS options are caught and provide helpful error 
messages"""
+    con = sedonadb.connect()
+    url = "s3://wherobots-examples/data/onboarding_1/nyc_buildings.parquet"
+
+    # Test with a misspelled AWS option
+    with pytest.raises(
+        sedonadb._lib.SedonaError,
+        match=r"Unknown AWS option 'aws\.skip_sig'\..*aws\.skip_signature",
+    ):
+        con.read_parquet(url, options={"aws.skip_sig": "true"})
+
+    # Test with completely unknown AWS option
+    with pytest.raises(
+        sedonadb._lib.SedonaError,
+        match="Unknown AWS option.*aws.unknown_option.*Valid options are",
+    ):
+        con.read_parquet(url, options={"aws.unknown_option": "value"})
diff --git a/rust/sedona-geoparquet/src/provider.rs 
b/rust/sedona-geoparquet/src/provider.rs
index 6fcb4df..5b5e059 100644
--- a/rust/sedona-geoparquet/src/provider.rs
+++ b/rust/sedona-geoparquet/src/provider.rs
@@ -14,7 +14,7 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
-use std::sync::Arc;
+use std::{collections::HashMap, sync::Arc};
 
 use arrow_schema::SchemaRef;
 use async_trait::async_trait;
@@ -78,6 +78,7 @@ pub async fn geoparquet_listing_table(
 #[derive(Default, Clone)]
 pub struct GeoParquetReadOptions<'a> {
     inner: ParquetReadOptions<'a>,
+    table_options: Option<HashMap<String, String>>,
 }
 
 impl GeoParquetReadOptions<'_> {
@@ -85,6 +86,72 @@ impl GeoParquetReadOptions<'_> {
     pub fn new() -> Self {
         Default::default()
     }
+
+    /// Create GeoParquetReadOptions from table options HashMap
+    /// Validates that AWS options are spelled correctly to help catch user 
errors
+    pub fn from_table_options(options: HashMap<String, String>) -> 
Result<Self, String> {
+        // Validate AWS options to catch common misspellings
+        for key in options.keys() {
+            if key.starts_with("aws.") {
+                let common_aws_options = [
+                    "aws.access_key_id",
+                    "aws.secret_access_key",
+                    "aws.region",
+                    "aws.endpoint",
+                    "aws.skip_signature",
+                    "aws.nosign", // Alternative name for skip_signature
+                    "aws.bucket_name",
+                    "aws.use_ssl",
+                    "aws.force_path_style",
+                ];
+
+                if !common_aws_options.contains(&key.as_str()) {
+                    // Find potential matches for misspelled options
+                    let close_matches: Vec<&str> = common_aws_options
+                        .iter()
+                        .filter(|&&option| {
+                            // Check for similar starting patterns or 
abbreviations
+                            let key_start = &key[4..]; // Remove "aws." prefix
+                            let option_start = &option[4..]; // Remove "aws." 
prefix
+
+                            // Check if the key is a prefix of the option 
(abbreviation)
+                            // or if they share a common prefix of at least 4 
characters
+                            option_start.starts_with(key_start)
+                                || key_start.starts_with(option_start)
+                                || (key_start.len() >= 4
+                                    && option_start.len() >= 4
+                                    && key_start[..4] == option_start[..4])
+                        })
+                        .cloned()
+                        .collect();
+
+                    if !close_matches.is_empty() {
+                        return Err(format!(
+                            "Unknown AWS option '{}'. Did you mean: {}?",
+                            key,
+                            close_matches.join(", ")
+                        ));
+                    } else {
+                        return Err(format!(
+                            "Unknown AWS option '{}'. Valid options are: {}",
+                            key,
+                            common_aws_options.join(", ")
+                        ));
+                    }
+                }
+            }
+        }
+
+        Ok(GeoParquetReadOptions {
+            inner: ParquetReadOptions::default(),
+            table_options: Some(options),
+        })
+    }
+
+    /// Get the table options
+    pub fn table_options(&self) -> Option<&HashMap<String, String>> {
+        self.table_options.as_ref()
+    }
 }
 
 #[async_trait]
@@ -92,8 +159,18 @@ impl ReadOptions<'_> for GeoParquetReadOptions<'_> {
     fn to_listing_options(
         &self,
         config: &SessionConfig,
-        table_options: TableOptions,
+        mut table_options: TableOptions,
     ) -> ListingOptions {
+        // Merge custom table options if provided
+        if let Some(ref custom_options) = self.table_options {
+            for (key, value) in custom_options {
+                if let Err(_e) = table_options.set(key, value) {
+                    // Silently continue for now - unknown options are ignored 
for compatibility
+                    // The validation happens in from_table_options() method
+                }
+            }
+        }
+
         let mut options = self.inner.to_listing_options(config, table_options);
         if let Some(parquet_format) = 
options.format.as_any().downcast_ref::<ParquetFormat>() {
             options.format = Arc::new(GeoParquetFormat::new(parquet_format));
diff --git a/rust/sedona/src/context.rs b/rust/sedona/src/context.rs
index dec90d1..3ca8fc6 100644
--- a/rust/sedona/src/context.rs
+++ b/rust/sedona/src/context.rs
@@ -19,7 +19,6 @@ use std::{collections::VecDeque, sync::Arc};
 use crate::exec::create_plan_from_sql;
 use crate::{
     catalog::DynamicObjectStoreCatalog,
-    object_storage::ensure_object_store_registered,
     random_geometry_provider::RandomGeometryFunction,
     show::{show_batches, DisplayTableOptions},
 };
@@ -220,18 +219,28 @@ impl SedonaContext {
         options: GeoParquetReadOptions<'_>,
     ) -> Result<DataFrame> {
         let urls = table_paths.to_urls()?;
-        let provider =
-            match geoparquet_listing_table(&self.ctx, urls.clone(), 
options.clone()).await {
-                Ok(provider) => provider,
-                Err(e) => {
-                    if urls.is_empty() {
-                        return Err(e);
-                    }
-
-                    ensure_object_store_registered(&mut self.ctx.state(), 
urls[0].as_str()).await?;
-                    geoparquet_listing_table(&self.ctx, urls, options).await?
-                }
-            };
+
+        // Pre-register object store with our custom options before creating 
GeoParquetReadOptions
+        if !urls.is_empty() {
+            use 
crate::object_storage::ensure_object_store_registered_with_options;
+            // Extract the table options from GeoParquetReadOptions for object 
store registration
+            let table_options_map = 
options.table_options().cloned().unwrap_or_default();
+
+            // TODO: Consider registering object stores per-bucket instead of 
per-scheme to avoid
+            // authentication conflicts. Currently, if a user first accesses a 
public S3 bucket with
+            // aws.skip_signature=true and then tries to access a private 
bucket, the cached object
+            // store will still have skip_signature enabled, preventing 
authentication to the private
+            // bucket. A per-bucket registration approach would solve this by 
using bucket-specific
+            // cache keys like "s3://bucket-name" instead of just "s3://".
+            ensure_object_store_registered_with_options(
+                &mut self.ctx.state(),
+                urls[0].as_str(),
+                Some(&table_options_map),
+            )
+            .await?;
+        }
+
+        let provider = geoparquet_listing_table(&self.ctx, urls, 
options).await?;
 
         self.ctx.read_table(Arc::new(provider))
     }
diff --git a/rust/sedona/src/object_storage.rs 
b/rust/sedona/src/object_storage.rs
index 22bdfa0..f6d9065 100644
--- a/rust/sedona/src/object_storage.rs
+++ b/rust/sedona/src/object_storage.rs
@@ -52,7 +52,11 @@ use object_store::ClientOptions;
 
 use crate::context::SedonaContext;
 
-pub async fn ensure_object_store_registered(state: &mut SessionState, name: 
&str) -> Result<()> {
+pub async fn ensure_object_store_registered_with_options(
+    state: &mut SessionState,
+    name: &str,
+    custom_options: Option<&HashMap<String, String>>,
+) -> Result<()> {
     let optimized_name = substitute_tilde(name.to_owned());
     let table_url = ListingTableUrl::parse(optimized_name.as_str())?;
     let scheme = table_url.scheme();
@@ -67,17 +71,38 @@ pub async fn ensure_object_store_registered(state: &mut 
SessionState, name: &str
         Err(_) => {
             let mut builder = SessionStateBuilder::from(state.clone());
 
-            // Register the store for this URL. Here we don't have access
-            // to any command options so the only choice is to use an empty 
collection
+            // Register the store for this URL with custom options if provided
             match scheme {
                 "s3" | "oss" | "cos" => {
                     if let Some(table_options) = builder.table_options() {
-                        table_options.extensions.insert(AwsOptions::default())
+                        let mut aws_options = AwsOptions::default();
+
+                        // Process custom options if provided
+                        if let Some(options) = custom_options {
+                            for (key, value) in options {
+                                if key.starts_with("aws.") {
+                                    let _ = aws_options.set(key, value);
+                                }
+                            }
+                        }
+
+                        table_options.extensions.insert(aws_options);
                     }
                 }
                 "gs" | "gcs" => {
                     if let Some(table_options) = builder.table_options() {
-                        table_options.extensions.insert(GcpOptions::default())
+                        let mut gcp_options = GcpOptions::default();
+
+                        // Process custom options if provided
+                        if let Some(options) = custom_options {
+                            for (key, value) in options {
+                                if key.starts_with("gcp.") {
+                                    let _ = gcp_options.set(key, value);
+                                }
+                            }
+                        }
+
+                        table_options.extensions.insert(gcp_options);
                     }
                 }
                 _ => {}
@@ -97,6 +122,11 @@ pub async fn ensure_object_store_registered(state: &mut 
SessionState, name: &str
     Ok(())
 }
 
+// Backward compatibility wrapper
+pub async fn ensure_object_store_registered(state: &mut SessionState, name: 
&str) -> Result<()> {
+    ensure_object_store_registered_with_options(state, name, None).await
+}
+
 pub fn substitute_tilde(cur: String) -> String {
     if let Some(usr_dir_path) = home_dir() {
         if let Some(usr_dir) = usr_dir_path.to_str() {
@@ -120,37 +150,51 @@ pub async fn get_s3_object_store_builder(
         region,
         endpoint,
         allow_http,
+        skip_signature,
     } = aws_options;
 
     let bucket_name = get_bucket_name(url)?;
-    let mut builder = 
AmazonS3Builder::from_env().with_bucket_name(bucket_name);
 
-    if let (Some(access_key_id), Some(secret_access_key)) = (access_key_id, 
secret_access_key) {
-        builder = builder
-            .with_access_key_id(access_key_id)
-            .with_secret_access_key(secret_access_key);
-
-        if let Some(session_token) = session_token {
-            builder = builder.with_token(session_token);
-        }
+    let mut builder = if let Some(true) = skip_signature {
+        // For anonymous access, create builder without loading environment 
credentials
+        // and immediately set skip_signature to prevent credential fetching
+        AmazonS3Builder::new()
+            .with_bucket_name(bucket_name)
+            .with_skip_signature(true)
     } else {
-        let config = 
aws_config::defaults(BehaviorVersion::latest()).load().await;
-        if let Some(region) = config.region() {
-            builder = builder.with_region(region.to_string());
-        }
+        // For authenticated access, load from environment as usual
+        AmazonS3Builder::from_env().with_bucket_name(bucket_name)
+    };
+
+    // Handle authenticated access (only if not skipping signature)
+    if !matches!(skip_signature, Some(true)) {
+        if let (Some(access_key_id), Some(secret_access_key)) = 
(access_key_id, secret_access_key) {
+            builder = builder
+                .with_access_key_id(access_key_id)
+                .with_secret_access_key(secret_access_key);
 
-        let credentials = config
-            .credentials_provider()
-            .ok_or_else(|| {
-                
DataFusionError::ObjectStore(Box::new(object_store::Error::Generic {
-                    store: "S3",
-                    source: "Failed to get S3 credentials from the 
environment".into(),
-                }))
-            })?
-            .clone();
-
-        let credentials = Arc::new(S3CredentialProvider { credentials });
-        builder = builder.with_credentials(credentials);
+            if let Some(session_token) = session_token {
+                builder = builder.with_token(session_token);
+            }
+        } else {
+            let config = 
aws_config::defaults(BehaviorVersion::latest()).load().await;
+            if let Some(region) = config.region() {
+                builder = builder.with_region(region.to_string());
+            }
+
+            let credentials = config
+                .credentials_provider()
+                .ok_or_else(|| {
+                    
DataFusionError::ObjectStore(Box::new(object_store::Error::Generic {
+                        store: "S3",
+                        source: "Failed to get S3 credentials from the 
environment".into(),
+                    }))
+                })?
+                .clone();
+
+            let credentials = Arc::new(S3CredentialProvider { credentials });
+            builder = builder.with_credentials(credentials);
+        }
     }
 
     if let Some(region) = region {
@@ -293,6 +337,8 @@ pub struct AwsOptions {
     pub endpoint: Option<String>,
     /// Allow HTTP (otherwise will always use https)
     pub allow_http: Option<bool>,
+    /// Skip request signing for anonymous access
+    pub skip_signature: Option<bool>,
 }
 
 impl ExtensionOptions for AwsOptions {
@@ -330,6 +376,9 @@ impl ExtensionOptions for AwsOptions {
             "allow_http" => {
                 self.allow_http.set(rem, value)?;
             }
+            "skip_signature" | "SKIP_SIGNATURE" => {
+                self.skip_signature.set(rem, value)?;
+            }
             _ => {
                 return config_err!("Config value \"{}\" not found on 
AwsOptions", rem);
             }
@@ -366,6 +415,7 @@ impl ExtensionOptions for AwsOptions {
         self.region.visit(&mut v, "region", "");
         self.endpoint.visit(&mut v, "endpoint", "");
         self.allow_http.visit(&mut v, "allow_http", "");
+        self.skip_signature.visit(&mut v, "skip_signature", "");
         v.0
     }
 }


Reply via email to