This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 447a4bd334 [GH-2125] Fix STAC reader to accept tuples for datetime
parameter (#2126)
447a4bd334 is described below
commit 447a4bd3347f2d552b2f6f1c7ac53b3246cd82f3
Author: Feng Zhang <[email protected]>
AuthorDate: Sat Jul 19 22:43:05 2025 -0700
[GH-2125] Fix STAC reader to accept tuples for datetime parameter (#2126)
Summary
Fixed a bug where the STAC reader would return 0 items when a tuple was
passed as the datetime
parameter, while the same data as a list would return expected results.
Problem
The CollectionClient.load_items_df() method only checked for list type
when handling datetime
parameters, causing tuples to be ignored and resulting in no temporal
filtering being applied.
Before:
t = ("2025-01-01", "2025-02-01")
items = client.search(datetime=t) # Returns 0 items
After:
t = ("2025-01-01", "2025-02-01")
items = client.search(datetime=t) # Returns expected items
---
python/sedona/spark/stac/collection_client.py | 6 +++--
python/tests/stac/test_collection_client.py | 38 +++++++++++++++++++++++++++
2 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/python/sedona/spark/stac/collection_client.py
b/python/sedona/spark/stac/collection_client.py
index c34098af04..0d04c06575 100644
--- a/python/sedona/spark/stac/collection_client.py
+++ b/python/sedona/spark/stac/collection_client.py
@@ -366,8 +366,10 @@ class CollectionClient:
if datetime:
if isinstance(datetime, (str, python_datetime.datetime)):
datetime = [self._expand_date(str(datetime))]
- elif isinstance(datetime, list) and isinstance(datetime[0],
str):
- datetime = [datetime]
+ elif isinstance(datetime, (list, tuple)) and isinstance(
+ datetime[0], str
+ ):
+ datetime = [list(datetime)]
# Apply spatial and temporal filters
df = self._apply_spatial_temporal_filters(df, bbox, datetime)
# Limit the number of items if max_items is specified
diff --git a/python/tests/stac/test_collection_client.py
b/python/tests/stac/test_collection_client.py
index 4f73002894..f50a811daa 100644
--- a/python/tests/stac/test_collection_client.py
+++ b/python/tests/stac/test_collection_client.py
@@ -187,3 +187,41 @@ class TestStacReader(TestBase):
# Optionally, you can load the file back and check its contents
df_loaded =
collection.spark.read.format("geoparquet").load(output_path)
assert df_loaded.count() == 20, "Loaded GeoParquet file is empty"
+
+ def test_get_items_with_tuple_datetime(self) -> None:
+ """Test that tuples are properly handled as datetime input (same as
lists)."""
+ client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+ collection = client.get_collection("aster-l1t")
+
+ # Test with tuple instead of list
+ datetime_tuple = ("2006-12-01T00:00:00Z", "2006-12-27T02:00:00Z")
+ items_with_tuple = list(collection.get_items(datetime=datetime_tuple))
+
+ # Test with list for comparison
+ datetime_list = ["2006-12-01T00:00:00Z", "2006-12-27T02:00:00Z"]
+ items_with_list = list(collection.get_items(datetime=datetime_list))
+
+ # Both should return the same number of items
+ assert items_with_tuple is not None
+ assert items_with_list is not None
+ assert len(items_with_tuple) == len(items_with_list)
+ assert len(items_with_tuple) == 16
+
+ def test_get_dataframe_with_tuple_datetime(self) -> None:
+ """Test that tuples are properly handled as datetime input for
dataframes."""
+ client = Client.open(STAC_URLS["PLANETARY-COMPUTER"])
+ collection = client.get_collection("aster-l1t")
+
+ # Test with tuple instead of list
+ datetime_tuple = ("2006-01-01T00:00:00Z", "2007-01-01T00:00:00Z")
+ df_with_tuple = collection.get_dataframe(datetime=datetime_tuple)
+
+ # Test with list for comparison
+ datetime_list = ["2006-01-01T00:00:00Z", "2007-01-01T00:00:00Z"]
+ df_with_list = collection.get_dataframe(datetime=datetime_list)
+
+ # Both should return the same count
+ assert df_with_tuple is not None
+ assert df_with_list is not None
+ assert df_with_tuple.count() == df_with_list.count()
+ assert df_with_tuple.count() > 0