This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new b66e768155 [SEDONA-680] Remove rasterio from mandatory dependencies 
(#1692)
b66e768155 is described below

commit b66e768155866a38ba2e3404f1151cac14fad5ea
Author: Jia Yu <[email protected]>
AuthorDate: Sat Nov 23 00:34:11 2024 -0800

    [SEDONA-680] Remove rasterio from mandatory dependencies (#1692)
---
 .github/workflows/python.yml | 19 +++++++++++++++----
 docs/tutorial/raster.md      |  3 +++
 python/sedona/sql/types.py   | 27 +++++++++++++++++++++++----
 python/setup.py              | 10 ++++++++--
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index 12956ffd93..aaca28df05 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -143,14 +143,25 @@ jobs:
       - env:
           PYTHON_VERSION: ${{ matrix.python }}
         run: find spark-shaded/target -name sedona-*.jar -exec cp {} 
${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \;
-      - env:
+      - name: Run tests
+        env:
           PYTHON_VERSION: ${{ matrix.python }}
         run: |
           export 
SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark
           cd python
           source ${VENV_PATH}/bin/activate
-          pytest tests
-      - env:
+          pytest -v tests
+      - name: Run basic tests without rasterio
+        env:
+          PYTHON_VERSION: ${{ matrix.python }}
+        run: |
+          export 
SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark
+          cd python
+          source ${VENV_PATH}/bin/activate
+          pip uninstall -y rasterio
+          pytest -v tests/core/test_rdd.py tests/sql/test_dataframe_api.py
+      - name: Run Spark Connect tests
+        env:
           PYTHON_VERSION: ${{ matrix.python }}
         run: |
           if [ ! -f 
"${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/sbin/start-connect-server.sh"
 ]
@@ -165,4 +176,4 @@ jobs:
           cd python
           source ${VENV_PATH}/bin/activate
           pip install "pyspark[connect]==${SPARK_VERSION}"
-          pytest tests/sql/test_dataframe_api.py
+          pytest -v tests/sql/test_dataframe_api.py
diff --git a/docs/tutorial/raster.md b/docs/tutorial/raster.md
index 5384f82541..7d7df586eb 100644
--- a/docs/tutorial/raster.md
+++ b/docs/tutorial/raster.md
@@ -615,6 +615,9 @@ raster.as_numpy_masked() # numpy array with nodata values 
masked as nan
 If you want to work with the raster data using `rasterio`, you can retrieve a 
`rasterio.DatasetReader` object using the
 `as_rasterio` method.
 
+!!!note
+    You need to have the `rasterio` package installed (version >= 1.2.10) to 
use this method. You can install it using `pip install rasterio`.
+
 ```python
 ds = raster.as_rasterio()  # rasterio.DatasetReader object
 # Work with the raster using rasterio
diff --git a/python/sedona/sql/types.py b/python/sedona/sql/types.py
index 1d7a693f1e..c966d451ca 100644
--- a/python/sedona/sql/types.py
+++ b/python/sedona/sql/types.py
@@ -17,8 +17,21 @@
 
 from pyspark.sql.types import BinaryType, UserDefinedType
 
-from ..raster import raster_serde
-from ..raster.sedona_raster import SedonaRaster
+# Only support RasterType when rasterio is installed
+try:
+    import rasterio
+except ImportError:
+    rasterio = None
+
+if rasterio is not None:
+    from ..raster import raster_serde
+    from ..raster.sedona_raster import SedonaRaster
+else:
+    # We'll skip RasterType UDT registration and raise error when deserializing
+    # RasterUDT objects if rasterio is not installed
+    raster_serde = None
+    SedonaRaster = None
+
 from ..utils import geometry_serde
 
 
@@ -57,7 +70,12 @@ class RasterType(UserDefinedType):
         raise NotImplementedError("RasterType.serialize is not implemented 
yet")
 
     def deserialize(self, datum):
-        return raster_serde.deserialize(datum)
+        if raster_serde is not None:
+            return raster_serde.deserialize(datum)
+        else:
+            raise NotImplementedError(
+                "rasterio is not installed. Please install it to support 
RasterType deserialization"
+            )
 
     @classmethod
     def module(cls):
@@ -71,4 +89,5 @@ class RasterType(UserDefinedType):
         return "org.apache.spark.sql.sedona_sql.UDT.RasterUDT"
 
 
-SedonaRaster.__UDT__ = RasterType()
+if SedonaRaster is not None:
+    SedonaRaster.__UDT__ = RasterType()
diff --git a/python/setup.py b/python/setup.py
index e4dccbd8f1..d0770fcc02 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -58,12 +58,18 @@ setup(
     long_description=long_description,
     long_description_content_type="text/markdown",
     python_requires=">=3.6",
-    install_requires=["attrs", "shapely>=1.7.0", "rasterio>=1.2.10"],
+    install_requires=["attrs", "shapely>=1.7.0"],
     extras_require={
         "spark": ["pyspark>=2.3.0"],
         "pydeck-map": ["geopandas", "pydeck==0.8.0"],
         "kepler-map": ["geopandas", "keplergl==0.3.2"],
-        "all": ["pyspark>=2.3.0", "geopandas", "pydeck==0.8.0", 
"keplergl==0.3.2"],
+        "all": [
+            "pyspark>=2.3.0",
+            "geopandas",
+            "pydeck==0.8.0",
+            "keplergl==0.3.2",
+            "rasterio>=1.2.10",
+        ],
     },
     project_urls={
         "Documentation": "https://sedona.apache.org";,

Reply via email to