(sedona) branch master updated: [CI] Add pre-commit hook `pyupgrade` to auto upgrade Python syntax (#1638)

jiayu Mon, 31 Mar 2025 16:34:38 -0700

This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git



The following commit(s) were added to refs/heads/master by this push:
     new 343f79c907 [CI] Add pre-commit hook `pyupgrade` to auto upgrade Python 
syntax (#1638)
343f79c907 is described below

commit 343f79c90712c562f407787fb947139bfdeaeb90
Author: John Bampton <[email protected]>
AuthorDate: Tue Apr 1 09:16:06 2025 +1000

    [CI] Add pre-commit hook `pyupgrade` to auto upgrade Python syntax (#1638)
    
    "A tool (and pre-commit hook) to automatically upgrade syntax for newer 
versions of the language."
    
    https://github.com/asottile/pyupgrade
---
 .pre-commit-config.yaml                            |  5 +++
 python/sedona/core/jvm/config.py                   |  4 +--
 python/sedona/geopandas/base.py                    |  2 +-
 python/sedona/geopandas/geodataframe.py            | 18 +++++------
 python/sedona/maps/SedonaPyDeck.py                 |  2 +-
 python/sedona/raster/meta.py                       |  4 +--
 python/sedona/raster/raster_serde.py               |  4 +--
 python/sedona/sql/dataframe_api.py                 |  2 +-
 python/sedona/utils/decorators.py                  |  2 +-
 python/sedona/utils/geometry_serde.py              |  4 +--
 python/sedona/utils/geometry_serde_general.py      |  6 ++--
 python/sedona/utils/meta.py                        |  2 +-
 python/setup.py                                    |  2 +-
 .../core/test_avoiding_python_jvm_serde_df.py      | 32 +++++++++----------
 python/tests/geopandas/test_geodataframe.py        |  2 +-
 python/tests/geopandas/test_geoseries.py           |  8 ++---
 python/tests/geopandas/test_sjoin.py               |  4 +--
 python/tests/sql/test_function.py                  | 36 +++++++++-------------
 python/tests/stats/test_dbscan.py                  |  2 +-
 19 files changed, 66 insertions(+), 75 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 67ca1cf07a..07ccb8922b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -125,6 +125,11 @@ repos:
           - --license-filepath
           - .github/workflows/license-templates/LICENSE.txt
           - --fuzzy-match-generates-todo
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.19.1
+    hooks:
+      - id: pyupgrade
+        args: [--py37-plus]
   - repo: https://github.com/psf/black-pre-commit-mirror
     rev: 25.1.0
     hooks:
diff --git a/python/sedona/core/jvm/config.py b/python/sedona/core/jvm/config.py
index 1775712796..09650ed51b 100644
--- a/python/sedona/core/jvm/config.py
+++ b/python/sedona/core/jvm/config.py
@@ -28,7 +28,7 @@ from pyspark.sql import SparkSession
 
 from sedona.utils.decorators import classproperty
 
-string_types = (type(b""), type(""))
+string_types = (bytes, str)
 
 
 def is_greater_or_equal_version(version_a: str, version_b: str) -> bool:
@@ -190,7 +190,7 @@ class SparkJars:
         try:
             used_jar_files = java_spark_conf.get(value)
         except Py4JJavaError:
-            error_message = "Didn't find the value of {} from 
SparkConf".format(value)
+            error_message = f"Didn't find the value of {value} from SparkConf"
             logging.info(error_message)
 
         return used_jar_files, error_message
diff --git a/python/sedona/geopandas/base.py b/python/sedona/geopandas/base.py
index c7cbc39ca3..b70725930f 100644
--- a/python/sedona/geopandas/base.py
+++ b/python/sedona/geopandas/base.py
@@ -40,7 +40,7 @@ from sedona.geopandas._typing import GeoFrameLike
 bool_type = bool
 
 
-class GeoFrame(object, metaclass=ABCMeta):
+class GeoFrame(metaclass=ABCMeta):
     """
     A base class for both GeoDataFrame and GeoSeries.
     """
diff --git a/python/sedona/geopandas/geodataframe.py 
b/python/sedona/geopandas/geodataframe.py
index bdef237c1f..e22451f0c5 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -174,31 +174,31 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
 
     def _reduce_for_geostat_function(
         self,
-        sfun: Callable[["GeoSeries"], Column],
+        sfun: Callable[[GeoSeries], Column],
         name: str,
-        axis: Optional[Axis] = None,
+        axis: Axis | None = None,
         numeric_only: bool = True,
         skipna: bool = True,
         **kwargs: Any,
-    ) -> Union["GeoSeries", Scalar]:
+    ) -> GeoSeries | Scalar:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
     @property
-    def dtypes(self) -> Union[gpd.GeoSeries, pd.Series, Dtype]:
+    def dtypes(self) -> gpd.GeoSeries | pd.Series | Dtype:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
-    def to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+    def to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
-    def _to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+    def _to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
     @property
-    def geoindex(self) -> "GeoIndex":
+    def geoindex(self) -> GeoIndex:
         # Implementation of the abstract method
         raise NotImplementedError("This method is not implemented yet.")
 
@@ -232,7 +232,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
             return self
 
     @property
-    def area(self) -> "GeoDataFrame":
+    def area(self) -> GeoDataFrame:
         """
         Returns a GeoDataFrame containing the area of each geometry expressed 
in the units of the CRS.
 
@@ -517,7 +517,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
         mitre_limit=5.0,
         single_sided=False,
         **kwargs,
-    ) -> "GeoDataFrame":
+    ) -> GeoDataFrame:
         """
         Returns a GeoDataFrame with all geometries buffered by the specified 
distance.
 
diff --git a/python/sedona/maps/SedonaPyDeck.py 
b/python/sedona/maps/SedonaPyDeck.py
index 343389841b..a553817ec8 100644
--- a/python/sedona/maps/SedonaPyDeck.py
+++ b/python/sedona/maps/SedonaPyDeck.py
@@ -315,7 +315,7 @@ class SedonaPyDeck:
         :return: fill_color string for pydeck map
         """
         plot_max = gdf[plot_col].max()
-        return "[85, 183, 177, ({0} / {1}) * 255 + 15]".format(plot_col, 
plot_max)
+        return f"[85, 183, 177, ({plot_col} / {plot_max}) * 255 + 15]"
 
     @classmethod
     def _create_coord_column_(cls, gdf, geometry_col, add_points=False):
diff --git a/python/sedona/raster/meta.py b/python/sedona/raster/meta.py
index 38c5283ba7..f440b009ad 100644
--- a/python/sedona/raster/meta.py
+++ b/python/sedona/raster/meta.py
@@ -104,8 +104,8 @@ class AffineTransform:
 
     def __repr__(self):
         return (
-            "[ {} {} {}\n".format(self.scale_x, self.skew_x, self.ip_x)
-            + "  {} {} {}\n".format(self.skew_y, self.scale_y, self.ip_y)
+            f"[ {self.scale_x} {self.skew_x} {self.ip_x}\n"
+            + f"  {self.skew_y} {self.scale_y} {self.ip_y}\n"
             + "   0  0  1 ]"
         )
 
diff --git a/python/sedona/raster/raster_serde.py 
b/python/sedona/raster/raster_serde.py
index 90dd651bee..1d86531e03 100644
--- a/python/sedona/raster/raster_serde.py
+++ b/python/sedona/raster/raster_serde.py
@@ -63,7 +63,7 @@ def _deserialize(bio: BytesIO, raster_type: int) -> 
SedonaRaster:
             width, height, bands_meta, affine_trans, crs_wkt, awt_raster
         )
     else:
-        raise ValueError("unsupported raster_type: {}".format(raster_type))
+        raise ValueError(f"unsupported raster_type: {raster_type}")
 
 
 def _read_grid_envelope(bio: BytesIO) -> Tuple[int, int, int, int]:
@@ -183,7 +183,7 @@ def _read_data_buffer(bio: BytesIO) -> DataBuffer:
         elif data_type == DataBuffer.TYPE_DOUBLE:
             np_array = np.frombuffer(bio.read(8 * bank_size), dtype=np.float64)
         else:
-            raise ValueError("unknown data_type {}".format(data_type))
+            raise ValueError(f"unknown data_type {data_type}")
 
         banks.append(np_array)
 
diff --git a/python/sedona/sql/dataframe_api.py 
b/python/sedona/sql/dataframe_api.py
index b1639a97bf..4c73a8af07 100644
--- a/python/sedona/sql/dataframe_api.py
+++ b/python/sedona/sql/dataframe_api.py
@@ -123,7 +123,7 @@ def _get_readable_name_for_type(type: Type) -> str:
     :rtype: str
     """
     if isinstance(type, typing._GenericAlias) and type.__origin__._name == 
"Union":
-        return f"Union[{', '.join((_strip_extra_from_class_name(str(x)) for x 
in type.__args__))}]"
+        return f"Union[{', '.join(_strip_extra_from_class_name(str(x)) for x 
in type.__args__)}]"
     return _strip_extra_from_class_name(str(type))
 
 
diff --git a/python/sedona/utils/decorators.py 
b/python/sedona/utils/decorators.py
index 90de8d16e3..2794baba18 100644
--- a/python/sedona/utils/decorators.py
+++ b/python/sedona/utils/decorators.py
@@ -20,7 +20,7 @@ from typing import Callable, Iterable, List, TypeVar
 T = TypeVar("T")
 
 
-class classproperty(object):
+class classproperty:
 
     def __init__(self, f):
         self.f = f
diff --git a/python/sedona/utils/geometry_serde.py 
b/python/sedona/utils/geometry_serde.py
index f9c23e1128..d0645b6ada 100644
--- a/python/sedona/utils/geometry_serde.py
+++ b/python/sedona/utils/geometry_serde.py
@@ -42,9 +42,7 @@ try:
                     ".dll"
                 ):
                     return os.path.join(lib_dirpath, filename)
-        raise RuntimeError(
-            "geos_c DLL not found in {}\\[S|s]hapely.libs".format(packages_dir)
-        )
+        raise RuntimeError(f"geos_c DLL not found in 
{packages_dir}\\[S|s]hapely.libs")
 
     if shapely.__version__.startswith("2."):
         if sys.platform != "win32":
diff --git a/python/sedona/utils/geometry_serde_general.py 
b/python/sedona/utils/geometry_serde_general.py
index 6c1a821340..89b3e64ecd 100644
--- a/python/sedona/utils/geometry_serde_general.py
+++ b/python/sedona/utils/geometry_serde_general.py
@@ -82,7 +82,7 @@ class CoordinateType:
         elif geom._ndim == 3:
             return CoordinateType.XYZ
         else:
-            raise ValueError("Invalid coordinate dimension: 
{}".format(geom._ndim))
+            raise ValueError(f"Invalid coordinate dimension: {geom._ndim}")
 
     @staticmethod
     def bytes_per_coord(coord_type: int) -> int:
@@ -233,7 +233,7 @@ def deserialize(buffer: bytes) -> Optional[BaseGeometry]:
     elif geom_type == GeometryTypeID.GEOMETRYCOLLECTION:
         geom = deserialize_geometry_collection(geom_buffer)
     else:
-        raise ValueError("Unsupported geometry type ID: {}".format(geom_type))
+        raise ValueError(f"Unsupported geometry type ID: {geom_type}")
     return geom, geom_buffer.ints_offset
 
 
@@ -546,7 +546,7 @@ def serialize_shapely_1_empty_geom(geom: BaseGeometry) -> 
bytearray:
         geom_type = GeometryTypeID.MULTIPOLYGON
         total_size = 12
     else:
-        raise ValueError("Invalid empty geometry collection object: 
{}".format(geom))
+        raise ValueError(f"Invalid empty geometry collection object: {geom}")
     return create_buffer_for_geom(geom_type, CoordinateType.XY, total_size, 0)
 
 
diff --git a/python/sedona/utils/meta.py b/python/sedona/utils/meta.py
index c6c6945d41..0234a60c1a 100644
--- a/python/sedona/utils/meta.py
+++ b/python/sedona/utils/meta.py
@@ -79,7 +79,7 @@ class MultiMethod:
 
             if parm.annotation is inspect.Parameter.empty:
                 raise InvalidParametersException(
-                    "Argument {} must be annotated with a type".format(name)
+                    f"Argument {name} must be annotated with a type"
                 )
             if parm.default is not inspect.Parameter.empty:
                 self._methods[tuple(types)] = meth
diff --git a/python/setup.py b/python/setup.py
index d0770fcc02..a6bc7e9560 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -21,7 +21,7 @@ from setuptools import Extension, find_packages, setup
 
 from sedona import version
 
-with open("README.md", "r") as fh:
+with open("README.md") as fh:
     long_description = fh.read()
 
 extension_args = {}
diff --git a/python/tests/core/test_avoiding_python_jvm_serde_df.py 
b/python/tests/core/test_avoiding_python_jvm_serde_df.py
index 66b8a27b62..117429d996 100644
--- a/python/tests/core/test_avoiding_python_jvm_serde_df.py
+++ b/python/tests/core/test_avoiding_python_jvm_serde_df.py
@@ -165,24 +165,20 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
         right_geometries = self.__row_to_list(right_geometries_raw)
 
         # Ignore the ordering of these
-        assert set(geom[0] for geom in left_geometries) == set(
-            [
-                "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
-                "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
-                "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
-                "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
-                "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
-            ]
-        )
-        assert set(geom[0] for geom in right_geometries) == set(
-            [
-                "POINT (-3 5)",
-                "POINT (11 5)",
-                "POINT (4 3)",
-                "POINT (-1 -1)",
-                "POINT (-4 -5)",
-            ]
-        )
+        assert {geom[0] for geom in left_geometries} == {
+            "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
+            "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
+            "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
+            "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+            "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+        }
+        assert {geom[0] for geom in right_geometries} == {
+            "POINT (-3 5)",
+            "POINT (11 5)",
+            "POINT (4 3)",
+            "POINT (-1 -1)",
+            "POINT (-4 -5)",
+        }
 
     def test_range_query_flat_to_df(self):
         poi_point_rdd = WktReader.readToGeometryRDD(
diff --git a/python/tests/geopandas/test_geodataframe.py 
b/python/tests/geopandas/test_geodataframe.py
index 502b521526..30fcf24520 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -56,7 +56,7 @@ class TestDataframe(TestBase):
             },
             index=[10, 20, 30, 40, 50, 60],
         )
-        assert psdf.count().count() is 3
+        assert psdf.count().count() == 3
 
     def test_type_single_geometry_column(self):
         # Create a GeoDataFrame with a single geometry column and additional 
attributes
diff --git a/python/tests/geopandas/test_geoseries.py 
b/python/tests/geopandas/test_geoseries.py
index 52e60ab3f7..2589726929 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -56,7 +56,7 @@ class TestSeries(TestBase):
             },
             index=[10, 20, 30, 40, 50, 60],
         )
-        assert psdf.count().count() is 3
+        assert psdf.count().count() == 3
 
     def test_internal_st_function(self):
         # this is to make sure the spark session works with internal sedona 
udfs
@@ -82,19 +82,19 @@ class TestSeries(TestBase):
         area = self.g1.area
         assert area is not None
         assert type(area) is GeoSeries
-        assert area.count() is 2
+        assert area.count() == 2
 
     def test_buffer(self):
         buffer = self.g1.buffer(0.2)
         assert buffer is not None
         assert type(buffer) is GeoSeries
-        assert buffer.count() is 2
+        assert buffer.count() == 2
 
     def test_buffer_then_area(self):
         area = self.g1.buffer(0.2).area
         assert area is not None
         assert type(area) is GeoSeries
-        assert area.count() is 2
+        assert area.count() == 2
 
     def test_buffer_then_geoparquet(self):
         temp_file_path = os.path.join(
diff --git a/python/tests/geopandas/test_sjoin.py 
b/python/tests/geopandas/test_sjoin.py
index f9e1c6f680..bdca4d56f3 100644
--- a/python/tests/geopandas/test_sjoin.py
+++ b/python/tests/geopandas/test_sjoin.py
@@ -42,7 +42,7 @@ class TestSpatialJoin(TestBase):
         joined = sjoin(left, right)
         assert joined is not None
         assert type(joined) is GeoSeries
-        assert joined.count() is 4
+        assert joined.count() == 4
 
     def test_sjoin_method2(self):
         left = self.g1
@@ -50,4 +50,4 @@ class TestSpatialJoin(TestBase):
         joined = left.sjoin(right)
         assert joined is not None
         assert type(joined) is GeoSeries
-        assert joined.count() is 4
+        assert joined.count() == 4
diff --git a/python/tests/sql/test_function.py 
b/python/tests/sql/test_function.py
index da9f2bc253..93ab59f24f 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -1967,14 +1967,10 @@ class TestPredicateJoin(TestBase):
         )
 
         # then result should be as expected
-        assert set(
-            [
-                el[0]
-                for el in geometry_df_collected.selectExpr(
-                    "ST_AsText(collected)"
-                ).collect()
-            ]
-        ) == {
+        assert {
+            el[0]
+            for el in 
geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+        } == {
             "MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
             "MULTIPOINT ((1 2), (-2 3))",
             "MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 
5, 0.5 0.5)))",
@@ -2000,14 +1996,10 @@ class TestPredicateJoin(TestBase):
         )
 
         # then result should be calculated
-        assert set(
-            [
-                el[0]
-                for el in geometry_df_collected.selectExpr(
-                    "ST_AsText(collected)"
-                ).collect()
-            ]
-        ) == {
+        assert {
+            el[0]
+            for el in 
geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+        } == {
             "MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
             "MULTIPOINT ((1 2), (-2 3))",
             "MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0 
5, 0.5 0.5)))",
@@ -2036,7 +2028,7 @@ class TestPredicateJoin(TestBase):
         }
         for input_geom, expected_geom in test_cases.items():
             reversed_geometry = self.spark.sql(
-                "select 
ST_AsText(ST_Reverse(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Reverse(ST_GeomFromText({input_geom})))"
             )
             assert reversed_geometry.take(1)[0][0] == expected_geom
 
@@ -2134,7 +2126,7 @@ class TestPredicateJoin(TestBase):
 
         for input_geom, expected_geom in tests1.items():
             geom_2d = self.spark.sql(
-                "select 
ST_AsText(ST_Force_2D(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Force_2D(ST_GeomFromText({input_geom})))"
             )
             assert geom_2d.take(1)[0][0] == expected_geom
 
@@ -2147,7 +2139,7 @@ class TestPredicateJoin(TestBase):
 
         for input_geom, expected_geom in tests1.items():
             geom_2d = self.spark.sql(
-                "select 
ST_AsText(ST_Force2D(ST_GeomFromText({})))".format(input_geom)
+                f"select ST_AsText(ST_Force2D(ST_GeomFromText({input_geom})))"
             )
             assert geom_2d.take(1)[0][0] == expected_geom
 
@@ -2171,7 +2163,7 @@ class TestPredicateJoin(TestBase):
 
         for input_geom, expected_geom in tests.items():
             areal_geom = self.spark.sql(
-                "select 
ST_AsText(ST_BuildArea(ST_GeomFromText({})))".format(input_geom)
+                f"select 
ST_AsText(ST_BuildArea(ST_GeomFromText({input_geom})))"
             )
             assert areal_geom.take(1)[0][0] == expected_geom
 
@@ -2244,7 +2236,7 @@ class TestPredicateJoin(TestBase):
         ]
         for input_geom in test_cases:
             cell_ids = self.spark.sql(
-                "select ST_S2CellIDs(ST_GeomFromText({}), 
6)".format(input_geom)
+                f"select ST_S2CellIDs(ST_GeomFromText({input_geom}), 6)"
             ).take(1)[0][0]
             assert isinstance(cell_ids, list)
             assert isinstance(cell_ids[0], int)
@@ -2272,7 +2264,7 @@ class TestPredicateJoin(TestBase):
         ]
         for input_geom in test_cases:
             cell_ids = self.spark.sql(
-                "select ST_H3CellIDs(ST_GeomFromText({}), 6, 
true)".format(input_geom)
+                f"select ST_H3CellIDs(ST_GeomFromText({input_geom}), 6, true)"
             ).take(1)[0][0]
             assert isinstance(cell_ids, list)
             assert isinstance(cell_ids[0], int)
diff --git a/python/tests/stats/test_dbscan.py 
b/python/tests/stats/test_dbscan.py
index 0d816b6dce..29080d93fb 100644
--- a/python/tests/stats/test_dbscan.py
+++ b/python/tests/stats/test_dbscan.py
@@ -92,7 +92,7 @@ class TestDBScan(TestBase):
 
         clusters = {
             frozenset([y[0] for y in clusters_members if y[1] == x])
-            for x in set([y[1] for y in clusters_members])
+            for x in {y[1] for y in clusters_members}
         }
 
         return clusters

(sedona) branch master updated: [CI] Add pre-commit hook `pyupgrade` to auto upgrade Python syntax (#1638)

Reply via email to