This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 343f79c907 [CI] Add pre-commit hook `pyupgrade` to auto upgrade Python
syntax (#1638)
343f79c907 is described below
commit 343f79c90712c562f407787fb947139bfdeaeb90
Author: John Bampton <[email protected]>
AuthorDate: Tue Apr 1 09:16:06 2025 +1000
[CI] Add pre-commit hook `pyupgrade` to auto upgrade Python syntax (#1638)
"A tool (and pre-commit hook) to automatically upgrade syntax for newer
versions of the language."
https://github.com/asottile/pyupgrade
---
.pre-commit-config.yaml | 5 +++
python/sedona/core/jvm/config.py | 4 +--
python/sedona/geopandas/base.py | 2 +-
python/sedona/geopandas/geodataframe.py | 18 +++++------
python/sedona/maps/SedonaPyDeck.py | 2 +-
python/sedona/raster/meta.py | 4 +--
python/sedona/raster/raster_serde.py | 4 +--
python/sedona/sql/dataframe_api.py | 2 +-
python/sedona/utils/decorators.py | 2 +-
python/sedona/utils/geometry_serde.py | 4 +--
python/sedona/utils/geometry_serde_general.py | 6 ++--
python/sedona/utils/meta.py | 2 +-
python/setup.py | 2 +-
.../core/test_avoiding_python_jvm_serde_df.py | 32 +++++++++----------
python/tests/geopandas/test_geodataframe.py | 2 +-
python/tests/geopandas/test_geoseries.py | 8 ++---
python/tests/geopandas/test_sjoin.py | 4 +--
python/tests/sql/test_function.py | 36 +++++++++-------------
python/tests/stats/test_dbscan.py | 2 +-
19 files changed, 66 insertions(+), 75 deletions(-)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 67ca1cf07a..07ccb8922b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -125,6 +125,11 @@ repos:
- --license-filepath
- .github/workflows/license-templates/LICENSE.txt
- --fuzzy-match-generates-todo
+ - repo: https://github.com/asottile/pyupgrade
+ rev: v3.19.1
+ hooks:
+ - id: pyupgrade
+ args: [--py37-plus]
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 25.1.0
hooks:
diff --git a/python/sedona/core/jvm/config.py b/python/sedona/core/jvm/config.py
index 1775712796..09650ed51b 100644
--- a/python/sedona/core/jvm/config.py
+++ b/python/sedona/core/jvm/config.py
@@ -28,7 +28,7 @@ from pyspark.sql import SparkSession
from sedona.utils.decorators import classproperty
-string_types = (type(b""), type(""))
+string_types = (bytes, str)
def is_greater_or_equal_version(version_a: str, version_b: str) -> bool:
@@ -190,7 +190,7 @@ class SparkJars:
try:
used_jar_files = java_spark_conf.get(value)
except Py4JJavaError:
- error_message = "Didn't find the value of {} from
SparkConf".format(value)
+ error_message = f"Didn't find the value of {value} from SparkConf"
logging.info(error_message)
return used_jar_files, error_message
diff --git a/python/sedona/geopandas/base.py b/python/sedona/geopandas/base.py
index c7cbc39ca3..b70725930f 100644
--- a/python/sedona/geopandas/base.py
+++ b/python/sedona/geopandas/base.py
@@ -40,7 +40,7 @@ from sedona.geopandas._typing import GeoFrameLike
bool_type = bool
-class GeoFrame(object, metaclass=ABCMeta):
+class GeoFrame(metaclass=ABCMeta):
"""
A base class for both GeoDataFrame and GeoSeries.
"""
diff --git a/python/sedona/geopandas/geodataframe.py
b/python/sedona/geopandas/geodataframe.py
index bdef237c1f..e22451f0c5 100644
--- a/python/sedona/geopandas/geodataframe.py
+++ b/python/sedona/geopandas/geodataframe.py
@@ -174,31 +174,31 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
def _reduce_for_geostat_function(
self,
- sfun: Callable[["GeoSeries"], Column],
+ sfun: Callable[[GeoSeries], Column],
name: str,
- axis: Optional[Axis] = None,
+ axis: Axis | None = None,
numeric_only: bool = True,
skipna: bool = True,
**kwargs: Any,
- ) -> Union["GeoSeries", Scalar]:
+ ) -> GeoSeries | Scalar:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
@property
- def dtypes(self) -> Union[gpd.GeoSeries, pd.Series, Dtype]:
+ def dtypes(self) -> gpd.GeoSeries | pd.Series | Dtype:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
- def to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+ def to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
- def _to_geopandas(self) -> Union[gpd.GeoDataFrame, pd.Series]:
+ def _to_geopandas(self) -> gpd.GeoDataFrame | pd.Series:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
@property
- def geoindex(self) -> "GeoIndex":
+ def geoindex(self) -> GeoIndex:
# Implementation of the abstract method
raise NotImplementedError("This method is not implemented yet.")
@@ -232,7 +232,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
return self
@property
- def area(self) -> "GeoDataFrame":
+ def area(self) -> GeoDataFrame:
"""
Returns a GeoDataFrame containing the area of each geometry expressed
in the units of the CRS.
@@ -517,7 +517,7 @@ class GeoDataFrame(GeoFrame, pspd.DataFrame):
mitre_limit=5.0,
single_sided=False,
**kwargs,
- ) -> "GeoDataFrame":
+ ) -> GeoDataFrame:
"""
Returns a GeoDataFrame with all geometries buffered by the specified
distance.
diff --git a/python/sedona/maps/SedonaPyDeck.py
b/python/sedona/maps/SedonaPyDeck.py
index 343389841b..a553817ec8 100644
--- a/python/sedona/maps/SedonaPyDeck.py
+++ b/python/sedona/maps/SedonaPyDeck.py
@@ -315,7 +315,7 @@ class SedonaPyDeck:
:return: fill_color string for pydeck map
"""
plot_max = gdf[plot_col].max()
- return "[85, 183, 177, ({0} / {1}) * 255 + 15]".format(plot_col,
plot_max)
+ return f"[85, 183, 177, ({plot_col} / {plot_max}) * 255 + 15]"
@classmethod
def _create_coord_column_(cls, gdf, geometry_col, add_points=False):
diff --git a/python/sedona/raster/meta.py b/python/sedona/raster/meta.py
index 38c5283ba7..f440b009ad 100644
--- a/python/sedona/raster/meta.py
+++ b/python/sedona/raster/meta.py
@@ -104,8 +104,8 @@ class AffineTransform:
def __repr__(self):
return (
- "[ {} {} {}\n".format(self.scale_x, self.skew_x, self.ip_x)
- + " {} {} {}\n".format(self.skew_y, self.scale_y, self.ip_y)
+ f"[ {self.scale_x} {self.skew_x} {self.ip_x}\n"
+ + f" {self.skew_y} {self.scale_y} {self.ip_y}\n"
+ " 0 0 1 ]"
)
diff --git a/python/sedona/raster/raster_serde.py
b/python/sedona/raster/raster_serde.py
index 90dd651bee..1d86531e03 100644
--- a/python/sedona/raster/raster_serde.py
+++ b/python/sedona/raster/raster_serde.py
@@ -63,7 +63,7 @@ def _deserialize(bio: BytesIO, raster_type: int) ->
SedonaRaster:
width, height, bands_meta, affine_trans, crs_wkt, awt_raster
)
else:
- raise ValueError("unsupported raster_type: {}".format(raster_type))
+ raise ValueError(f"unsupported raster_type: {raster_type}")
def _read_grid_envelope(bio: BytesIO) -> Tuple[int, int, int, int]:
@@ -183,7 +183,7 @@ def _read_data_buffer(bio: BytesIO) -> DataBuffer:
elif data_type == DataBuffer.TYPE_DOUBLE:
np_array = np.frombuffer(bio.read(8 * bank_size), dtype=np.float64)
else:
- raise ValueError("unknown data_type {}".format(data_type))
+ raise ValueError(f"unknown data_type {data_type}")
banks.append(np_array)
diff --git a/python/sedona/sql/dataframe_api.py
b/python/sedona/sql/dataframe_api.py
index b1639a97bf..4c73a8af07 100644
--- a/python/sedona/sql/dataframe_api.py
+++ b/python/sedona/sql/dataframe_api.py
@@ -123,7 +123,7 @@ def _get_readable_name_for_type(type: Type) -> str:
:rtype: str
"""
if isinstance(type, typing._GenericAlias) and type.__origin__._name ==
"Union":
- return f"Union[{', '.join((_strip_extra_from_class_name(str(x)) for x
in type.__args__))}]"
+ return f"Union[{', '.join(_strip_extra_from_class_name(str(x)) for x
in type.__args__)}]"
return _strip_extra_from_class_name(str(type))
diff --git a/python/sedona/utils/decorators.py
b/python/sedona/utils/decorators.py
index 90de8d16e3..2794baba18 100644
--- a/python/sedona/utils/decorators.py
+++ b/python/sedona/utils/decorators.py
@@ -20,7 +20,7 @@ from typing import Callable, Iterable, List, TypeVar
T = TypeVar("T")
-class classproperty(object):
+class classproperty:
def __init__(self, f):
self.f = f
diff --git a/python/sedona/utils/geometry_serde.py
b/python/sedona/utils/geometry_serde.py
index f9c23e1128..d0645b6ada 100644
--- a/python/sedona/utils/geometry_serde.py
+++ b/python/sedona/utils/geometry_serde.py
@@ -42,9 +42,7 @@ try:
".dll"
):
return os.path.join(lib_dirpath, filename)
- raise RuntimeError(
- "geos_c DLL not found in {}\\[S|s]hapely.libs".format(packages_dir)
- )
+ raise RuntimeError(f"geos_c DLL not found in
{packages_dir}\\[S|s]hapely.libs")
if shapely.__version__.startswith("2."):
if sys.platform != "win32":
diff --git a/python/sedona/utils/geometry_serde_general.py
b/python/sedona/utils/geometry_serde_general.py
index 6c1a821340..89b3e64ecd 100644
--- a/python/sedona/utils/geometry_serde_general.py
+++ b/python/sedona/utils/geometry_serde_general.py
@@ -82,7 +82,7 @@ class CoordinateType:
elif geom._ndim == 3:
return CoordinateType.XYZ
else:
- raise ValueError("Invalid coordinate dimension:
{}".format(geom._ndim))
+ raise ValueError(f"Invalid coordinate dimension: {geom._ndim}")
@staticmethod
def bytes_per_coord(coord_type: int) -> int:
@@ -233,7 +233,7 @@ def deserialize(buffer: bytes) -> Optional[BaseGeometry]:
elif geom_type == GeometryTypeID.GEOMETRYCOLLECTION:
geom = deserialize_geometry_collection(geom_buffer)
else:
- raise ValueError("Unsupported geometry type ID: {}".format(geom_type))
+ raise ValueError(f"Unsupported geometry type ID: {geom_type}")
return geom, geom_buffer.ints_offset
@@ -546,7 +546,7 @@ def serialize_shapely_1_empty_geom(geom: BaseGeometry) ->
bytearray:
geom_type = GeometryTypeID.MULTIPOLYGON
total_size = 12
else:
- raise ValueError("Invalid empty geometry collection object:
{}".format(geom))
+ raise ValueError(f"Invalid empty geometry collection object: {geom}")
return create_buffer_for_geom(geom_type, CoordinateType.XY, total_size, 0)
diff --git a/python/sedona/utils/meta.py b/python/sedona/utils/meta.py
index c6c6945d41..0234a60c1a 100644
--- a/python/sedona/utils/meta.py
+++ b/python/sedona/utils/meta.py
@@ -79,7 +79,7 @@ class MultiMethod:
if parm.annotation is inspect.Parameter.empty:
raise InvalidParametersException(
- "Argument {} must be annotated with a type".format(name)
+ f"Argument {name} must be annotated with a type"
)
if parm.default is not inspect.Parameter.empty:
self._methods[tuple(types)] = meth
diff --git a/python/setup.py b/python/setup.py
index d0770fcc02..a6bc7e9560 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -21,7 +21,7 @@ from setuptools import Extension, find_packages, setup
from sedona import version
-with open("README.md", "r") as fh:
+with open("README.md") as fh:
long_description = fh.read()
extension_args = {}
diff --git a/python/tests/core/test_avoiding_python_jvm_serde_df.py
b/python/tests/core/test_avoiding_python_jvm_serde_df.py
index 66b8a27b62..117429d996 100644
--- a/python/tests/core/test_avoiding_python_jvm_serde_df.py
+++ b/python/tests/core/test_avoiding_python_jvm_serde_df.py
@@ -165,24 +165,20 @@ class TestOmitPythonJvmSerdeToDf(TestBase):
right_geometries = self.__row_to_list(right_geometries_raw)
# Ignore the ordering of these
- assert set(geom[0] for geom in left_geometries) == set(
- [
- "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
- "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
- "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
- "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
- "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
- ]
- )
- assert set(geom[0] for geom in right_geometries) == set(
- [
- "POINT (-3 5)",
- "POINT (11 5)",
- "POINT (4 3)",
- "POINT (-1 -1)",
- "POINT (-4 -5)",
- ]
- )
+ assert {geom[0] for geom in left_geometries} == {
+ "POLYGON ((0 4, -3 3, -8 6, -6 8, -2 9, 0 4))",
+ "POLYGON ((10 3, 10 6, 14 6, 14 3, 10 3))",
+ "POLYGON ((2 2, 2 4, 3 5, 7 5, 9 3, 8 1, 4 1, 2 2))",
+ "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+ "POLYGON ((-1 -1, -1 -3, -2 -5, -6 -8, -5 -2, -3 -2, -1 -1))",
+ }
+ assert {geom[0] for geom in right_geometries} == {
+ "POINT (-3 5)",
+ "POINT (11 5)",
+ "POINT (4 3)",
+ "POINT (-1 -1)",
+ "POINT (-4 -5)",
+ }
def test_range_query_flat_to_df(self):
poi_point_rdd = WktReader.readToGeometryRDD(
diff --git a/python/tests/geopandas/test_geodataframe.py
b/python/tests/geopandas/test_geodataframe.py
index 502b521526..30fcf24520 100644
--- a/python/tests/geopandas/test_geodataframe.py
+++ b/python/tests/geopandas/test_geodataframe.py
@@ -56,7 +56,7 @@ class TestDataframe(TestBase):
},
index=[10, 20, 30, 40, 50, 60],
)
- assert psdf.count().count() is 3
+ assert psdf.count().count() == 3
def test_type_single_geometry_column(self):
# Create a GeoDataFrame with a single geometry column and additional
attributes
diff --git a/python/tests/geopandas/test_geoseries.py
b/python/tests/geopandas/test_geoseries.py
index 52e60ab3f7..2589726929 100644
--- a/python/tests/geopandas/test_geoseries.py
+++ b/python/tests/geopandas/test_geoseries.py
@@ -56,7 +56,7 @@ class TestSeries(TestBase):
},
index=[10, 20, 30, 40, 50, 60],
)
- assert psdf.count().count() is 3
+ assert psdf.count().count() == 3
def test_internal_st_function(self):
# this is to make sure the spark session works with internal sedona
udfs
@@ -82,19 +82,19 @@ class TestSeries(TestBase):
area = self.g1.area
assert area is not None
assert type(area) is GeoSeries
- assert area.count() is 2
+ assert area.count() == 2
def test_buffer(self):
buffer = self.g1.buffer(0.2)
assert buffer is not None
assert type(buffer) is GeoSeries
- assert buffer.count() is 2
+ assert buffer.count() == 2
def test_buffer_then_area(self):
area = self.g1.buffer(0.2).area
assert area is not None
assert type(area) is GeoSeries
- assert area.count() is 2
+ assert area.count() == 2
def test_buffer_then_geoparquet(self):
temp_file_path = os.path.join(
diff --git a/python/tests/geopandas/test_sjoin.py
b/python/tests/geopandas/test_sjoin.py
index f9e1c6f680..bdca4d56f3 100644
--- a/python/tests/geopandas/test_sjoin.py
+++ b/python/tests/geopandas/test_sjoin.py
@@ -42,7 +42,7 @@ class TestSpatialJoin(TestBase):
joined = sjoin(left, right)
assert joined is not None
assert type(joined) is GeoSeries
- assert joined.count() is 4
+ assert joined.count() == 4
def test_sjoin_method2(self):
left = self.g1
@@ -50,4 +50,4 @@ class TestSpatialJoin(TestBase):
joined = left.sjoin(right)
assert joined is not None
assert type(joined) is GeoSeries
- assert joined.count() is 4
+ assert joined.count() == 4
diff --git a/python/tests/sql/test_function.py
b/python/tests/sql/test_function.py
index da9f2bc253..93ab59f24f 100644
--- a/python/tests/sql/test_function.py
+++ b/python/tests/sql/test_function.py
@@ -1967,14 +1967,10 @@ class TestPredicateJoin(TestBase):
)
# then result should be as expected
- assert set(
- [
- el[0]
- for el in geometry_df_collected.selectExpr(
- "ST_AsText(collected)"
- ).collect()
- ]
- ) == {
+ assert {
+ el[0]
+ for el in
geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+ } == {
"MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
"MULTIPOINT ((1 2), (-2 3))",
"MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0
5, 0.5 0.5)))",
@@ -2000,14 +1996,10 @@ class TestPredicateJoin(TestBase):
)
# then result should be calculated
- assert set(
- [
- el[0]
- for el in geometry_df_collected.selectExpr(
- "ST_AsText(collected)"
- ).collect()
- ]
- ) == {
+ assert {
+ el[0]
+ for el in
geometry_df_collected.selectExpr("ST_AsText(collected)").collect()
+ } == {
"MULTILINESTRING ((1 2, 3 4), (3 4, 4 5))",
"MULTIPOINT ((1 2), (-2 3))",
"MULTIPOLYGON (((1 2, 1 4, 3 4, 3 2, 1 2)), ((0.5 0.5, 5 0, 5 5, 0
5, 0.5 0.5)))",
@@ -2036,7 +2028,7 @@ class TestPredicateJoin(TestBase):
}
for input_geom, expected_geom in test_cases.items():
reversed_geometry = self.spark.sql(
- "select
ST_AsText(ST_Reverse(ST_GeomFromText({})))".format(input_geom)
+ f"select ST_AsText(ST_Reverse(ST_GeomFromText({input_geom})))"
)
assert reversed_geometry.take(1)[0][0] == expected_geom
@@ -2134,7 +2126,7 @@ class TestPredicateJoin(TestBase):
for input_geom, expected_geom in tests1.items():
geom_2d = self.spark.sql(
- "select
ST_AsText(ST_Force_2D(ST_GeomFromText({})))".format(input_geom)
+ f"select ST_AsText(ST_Force_2D(ST_GeomFromText({input_geom})))"
)
assert geom_2d.take(1)[0][0] == expected_geom
@@ -2147,7 +2139,7 @@ class TestPredicateJoin(TestBase):
for input_geom, expected_geom in tests1.items():
geom_2d = self.spark.sql(
- "select
ST_AsText(ST_Force2D(ST_GeomFromText({})))".format(input_geom)
+ f"select ST_AsText(ST_Force2D(ST_GeomFromText({input_geom})))"
)
assert geom_2d.take(1)[0][0] == expected_geom
@@ -2171,7 +2163,7 @@ class TestPredicateJoin(TestBase):
for input_geom, expected_geom in tests.items():
areal_geom = self.spark.sql(
- "select
ST_AsText(ST_BuildArea(ST_GeomFromText({})))".format(input_geom)
+ f"select
ST_AsText(ST_BuildArea(ST_GeomFromText({input_geom})))"
)
assert areal_geom.take(1)[0][0] == expected_geom
@@ -2244,7 +2236,7 @@ class TestPredicateJoin(TestBase):
]
for input_geom in test_cases:
cell_ids = self.spark.sql(
- "select ST_S2CellIDs(ST_GeomFromText({}),
6)".format(input_geom)
+ f"select ST_S2CellIDs(ST_GeomFromText({input_geom}), 6)"
).take(1)[0][0]
assert isinstance(cell_ids, list)
assert isinstance(cell_ids[0], int)
@@ -2272,7 +2264,7 @@ class TestPredicateJoin(TestBase):
]
for input_geom in test_cases:
cell_ids = self.spark.sql(
- "select ST_H3CellIDs(ST_GeomFromText({}), 6,
true)".format(input_geom)
+ f"select ST_H3CellIDs(ST_GeomFromText({input_geom}), 6, true)"
).take(1)[0][0]
assert isinstance(cell_ids, list)
assert isinstance(cell_ids[0], int)
diff --git a/python/tests/stats/test_dbscan.py
b/python/tests/stats/test_dbscan.py
index 0d816b6dce..29080d93fb 100644
--- a/python/tests/stats/test_dbscan.py
+++ b/python/tests/stats/test_dbscan.py
@@ -92,7 +92,7 @@ class TestDBScan(TestBase):
clusters = {
frozenset([y[0] for y in clusters_members if y[1] == x])
- for x in set([y[1] for y in clusters_members])
+ for x in {y[1] for y in clusters_members}
}
return clusters