This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch prepare-1.7.2 in repository https://gitbox.apache.org/repos/asf/sedona.git
commit 2a19d9410b08ad1306b9f11b7cf4f9a9d49642bc Author: Olly Beagley <[email protected]> AuthorDate: Wed Apr 30 05:23:14 2025 +0100 [GH-1910] Import geopandas only when type checking (#1929) Co-authored-by: Olly Beagley <[email protected]> --- python/sedona/utils/geoarrow.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/sedona/utils/geoarrow.py b/python/sedona/utils/geoarrow.py index b4a539dfa4..25ec012a8f 100644 --- a/python/sedona/utils/geoarrow.py +++ b/python/sedona/utils/geoarrow.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import itertools -from typing import List, Callable +from typing import TYPE_CHECKING, Callable, List # We may be able to achieve streaming rather than complete materialization by using # with the ArrowStreamSerializer (instead of the ArrowCollectSerializer) @@ -27,12 +27,14 @@ from pyspark.sql import DataFrame from pyspark.sql.types import StructType, StructField, DataType, ArrayType, MapType from sedona.sql.types import GeometryType -import geopandas as gpd from pyspark.sql.pandas.types import ( from_arrow_type, ) from pyspark.sql.pandas.serializers import ArrowStreamPandasSerializer +if TYPE_CHECKING: + import geopandas as gpd + def dataframe_to_arrow(df, crs=None): """ @@ -272,7 +274,7 @@ def _deduplicate_field_names(dt: DataType) -> DataType: return dt -def infer_schema(gdf: gpd.GeoDataFrame) -> StructType: +def infer_schema(gdf: "gpd.GeoDataFrame") -> StructType: import pyarrow as pa fields = gdf.dtypes.reset_index().values.tolist() @@ -307,7 +309,7 @@ def infer_schema(gdf: gpd.GeoDataFrame) -> StructType: # Modified backport from Spark 4.0 # https://github.com/apache/spark/blob/3515b207c41d78194d11933cd04bddc21f8418dd/python/pyspark/sql/pandas/conversion.py#L632 -def create_spatial_dataframe(spark: SparkSession, gdf: gpd.GeoDataFrame) -> DataFrame: +def create_spatial_dataframe(spark: SparkSession, gdf: "gpd.GeoDataFrame") -> DataFrame: from pyspark.sql.pandas.types import ( to_arrow_type, )
