This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 607c383f4b [SEDONA-708] Sedona should use PyArrow to get GeoPandas
(#1794)
607c383f4b is described below
commit 607c383f4bb6df5763a597cb98dbb6c8d390513c
Author: Jia Yu <[email protected]>
AuthorDate: Thu Feb 6 21:18:40 2025 -0800
[SEDONA-708] Sedona should use PyArrow to get GeoPandas (#1794)
* Initial commit
* Separate the logic between Pandas and GeoPandas
* Work with GeoPandas < 1.0.0
* Refine the code structure
---
python/sedona/maps/SedonaMapUtils.py | 16 +++++++++++++---
python/sedona/raster_utils/SedonaUtils.py | 5 ++++-
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/python/sedona/maps/SedonaMapUtils.py
b/python/sedona/maps/SedonaMapUtils.py
index 66fa283d1d..ddca721c45 100644
--- a/python/sedona/maps/SedonaMapUtils.py
+++ b/python/sedona/maps/SedonaMapUtils.py
@@ -18,6 +18,8 @@
import json
from sedona.sql.types import GeometryType
+from sedona.utils.geoarrow import dataframe_to_arrow
+from packaging.version import parse
class SedonaMapUtils:
@@ -34,17 +36,25 @@ class SedonaMapUtils:
"""
if geometry_col is None:
geometry_col = SedonaMapUtils.__get_geometry_col__(df)
- pandas_df = df.toPandas()
+
+ # Convert the dataframe to arrow format, then to geopandas dataframe
+ # This is faster than converting directly to geopandas dataframe via
toPandas
if (
geometry_col is None
): # No geometry column found even after searching schema, return
Pandas Dataframe
- return pandas_df
+ data_pyarrow = dataframe_to_arrow(df)
+ return data_pyarrow.to_pandas()
try:
import geopandas as gpd
except ImportError:
msg = "GeoPandas is missing. You can install it manually or via
apache-sedona[kepler-map] or apache-sedona[pydeck-map]."
raise ImportError(msg) from None
- geo_df = gpd.GeoDataFrame(pandas_df, geometry=geometry_col)
+ # From GeoPandas 1.0.0 onwards, the from_arrow method is available
+ if parse(gpd.__version__) >= parse("1.0.0"):
+ data_pyarrow = dataframe_to_arrow(df)
+ geo_df = gpd.GeoDataFrame.from_arrow(data_pyarrow)
+ else:
+ geo_df = gpd.GeoDataFrame(df.toPandas(), geometry=geometry_col)
if geometry_col != "geometry" and rename is True:
geo_df.rename_geometry("geometry", inplace=True)
return geo_df
diff --git a/python/sedona/raster_utils/SedonaUtils.py
b/python/sedona/raster_utils/SedonaUtils.py
index 5f7304f3ff..d35fcd6210 100644
--- a/python/sedona/raster_utils/SedonaUtils.py
+++ b/python/sedona/raster_utils/SedonaUtils.py
@@ -15,10 +15,13 @@
# specific language governing permissions and limitations
# under the License.
+from sedona.maps.SedonaMapUtils import SedonaMapUtils
+
class SedonaUtils:
@classmethod
def display_image(cls, df):
from IPython.display import HTML, display
- display(HTML(df.toPandas().to_html(escape=False)))
+ pdf = SedonaMapUtils.__convert_to_gdf_or_pdf__(df, rename=False)
+ display(HTML(pdf.to_html(escape=False)))