Copilot commented on code in PR #561:
URL: https://github.com/apache/sedona-db/pull/561#discussion_r2747529388
##########
rust/sedona-geoparquet/src/file_opener.rs:
##########
@@ -371,6 +453,110 @@ fn parse_column_coverings(
.collect()
}
+/// Calculates a Vec of [GeoStatistics] based on Parquet-native GeoStatistics
+///
+/// Each element is either a [GeoStatistics] populated with a [BoundingBox]
+/// or [GeoStatistics::unspecified], which is a value that will ensure that
+/// any spatial predicate that references those statistics will evaluate to
+/// true.
+fn row_group_native_geo_stats(
+ row_group_metadata: &RowGroupMetaData,
+ column_indices: &[usize],
+) -> Vec<GeoStatistics> {
+ column_indices
+ .iter()
+ .map(|column_index| {
+ let native_geo_stats_opt =
row_group_metadata.column(*column_index).geo_statistics();
+ native_geo_stats_opt
+ .map(parquet_geo_stats_to_sedona_geo_stats)
+ .unwrap_or(GeoStatistics::unspecified())
+ })
+ .collect()
+}
+
+/// Convert Parquet [GeospatialStatistics] into Sedona [GeoStatistics]
+///
+/// This also sanity checks the Parquet statistics for non-finite or
non-sensical
+/// ranges, treating the information as unknown if it fails the sanity check.
+fn parquet_geo_stats_to_sedona_geo_stats(
+ parquet_geo_stats: &GeospatialStatistics,
+) -> GeoStatistics {
+ let mut out = GeoStatistics::unspecified();
+
+ if let Some(native_bbox) = parquet_geo_stats.bounding_box() {
+ let x_range = (native_bbox.get_xmin(), native_bbox.get_xmax());
+ let y_range = (native_bbox.get_ymin(), native_bbox.get_ymax());
+ let z_range = match (native_bbox.get_zmin(), native_bbox.get_zmax()) {
+ (Some(lo), Some(hi)) => Some(Interval::new(lo, hi)),
+ _ => None,
+ };
+ let m_range = match (native_bbox.get_mmin(), native_bbox.get_mmax()) {
+ (Some(lo), Some(hi)) => Some(Interval::new(lo, hi)),
+ _ => None,
+ };
+
+ let bbox = BoundingBox::xyzm(x_range, y_range, z_range, m_range);
+
+ // Sanity check the bbox statistics. If the sanity check fails, don't
set
+ // a bounding box for pruning. Note that the x width can be < 0
(wraparound).
+ let mut bbox_is_valid =
+ bbox.x().width().is_finite() && bbox.y().width().is_finite() &&
bbox.y().width() >= 0.0;
+ if let Some(z) = bbox.z() {
+ bbox_is_valid = bbox_is_valid && z.width().is_finite() &&
z.width() >= 0.0;
+ }
+ if let Some(m) = bbox.m() {
+ bbox_is_valid = bbox_is_valid && m.width().is_finite() &&
m.width() >= 0.0;
+ }
+
+ if bbox_is_valid {
+ out = out.with_bbox(Some(bbox));
+ }
+ }
+
+ if let Some(native_geometry_types) = parquet_geo_stats.geospatial_types() {
+ let mut geometry_types = GeometryTypeAndDimensionsSet::new();
+ let mut geometry_types_valid = true;
+ for wkb_id in native_geometry_types {
+ if *wkb_id < 0 {
+ geometry_types_valid = false;
+ break;
+ }
+
+ match GeometryTypeAndDimensions::try_from_wkb_id(*wkb_id as u32) {
+ Ok(type_and_dim) =>
geometry_types.insert_or_ignore(&type_and_dim),
+ Err(_) => {
+ geometry_types_valid = false;
+ break;
+ }
+ }
+ }
+
+ if !geometry_types.is_empty() && geometry_types_valid {
+ out = out.with_geometry_types(Some(geometry_types))
+ }
+ }
+
+ out
+}
+
+/// Calculates column indices for top-level columns of file_schema
+///
+/// We need to build a list of top-level indices, where the indices refer to
the
+/// flattened list of columns (e.g., `.column(i)` in row group metadata).
+/// anyway.
Review Comment:
Incomplete comment fragment 'anyway.' should be removed or completed to
explain what is meant.
```suggestion
```
##########
python/sedonadb/tests/io/test_parquet.py:
##########
@@ -21,10 +21,11 @@
import geopandas
import geopandas.testing
+import pyarrow as pa
import pytest
+import sedonadb
import shapely
from pyarrow import parquet
Review Comment:
Imports are not properly ordered. The 'from pyarrow import parquet' import
should appear before 'import sedonadb' and 'import shapely' to maintain
alphabetical ordering of import groups.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]