This is an automated email from the ASF dual-hosted git repository.

kontinuation pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git


The following commit(s) were added to refs/heads/main by this push:
     new 3ed337e  Fix distinguishing the joined geospatial types for ST_KNN 
(#87)
3ed337e is described below

commit 3ed337e6b55494870fed38561c7717ba84adf15a
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Tue Sep 16 11:45:40 2025 +0800

    Fix distinguishing the joined geospatial types for ST_KNN (#87)
    
    This PR fixes a regression introduced by 
https://github.com/apache/sedona-db/pull/57, which added geography type 
checking to prevent optimized spatial joins from being used with unsupported 
geography types.
    
    The problematic patch incorrectly matches `left` and `right` expressions in 
`KNNPredicate` to query plans. Actually, how `left` and `right` maps to the 
original query plans depends on the value of `probe_side` field. This is quite 
misleading and may be addressed in a future PR by renaming `left` and `right` 
to `build` and `probe`.
    
    The function for checking if geospatial types are supported now returns 
`Result<bool>` to propagate errors during the field type extraction and 
matching process. This prevents silent matching failures due to bugs from 
happening.
---
 rust/sedona-spatial-join/src/optimizer.rs | 109 ++++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 20 deletions(-)

diff --git a/rust/sedona-spatial-join/src/optimizer.rs 
b/rust/sedona-spatial-join/src/optimizer.rs
index 6440da2..0a0ea54 100644
--- a/rust/sedona-spatial-join/src/optimizer.rs
+++ b/rust/sedona-spatial-join/src/optimizer.rs
@@ -147,7 +147,7 @@ impl SpatialJoinOptimizer {
                     &spatial_predicate,
                     &left.schema(),
                     &right.schema(),
-                ) {
+                )? {
                     return Ok(None);
                 }
 
@@ -190,7 +190,7 @@ impl SpatialJoinOptimizer {
                     &spatial_predicate,
                     &hash_join.left().schema(),
                     &hash_join.right().schema(),
-                ) {
+                )? {
                     return Ok(None);
                 }
 
@@ -877,26 +877,40 @@ fn is_spatial_predicate_supported(
     spatial_predicate: &SpatialPredicate,
     left_schema: &Schema,
     right_schema: &Schema,
-) -> bool {
+) -> Result<bool> {
     /// Only spatial predicates working with planar geometry are supported for 
optimization.
     /// Geography (spherical) types are explicitly excluded and will not 
trigger optimized spatial joins.
-    fn is_geometry_type_supported(expr: &Arc<dyn PhysicalExpr>, schema: 
&Schema) -> bool {
-        let Ok(left_return_field) = expr.return_field(schema) else {
-            return false;
-        };
-        let Ok(sedona_type) = 
SedonaType::from_storage_field(&left_return_field) else {
-            return false;
-        };
+    fn is_geometry_type_supported(expr: &Arc<dyn PhysicalExpr>, schema: 
&Schema) -> Result<bool> {
+        let left_return_field = expr.return_field(schema)?;
+        let sedona_type = SedonaType::from_storage_field(&left_return_field)?;
         let matcher = ArgMatcher::is_geometry();
-        matcher.match_type(&sedona_type)
+        Ok(matcher.match_type(&sedona_type))
     }
 
     match spatial_predicate {
         SpatialPredicate::Relation(RelationPredicate { left, right, .. })
-        | SpatialPredicate::Distance(DistancePredicate { left, right, .. })
-        | SpatialPredicate::KNearestNeighbors(KNNPredicate { left, right, .. 
}) => {
-            is_geometry_type_supported(left, left_schema)
-                && is_geometry_type_supported(right, right_schema)
+        | SpatialPredicate::Distance(DistancePredicate { left, right, .. }) => 
{
+            Ok(is_geometry_type_supported(left, left_schema)?
+                && is_geometry_type_supported(right, right_schema)?)
+        }
+        SpatialPredicate::KNearestNeighbors(KNNPredicate {
+            left,
+            right,
+            probe_side,
+            ..
+        }) => {
+            let (left, right) = match probe_side {
+                JoinSide::Left => (left, right),
+                JoinSide::Right => (right, left),
+                _ => {
+                    return sedona_internal_err!(
+                        "Invalid probe side in KNN predicate: {:?}",
+                        probe_side
+                    )
+                }
+            };
+            Ok(is_geometry_type_supported(left, left_schema)?
+                && is_geometry_type_supported(right, right_schema)?)
         }
     }
 }
@@ -2479,11 +2493,7 @@ mod tests {
             SpatialRelationType::Intersects,
         );
         let spatial_pred = SpatialPredicate::Relation(rel_pred);
-        assert!(super::is_spatial_predicate_supported(
-            &spatial_pred,
-            &schema,
-            &schema
-        ));
+        assert!(super::is_spatial_predicate_supported(&spatial_pred, &schema, 
&schema).unwrap());
 
         // Geography field (should NOT be supported)
         let geog_field = WKB_GEOGRAPHY.to_storage_field("geog", 
false).unwrap();
@@ -2499,6 +2509,65 @@ mod tests {
             &spatial_pred_geog,
             &geog_schema,
             &geog_schema
+        )
+        .unwrap());
+    }
+
+    #[test]
+    fn test_is_knn_predicate_supported() {
+        // ST_KNN(left, right)
+        let left_schema = Arc::new(Schema::new(vec![WKB_GEOMETRY
+            .to_storage_field("geom", false)
+            .unwrap()]));
+        let right_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            WKB_GEOMETRY.to_storage_field("geom", false).unwrap(),
+        ]));
+        let left_col_expr = Arc::new(Column::new("geom", 0)) as Arc<dyn 
PhysicalExpr>;
+        let right_col_expr = Arc::new(Column::new("geom", 1)) as Arc<dyn 
PhysicalExpr>;
+        let knn_pred = SpatialPredicate::KNearestNeighbors(KNNPredicate::new(
+            left_col_expr.clone(),
+            right_col_expr.clone(),
+            5,
+            false,
+            JoinSide::Left,
+        ));
+        assert!(
+            super::is_spatial_predicate_supported(&knn_pred, &left_schema, 
&right_schema).unwrap()
+        );
+
+        // ST_KNN(right, left)
+        let knn_pred = SpatialPredicate::KNearestNeighbors(KNNPredicate::new(
+            right_col_expr.clone(),
+            left_col_expr.clone(),
+            5,
+            false,
+            JoinSide::Right,
         ));
+        assert!(
+            super::is_spatial_predicate_supported(&knn_pred, &left_schema, 
&right_schema).unwrap()
+        );
+
+        // ST_KNN with geography (should NOT be supported)
+        let left_geog_schema = Arc::new(Schema::new(vec![WKB_GEOGRAPHY
+            .to_storage_field("geog", false)
+            .unwrap()]));
+        assert!(!super::is_spatial_predicate_supported(
+            &knn_pred,
+            &left_geog_schema,
+            &right_schema
+        )
+        .unwrap());
+
+        let right_geog_schema = Arc::new(Schema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            WKB_GEOGRAPHY.to_storage_field("geog", false).unwrap(),
+        ]));
+        assert!(!super::is_spatial_predicate_supported(
+            &knn_pred,
+            &left_schema,
+            &right_geog_schema
+        )
+        .unwrap());
     }
 }

Reply via email to