petern48 commented on code in PR #171:
URL: https://github.com/apache/sedona-db/pull/171#discussion_r2400821600


##########
rust/sedona-functions/src/st_haszm.rs:
##########
@@ -107,28 +107,63 @@ impl SedonaScalarKernel for STHasZm {
     }
 }
 
-fn invoke_scalar(item: &Wkb, dim_index: usize) -> Result<Option<bool>> {
-    match item.as_type() {
-        geo_traits::GeometryType::GeometryCollection(collection) => {
-            use geo_traits::GeometryCollectionTrait;
-            if collection.num_geometries() == 0 {
-                Ok(Some(false))
-            } else {
-                // PostGIS doesn't allow creating a GeometryCollection with 
geometries of different dimensions
-                // so we can just check the dimension of the first one
-                let first_geom = unsafe { collection.geometry_unchecked(0) };
-                invoke_scalar(first_geom, dim_index)
-            }
-        }
-        _ => {
-            let geom_dim = item.dim();
-            match dim_index {
-                2 => Ok(Some(matches!(geom_dim, Dimensions::Xyz | 
Dimensions::Xyzm))),
-                3 => Ok(Some(matches!(geom_dim, Dimensions::Xym | 
Dimensions::Xyzm))),
-                _ => sedona_internal_err!("unexpected dim_index"),
-            }
+/// Fast-path inference of geometry type name from raw WKB bytes
+/// An error will be thrown for invalid WKB bytes input
+///
+/// Spec: https://libgeos.org/specifications/wkb/
+fn infer_haszm(buf: &[u8], dim_index: usize) -> Result<Option<bool>> {
+    if buf.len() < 5 {
+        return sedona_internal_err!("Invalid WKB: buffer too small ({} 
bytes)", buf.len());
+    }
+
+    let byte_order = buf[0];
+    let code = match byte_order {
+        0 => u32::from_be_bytes([buf[1], buf[2], buf[3], buf[4]]),
+        1 => u32::from_le_bytes([buf[1], buf[2], buf[3], buf[4]]),
+        other => return sedona_internal_err!("Unexpected byte order: {other}"),
+    };
+
+    // 0000 -> xy or unspecified
+    // 1000 -> xyz
+    // 2000 -> xym
+    // 3000 -> xyzm
+    match code / 1000 {
+        // If xy, it's possible we need to infer the dimension
+        0 => {}
+        1 => return Ok(Some(dim_index == 2)),
+        2 => return Ok(Some(dim_index == 3)),
+        3 => return Ok(Some(true)),
+        _ => return sedona_internal_err!("Unexpected code: {code}"),
+    };
+
+    // If GeometryCollection (7), we need to check the dimension of the first 
geometry
+    if code & 0x7 == 7 {
+        // The next 4 bytes are the number of geometries in the collection
+        let num_geometries = match byte_order {
+            0 => u32::from_be_bytes([buf[5], buf[6], buf[7], buf[8]]),
+            1 => u32::from_le_bytes([buf[5], buf[6], buf[7], buf[8]]),
+            other => return sedona_internal_err!("Unexpected byte order: 
{other}"),
+        };
+        // Check the dimension of the first geometry since they all have to be 
the same dimension
+        // Note: Attempting to create the following geometries error and are 
thus not possible to create:
+        // - Nested geometry dimension doesn't match the **specified** geom 
collection z-dimension
+        //   - GEOMETRYCOLLECTION M (POINT Z (1 1 1))
+        // - Nested geometry doesn't have the specified dimension
+        //   - GEOMETRYCOLLECTION Z (POINT (1 1))
+        // - Nested geometries have different dimensions
+        //   - GEOMETRYCOLLECTION (POINT Z (1 1 1), POINT (1 1))
+        if num_geometries >= 1 {
+            return infer_haszm(&buf[9..], dim_index);
         }
+        // If empty geometry (num_geometries == 0), fallback to below logic to 
check the geom collection's dimension
+        // GEOMETRY COLLECTION Z EMPTY hasz -> true
     }
+
+    // TODO: Last check: check how many dimensions the 1st coordinate has (all 
other coordinates must have the same)
+    // e.g handle this case: POINT (0 0 0) -> xyz dimension, POINT (0 0 0 0) 
-> xyzm dimension

Review Comment:
   Based on what you're saying, this logic be unnecessary, so I will remove it.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to