This is an automated email from the ASF dual-hosted git repository.
kontinuation pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 7097a94 chore: Add assert_scalar_equal_wkb_geometry_topologically to
compare WKBs topologically (#192)
7097a94 is described below
commit 7097a94faf30db882a746282b9b5d528c4b6fd6f
Author: Kristin Cowalcijk <[email protected]>
AuthorDate: Wed Oct 8 11:46:39 2025 +0800
chore: Add assert_scalar_equal_wkb_geometry_topologically to compare WKBs
topologically (#192)
This is part of the forked dependency elimination plan
https://github.com/apache/sedona-db/pull/165. We've hit some overlay operation
result assertion errors after upgrading geo to the latest 0.31.0 release. The
failures were caused by geo producing different, but topologically equivalent
results, and we are asserting on the byte-level equality of resulting WKBs.
This patch adds `assert_scalar_equal_wkb_geometry_topologically` to
sedona-testing behind a feature flag "geo". We'll switch to use
`assert_scalar_equal_wkb_geometry_topologically` for testing overlay ST
functions (ST_Union_Aggr and ST_Intersection_Aggr) after upgrading geo. This
function is also useful in many other scenarios, so we add this function in its
dedicated PR.
---
Cargo.lock | 18 +++++-
rust/sedona-testing/Cargo.toml | 4 +-
rust/sedona-testing/src/compare.rs | 120 +++++++++++++++++++++++++++++++++----
3 files changed, 128 insertions(+), 14 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 56a5058..af9e3f1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -52,9 +52,21 @@ dependencies = [
[[package]]
name = "adbc_core"
-version = "0.18.0"
-source =
"git+https://github.com/apache/arrow-adbc?rev=1ba248290cd299c4969b679463bcd54c217cf2e4#1ba248290cd299c4969b679463bcd54c217cf2e4"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5b891479797b5588e320f7fd3caf15faba311cf8f8a76911195b6a3d55304eb"
+dependencies = [
+ "arrow-array",
+ "arrow-schema",
+]
+
+[[package]]
+name = "adbc_ffi"
+version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c30e0f28a8363a76a8ec802934223ea5811ea658308c7403c8beb9aa86fa808f"
dependencies = [
+ "adbc_core",
"arrow-array",
"arrow-schema",
]
@@ -4787,6 +4799,7 @@ name = "sedona-adbc"
version = "0.2.0"
dependencies = [
"adbc_core",
+ "adbc_ffi",
"arrow-array",
"arrow-schema",
"datafusion",
@@ -5090,6 +5103,7 @@ dependencies = [
"datafusion-common",
"datafusion-expr",
"datafusion-physical-expr",
+ "geo",
"geo-traits 0.2.0",
"geo-types",
"parquet",
diff --git a/rust/sedona-testing/Cargo.toml b/rust/sedona-testing/Cargo.toml
index c7e0783..d0c058f 100644
--- a/rust/sedona-testing/Cargo.toml
+++ b/rust/sedona-testing/Cargo.toml
@@ -32,6 +32,7 @@ rstest = { workspace = true }
[features]
default = []
+geo = ["dep:geo"]
criterion = ["dep:criterion"]
[dependencies]
@@ -42,7 +43,7 @@ criterion = { workspace = true, optional = true }
datafusion-common = { workspace = true }
datafusion-expr = { workspace = true }
datafusion-physical-expr = { workspace = true }
-geo-traits = { workspace = true }
+geo-traits = { workspace = true, features = ["geo-types"] }
geo-types = { workspace = true }
parquet = { workspace = true, features = ["arrow", "snap", "zstd"] }
rand = { workspace = true }
@@ -52,3 +53,4 @@ sedona-expr = { path = "../sedona-expr" }
sedona-schema = { path = "../sedona-schema" }
wkb = { workspace = true }
wkt = { workspace = true }
+geo = { workspace = true, optional = true }
diff --git a/rust/sedona-testing/src/compare.rs
b/rust/sedona-testing/src/compare.rs
index dac2789..780b2ff 100644
--- a/rust/sedona-testing/src/compare.rs
+++ b/rust/sedona-testing/src/compare.rs
@@ -104,7 +104,25 @@ pub fn assert_array_equal(actual: &ArrayRef, expected:
&ArrayRef) {
/// Panics if the values' are not equal, generating reasonable failure
messages for geometry
/// arrays where the default failure message would otherwise be uninformative.
pub fn assert_scalar_equal_wkb_geometry(actual: &ScalarValue, expected_wkt:
Option<&str>) {
- assert_scalar_equal(actual, &create_scalar(expected_wkt, &WKB_GEOMETRY));
+ let expected = create_scalar(expected_wkt, &WKB_GEOMETRY);
+ assert_eq!(actual.data_type(), DataType::Binary);
+ assert_wkb_scalar_equal(actual, &expected, false);
+}
+
+/// Assert a [`ScalarValue`] is a WKB_GEOMETRY scalar corresponding to the
given WKT. This function
+/// compares the geometries topologically, so two geometries that are not
byte-wise equal but are
+/// topologically equal will be considered equal.
+///
+/// Panics if the values' are not topologically equal, generating reasonable
failure messages for geometry
+/// arrays where the default failure message would otherwise be uninformative.
+#[cfg(feature = "geo")]
+pub fn assert_scalar_equal_wkb_geometry_topologically(
+ actual: &ScalarValue,
+ expected_wkt: Option<&str>,
+) {
+ let expected = create_scalar(expected_wkt, &WKB_GEOMETRY);
+ assert_eq!(actual.data_type(), DataType::Binary);
+ assert_wkb_scalar_equal(actual, &expected, true);
}
/// Assert two [`ScalarValue`]s are equal
@@ -123,7 +141,7 @@ pub fn assert_scalar_equal(actual: &ScalarValue, expected:
&ScalarValue) {
(SedonaType::Arrow(_), SedonaType::Arrow(_)) =>
assert_arrow_scalar_equal(actual, expected),
(SedonaType::Wkb(_, _), SedonaType::Wkb(_, _))
| (SedonaType::WkbView(_, _), SedonaType::WkbView(_, _)) => {
- assert_wkb_scalar_equal(actual, expected);
+ assert_wkb_scalar_equal(actual, expected, false);
}
(_, _) => unreachable!(),
}
@@ -160,11 +178,21 @@ where
for (i, (actual_item, expected_item)) in zip(actual, expected).enumerate()
{
let actual_label = format!("actual Array element #{i}");
let expected_label = format!("expected Array element #{i}");
- assert_wkb_value_equal(actual_item, expected_item, &actual_label,
&expected_label);
+ assert_wkb_value_equal(
+ actual_item,
+ expected_item,
+ &actual_label,
+ &expected_label,
+ false,
+ );
}
}
-fn assert_wkb_scalar_equal(actual: &ScalarValue, expected: &ScalarValue) {
+fn assert_wkb_scalar_equal(
+ actual: &ScalarValue,
+ expected: &ScalarValue,
+ compare_topologically: bool,
+) {
match (actual, expected) {
(ScalarValue::Binary(maybe_actual_wkb),
ScalarValue::Binary(maybe_expected_wkb))
| (
@@ -176,6 +204,7 @@ fn assert_wkb_scalar_equal(actual: &ScalarValue, expected:
&ScalarValue) {
maybe_expected_wkb.as_deref(),
"actual WKB scalar",
"expected WKB scalar",
+ compare_topologically,
);
}
(_, _) => {
@@ -189,6 +218,7 @@ fn assert_wkb_value_equal(
expected: Option<&[u8]>,
actual_label: &str,
expected_label: &str,
+ compare_topologically: bool,
) {
match (actual, expected) {
(None, None) => {}
@@ -205,12 +235,56 @@ fn assert_wkb_value_equal(
)
}
(Some(actual_wkb), Some(expected_wkb)) => {
+ // Quick test: if the binary of the WKB is the same, they are equal
if actual_wkb != expected_wkb {
- let (actual_wkt, expected_wkt) = (format_wkb(actual_wkb),
format_wkb(expected_wkb));
- panic!("{actual_label} != {expected_label}\n{actual_label}:\n
{actual_wkt}\n{expected_label}:\n {expected_wkt}")
+ let is_equals = if compare_topologically {
+ compare_wkb_topologically(expected_wkb, actual_wkb)
+ } else {
+ false
+ };
+
+ if !is_equals {
+ let (actual_wkt, expected_wkt) =
+ (format_wkb(actual_wkb), format_wkb(expected_wkb));
+ panic!("{actual_label} !=
{expected_label}\n{actual_label}:\n {actual_wkt}\n{expected_label}:\n
{expected_wkt}")
+ }
+ }
+ }
+ }
+}
+
+fn compare_wkb_topologically(
+ #[allow(unused)] expected_wkb: &[u8],
+ #[allow(unused)] actual_wkb: &[u8],
+) -> bool {
+ #[cfg(feature = "geo")]
+ {
+ use geo::Relate;
+ use geo_traits::to_geo::ToGeoGeometry;
+ use geo_traits::Dimensions;
+ use geo_traits::GeometryTrait;
+
+ let expected = wkb::reader::read_wkb(expected_wkb);
+ let actual = wkb::reader::read_wkb(actual_wkb);
+ match (expected, actual) {
+ (Ok(expected_geom), Ok(actual_geom)) => {
+ if expected_geom.dim() == Dimensions::Xy && actual_geom.dim()
== Dimensions::Xy {
+ let expected_geom = expected_geom.to_geometry();
+ let actual_geom = actual_geom.to_geometry();
+ expected_geom.relate(&actual_geom).is_equal_topo()
+ } else {
+ // geo crate does not support 3D/4D geometry operations,
so we fall back to using the result
+ // of byte-wise comparison
+ false
+ }
}
+ _ => false,
}
}
+ #[cfg(not(feature = "geo"))]
+ {
+ panic!("Topological comparison requires the 'geo' feature to be
enabled");
+ }
}
fn format_wkb(value: &[u8]) -> String {
@@ -357,20 +431,20 @@ actual Array element #0 is POINT(1 2), expected Array
element #0 is null")]
#[test]
fn wkb_value_equal() {
- assert_wkb_value_equal(None, None, "lhs", "rhs");
- assert_wkb_value_equal(Some(&[]), Some(&[]), "lhs", "rhs");
+ assert_wkb_value_equal(None, None, "lhs", "rhs", false);
+ assert_wkb_value_equal(Some(&[]), Some(&[]), "lhs", "rhs", false);
}
#[test]
#[should_panic(expected = "lhs != rhs:\nlhs is POINT(1 2), rhs is null")]
fn wkb_value_expected_null() {
- assert_wkb_value_equal(Some(&POINT), None, "lhs", "rhs");
+ assert_wkb_value_equal(Some(&POINT), None, "lhs", "rhs", false);
}
#[test]
#[should_panic(expected = "lhs != rhs:\nlhs is null, rhs is POINT(1 2)")]
fn wkb_value_actual_null() {
- assert_wkb_value_equal(None, Some(&POINT), "lhs", "rhs");
+ assert_wkb_value_equal(None, Some(&POINT), "lhs", "rhs", false);
}
#[test]
@@ -380,7 +454,31 @@ lhs:
rhs:
POINT(1 2)")]
fn wkb_value_values_not_equal() {
- assert_wkb_value_equal(Some(&[]), Some(&POINT), "lhs", "rhs");
+ assert_wkb_value_equal(Some(&[]), Some(&POINT), "lhs", "rhs", false);
+ }
+
+ #[cfg(feature = "geo")]
+ #[test]
+ fn wkb_value_equal_topologically() {
+ use crate::create::make_wkb;
+ assert_wkb_value_equal(Some(&POINT), Some(&POINT), "lhs", "rhs", true);
+ let lhs = make_wkb("POLYGON ((0 0, 1 0, 0 1, 0 0))");
+ let rhs = make_wkb("POLYGON ((0 0, 0 1, 1 0, 0 0))");
+ assert_wkb_value_equal(Some(&lhs), Some(&rhs), "lhs", "rhs", true);
+ }
+
+ #[cfg(feature = "geo")]
+ #[test]
+ #[should_panic(expected = "lhs != rhs
+lhs:
+ POLYGON((0 0,1 0,0 1,0 0))
+rhs:
+ POLYGON((0 0,1 0,0 0))")]
+ fn wkb_value_not_equal_topologically() {
+ use crate::create::make_wkb;
+ let lhs = make_wkb("POLYGON ((0 0, 1 0, 0 1, 0 0))");
+ let rhs = make_wkb("POLYGON ((0 0, 1 0, 0 0))");
+ assert_wkb_value_equal(Some(&lhs), Some(&rhs), "lhs", "rhs", true);
}
#[test]