This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 7ea6f494 feat(rust/sedona-functions): Implement
`ST_GeomFromWKBUnchecked()` (#533)
7ea6f494 is described below
commit 7ea6f4947b64004fc3e3da805f51907cfd51a936
Author: Yongting You <[email protected]>
AuthorDate: Thu Jan 22 13:56:06 2026 +0800
feat(rust/sedona-functions): Implement `ST_GeomFromWKBUnchecked()` (#533)
---
python/sedonadb/tests/functions/test_functions.py | 47 +++++++-
rust/sedona-functions/src/register.rs | 1 +
rust/sedona-functions/src/st_geomfromwkb.rs | 128 +++++++++++++++++++++-
3 files changed, 170 insertions(+), 6 deletions(-)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index 28f2e901..1c16b6d3 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -14,10 +14,12 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+import math
+
+import pyarrow
import pytest
import shapely
from sedonadb.testing import PostGIS, SedonaDB, geom_or_null, val_or_null
-import math
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@@ -1668,6 +1670,49 @@ def test_st_geomfromwkb(eng, geom):
eng.assert_query_result(f"SELECT ST_GeomFromWKB({wkb})", expected)
+# `ST_GeomFromWKBUnchecked` is not available in PostGIS
[email protected]("eng", [SedonaDB])
[email protected](
+ ("geom"),
+ [
+ "POINT (1 1)",
+ "POINT EMPTY",
+ "LINESTRING EMPTY",
+ "POLYGON EMPTY",
+ "GEOMETRYCOLLECTION EMPTY",
+ "POLYGON ((0 0, 1 0, 1 1, 0 1, 0 0))",
+ "MULTILINESTRING ((0 0, 1 1), (1 1, 2 2))",
+ "GEOMETRYCOLLECTION (POINT (0 0), LINESTRING (0 0, 1 1), POLYGON ((0
0, 0 1, 1 1, 1 0, 0 0)))",
+ ],
+)
+def test_st_geomfromwkbunchecked(eng, geom):
+ eng = eng.create_or_skip()
+
+ expected = geom
+ if geom == "POINT EMPTY":
+ # arrow-c returns POINT (nan nan) instead of POINT EMPTY
+ expected = "POINT (nan nan)"
+
+ wkb = shapely.from_wkt(geom).wkb
+ wkb = "0x" + wkb.hex()
+
+ eng.assert_query_result(f"SELECT ST_GeomFromWKBUnchecked({wkb})", expected)
+
+
[email protected]("eng", [SedonaDB])
+def test_st_geomfromwkbunchecked_invalid_wkb(eng):
+ eng = eng.create_or_skip()
+
+ # Invalid WKB payload can still convert to geometry column
+ eng.assert_query_result(
+ "SELECT ST_AsBinary(ST_GeomFromWKBUnchecked(0x01))", b"\x01"
+ )
+
+ # Using invalid WKB elsewhere may result in undefined behavior.
+ with pytest.raises(pyarrow.lib.ArrowInvalid, match="failed to fill whole
buffer"):
+ eng.execute_and_collect("SELECT
ST_AsText(ST_GeomFromWKBUnchecked(0x01))")
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("geom", "index", "expected"),
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index 80dbbd82..8161923a 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -81,6 +81,7 @@ pub fn default_function_set() -> FunctionSet {
crate::st_geometrytype::st_geometry_type_udf,
crate::st_geomfromwkb::st_geogfromwkb_udf,
crate::st_geomfromwkb::st_geomfromwkb_udf,
+ crate::st_geomfromwkb::st_geomfromwkbunchecked_udf,
crate::st_geomfromwkt::st_geogfromwkt_udf,
crate::st_geomfromwkt::st_geomfromwkt_udf,
crate::st_geomfromwkt::st_geomfromewkt_udf,
diff --git a/rust/sedona-functions/src/st_geomfromwkb.rs
b/rust/sedona-functions/src/st_geomfromwkb.rs
index 680cc72e..b218747c 100644
--- a/rust/sedona-functions/src/st_geomfromwkb.rs
+++ b/rust/sedona-functions/src/st_geomfromwkb.rs
@@ -44,6 +44,21 @@ pub fn st_geomfromwkb_udf() -> SedonaScalarUDF {
)
}
+/// ST_GeomFromWKBUnchecked() scalar UDF implementation
+///
+/// An implementation of WKB reading using GeoRust's wkb crate without
validation.
+pub fn st_geomfromwkbunchecked_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_geomfromwkbunchecked",
+ vec![Arc::new(STGeomFromWKB {
+ validate: false,
+ out_type: WKB_VIEW_GEOMETRY,
+ })],
+ Volatility::Immutable,
+ Some(doc_unchecked("ST_GeomFromWKBUnchecked", "Geometry")),
+ )
+}
+
/// ST_GeogFromWKB() scalar UDF implementation
///
/// An implementation of WKB reading using GeoRust's wkb crate.
@@ -77,6 +92,29 @@ fn doc(name: &str, out_type_name: &str) -> Documentation {
.build()
}
+/// Documentation for `ST_GeomFromWKBUnchecked()`.
+///
+/// Parameterized for reuse if `ST_GeogFromWKBUnchecked()` is implemented in
the future.
+fn doc_unchecked(name: &str, out_type_name: &str) -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ format!(
+ "Construct a {out_type_name} from WKB without validation. Invalid
WKB input may result in undefined behavior."
+ ),
+ format!("{name} (Wkb: Binary)"),
+ )
+ .with_argument(
+ "WKB",
+ format!(
+ "binary: Well-known binary representation of the {}",
+ out_type_name.to_lowercase()
+ ),
+ )
+ .with_sql_example(format!("SELECT {name}([01 02 00 00 00 02 00 00 00 00 00
00 00 84 D6 00 C0 00 00 00 00 80 B5 D6 BF 00 00 00 60 E1 EF F7 BF 00 00 00 80
07 5D E5 BF])"))
+ .with_related_udf("ST_AsText")
+ .build()
+}
+
#[derive(Debug)]
struct STGeomFromWKB {
validate: bool,
@@ -117,7 +155,7 @@ impl SedonaScalarKernel for STGeomFromWKB {
#[cfg(test)]
mod tests {
- use arrow_array::BinaryArray;
+ use arrow_array::{ArrayRef, BinaryArray, BinaryViewArray};
use datafusion_common::scalar::ScalarValue;
use datafusion_expr::ScalarUDF;
use rstest::rstest;
@@ -134,6 +172,9 @@ mod tests {
0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xf0, 0x3f, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x40,
];
+ const INVALID_WKB_LEN: [u8; 0] = [];
+ const INVALID_WKB_CONTENT: [u8; 5] = [0x01, 0x00, 0x00, 0x00, 0x00];
+ const INVALID_WKBS: [&[u8]; 2] = [&INVALID_WKB_LEN, &INVALID_WKB_CONTENT];
#[test]
fn udf_metadata() {
@@ -144,6 +185,10 @@ mod tests {
let geom_from_wkb: ScalarUDF = st_geomfromwkb_udf().into();
assert_eq!(geom_from_wkb.name(), "st_geomfromwkb");
assert!(geom_from_wkb.documentation().is_some());
+
+ let geom_from_wkb_unchecked: ScalarUDF =
st_geomfromwkbunchecked_udf().into();
+ assert_eq!(geom_from_wkb_unchecked.name(), "st_geomfromwkbunchecked");
+ assert!(geom_from_wkb_unchecked.documentation().is_some());
}
#[rstest]
@@ -176,16 +221,89 @@ mod tests {
);
}
+ #[rstest]
+ fn udf_unchecked(#[values(DataType::Binary, DataType::BinaryView)]
data_type: DataType) {
+ let udf = st_geomfromwkbunchecked_udf();
+ let tester = ScalarUdfTester::new(
+ udf.clone().into(),
+ vec![SedonaType::Arrow(data_type.clone())],
+ );
+
+ assert_eq!(tester.return_type().unwrap(), WKB_VIEW_GEOMETRY);
+
+ assert_scalar_equal(
+ &tester.invoke_scalar(POINT12.to_vec()).unwrap(),
+ &create_scalar(Some("POINT (1 2)"), &WKB_VIEW_GEOMETRY),
+ );
+
+ assert_scalar_equal(
+ &tester.invoke_scalar(ScalarValue::Null).unwrap(),
+ &create_scalar(None, &WKB_VIEW_GEOMETRY),
+ );
+
+ let binary_array: BinaryArray = [Some(POINT12), None,
Some(POINT12)].iter().collect();
+ assert_array_equal(
+ &tester.invoke_array(Arc::new(binary_array)).unwrap(),
+ &create_array(
+ &[Some("POINT (1 2)"), None, Some("POINT (1 2)")],
+ &WKB_VIEW_GEOMETRY,
+ ),
+ );
+ }
+
#[test]
fn invalid_wkb() {
let udf = st_geomfromwkb_udf();
let tester = ScalarUdfTester::new(udf.into(),
vec![SedonaType::Arrow(DataType::Binary)]);
- let err = tester
- .invoke_scalar(ScalarValue::Binary(Some(vec![])))
- .unwrap_err();
+ for invalid in INVALID_WKBS {
+ let _err = tester
+ .invoke_scalar(ScalarValue::Binary(Some(invalid.to_vec())))
+ .unwrap_err();
+ }
+ }
- assert_eq!(err.message(), "failed to fill whole buffer");
+ #[rstest]
+ fn unchecked_invalid_wkb(
+ #[values(DataType::Binary, DataType::BinaryView)] data_type: DataType,
+ ) {
+ let udf = st_geomfromwkbunchecked_udf();
+ let tester = ScalarUdfTester::new(udf.into(),
vec![SedonaType::Arrow(data_type.clone())]);
+
+ for invalid in INVALID_WKBS {
+ let invalid_scalar = match data_type {
+ DataType::Binary =>
ScalarValue::Binary(Some(invalid.to_vec())),
+ DataType::BinaryView =>
ScalarValue::BinaryView(Some(invalid.to_vec())),
+ _ => unreachable!(),
+ };
+
+ assert_scalar_equal(
+ &tester.invoke_scalar(invalid_scalar).unwrap(),
+ &ScalarValue::BinaryView(Some(invalid.to_vec())),
+ );
+
+ let input_array: ArrayRef = match data_type {
+ DataType::Binary => Arc::new(
+ [Some(invalid), None, Some(invalid)]
+ .iter()
+ .collect::<BinaryArray>(),
+ ),
+ DataType::BinaryView => Arc::new(
+ [Some(invalid), None, Some(invalid)]
+ .iter()
+ .collect::<BinaryViewArray>(),
+ ),
+ _ => unreachable!(),
+ };
+
+ let expected_array: BinaryViewArray =
+ [Some(invalid), None, Some(invalid)].iter().collect();
+
+ assert_array_equal(
+ &tester.invoke_array(input_array).unwrap(),
+ &(Arc::new(expected_array) as ArrayRef),
+ );
+ }
}
#[test]