This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-db.git
The following commit(s) were added to refs/heads/main by this push:
new 0551583 feat(sql): Implement ST_StartPoint() and ST_EndPoint() (#245)
0551583 is described below
commit 0551583a675da1cf5cc61cb7c7a61352051f1fc6
Author: Hiroaki Yutani <[email protected]>
AuthorDate: Wed Oct 29 12:35:41 2025 +0900
feat(sql): Implement ST_StartPoint() and ST_EndPoint() (#245)
Co-authored-by: Dewey Dunnington <[email protected]>
---
benchmarks/test_functions.py | 36 +++
python/sedonadb/tests/functions/test_functions.py | 65 +++++
rust/sedona-functions/benches/native-functions.rs | 16 ++
rust/sedona-functions/src/lib.rs | 1 +
rust/sedona-functions/src/register.rs | 2 +
rust/sedona-functions/src/st_start_point.rs | 300 ++++++++++++++++++++++
rust/sedona-geometry/src/wkb_factory.rs | 58 ++++-
7 files changed, 477 insertions(+), 1 deletion(-)
diff --git a/benchmarks/test_functions.py b/benchmarks/test_functions.py
index d8ec008..cf0efd6 100644
--- a/benchmarks/test_functions.py
+++ b/benchmarks/test_functions.py
@@ -203,3 +203,39 @@ class TestBenchFunctions(TestBenchBase):
eng.execute_and_collect(f"SELECT ST_Perimeter(geom1) from {table}")
benchmark(queries)
+
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "segments_large",
+ ],
+ )
+ def test_st_start_point(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_StartPoint(geom1) from
{table}")
+
+ benchmark(queries)
+
+ @pytest.mark.parametrize(
+ "eng", [SedonaDBSingleThread, PostGISSingleThread, DuckDBSingleThread]
+ )
+ @pytest.mark.parametrize(
+ "table",
+ [
+ "collections_simple",
+ "segments_large",
+ ],
+ )
+ def test_st_end_point(self, benchmark, eng, table):
+ eng = self._get_eng(eng)
+
+ def queries():
+ eng.execute_and_collect(f"SELECT ST_EndPoint(geom1) from {table}")
+
+ benchmark(queries)
diff --git a/python/sedonadb/tests/functions/test_functions.py
b/python/sedonadb/tests/functions/test_functions.py
index 327b3ec..a219e4f 100644
--- a/python/sedonadb/tests/functions/test_functions.py
+++ b/python/sedonadb/tests/functions/test_functions.py
@@ -1016,6 +1016,71 @@ def test_st_pointm(eng, x, y, m, expected):
)
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geometry", "expected"),
+ [
+ (None, None),
+ ("POINT EMPTY", None),
+ ("LINESTRING EMPTY", None),
+ ("POLYGON EMPTY", None),
+ ("MULTIPOINT EMPTY", None),
+ ("MULTILINESTRING EMPTY", None),
+ ("MULTIPOLYGON EMPTY", None),
+ ("GEOMETRYCOLLECTION EMPTY", None),
+ ("LINESTRING (1 2, 3 4, 5 6)", "POINT (1 2)"),
+ ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (1 2 3)"),
+ ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (1 2 3)"),
+ ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", "POINT ZM (1 2 3 4)"),
+ ("POINT (1 2)", "POINT (1 2)"),
+ ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", "POINT (0 0)"),
+ ("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)", "POINT (0 0)"),
+ ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", "POINT (1 2)"),
+ ("MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)))", "POINT (0 0)"),
+ ("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5 6))", "POINT (1
2)"),
+ (
+ "GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (GEOMETRYCOLLECTION (POINT
(1 2), LINESTRING (3 4, 5 6))))",
+ "POINT (1 2)",
+ ),
+ ],
+)
+def test_st_start_point(eng, geometry, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(
+ f"SELECT ST_StartPoint({geom_or_null(geometry)})",
+ expected,
+ )
+
+
[email protected]("eng", [SedonaDB, PostGIS])
[email protected](
+ ("geometry", "expected"),
+ [
+ (None, None),
+ ("POINT EMPTY", None),
+ ("LINESTRING EMPTY", None),
+ ("POLYGON EMPTY", None),
+ ("MULTIPOINT EMPTY", None),
+ ("MULTILINESTRING EMPTY", None),
+ ("MULTIPOLYGON EMPTY", None),
+ ("GEOMETRYCOLLECTION EMPTY", None),
+ ("LINESTRING (1 2, 3 4, 5 6)", "POINT (5 6)"),
+ ("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)", "POINT Z (5 6 7)"),
+ ("LINESTRING M (1 2 3, 3 4 5, 5 6 7)", "POINT M (5 6 7)"),
+ ("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)", "POINT ZM (5 6 7 8)"),
+ ("POINT (1 2)", None),
+ ("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))", None),
+ ("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))", None),
+ ],
+)
+def test_st_end_point(eng, geometry, expected):
+ eng = eng.create_or_skip()
+ eng.assert_query_result(
+ f"SELECT ST_EndPoint({geom_or_null(geometry)})",
+ expected,
+ )
+
+
@pytest.mark.parametrize("eng", [SedonaDB, PostGIS])
@pytest.mark.parametrize(
("x", "y", "z", "m", "expected"),
diff --git a/rust/sedona-functions/benches/native-functions.rs
b/rust/sedona-functions/benches/native-functions.rs
index f7d4ed2..af89a46 100644
--- a/rust/sedona-functions/benches/native-functions.rs
+++ b/rust/sedona-functions/benches/native-functions.rs
@@ -126,6 +126,22 @@ fn criterion_benchmark(c: &mut Criterion) {
),
);
+ benchmark::scalar(
+ c,
+ &f,
+ "native",
+ "st_startpoint",
+ BenchmarkArgs::Array(LineString(10)),
+ );
+
+ benchmark::scalar(
+ c,
+ &f,
+ "native",
+ "st_endpoint",
+ BenchmarkArgs::Array(LineString(10)),
+ );
+
benchmark::scalar(c, &f, "native", "st_x", Point);
benchmark::scalar(c, &f, "native", "st_y", Point);
benchmark::scalar(c, &f, "native", "st_z", Point);
diff --git a/rust/sedona-functions/src/lib.rs b/rust/sedona-functions/src/lib.rs
index cf7608d..42ee3dc 100644
--- a/rust/sedona-functions/src/lib.rs
+++ b/rust/sedona-functions/src/lib.rs
@@ -49,6 +49,7 @@ mod st_point;
mod st_pointzm;
mod st_setsrid;
mod st_srid;
+mod st_start_point;
mod st_transform;
pub mod st_union_aggr;
mod st_xyzm;
diff --git a/rust/sedona-functions/src/register.rs
b/rust/sedona-functions/src/register.rs
index 0e0d871..30a9007 100644
--- a/rust/sedona-functions/src/register.rs
+++ b/rust/sedona-functions/src/register.rs
@@ -92,6 +92,8 @@ pub fn default_function_set() -> FunctionSet {
crate::st_setsrid::st_set_srid_udf,
crate::st_srid::st_crs_udf,
crate::st_srid::st_srid_udf,
+ crate::st_start_point::st_end_point_udf,
+ crate::st_start_point::st_start_point_udf,
crate::st_xyzm::st_m_udf,
crate::st_xyzm::st_x_udf,
crate::st_xyzm::st_y_udf,
diff --git a/rust/sedona-functions/src/st_start_point.rs
b/rust/sedona-functions/src/st_start_point.rs
new file mode 100644
index 0000000..4cf7887
--- /dev/null
+++ b/rust/sedona-functions/src/st_start_point.rs
@@ -0,0 +1,300 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+use arrow_array::builder::BinaryBuilder;
+use datafusion_common::error::Result;
+use datafusion_expr::{
+ scalar_doc_sections::DOC_SECTION_OTHER, ColumnarValue, Documentation,
Volatility,
+};
+use geo_traits::{
+ CoordTrait, GeometryCollectionTrait, GeometryTrait, LineStringTrait,
MultiLineStringTrait,
+ MultiPointTrait, MultiPolygonTrait, PointTrait, PolygonTrait,
+};
+use sedona_common::sedona_internal_err;
+use sedona_expr::scalar_udf::{SedonaScalarKernel, SedonaScalarUDF};
+use sedona_geometry::{
+ error::SedonaGeometryError,
+ wkb_factory::{write_wkb_coord_trait, write_wkb_point_header,
WKB_MIN_PROBABLE_BYTES},
+};
+use sedona_schema::{
+ datatypes::{SedonaType, WKB_GEOMETRY},
+ matchers::ArgMatcher,
+};
+use std::{io::Write, sync::Arc};
+
+use crate::executor::WkbExecutor;
+
+/// ST_StartPoint() scalar UDF
+///
+/// Native implementation to get the start point of a geometry
+pub fn st_start_point_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_startpoint",
+ vec![Arc::new(STStartOrEndPoint::new(true))],
+ Volatility::Immutable,
+ Some(st_start_point_doc()),
+ )
+}
+
+fn st_start_point_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Returns the start point of a geometry. Returns NULL if the geometry
is empty.",
+ "ST_StartPoint (geom: Geometry)",
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_sql_example("SELECT ST_StartPoint(ST_GeomFromWKT('LINESTRING(0 1, 2
3, 4 5)'))")
+ .build()
+}
+
+/// ST_EndPoint() scalar UDF
+///
+/// Native implementation to get the end point of a geometry
+pub fn st_end_point_udf() -> SedonaScalarUDF {
+ SedonaScalarUDF::new(
+ "st_endpoint",
+ vec![Arc::new(STStartOrEndPoint::new(false))],
+ Volatility::Immutable,
+ Some(st_end_point_doc()),
+ )
+}
+
+fn st_end_point_doc() -> Documentation {
+ Documentation::builder(
+ DOC_SECTION_OTHER,
+ "Returns the end point of a LINESTRING geometry. Returns NULL if the
geometry is empty or not a LINESTRING.",
+ "ST_EndPoint (geom: Geometry)",
+ )
+ .with_argument("geom", "geometry: Input geometry")
+ .with_sql_example("SELECT ST_EndPoint(ST_GeomFromWKT('LINESTRING(0 1, 2 3,
4 5)'))")
+ .build()
+}
+
+#[derive(Debug)]
+struct STStartOrEndPoint {
+ from_start: bool,
+}
+
+impl STStartOrEndPoint {
+ fn new(from_start: bool) -> Self {
+ STStartOrEndPoint { from_start }
+ }
+}
+
+impl SedonaScalarKernel for STStartOrEndPoint {
+ fn return_type(&self, args: &[SedonaType]) -> Result<Option<SedonaType>> {
+ let matcher = ArgMatcher::new(vec![ArgMatcher::is_geometry()],
WKB_GEOMETRY);
+
+ matcher.match_args(args)
+ }
+
+ fn invoke_batch(
+ &self,
+ arg_types: &[SedonaType],
+ args: &[ColumnarValue],
+ ) -> Result<ColumnarValue> {
+ let executor = WkbExecutor::new(arg_types, args);
+ let mut builder = BinaryBuilder::with_capacity(
+ executor.num_iterations(),
+ WKB_MIN_PROBABLE_BYTES * executor.num_iterations(),
+ );
+
+ executor.execute_wkb_void(|maybe_wkb| {
+ if let Some(wkb) = maybe_wkb {
+ if let Some(coord) = extract_start_or_end_coord(&wkb,
self.from_start) {
+ if write_wkb_point_from_coord(&mut builder,
coord).is_err() {
+ return sedona_internal_err!("Failed to write WKB point
header");
+ };
+ builder.append_value([]);
+ return Ok(());
+ }
+ }
+
+ builder.append_null();
+ Ok(())
+ })?;
+
+ executor.finish(Arc::new(builder.finish()))
+ }
+}
+
+fn write_wkb_point_from_coord(
+ buf: &mut impl Write,
+ coord: impl CoordTrait<T = f64>,
+) -> Result<(), SedonaGeometryError> {
+ write_wkb_point_header(buf, coord.dim())?;
+ write_wkb_coord_trait(buf, &coord)
+}
+
+// - ST_StartPoint returns result for all types of geometries
+// - ST_EndPoint returns result only for LINESTRING
+fn extract_start_or_end_coord<'a>(
+ wkb: &'a wkb::reader::Wkb<'a>,
+ from_start: bool,
+) -> Option<wkb::reader::Coord<'a>> {
+ match (wkb.as_type(), from_start) {
+ (geo_traits::GeometryType::Point(point), true) => point.coord(),
+ (geo_traits::GeometryType::LineString(line_string), true) =>
line_string.coord(0),
+ (geo_traits::GeometryType::LineString(line_string), false) => {
+ match line_string.num_coords() {
+ 0 => None,
+ n => line_string.coord(n - 1),
+ }
+ }
+ (geo_traits::GeometryType::Polygon(polygon), true) => match
polygon.exterior() {
+ Some(ring) => ring.coord(0),
+ None => None,
+ },
+ (geo_traits::GeometryType::MultiPoint(multi_point), true) => match
multi_point.point(0) {
+ Some(point) => point.coord(),
+ None => None,
+ },
+ (geo_traits::GeometryType::MultiLineString(multi_line_string), true)
=> {
+ match multi_line_string.line_string(0) {
+ Some(line_string) => line_string.coord(0),
+ None => None,
+ }
+ }
+ (geo_traits::GeometryType::MultiPolygon(multi_polygon), true) => {
+ match multi_polygon.polygon(0) {
+ Some(polygon) => match polygon.exterior() {
+ Some(ring) => ring.coord(0),
+ None => None,
+ },
+ None => None,
+ }
+ }
+ (geo_traits::GeometryType::GeometryCollection(geometry_collection),
true) => {
+ match geometry_collection.geometry(0) {
+ Some(geometry) => extract_start_or_end_coord(geometry,
from_start),
+ None => None,
+ }
+ }
+ (geo_traits::GeometryType::Rect(_), true) => None,
+ (geo_traits::GeometryType::Triangle(_), true) => None,
+ (geo_traits::GeometryType::Line(_), true) => None,
+ _ => None,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use datafusion_expr::ScalarUDF;
+ use rstest::rstest;
+ use sedona_schema::datatypes::WKB_VIEW_GEOMETRY;
+ use sedona_testing::{
+ compare::assert_array_equal, create::create_array,
testers::ScalarUdfTester,
+ };
+
+ use super::*;
+
+ #[test]
+ fn udf_metadata() {
+ let st_start_point_udf: ScalarUDF = st_start_point_udf().into();
+ assert_eq!(st_start_point_udf.name(), "st_startpoint");
+ assert!(st_start_point_udf.documentation().is_some());
+
+ let st_end_point_udf: ScalarUDF = st_end_point_udf().into();
+ assert_eq!(st_end_point_udf.name(), "st_endpoint");
+ assert!(st_end_point_udf.documentation().is_some());
+ }
+
+ #[rstest]
+ fn udf(#[values(WKB_GEOMETRY, WKB_VIEW_GEOMETRY)] sedona_type: SedonaType)
{
+ let tester_start_point =
+ ScalarUdfTester::new(st_start_point_udf().into(),
vec![sedona_type.clone()]);
+ let tester_end_point =
+ ScalarUdfTester::new(st_end_point_udf().into(),
vec![sedona_type.clone()]);
+
+ let input = create_array(
+ &[
+ Some("LINESTRING (1 2, 3 4, 5 6)"),
+ Some("LINESTRING Z (1 2 3, 3 4 5, 5 6 7)"),
+ Some("LINESTRING M (1 2 3, 3 4 5, 5 6 7)"),
+ Some("LINESTRING ZM (1 2 3 4, 3 4 5 6, 5 6 7 8)"),
+ Some("POINT (1 2)"),
+ Some("POLYGON ((0 0, 10 0, 10 10, 0 10, 0 0))"),
+ Some("MULTIPOINT (0 0, 10 0, 10 10, 0 10, 0 0)"),
+ Some("MULTILINESTRING ((1 2, 3 4), (5 6, 7 8))"),
+ Some("MULTIPOLYGON (((0 0, 10 0, 10 10, 0 10, 0 0)))"),
+ Some("GEOMETRYCOLLECTION (POINT (1 2), LINESTRING (3 4, 5
6))"),
+ Some("POINT EMPTY"),
+ Some("LINESTRING EMPTY"),
+ Some("POLYGON EMPTY"),
+ Some("MULTIPOINT EMPTY"),
+ Some("MULTILINESTRING EMPTY"),
+ Some("MULTIPOLYGON EMPTY"),
+ Some("GEOMETRYCOLLECTION EMPTY"),
+ None,
+ ],
+ &sedona_type,
+ );
+
+ let expected_start_point = create_array(
+ &[
+ Some("POINT (1 2)"),
+ Some("POINT Z (1 2 3)"),
+ Some("POINT M (1 2 3)"),
+ Some("POINT ZM (1 2 3 4)"),
+ Some("POINT (1 2)"),
+ Some("POINT (0 0)"),
+ Some("POINT (0 0)"),
+ Some("POINT (1 2)"),
+ Some("POINT (0 0)"),
+ Some("POINT (1 2)"),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ let result_start_point =
tester_start_point.invoke_array(input.clone()).unwrap();
+ assert_array_equal(&result_start_point, &expected_start_point);
+
+ let expected_end_point = create_array(
+ &[
+ Some("POINT (5 6)"),
+ Some("POINT Z (5 6 7)"),
+ Some("POINT M (5 6 7)"),
+ Some("POINT ZM (5 6 7 8)"),
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ None,
+ ],
+ &WKB_GEOMETRY,
+ );
+
+ let result_end_point = tester_end_point.invoke_array(input).unwrap();
+ assert_array_equal(&result_end_point, &expected_end_point);
+ }
+}
diff --git a/rust/sedona-geometry/src/wkb_factory.rs
b/rust/sedona-geometry/src/wkb_factory.rs
index 000788f..9db1d29 100644
--- a/rust/sedona-geometry/src/wkb_factory.rs
+++ b/rust/sedona-geometry/src/wkb_factory.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
use crate::error::SedonaGeometryError;
-use geo_traits::Dimensions;
+use geo_traits::{CoordTrait, Dimensions};
use std::io::Write;
pub const WKB_MIN_PROBABLE_BYTES: usize = 21;
@@ -392,6 +392,37 @@ where
Ok(())
}
+/// Write a single coordinate of CoordTrait to WKB
+/// This function always writes little endian coordinates.
+pub fn write_wkb_coord_trait<C>(buf: &mut impl Write, coord: &C) -> Result<(),
SedonaGeometryError>
+where
+ C: CoordTrait<T = f64>,
+{
+ match coord.dim().size() {
+ 2 => {
+ let coord_tuple = coord.x_y();
+ write_wkb_coord(buf, coord_tuple)
+ }
+ 3 => {
+ let coord_tuple: (<C as CoordTrait>::T, _, _) =
+ (coord.x(), coord.y(), coord.nth_or_panic(2));
+ write_wkb_coord(buf, coord_tuple)
+ }
+ 4 => {
+ let coord_tuple = (
+ coord.x(),
+ coord.y(),
+ coord.nth_or_panic(2),
+ coord.nth_or_panic(3),
+ );
+ write_wkb_coord(buf, coord_tuple)
+ }
+ _ => Err(SedonaGeometryError::Invalid(
+ "Unsupported number of dimensions".to_string(),
+ )),
+ }
+}
+
/// Write multiple coordinates to WKB
///
/// This function takes an iterator of coordinates and writes them to the
provided buffer.
@@ -537,6 +568,31 @@ mod test {
check_bytes(&wkb, "POINT ZM(12 13 14 15)");
}
+ #[test]
+ fn test_write_wkb_coord_trait() {
+ let cases = [
+ (None, None, "POINT(0 1)"),
+ (Some(2.0), None, "POINT Z(0 1 2)"),
+ (None, Some(3.0), "POINT M(0 1 3)"),
+ (Some(2.0), Some(3.0), "POINT ZM(0 1 2 3)"),
+ ];
+ let mut wkb = vec![];
+
+ for (z, m, expected) in cases {
+ let coord = wkt::types::Coord {
+ x: 0.0,
+ y: 1.0,
+ z,
+ m,
+ };
+
+ wkb.clear();
+ write_wkb_point_header(&mut wkb, coord.dim()).unwrap();
+ write_wkb_coord_trait(&mut wkb, &coord).unwrap();
+ check_bytes(&wkb, expected);
+ }
+ }
+
#[test]
fn test_wkb_linestring() {
let wkt: Wkt = Wkt::from_str("LINESTRING EMPTY").unwrap();