This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new a5207a81cc [GH-2261] add new constructor ST_GeogFromEKWB (#2260)
a5207a81cc is described below
commit a5207a81cccc147ce203832bbdb74438504cb307
Author: Zhuocheng Shang <[email protected]>
AuthorDate: Wed Aug 13 23:23:10 2025 -0700
[GH-2261] add new constructor ST_GeogFromEKWB (#2260)
* add new constructor ST_GeogFromEKWB
* test SRID preserved
* add API doc
* fix EWKB SRID output
* ci: retrigger workflow
* ci: retrigger workflow
* fix miss ST_GeogFromWKT header
---
.../sedona/common/Geography/ConstructorsTest.java | 37 ++++++++++++++++++
docs/api/sql/geography/Constructor.md | 45 ++++++++++++++++++++++
.../scala/org/apache/sedona/sql/UDF/Catalog.scala | 3 +-
.../expressions/geography/Constructors.scala | 15 ++++++++
.../sedona_sql/expressions/st_constructors.scala | 10 +++--
.../geography/ConstructorsDataFrameAPITest.scala | 15 +++++++-
.../sedona/sql/geography/ConstructorsTest.scala | 29 ++++++++++++++
7 files changed, 149 insertions(+), 5 deletions(-)
diff --git
a/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java
b/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java
index 4d1100c446..4d2075920e 100644
---
a/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java
+++
b/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java
@@ -24,6 +24,7 @@ import com.google.common.geometry.S2LatLng;
import com.google.common.geometry.S2Point;
import org.apache.sedona.common.S2Geography.Geography;
import org.apache.sedona.common.S2Geography.SinglePointGeography;
+import org.apache.sedona.common.S2Geography.WKBReader;
import org.apache.sedona.common.S2Geography.WKBWriter;
import org.apache.sedona.common.geography.Constructors;
import org.junit.Test;
@@ -86,4 +87,40 @@ public class ConstructorsTest {
assertEquals("POINT (-64 45)", result.toString());
assertEquals(0, result.getSRID());
}
+
+ @Test
+ public void testGeogFromEWKB() throws ParseException {
+ String ewkbString =
"01010000A0E61000000000000000000000000000000000F03F0000000000000040";
+ byte[] wkbBytes = WKBReader.hexToBytes(ewkbString);
+ Geography result = Constructors.geogFromWKB(wkbBytes);
+ String expectedGeom = "SRID=4326; POINT (0 1)";
+ assertEquals(expectedGeom, result.toString());
+ assertEquals(4326, result.getSRID());
+
+ ewkbString =
+
"0103000020E61000000100000005000000000000000000000000000000000000000000000000000000000000000000F03F000000000000F03F000000000000F03F000000000000F03F000000000000000000000000000000000000000000000000";
+ wkbBytes = WKBReader.hexToBytes(ewkbString);
+ result = Constructors.geogFromWKB(wkbBytes);
+ expectedGeom = "SRID=4326; POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))";
+ assertEquals(expectedGeom, result.toString());
+ assertEquals(4326, result.getSRID());
+
+ ewkbString =
+ "0106000020E610000002000000"
+ + "01030000000100000004000000"
+ + "00000000000000000000000000000000"
+ + "000000000000F03F0000000000000000"
+ + "000000000000F03F000000000000F03F"
+ + "00000000000000000000000000000000"
+ + "01030000000100000004000000"
+ + "000000000000F0BF000000000000F0BF"
+ + "000000000000F0BF0000000000000000"
+ + "0000000000000000000000000000F0BF"
+ + "000000000000F0BF000000000000F0BF";
+ wkbBytes = WKBReader.hexToBytes(ewkbString);
+ result = Constructors.geogFromWKB(wkbBytes);
+ expectedGeom = "SRID=4326; MULTIPOLYGON (((0 0, 1 0, 1 1, 0 0)), ((-1 -1,
-1 0, 0 -1, -1 -1)))";
+ assertEquals(expectedGeom, result.toString());
+ assertEquals(4326, result.getSRID());
+ }
}
diff --git a/docs/api/sql/geography/Constructor.md
b/docs/api/sql/geography/Constructor.md
index d9dc329252..8cf2d9b74d 100644
--- a/docs/api/sql/geography/Constructor.md
+++ b/docs/api/sql/geography/Constructor.md
@@ -17,6 +17,50 @@
under the License.
-->
+## ST_GeogFromWKB
+
+Introduction: Construct a Geography from WKB Binary.
+
+Format:
+
+`ST_GeogFromWKB (Wkb: Binary)`
+
+Since: `v1.8.0`
+
+SQL Example
+
+```sql
+SELECT ST_GeogFromWKB([01 02 00 00 00 02 00 00 00 00 00 00 00 84 d6 00 c0 00
00 00 00 80 b5 d6 bf 00 00 00 60 e1 ef f7 bf 00 00 00 80 07 5d e5 bf])
+```
+
+Output:
+
+```
+LINESTRING (-2.1 -0.4, -1.5 -0.7)
+```
+
+## ST_GeogFromEWKB
+
+Introduction: Construct a Geography from EWKB Binary. This function is an
alias of [ST_GeogFromWKB](#st_geogfromwkb).
+
+Format:
+
+`ST_GeogFromEWKB (EWkb: Binary)`
+
+Since: `v1.8.0`
+
+SQL Example
+
+```sql
+SELECT ST_GeogFromEWKB([01 02 00 00 20 E6 10 00 00 02 00 00 00 00 00 00 00 84
D6 00 C0 00 00 00 00 80 B5 D6 BF 00 00 00 60 E1 EF F7 BF 00 00 00 80 07 5D E5
BF])
+```
+
+Output:
+
+```
+SRID: 4326; LINESTRING (-2.1 -0.4, -1.5 -0.7)
+```
+
## ST_GeogFromWKT
Introduction: Construct a Geography from WKT. If SRID is not set, it defaults
to 0 (unknown).
@@ -58,6 +102,7 @@ SRID=4326; LINESTRING (1 2, 3 4, 5 6)
Introduction: Construct a Geography from OGC Extended WKT.
Format:
+
`ST_GeogFromEWKT (EWkt: String)`
Since: `v1.8.0`
diff --git
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index b119620493..4b948c63a5 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.expressions.Aggregator
import org.apache.spark.sql.sedona_sql.expressions.collect.ST_Collect
import org.apache.spark.sql.sedona_sql.expressions.raster._
import org.apache.spark.sql.sedona_sql.expressions._
-import
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText,
ST_GeogFromEWKT, ST_GeogFromText, ST_GeogFromWKB, ST_GeogFromWKT}
+import
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText,
ST_GeogFromEWKB, ST_GeogFromEWKT, ST_GeogFromText, ST_GeogFromWKB,
ST_GeogFromWKT}
import org.locationtech.jts.geom.Geometry
import org.locationtech.jts.operation.buffer.BufferParameters
@@ -46,6 +46,7 @@ object Catalog extends AbstractCatalog {
function[ST_GeogFromWKT](0),
function[ST_GeogFromText](0),
function[ST_GeogFromWKB](0),
+ function[ST_GeogFromEWKB](0),
function[ST_GeogFromEWKT](),
function[ST_GeomFromWKT](0),
function[ST_GeomFromEWKT](),
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
index ba390220f1..13eb40ee81 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
@@ -93,3 +93,18 @@ private[apache] case class ST_GeogFromWKB(inputExpressions:
Seq[Expression])
copy(inputExpressions = newChildren)
}
}
+
+/**
+ * Return a Geography from a EWKB string
+ *
+ * @param inputExpressions
+ * This function takes a geometry string and a srid. The string format must
be WKB binary array
+ * / string.
+ */
+private[apache] case class ST_GeogFromEWKB(inputExpressions: Seq[Expression])
+ extends InferredExpression(Constructors.geogFromWKB(_: Array[Byte])) {
+
+ protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) =
{
+ copy(inputExpressions = newChildren)
+ }
+}
diff --git
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
index eaa0add57d..7dcfa2b115 100644
---
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
+++
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.sedona_sql.expressions
import org.apache.spark.sql.Column
import org.apache.spark.sql.sedona_sql.DataFrameShims._
+import
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText,
ST_GeogFromEWKB, ST_GeogFromText, ST_GeogFromWKB, ST_GeogFromWKT}
import
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText,
ST_GeogFromEWKT, ST_GeogFromText, ST_GeogFromWKB, ST_GeogFromWKT}
object st_constructors {
@@ -90,15 +91,15 @@ object st_constructors {
def ST_GeomFromEWKT(wkt: Column): Column =
wrapExpression[ST_GeomFromEWKT](wkt)
def ST_GeomFromEWKT(wkt: String): Column =
wrapExpression[ST_GeomFromEWKT](wkt)
- def ST_GeogFromEWKT(wkt: Column): Column =
wrapExpression[ST_GeogFromEWKT](wkt)
- def ST_GeogFromEWKT(wkt: String): Column =
wrapExpression[ST_GeogFromEWKT](wkt)
-
def ST_GeogFromWKT(wkt: Column): Column =
wrapExpression[ST_GeogFromWKT](wkt, 0)
def ST_GeogFromWKT(wkt: String): Column =
wrapExpression[ST_GeogFromWKT](wkt, 0)
def ST_GeogFromWKT(wkt: Column, srid: Column): Column =
wrapExpression[ST_GeogFromWKT](wkt, srid)
def ST_GeogFromWKT(wkt: String, srid: Int): Column =
wrapExpression[ST_GeogFromWKT](wkt, srid)
+ def ST_GeogFromEWKT(wkt: Column): Column =
wrapExpression[ST_GeogFromEWKT](wkt)
+ def ST_GeogFromEWKT(wkt: String): Column =
wrapExpression[ST_GeogFromEWKT](wkt)
+
def ST_GeogFromText(wkt: Column): Column =
wrapExpression[ST_GeogFromText](wkt, 0)
def ST_GeogFromText(wkt: String): Column =
wrapExpression[ST_GeogFromText](wkt, 0)
def ST_GeogFromText(wkt: Column, srid: Column): Column =
@@ -111,6 +112,9 @@ object st_constructors {
wrapExpression[ST_GeogFromWKB](wkb, srid)
def ST_GeogFromWKB(wkb: String, srid: Int): Column =
wrapExpression[ST_GeogFromWKB](wkb, srid)
+ def ST_GeogFromEWKB(wkb: Column): Column =
wrapExpression[ST_GeogFromEWKB](wkb)
+ def ST_GeogFromEWKB(wkb: String): Column =
wrapExpression[ST_GeogFromEWKB](wkb)
+
def ST_LineFromText(wkt: Column): Column =
wrapExpression[ST_LineFromText](wkt)
def ST_LineFromText(wkt: String): Column =
wrapExpression[ST_LineFromText](wkt)
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
index b44515871b..24625e9445 100644
---
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
+++
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
@@ -21,7 +21,6 @@ package org.apache.sedona.sql.geography
import org.apache.sedona.common.S2Geography.{Geography, WKBReader}
import org.apache.sedona.sql.TestBaseScala
import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.sedona_sql.expressions.geography.ST_GeogFromEWKT
import org.apache.spark.sql.sedona_sql.expressions.{implicits, st_constructors}
import org.junit.Assert.{assertEquals, assertFalse, assertTrue}
import org.locationtech.jts.geom.PrecisionModel
@@ -63,6 +62,20 @@ class ConstructorsDataFrameAPITest extends TestBaseScala {
assert(actualResult == expectedResult)
}
+ it("passed ST_GeogFromEWKB") {
+ val wkbSeq = Seq[Array[Byte]](
+ Array[Byte](1, 2, 0, 0, 32, -26, 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, -124,
-42, 0, -64, 0, 0,
+ 0, 0, -128, -75, -42, -65, 0, 0, 0, 96, -31, -17, -9, -65, 0, 0, 0,
-128, 7, 93, -27,
+ -65))
+ val df = wkbSeq.toDF("wkb") select (st_constructors.ST_GeogFromEWKB("wkb"))
+ val actualResult = df.take(1)(0).get(0).asInstanceOf[Geography].toString()
+ val expectedResult = {
+ "SRID=4326; LINESTRING (-2.1 -0.4, -1.5 -0.7)"
+ }
+ assert(df.take(1)(0).get(0).asInstanceOf[Geography].getSRID == 4326)
+ assert(actualResult == expectedResult)
+ }
+
it("passed st_geomfromewkt") {
val df = sparkSession
.sql("SELECT 'SRID=4269;POINT(0.0 1.0)' AS wkt")
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
index 5a07b45edd..6f4a9b376d 100644
---
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
+++
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
@@ -118,4 +118,33 @@ class ConstructorsTest extends TestBaseScala {
val nullGeom = sparkSession.sql("SELECT ST_GeogFromWKB(null)")
assert(nullGeom.first().isNullAt(0))
}
+
+ it("Passed ST_GeogFromEWKB") {
+ // UTF-8 encoded WKB String
+ val mixedWkbGeometryInputLocation =
+ getClass.getResource("/county_small_wkb.tsv").getPath
+ val polygonWkbDf = sparkSession.read
+ .format("csv")
+ .option("delimiter", "\t")
+ .option("header", "false")
+ .load(mixedWkbGeometryInputLocation)
+ polygonWkbDf.createOrReplaceTempView("polygontable")
+ val polygonDf = sparkSession.sql(
+ "select ST_GeogFromEWKB(polygontable._c0) as countyshape from
polygontable")
+ assert(polygonDf.count() == 100)
+ // RAW binary array
+ val wkbSeq = Seq[Array[Byte]](
+ Array[Byte](1, 2, 0, 0, 32, -26, 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, -124,
-42, 0, -64, 0, 0,
+ 0, 0, -128, -75, -42, -65, 0, 0, 0, 96, -31, -17, -9, -65, 0, 0, 0,
-128, 7, 93, -27,
+ -65))
+ val rawWkbDf = wkbSeq.toDF("wkb")
+ rawWkbDf.createOrReplaceTempView("rawWKBTable")
+ val geography =
+ sparkSession.sql("SELECT ST_GeogFromEWKB(rawWKBTable.wkb) as countyshape
from rawWKBTable")
+ val expectedGeog = {
+ "SRID=4326; LINESTRING (-2.1 -0.4, -1.5 -0.7)"
+ }
+ assert(geography.first().getAs[Geography](0).getSRID == 4326)
+ assert(geography.first().getAs[Geography](0).toString.equals(expectedGeog))
+ }
}