This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git


The following commit(s) were added to refs/heads/master by this push:
     new a5207a81cc [GH-2261] add new constructor ST_GeogFromEKWB (#2260)
a5207a81cc is described below

commit a5207a81cccc147ce203832bbdb74438504cb307
Author: Zhuocheng Shang <[email protected]>
AuthorDate: Wed Aug 13 23:23:10 2025 -0700

    [GH-2261] add new constructor ST_GeogFromEKWB (#2260)
    
    * add new constructor ST_GeogFromEKWB
    
    * test SRID preserved
    
    * add API doc
    
    * fix EWKB SRID output
    
    * ci: retrigger workflow
    
    * ci: retrigger workflow
    
    * fix miss ST_GeogFromWKT header
---
 .../sedona/common/Geography/ConstructorsTest.java  | 37 ++++++++++++++++++
 docs/api/sql/geography/Constructor.md              | 45 ++++++++++++++++++++++
 .../scala/org/apache/sedona/sql/UDF/Catalog.scala  |  3 +-
 .../expressions/geography/Constructors.scala       | 15 ++++++++
 .../sedona_sql/expressions/st_constructors.scala   | 10 +++--
 .../geography/ConstructorsDataFrameAPITest.scala   | 15 +++++++-
 .../sedona/sql/geography/ConstructorsTest.scala    | 29 ++++++++++++++
 7 files changed, 149 insertions(+), 5 deletions(-)

diff --git 
a/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java 
b/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java
index 4d1100c446..4d2075920e 100644
--- 
a/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java
+++ 
b/common/src/test/java/org/apache/sedona/common/Geography/ConstructorsTest.java
@@ -24,6 +24,7 @@ import com.google.common.geometry.S2LatLng;
 import com.google.common.geometry.S2Point;
 import org.apache.sedona.common.S2Geography.Geography;
 import org.apache.sedona.common.S2Geography.SinglePointGeography;
+import org.apache.sedona.common.S2Geography.WKBReader;
 import org.apache.sedona.common.S2Geography.WKBWriter;
 import org.apache.sedona.common.geography.Constructors;
 import org.junit.Test;
@@ -86,4 +87,40 @@ public class ConstructorsTest {
     assertEquals("POINT (-64 45)", result.toString());
     assertEquals(0, result.getSRID());
   }
+
+  @Test
+  public void testGeogFromEWKB() throws ParseException {
+    String ewkbString = 
"01010000A0E61000000000000000000000000000000000F03F0000000000000040";
+    byte[] wkbBytes = WKBReader.hexToBytes(ewkbString);
+    Geography result = Constructors.geogFromWKB(wkbBytes);
+    String expectedGeom = "SRID=4326; POINT (0 1)";
+    assertEquals(expectedGeom, result.toString());
+    assertEquals(4326, result.getSRID());
+
+    ewkbString =
+        
"0103000020E61000000100000005000000000000000000000000000000000000000000000000000000000000000000F03F000000000000F03F000000000000F03F000000000000F03F000000000000000000000000000000000000000000000000";
+    wkbBytes = WKBReader.hexToBytes(ewkbString);
+    result = Constructors.geogFromWKB(wkbBytes);
+    expectedGeom = "SRID=4326; POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0))";
+    assertEquals(expectedGeom, result.toString());
+    assertEquals(4326, result.getSRID());
+
+    ewkbString =
+        "0106000020E610000002000000"
+            + "01030000000100000004000000"
+            + "00000000000000000000000000000000"
+            + "000000000000F03F0000000000000000"
+            + "000000000000F03F000000000000F03F"
+            + "00000000000000000000000000000000"
+            + "01030000000100000004000000"
+            + "000000000000F0BF000000000000F0BF"
+            + "000000000000F0BF0000000000000000"
+            + "0000000000000000000000000000F0BF"
+            + "000000000000F0BF000000000000F0BF";
+    wkbBytes = WKBReader.hexToBytes(ewkbString);
+    result = Constructors.geogFromWKB(wkbBytes);
+    expectedGeom = "SRID=4326; MULTIPOLYGON (((0 0, 1 0, 1 1, 0 0)), ((-1 -1, 
-1 0, 0 -1, -1 -1)))";
+    assertEquals(expectedGeom, result.toString());
+    assertEquals(4326, result.getSRID());
+  }
 }
diff --git a/docs/api/sql/geography/Constructor.md 
b/docs/api/sql/geography/Constructor.md
index d9dc329252..8cf2d9b74d 100644
--- a/docs/api/sql/geography/Constructor.md
+++ b/docs/api/sql/geography/Constructor.md
@@ -17,6 +17,50 @@
  under the License.
  -->
 
+## ST_GeogFromWKB
+
+Introduction: Construct a Geography from WKB Binary.
+
+Format:
+
+`ST_GeogFromWKB (Wkb: Binary)`
+
+Since: `v1.8.0`
+
+SQL Example
+
+```sql
+SELECT ST_GeogFromWKB([01 02 00 00 00 02 00 00 00 00 00 00 00 84 d6 00 c0 00 
00 00 00 80 b5 d6 bf 00 00 00 60 e1 ef f7 bf 00 00 00 80 07 5d e5 bf])
+```
+
+Output:
+
+```
+LINESTRING (-2.1 -0.4, -1.5 -0.7)
+```
+
+## ST_GeogFromEWKB
+
+Introduction: Construct a Geography from EWKB Binary. This function is an 
alias of [ST_GeogFromWKB](#st_geogfromwkb).
+
+Format:
+
+`ST_GeogFromEWKB (EWkb: Binary)`
+
+Since: `v1.8.0`
+
+SQL Example
+
+```sql
+SELECT ST_GeogFromEWKB([01 02 00 00 20 E6 10 00 00 02 00 00 00 00 00 00 00 84 
D6 00 C0 00 00 00 00 80 B5 D6 BF 00 00 00 60 E1 EF F7 BF 00 00 00 80 07 5D E5 
BF])
+```
+
+Output:
+
+```
+SRID: 4326; LINESTRING (-2.1 -0.4, -1.5 -0.7)
+```
+
 ## ST_GeogFromWKT
 
 Introduction: Construct a Geography from WKT. If SRID is not set, it defaults 
to 0 (unknown).
@@ -58,6 +102,7 @@ SRID=4326; LINESTRING (1 2, 3 4, 5 6)
 Introduction: Construct a Geography from OGC Extended WKT.
 
 Format:
+
 `ST_GeogFromEWKT (EWkt: String)`
 
 Since: `v1.8.0`
diff --git 
a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala 
b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
index b119620493..4b948c63a5 100644
--- a/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
+++ b/spark/common/src/main/scala/org/apache/sedona/sql/UDF/Catalog.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.sedona_sql.expressions.collect.ST_Collect
 import org.apache.spark.sql.sedona_sql.expressions.raster._
 import org.apache.spark.sql.sedona_sql.expressions._
-import 
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText, 
ST_GeogFromEWKT, ST_GeogFromText, ST_GeogFromWKB, ST_GeogFromWKT}
+import 
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText, 
ST_GeogFromEWKB, ST_GeogFromEWKT, ST_GeogFromText, ST_GeogFromWKB, 
ST_GeogFromWKT}
 import org.locationtech.jts.geom.Geometry
 import org.locationtech.jts.operation.buffer.BufferParameters
 
@@ -46,6 +46,7 @@ object Catalog extends AbstractCatalog {
     function[ST_GeogFromWKT](0),
     function[ST_GeogFromText](0),
     function[ST_GeogFromWKB](0),
+    function[ST_GeogFromEWKB](0),
     function[ST_GeogFromEWKT](),
     function[ST_GeomFromWKT](0),
     function[ST_GeomFromEWKT](),
diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
index ba390220f1..13eb40ee81 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/geography/Constructors.scala
@@ -93,3 +93,18 @@ private[apache] case class ST_GeogFromWKB(inputExpressions: 
Seq[Expression])
     copy(inputExpressions = newChildren)
   }
 }
+
+/**
+ * Return a Geography from a EWKB string
+ *
+ * @param inputExpressions
+ *   This function takes a geometry string and a srid. The string format must 
be WKB binary array
+ *   / string.
+ */
+private[apache] case class ST_GeogFromEWKB(inputExpressions: Seq[Expression])
+    extends InferredExpression(Constructors.geogFromWKB(_: Array[Byte])) {
+
+  protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]) = 
{
+    copy(inputExpressions = newChildren)
+  }
+}
diff --git 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
index eaa0add57d..7dcfa2b115 100644
--- 
a/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
+++ 
b/spark/common/src/main/scala/org/apache/spark/sql/sedona_sql/expressions/st_constructors.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.sedona_sql.expressions
 
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.sedona_sql.DataFrameShims._
+import 
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText, 
ST_GeogFromEWKB, ST_GeogFromText, ST_GeogFromWKB, ST_GeogFromWKT}
 import 
org.apache.spark.sql.sedona_sql.expressions.geography.{ST_GeogCollFromText, 
ST_GeogFromEWKT, ST_GeogFromText, ST_GeogFromWKB, ST_GeogFromWKT}
 
 object st_constructors {
@@ -90,15 +91,15 @@ object st_constructors {
   def ST_GeomFromEWKT(wkt: Column): Column = 
wrapExpression[ST_GeomFromEWKT](wkt)
   def ST_GeomFromEWKT(wkt: String): Column = 
wrapExpression[ST_GeomFromEWKT](wkt)
 
-  def ST_GeogFromEWKT(wkt: Column): Column = 
wrapExpression[ST_GeogFromEWKT](wkt)
-  def ST_GeogFromEWKT(wkt: String): Column = 
wrapExpression[ST_GeogFromEWKT](wkt)
-
   def ST_GeogFromWKT(wkt: Column): Column = 
wrapExpression[ST_GeogFromWKT](wkt, 0)
   def ST_GeogFromWKT(wkt: String): Column = 
wrapExpression[ST_GeogFromWKT](wkt, 0)
   def ST_GeogFromWKT(wkt: Column, srid: Column): Column =
     wrapExpression[ST_GeogFromWKT](wkt, srid)
   def ST_GeogFromWKT(wkt: String, srid: Int): Column = 
wrapExpression[ST_GeogFromWKT](wkt, srid)
 
+  def ST_GeogFromEWKT(wkt: Column): Column = 
wrapExpression[ST_GeogFromEWKT](wkt)
+  def ST_GeogFromEWKT(wkt: String): Column = 
wrapExpression[ST_GeogFromEWKT](wkt)
+
   def ST_GeogFromText(wkt: Column): Column = 
wrapExpression[ST_GeogFromText](wkt, 0)
   def ST_GeogFromText(wkt: String): Column = 
wrapExpression[ST_GeogFromText](wkt, 0)
   def ST_GeogFromText(wkt: Column, srid: Column): Column =
@@ -111,6 +112,9 @@ object st_constructors {
     wrapExpression[ST_GeogFromWKB](wkb, srid)
   def ST_GeogFromWKB(wkb: String, srid: Int): Column = 
wrapExpression[ST_GeogFromWKB](wkb, srid)
 
+  def ST_GeogFromEWKB(wkb: Column): Column = 
wrapExpression[ST_GeogFromEWKB](wkb)
+  def ST_GeogFromEWKB(wkb: String): Column = 
wrapExpression[ST_GeogFromEWKB](wkb)
+
   def ST_LineFromText(wkt: Column): Column = 
wrapExpression[ST_LineFromText](wkt)
   def ST_LineFromText(wkt: String): Column = 
wrapExpression[ST_LineFromText](wkt)
 
diff --git 
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
 
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
index b44515871b..24625e9445 100644
--- 
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
+++ 
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsDataFrameAPITest.scala
@@ -21,7 +21,6 @@ package org.apache.sedona.sql.geography
 import org.apache.sedona.common.S2Geography.{Geography, WKBReader}
 import org.apache.sedona.sql.TestBaseScala
 import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.sedona_sql.expressions.geography.ST_GeogFromEWKT
 import org.apache.spark.sql.sedona_sql.expressions.{implicits, st_constructors}
 import org.junit.Assert.{assertEquals, assertFalse, assertTrue}
 import org.locationtech.jts.geom.PrecisionModel
@@ -63,6 +62,20 @@ class ConstructorsDataFrameAPITest extends TestBaseScala {
     assert(actualResult == expectedResult)
   }
 
+  it("passed ST_GeogFromEWKB") {
+    val wkbSeq = Seq[Array[Byte]](
+      Array[Byte](1, 2, 0, 0, 32, -26, 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, -124, 
-42, 0, -64, 0, 0,
+        0, 0, -128, -75, -42, -65, 0, 0, 0, 96, -31, -17, -9, -65, 0, 0, 0, 
-128, 7, 93, -27,
+        -65))
+    val df = wkbSeq.toDF("wkb") select (st_constructors.ST_GeogFromEWKB("wkb"))
+    val actualResult = df.take(1)(0).get(0).asInstanceOf[Geography].toString()
+    val expectedResult = {
+      "SRID=4326; LINESTRING (-2.1 -0.4, -1.5 -0.7)"
+    }
+    assert(df.take(1)(0).get(0).asInstanceOf[Geography].getSRID == 4326)
+    assert(actualResult == expectedResult)
+  }
+
   it("passed st_geomfromewkt") {
     val df = sparkSession
       .sql("SELECT 'SRID=4269;POINT(0.0 1.0)' AS wkt")
diff --git 
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
 
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
index 5a07b45edd..6f4a9b376d 100644
--- 
a/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
+++ 
b/spark/common/src/test/scala/org/apache/sedona/sql/geography/ConstructorsTest.scala
@@ -118,4 +118,33 @@ class ConstructorsTest extends TestBaseScala {
     val nullGeom = sparkSession.sql("SELECT ST_GeogFromWKB(null)")
     assert(nullGeom.first().isNullAt(0))
   }
+
+  it("Passed ST_GeogFromEWKB") {
+    // UTF-8 encoded WKB String
+    val mixedWkbGeometryInputLocation =
+      getClass.getResource("/county_small_wkb.tsv").getPath
+    val polygonWkbDf = sparkSession.read
+      .format("csv")
+      .option("delimiter", "\t")
+      .option("header", "false")
+      .load(mixedWkbGeometryInputLocation)
+    polygonWkbDf.createOrReplaceTempView("polygontable")
+    val polygonDf = sparkSession.sql(
+      "select ST_GeogFromEWKB(polygontable._c0) as countyshape from 
polygontable")
+    assert(polygonDf.count() == 100)
+    // RAW binary array
+    val wkbSeq = Seq[Array[Byte]](
+      Array[Byte](1, 2, 0, 0, 32, -26, 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, -124, 
-42, 0, -64, 0, 0,
+        0, 0, -128, -75, -42, -65, 0, 0, 0, 96, -31, -17, -9, -65, 0, 0, 0, 
-128, 7, 93, -27,
+        -65))
+    val rawWkbDf = wkbSeq.toDF("wkb")
+    rawWkbDf.createOrReplaceTempView("rawWKBTable")
+    val geography =
+      sparkSession.sql("SELECT ST_GeogFromEWKB(rawWKBTable.wkb) as countyshape 
from rawWKBTable")
+    val expectedGeog = {
+      "SRID=4326; LINESTRING (-2.1 -0.4, -1.5 -0.7)"
+    }
+    assert(geography.first().getAs[Geography](0).getSRID == 4326)
+    assert(geography.first().getAs[Geography](0).toString.equals(expectedGeog))
+  }
 }

Reply via email to