This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/sedona.git
The following commit(s) were added to refs/heads/master by this push:
new 6a7d0cc722 [GH-2308] Fix the precision lost issue in OSM PBF reader
(#2312)
6a7d0cc722 is described below
commit 6a7d0cc72257f757dd89c86b1dffa15090c7e8f8
Author: Jia Yu <[email protected]>
AuthorDate: Wed Aug 27 09:25:50 2025 -0700
[GH-2308] Fix the precision lost issue in OSM PBF reader (#2312)
---
.../osmpbf/extractors/NodeExtractor.java | 4 ++--
.../org/apache/sedona/sql/OsmReaderTest.scala | 28 ++++++++++++++++++++--
2 files changed, 28 insertions(+), 4 deletions(-)
diff --git
a/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java
b/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java
index 39e98537cd..9ec3147b1e 100644
---
a/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java
+++
b/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java
@@ -52,8 +52,8 @@ public class NodeExtractor {
// https://wiki.openstreetmap.org/wiki/PBF_Format
// latitude = .000000001 * (lat_offset + (granularity * lat))
// longitude = .000000001 * (lon_offset + (granularity * lon))
- float lat = (float) (.000000001 * (latOffset + (latitude * granularity)));
- float lon = (float) (.000000001 * (lonOffset + (longitude * granularity)));
+ double lat = .000000001 * (latOffset + (latitude * granularity));
+ double lon = .000000001 * (lonOffset + (longitude * granularity));
HashMap<String, String> tags =
TagsResolver.resolveTags(node.getKeysCount(), node::getKeys,
node::getVals, stringTable);
diff --git
a/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala
b/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala
index 6bedc8f97a..177f113011 100644
--- a/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala
+++ b/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala
@@ -62,7 +62,7 @@ class OsmReaderTest extends TestBaseScala with Matchers {
.collect() should contain theSameElementsAs Array(
Node(1002, 48.86, 2.35, Map("amenity" -> "cafe", "name" -> "Cafe de
Paris")),
Node(1003, 30.12, 22.23, Map("amenity" -> "bakery", "name" ->
"Delicious Pastries")),
- Node(1001, 52.52, 13.40, Map("amenity" -> "restaurant", "name" ->
"Curry 36")))
+ Node(1001, 52.52, 13.41, Map("amenity" -> "restaurant", "name" ->
"Curry 36")))
}
it("should parse dense nodes") {
@@ -79,7 +79,7 @@ class OsmReaderTest extends TestBaseScala with Matchers {
.collect() should contain theSameElementsAs Array(
Node(1002, 48.86, 2.35, Map("amenity" -> "cafe", "name" -> "Cafe de
Paris")),
Node(1003, 30.12, 22.23, Map("amenity" -> "bakery", "name" ->
"Delicious Pastries")),
- Node(1001, 52.52, 13.40, Map("amenity" -> "restaurant", "name" ->
"Curry 36")))
+ Node(1001, 52.52, 13.41, Map("amenity" -> "restaurant", "name" ->
"Curry 36")))
}
it("should be able to read from osm file on s3") {
@@ -206,6 +206,30 @@ class OsmReaderTest extends TestBaseScala with Matchers {
relationsList.length shouldEqual (expectedRelationsList.length)
relationsList should contain theSameElementsAs expectedRelationsList
}
+
+ it("should not lose precision due to float to double conversion") {
+ // Test for accuracy loss bug in NodeExtractor and DenseNodeExtractor
+ val node = sparkSession.read
+ .format("osmpbf")
+ .load(nodesPath)
+ .where("kind == 'node'")
+ .select("location.latitude", "location.longitude")
+ .first()
+
+ val latitude = node.getDouble(0)
+ val longitude = node.getDouble(1)
+
+ // Check that coordinates maintain precision beyond float limits
+ val latAsFloat = latitude.toFloat
+ val lonAsFloat = longitude.toFloat
+
+ // If there's a difference, it indicates potential precision loss from
float arithmetic
+ val latDiff = Math.abs(latitude - latAsFloat)
+ val lonDiff = Math.abs(longitude - lonAsFloat)
+
+ // For high-precision coordinates, there should be some difference
+ (latDiff > 1e-10 || lonDiff > 1e-10) shouldBe true
+ }
}
private def prepareFile(name: String, path: String, minioClient:
MinioClient): String = {