This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch osm-precision in repository https://gitbox.apache.org/repos/asf/sedona.git
commit e2f6ba4469c1f8f01000c2663ce4847bf728b2f2 Author: Jia Yu <[email protected]> AuthorDate: Wed Aug 27 01:19:24 2025 -0700 Fix the precision lost issue --- .../osmpbf/extractors/NodeExtractor.java | 4 ++-- .../org/apache/sedona/sql/OsmReaderTest.scala | 28 ++++++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java b/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java index 39e98537cd..9ec3147b1e 100644 --- a/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java +++ b/spark/common/src/main/java/org/apache/sedona/sql/datasources/osmpbf/extractors/NodeExtractor.java @@ -52,8 +52,8 @@ public class NodeExtractor { // https://wiki.openstreetmap.org/wiki/PBF_Format // latitude = .000000001 * (lat_offset + (granularity * lat)) // longitude = .000000001 * (lon_offset + (granularity * lon)) - float lat = (float) (.000000001 * (latOffset + (latitude * granularity))); - float lon = (float) (.000000001 * (lonOffset + (longitude * granularity))); + double lat = .000000001 * (latOffset + (latitude * granularity)); + double lon = .000000001 * (lonOffset + (longitude * granularity)); HashMap<String, String> tags = TagsResolver.resolveTags(node.getKeysCount(), node::getKeys, node::getVals, stringTable); diff --git a/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala b/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala index 6bedc8f97a..177f113011 100644 --- a/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala +++ b/spark/common/src/test/scala/org/apache/sedona/sql/OsmReaderTest.scala @@ -62,7 +62,7 @@ class OsmReaderTest extends TestBaseScala with Matchers { .collect() should contain theSameElementsAs Array( Node(1002, 48.86, 2.35, Map("amenity" -> "cafe", "name" -> "Cafe de Paris")), Node(1003, 30.12, 22.23, Map("amenity" -> "bakery", "name" -> "Delicious Pastries")), - Node(1001, 52.52, 13.40, Map("amenity" -> "restaurant", "name" -> "Curry 36"))) + Node(1001, 52.52, 13.41, Map("amenity" -> "restaurant", "name" -> "Curry 36"))) } it("should parse dense nodes") { @@ -79,7 +79,7 @@ class OsmReaderTest extends TestBaseScala with Matchers { .collect() should contain theSameElementsAs Array( Node(1002, 48.86, 2.35, Map("amenity" -> "cafe", "name" -> "Cafe de Paris")), Node(1003, 30.12, 22.23, Map("amenity" -> "bakery", "name" -> "Delicious Pastries")), - Node(1001, 52.52, 13.40, Map("amenity" -> "restaurant", "name" -> "Curry 36"))) + Node(1001, 52.52, 13.41, Map("amenity" -> "restaurant", "name" -> "Curry 36"))) } it("should be able to read from osm file on s3") { @@ -206,6 +206,30 @@ class OsmReaderTest extends TestBaseScala with Matchers { relationsList.length shouldEqual (expectedRelationsList.length) relationsList should contain theSameElementsAs expectedRelationsList } + + it("should not lose precision due to float to double conversion") { + // Test for accuracy loss bug in NodeExtractor and DenseNodeExtractor + val node = sparkSession.read + .format("osmpbf") + .load(nodesPath) + .where("kind == 'node'") + .select("location.latitude", "location.longitude") + .first() + + val latitude = node.getDouble(0) + val longitude = node.getDouble(1) + + // Check that coordinates maintain precision beyond float limits + val latAsFloat = latitude.toFloat + val lonAsFloat = longitude.toFloat + + // If there's a difference, it indicates potential precision loss from float arithmetic + val latDiff = Math.abs(latitude - latAsFloat) + val lonDiff = Math.abs(longitude - lonAsFloat) + + // For high-precision coordinates, there should be some difference + (latDiff > 1e-10 || lonDiff > 1e-10) shouldBe true + } } private def prepareFile(name: String, path: String, minioClient: MinioClient): String = {
