This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git


The following commit(s) were added to refs/heads/main by this push:
     new b930cd0  Fix duplicate geometries issue and update default configs to 
improve coverage (#2)
b930cd0 is described below

commit b930cd0f9c1e6753fd6e7ea7b6c15d412835130f
Author: Pranav Toggi <[email protected]>
AuthorDate: Fri Aug 29 15:55:23 2025 -0700

    Fix duplicate geometries issue and update default configs to improve 
coverage (#2)
---
 spatialbench-arrow/src/lib.rs           |  20 +++++------
 spatialbench-config.yml                 |   6 ++--
 spatialbench/data/sf-v1/building.tbl.gz | Bin 47029 -> 32501 bytes
 spatialbench/data/sf-v1/trip.tbl.gz     | Bin 226051 -> 269490 bytes
 spatialbench/src/generators.rs          |  10 +++---
 spatialbench/src/lib.rs                 |   6 ++--
 spatialbench/src/spider.rs              |  59 +++++++++++++++++++++++++-------
 spatialbench/src/spider_defaults.rs     |   6 ++--
 8 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/spatialbench-arrow/src/lib.rs b/spatialbench-arrow/src/lib.rs
index e1b73ae..a9e8b85 100644
--- a/spatialbench-arrow/src/lib.rs
+++ b/spatialbench-arrow/src/lib.rs
@@ -21,16 +21,16 @@
 //!   
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
 //!   "| t_tripkey | t_custkey | t_driverkey | t_vehiclekey | t_pickuptime     
   | t_dropofftime       | t_fare  | t_tip   | t_totalamount | t_distance | 
t_pickuploc                                | t_dropoffloc                       
        |",
 //!   
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
-//!   "| 1         | 215       | 1           | 1            | 
1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037       | 
0.00014    | 010100000000000000009f65c000000000008056c0 | 
0101000000ea6f323f719f65c0a190cff1f28856c0 |",
-//!   "| 2         | 172       | 1           | 1            | 
1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004       | 
0.00001    | 010100000000000000800165c000000000001835c0 | 
01010000007707047c0f0165c0e360c2aa721735c0 |",
-//!   "| 3         | 46        | 1           | 1            | 
1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000       | 
0.00000    | 010100000000000000007265c000000000809953c0 | 
0101000000123a01b00e7265c0fc9862509e9953c0 |",
-//!   "| 4         | 40        | 1           | 1            | 
1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005       | 
0.00002    | 010100000000000000800f56c00000000000c63bc0 | 
01010000005c186d7e111056c0435fb4a6fdcb3bc0 |",
-//!   "| 5         | 232       | 1           | 1            | 
1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003       | 
0.00001    | 010100000000000000406460c00000000000da4640 | 
01010000003da9a3a1ae6460c00036836c17db4640 |",
-//!   "| 6         | 46        | 1           | 1            | 
1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003       | 
0.00001    | 010100000000000000002666c000000000806f40c0 | 
01010000009fbda7303e2666c0cdb6cb65c06d40c0 |",
-//!   "| 7         | 284       | 1           | 1            | 
1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000       | 
0.00000    | 010100000000000000002963c00000000000e040c0 | 
010100000000000000002963c00000000000e040c0 |",
-//!   "| 8         | 233       | 1           | 1            | 
1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003       | 
0.00001    | 010100000000000000008056c000000000c03955c0 | 
0101000000c0e91ba00d8156c06e03b14bd83955c0 |",
-//!   "| 9         | 178       | 1           | 1            | 
1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007       | 
0.00003    | 010100000000000000005366c00000000000e050c0 | 
0101000000a6ef3504e75266c0448c538406e250c0 |",
-//!   "| 10        | 118       | 1           | 1            | 
1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001       | 
0.00000    | 010100000000000000008066c000000000c07456c0 | 
01010000001459106fe27f66c08d065341837456c0 |",
+//!   "| 1         | 215       | 1           | 1            | 
1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037       | 
0.00014    | 01010000006c5ace4aff9e65c0e479ba04f19b4d40 | 
010100000055ca008a709f65c0a1581b210b8a4d40 |",
+//!   "| 2         | 172       | 1           | 1            | 
1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004       | 
0.00001    | 0101000000e711ed7431fd64c02fab0bd352644140 | 
01010000005d19f1f0c0fc64c0bd7aaa7d99644140 |",
+//!   "| 3         | 46        | 1           | 1            | 
1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000       | 
0.00000    | 0101000000a620e461467165c033cd2a2279fd4340 | 
0101000000b85ae511557165c03a9b65813cfd4340 |",
+//!   "| 4         | 40        | 1           | 1            | 
1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005       | 
0.00002    | 010100000060a28b97b80756c095bfd60000fb4d40 | 
0101000000bcbaf8154a0856c0f38f7c2d01f84d40 |",
+//!   "| 5         | 232       | 1           | 1            | 
1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003       | 
0.00001    | 010100000096c4fe57c25b60c00080d1c19f8664bf | 
0101000000d46da2f9305c60c0031ad78540aa783f |",
+//!   "| 6         | 46        | 1           | 1            | 
1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003       | 
0.00001    | 0101000000c356bf886c2266c000fa5635520004c0 | 
0101000000611467b9aa2266c0b566129258e403c0 |",
+//!   "| 7         | 284       | 1           | 1            | 
1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000       | 
0.00000    | 010100000097a0d0fc7b2563c074fb9b06fbf54340 | 
010100000097a0d0fc7b2563c074fb9b06fbf54340 |",
+//!   "| 8         | 233       | 1           | 1            | 
1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003       | 
0.00001    | 01010000002c7986ba597f56c0a27a6b60ab544340 | 
0101000000ec62a25a678056c0c77309c97a544340 |",
+//!   "| 9         | 178       | 1           | 1            | 
1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007       | 
0.00003    | 0101000000b3295778975166c09078680effff4840 | 
010100000059198d7c7e5166c00760c105f2fb4840 |",
+//!   "| 10        | 118       | 1           | 1            | 
1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001       | 
0.00000    | 01010000004900edfdfc7f66c0c58ec6a17eef5240 | 
01010000005d59fd6cdf7f66c038887360bbef5240 |",
 //!   
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+"
 //! ]);
 //! ```
diff --git a/spatialbench-config.yml b/spatialbench-config.yml
index dce10a7..df56384 100644
--- a/spatialbench-config.yml
+++ b/spatialbench-config.yml
@@ -3,7 +3,7 @@ trip:
   geom_type: point
   dim: 2
   seed: 42
-  affine: [360.0, 0.0, -180.0, 0.0, 180.0, -90.0]
+  affine: [360.0, 0.0, -180.0, 0.0, -160.0, 80.0]
   width: 0.0
   height: 0.0
   maxseg: 0
@@ -11,14 +11,14 @@ trip:
   params:
     type: bit
     probability: 0.2
-    digits: 10
+    digits: 30
 
 building:
   dist_type: sierpinski
   geom_type: polygon
   dim: 2
   seed: 12345
-  affine: [360.0, 0.0, -180.0, 0.0, 180.0, -90.0]
+  affine: [360.0, 0.0, -180.0, 0.0, -160.0, 80.0]
   width: 0.0
   height: 0.0
   maxseg: 5
diff --git a/spatialbench/data/sf-v1/building.tbl.gz 
b/spatialbench/data/sf-v1/building.tbl.gz
index a2befa1..26e2359 100644
Binary files a/spatialbench/data/sf-v1/building.tbl.gz and 
b/spatialbench/data/sf-v1/building.tbl.gz differ
diff --git a/spatialbench/data/sf-v1/trip.tbl.gz 
b/spatialbench/data/sf-v1/trip.tbl.gz
index d1e895b..15b75ac 100644
Binary files a/spatialbench/data/sf-v1/trip.tbl.gz and 
b/spatialbench/data/sf-v1/trip.tbl.gz differ
diff --git a/spatialbench/src/generators.rs b/spatialbench/src/generators.rs
index 02e635d..e8e4db7 100644
--- a/spatialbench/src/generators.rs
+++ b/spatialbench/src/generators.rs
@@ -1580,11 +1580,11 @@ impl ZoneGenerator {
 
         let query = format!(
             "SELECT
-                id as z_gersid,
-                country as z_country,
+                COALESCE(id, '') as z_gersid,
+                COALESCE(country, '') as z_country,
                 COALESCE(region, '') as z_region,
                 COALESCE(names.primary, '') as z_name,
-                subtype as z_subtype,
+                COALESCE(subtype, '') as z_subtype,
                 ST_AsWKB(geometry) as z_boundary
              FROM read_parquet('{}', hive_partitioning=1)
              WHERE {}
@@ -1814,7 +1814,7 @@ mod tests {
         // Check first Trip
         let first = &trips[1];
         assert_eq!(first.t_tripkey, 2);
-        assert_eq!(first.to_string(), "2|172|1|1|1997-12-24 
08:47:14|1997-12-24 09:28:57|0.03|0.00|0.04|0.01|POINT(-168.046875 
-21.09375)|POINT(-168.03314019 -21.09159343)|");
+        assert_eq!(first.to_string(), "2|172|1|1|1997-12-24 
08:47:14|1997-12-24 09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9122872 
34.7837776)|POINT(-167.89855239 34.78593417)|");
     }
 
     #[test]
@@ -1840,7 +1840,7 @@ mod tests {
         // Check first Building
         let first = &buildings[1];
         assert_eq!(first.b_buildingkey, 2);
-        assert_eq!(first.to_string(), "2|blush|POLYGON((-53.95503773947216 
-4.59336925079586,-53.95553716203489 -4.603649450495837,-53.952720010369774 
-4.601933644900541,-53.95223340198092 -4.601479576109057,-53.95084475390658 
-4.598929409235666,-53.95503773947216 -4.59336925079586))|")
+        assert_eq!(first.to_string(), "2|blush|POLYGON((-83.0378916 
76.8271904,-83.0573244 76.8261504,-83.05935840000001 76.835232,-83.0469492 
76.8372976,-83.0348352 76.8317088,-83.0378916 76.8271904))|")
     }
 
     #[test]
diff --git a/spatialbench/src/lib.rs b/spatialbench/src/lib.rs
index 47a61cf..628cb89 100644
--- a/spatialbench/src/lib.rs
+++ b/spatialbench/src/lib.rs
@@ -21,9 +21,9 @@
 //!    .collect::<Vec<_>>();
 //!  assert_eq!(
 //!   trips.join("\n"),"\
-//!     1|215|1|1|1997-07-24 06:58:22|1997-07-24 
13:59:54|0.34|0.02|0.37|0.14|POINT(-172.96875 -90.0)|POINT(-172.98257408 
-90.13982816)|\n\
-//!     2|172|1|1|1997-12-24 08:47:14|1997-12-24 
09:28:57|0.03|0.00|0.04|0.01|POINT(-168.046875 -21.09375)|POINT(-168.03314019 
-21.09159343)|\n\
-//!     3|46|1|1|1993-06-27 13:27:07|1993-06-27 
13:34:51|0.00|0.00|0.00|0.00|POINT(-171.5625 -78.3984375)|POINT(-171.56429291 
-78.40028772)|"
+//!     1|215|1|1|1997-07-24 06:58:22|1997-07-24 
13:59:54|0.34|0.02|0.37|0.14|POINT(-172.9686636 59.2182928)|POINT(-172.98248768 
59.07846464)|\n\
+//!     2|172|1|1|1997-12-24 08:47:14|1997-12-24 
09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9122872 34.7837776)|POINT(-167.89855239 
34.78593417)|\n\
+//!     3|46|1|1|1993-06-27 13:27:07|1993-06-27 
13:34:51|0.00|0.00|0.00|0.00|POINT(-171.5398416 39.9802592)|POINT(-171.54163451 
39.97840898)|"
 //!   );
 //! ```
 //!
diff --git a/spatialbench/src/spider.rs b/spatialbench/src/spider.rs
index d518f9f..c85e508 100644
--- a/spatialbench/src/spider.rs
+++ b/spatialbench/src/spider.rs
@@ -3,6 +3,8 @@ use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
 use std::f64::consts::PI;
 
+const GEOMETRY_PRECISION: f64 = 100_000_000.0;
+
 #[derive(Debug, Clone, Copy)]
 pub enum DistributionType {
     Uniform,
@@ -87,6 +89,7 @@ impl SpiderGenerator {
             DistributionParams::Normal { mu, sigma } => {
                 let x = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
                 let y = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
+
                 match self.config.geom_type {
                     GeomType::Point => generate_point_geom((x, y), 
&self.config),
                     GeomType::Box => generate_box_geom((x, y), &self.config, 
rng),
@@ -154,7 +157,7 @@ impl SpiderGenerator {
         let a = (0.0, 0.0);
         let b = (1.0, 0.0);
         let c = (0.5, (3.0f64).sqrt() / 2.0);
-        for _ in 0..10 {
+        for _ in 0..27 {
             match rng.gen_range(0..3) {
                 0 => {
                     x = (x + a.0) / 2.0;
@@ -220,9 +223,8 @@ fn spider_bit(rng: &mut StdRng, prob: f64, digits: u32) -> 
f64 {
 }
 
 pub fn generate_point_geom(center: (f64, f64), config: &SpiderConfig) -> 
Geometry {
-    let (x, y) = config
-        .affine
-        .map_or(center, |aff| apply_affine(center.0, center.1, &aff));
+    let (x, y) = round_coordinates(center.0, center.1, GEOMETRY_PRECISION);
+    let (x, y) = config.affine.map_or((x, y), |aff| apply_affine(x, y, &aff));
     Geometry::Point(Point::new(x, y))
 }
 
@@ -240,7 +242,8 @@ pub fn generate_box_geom(center: (f64, f64), config: 
&SpiderConfig, rng: &mut St
 
     let coords: Vec<_> = corners
         .iter()
-        .map(|&(x, y)| config.affine.map_or((x, y), |aff| apply_affine(x, y, 
&aff)))
+        .map(|&(x, y)| round_coordinates(x, y, GEOMETRY_PRECISION))
+        .map(|(x, y)| config.affine.map_or((x, y), |aff| apply_affine(x, y, 
&aff)))
         .map(|(x, y)| coord! { x: x, y: y })
         .collect();
 
@@ -259,6 +262,7 @@ pub fn generate_polygon_geom(
         rng.gen_range(0..=(config.maxseg - min_segs)) + min_segs
     };
 
+    // Sample angles and sort for a simple, non-self-intersecting polygon
     let mut angles: Vec<f64> = (0..num_segments)
         .map(|_| rand_unit(rng) * 2.0 * PI)
         .collect();
@@ -266,17 +270,46 @@ pub fn generate_polygon_geom(
 
     let mut coords = angles
         .iter()
-        .map(|angle| {
-            let (x, y) = (
-                center.0 + config.polysize * angle.cos(),
-                center.1 + config.polysize * angle.sin(),
-            );
-            config.affine.map_or((x, y), |aff| apply_affine(x, y, &aff))
+        .map(|&angle| {
+            // 1) Propose vertex around center
+            let x0 = center.0 + config.polysize * angle.cos();
+            let y0 = center.1 + config.polysize * angle.sin();
+
+            // 2) Clamp in unit square BEFORE affine to keep it in [0,1]^2
+            let x1 = x0.clamp(0.0, 1.0);
+            let y1 = y0.clamp(0.0, 1.0);
+
+            // 3) Round coordinates before affine transformation
+            let (x2, y2) = round_coordinates(x1, y1, GEOMETRY_PRECISION);
+
+            // 4) Apply affine transformation
+            let (xg, yg) = if let Some(aff) = config.affine {
+                apply_affine(x2, y2, &aff)
+            } else {
+                (x2, y2)
+            };
+
+            coord! { x: xg, y: yg }
         })
-        .map(|(x, y)| coord! { x: x, y: y })
         .collect::<Vec<_>>();
 
-    coords.push(coords[0]); // close the ring
+    // Close ring
+    if let Some(first) = coords.first().cloned() {
+        coords.push(first);
+    }
 
     Geometry::Polygon(Polygon::new(LineString::from(coords), vec![]))
 }
+
+#[inline]
+fn round_coordinate(coord: f64, precision: f64) -> f64 {
+    (coord * precision).round() / precision
+}
+
+#[inline]
+fn round_coordinates(x: f64, y: f64, precision: f64) -> (f64, f64) {
+    (
+        round_coordinate(x, precision),
+        round_coordinate(y, precision),
+    )
+}
diff --git a/spatialbench/src/spider_defaults.rs 
b/spatialbench/src/spider_defaults.rs
index acd41f1..883c360 100644
--- a/spatialbench/src/spider_defaults.rs
+++ b/spatialbench/src/spider_defaults.rs
@@ -8,8 +8,8 @@ impl SpiderDefaults {
     const FULL_WORLD_AFFINE: [f64; 6] = [
         360.0, // Scale X to cover full longitude range (-180° to 180°)
         0.0, -180.0, // Offset X to start at -180° (west edge of map)
-        0.0, 180.0, // Scale Y to cover full latitude range (-90° to 90°)
-        -90.0, // Offset Y to start at -90° (south edge of map)
+        0.0, -160.0, // Scale Y: maps unit square [0,1] to latitude range 
[80°, -80°]
+        80.0,   // Offset Y to start at 80° (north edge of map)
     ];
 
     pub fn trip_default() -> SpiderGenerator {
@@ -30,7 +30,7 @@ impl SpiderDefaults {
 
             params: DistributionParams::Bit {
                 probability: 0.2,
-                digits: 10,
+                digits: 30,
             },
         };
         SpiderGenerator::new(config)

Reply via email to