This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
The following commit(s) were added to refs/heads/main by this push:
new b930cd0 Fix duplicate geometries issue and update default configs to
improve coverage (#2)
b930cd0 is described below
commit b930cd0f9c1e6753fd6e7ea7b6c15d412835130f
Author: Pranav Toggi <[email protected]>
AuthorDate: Fri Aug 29 15:55:23 2025 -0700
Fix duplicate geometries issue and update default configs to improve
coverage (#2)
---
spatialbench-arrow/src/lib.rs | 20 +++++------
spatialbench-config.yml | 6 ++--
spatialbench/data/sf-v1/building.tbl.gz | Bin 47029 -> 32501 bytes
spatialbench/data/sf-v1/trip.tbl.gz | Bin 226051 -> 269490 bytes
spatialbench/src/generators.rs | 10 +++---
spatialbench/src/lib.rs | 6 ++--
spatialbench/src/spider.rs | 59 +++++++++++++++++++++++++-------
spatialbench/src/spider_defaults.rs | 6 ++--
8 files changed, 70 insertions(+), 37 deletions(-)
diff --git a/spatialbench-arrow/src/lib.rs b/spatialbench-arrow/src/lib.rs
index e1b73ae..a9e8b85 100644
--- a/spatialbench-arrow/src/lib.rs
+++ b/spatialbench-arrow/src/lib.rs
@@ -21,16 +21,16 @@
//!
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
//! "| t_tripkey | t_custkey | t_driverkey | t_vehiclekey | t_pickuptime
| t_dropofftime | t_fare | t_tip | t_totalamount | t_distance |
t_pickuploc | t_dropoffloc
|",
//!
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
-//! "| 1 | 215 | 1 | 1 |
1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037 |
0.00014 | 010100000000000000009f65c000000000008056c0 |
0101000000ea6f323f719f65c0a190cff1f28856c0 |",
-//! "| 2 | 172 | 1 | 1 |
1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004 |
0.00001 | 010100000000000000800165c000000000001835c0 |
01010000007707047c0f0165c0e360c2aa721735c0 |",
-//! "| 3 | 46 | 1 | 1 |
1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 010100000000000000007265c000000000809953c0 |
0101000000123a01b00e7265c0fc9862509e9953c0 |",
-//! "| 4 | 40 | 1 | 1 |
1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005 |
0.00002 | 010100000000000000800f56c00000000000c63bc0 |
01010000005c186d7e111056c0435fb4a6fdcb3bc0 |",
-//! "| 5 | 232 | 1 | 1 |
1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003 |
0.00001 | 010100000000000000406460c00000000000da4640 |
01010000003da9a3a1ae6460c00036836c17db4640 |",
-//! "| 6 | 46 | 1 | 1 |
1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 010100000000000000002666c000000000806f40c0 |
01010000009fbda7303e2666c0cdb6cb65c06d40c0 |",
-//! "| 7 | 284 | 1 | 1 |
1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 010100000000000000002963c00000000000e040c0 |
010100000000000000002963c00000000000e040c0 |",
-//! "| 8 | 233 | 1 | 1 |
1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 010100000000000000008056c000000000c03955c0 |
0101000000c0e91ba00d8156c06e03b14bd83955c0 |",
-//! "| 9 | 178 | 1 | 1 |
1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007 |
0.00003 | 010100000000000000005366c00000000000e050c0 |
0101000000a6ef3504e75266c0448c538406e250c0 |",
-//! "| 10 | 118 | 1 | 1 |
1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001 |
0.00000 | 010100000000000000008066c000000000c07456c0 |
01010000001459106fe27f66c08d065341837456c0 |",
+//! "| 1 | 215 | 1 | 1 |
1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037 |
0.00014 | 01010000006c5ace4aff9e65c0e479ba04f19b4d40 |
010100000055ca008a709f65c0a1581b210b8a4d40 |",
+//! "| 2 | 172 | 1 | 1 |
1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004 |
0.00001 | 0101000000e711ed7431fd64c02fab0bd352644140 |
01010000005d19f1f0c0fc64c0bd7aaa7d99644140 |",
+//! "| 3 | 46 | 1 | 1 |
1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 0101000000a620e461467165c033cd2a2279fd4340 |
0101000000b85ae511557165c03a9b65813cfd4340 |",
+//! "| 4 | 40 | 1 | 1 |
1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005 |
0.00002 | 010100000060a28b97b80756c095bfd60000fb4d40 |
0101000000bcbaf8154a0856c0f38f7c2d01f84d40 |",
+//! "| 5 | 232 | 1 | 1 |
1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003 |
0.00001 | 010100000096c4fe57c25b60c00080d1c19f8664bf |
0101000000d46da2f9305c60c0031ad78540aa783f |",
+//! "| 6 | 46 | 1 | 1 |
1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 0101000000c356bf886c2266c000fa5635520004c0 |
0101000000611467b9aa2266c0b566129258e403c0 |",
+//! "| 7 | 284 | 1 | 1 |
1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 010100000097a0d0fc7b2563c074fb9b06fbf54340 |
010100000097a0d0fc7b2563c074fb9b06fbf54340 |",
+//! "| 8 | 233 | 1 | 1 |
1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 01010000002c7986ba597f56c0a27a6b60ab544340 |
0101000000ec62a25a678056c0c77309c97a544340 |",
+//! "| 9 | 178 | 1 | 1 |
1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007 |
0.00003 | 0101000000b3295778975166c09078680effff4840 |
010100000059198d7c7e5166c00760c105f2fb4840 |",
+//! "| 10 | 118 | 1 | 1 |
1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001 |
0.00000 | 01010000004900edfdfc7f66c0c58ec6a17eef5240 |
01010000005d59fd6cdf7f66c038887360bbef5240 |",
//!
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+"
//! ]);
//! ```
diff --git a/spatialbench-config.yml b/spatialbench-config.yml
index dce10a7..df56384 100644
--- a/spatialbench-config.yml
+++ b/spatialbench-config.yml
@@ -3,7 +3,7 @@ trip:
geom_type: point
dim: 2
seed: 42
- affine: [360.0, 0.0, -180.0, 0.0, 180.0, -90.0]
+ affine: [360.0, 0.0, -180.0, 0.0, -160.0, 80.0]
width: 0.0
height: 0.0
maxseg: 0
@@ -11,14 +11,14 @@ trip:
params:
type: bit
probability: 0.2
- digits: 10
+ digits: 30
building:
dist_type: sierpinski
geom_type: polygon
dim: 2
seed: 12345
- affine: [360.0, 0.0, -180.0, 0.0, 180.0, -90.0]
+ affine: [360.0, 0.0, -180.0, 0.0, -160.0, 80.0]
width: 0.0
height: 0.0
maxseg: 5
diff --git a/spatialbench/data/sf-v1/building.tbl.gz
b/spatialbench/data/sf-v1/building.tbl.gz
index a2befa1..26e2359 100644
Binary files a/spatialbench/data/sf-v1/building.tbl.gz and
b/spatialbench/data/sf-v1/building.tbl.gz differ
diff --git a/spatialbench/data/sf-v1/trip.tbl.gz
b/spatialbench/data/sf-v1/trip.tbl.gz
index d1e895b..15b75ac 100644
Binary files a/spatialbench/data/sf-v1/trip.tbl.gz and
b/spatialbench/data/sf-v1/trip.tbl.gz differ
diff --git a/spatialbench/src/generators.rs b/spatialbench/src/generators.rs
index 02e635d..e8e4db7 100644
--- a/spatialbench/src/generators.rs
+++ b/spatialbench/src/generators.rs
@@ -1580,11 +1580,11 @@ impl ZoneGenerator {
let query = format!(
"SELECT
- id as z_gersid,
- country as z_country,
+ COALESCE(id, '') as z_gersid,
+ COALESCE(country, '') as z_country,
COALESCE(region, '') as z_region,
COALESCE(names.primary, '') as z_name,
- subtype as z_subtype,
+ COALESCE(subtype, '') as z_subtype,
ST_AsWKB(geometry) as z_boundary
FROM read_parquet('{}', hive_partitioning=1)
WHERE {}
@@ -1814,7 +1814,7 @@ mod tests {
// Check first Trip
let first = &trips[1];
assert_eq!(first.t_tripkey, 2);
- assert_eq!(first.to_string(), "2|172|1|1|1997-12-24
08:47:14|1997-12-24 09:28:57|0.03|0.00|0.04|0.01|POINT(-168.046875
-21.09375)|POINT(-168.03314019 -21.09159343)|");
+ assert_eq!(first.to_string(), "2|172|1|1|1997-12-24
08:47:14|1997-12-24 09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9122872
34.7837776)|POINT(-167.89855239 34.78593417)|");
}
#[test]
@@ -1840,7 +1840,7 @@ mod tests {
// Check first Building
let first = &buildings[1];
assert_eq!(first.b_buildingkey, 2);
- assert_eq!(first.to_string(), "2|blush|POLYGON((-53.95503773947216
-4.59336925079586,-53.95553716203489 -4.603649450495837,-53.952720010369774
-4.601933644900541,-53.95223340198092 -4.601479576109057,-53.95084475390658
-4.598929409235666,-53.95503773947216 -4.59336925079586))|")
+ assert_eq!(first.to_string(), "2|blush|POLYGON((-83.0378916
76.8271904,-83.0573244 76.8261504,-83.05935840000001 76.835232,-83.0469492
76.8372976,-83.0348352 76.8317088,-83.0378916 76.8271904))|")
}
#[test]
diff --git a/spatialbench/src/lib.rs b/spatialbench/src/lib.rs
index 47a61cf..628cb89 100644
--- a/spatialbench/src/lib.rs
+++ b/spatialbench/src/lib.rs
@@ -21,9 +21,9 @@
//! .collect::<Vec<_>>();
//! assert_eq!(
//! trips.join("\n"),"\
-//! 1|215|1|1|1997-07-24 06:58:22|1997-07-24
13:59:54|0.34|0.02|0.37|0.14|POINT(-172.96875 -90.0)|POINT(-172.98257408
-90.13982816)|\n\
-//! 2|172|1|1|1997-12-24 08:47:14|1997-12-24
09:28:57|0.03|0.00|0.04|0.01|POINT(-168.046875 -21.09375)|POINT(-168.03314019
-21.09159343)|\n\
-//! 3|46|1|1|1993-06-27 13:27:07|1993-06-27
13:34:51|0.00|0.00|0.00|0.00|POINT(-171.5625 -78.3984375)|POINT(-171.56429291
-78.40028772)|"
+//! 1|215|1|1|1997-07-24 06:58:22|1997-07-24
13:59:54|0.34|0.02|0.37|0.14|POINT(-172.9686636 59.2182928)|POINT(-172.98248768
59.07846464)|\n\
+//! 2|172|1|1|1997-12-24 08:47:14|1997-12-24
09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9122872 34.7837776)|POINT(-167.89855239
34.78593417)|\n\
+//! 3|46|1|1|1993-06-27 13:27:07|1993-06-27
13:34:51|0.00|0.00|0.00|0.00|POINT(-171.5398416 39.9802592)|POINT(-171.54163451
39.97840898)|"
//! );
//! ```
//!
diff --git a/spatialbench/src/spider.rs b/spatialbench/src/spider.rs
index d518f9f..c85e508 100644
--- a/spatialbench/src/spider.rs
+++ b/spatialbench/src/spider.rs
@@ -3,6 +3,8 @@ use rand::rngs::StdRng;
use rand::{Rng, SeedableRng};
use std::f64::consts::PI;
+const GEOMETRY_PRECISION: f64 = 100_000_000.0;
+
#[derive(Debug, Clone, Copy)]
pub enum DistributionType {
Uniform,
@@ -87,6 +89,7 @@ impl SpiderGenerator {
DistributionParams::Normal { mu, sigma } => {
let x = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
let y = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
+
match self.config.geom_type {
GeomType::Point => generate_point_geom((x, y),
&self.config),
GeomType::Box => generate_box_geom((x, y), &self.config,
rng),
@@ -154,7 +157,7 @@ impl SpiderGenerator {
let a = (0.0, 0.0);
let b = (1.0, 0.0);
let c = (0.5, (3.0f64).sqrt() / 2.0);
- for _ in 0..10 {
+ for _ in 0..27 {
match rng.gen_range(0..3) {
0 => {
x = (x + a.0) / 2.0;
@@ -220,9 +223,8 @@ fn spider_bit(rng: &mut StdRng, prob: f64, digits: u32) ->
f64 {
}
pub fn generate_point_geom(center: (f64, f64), config: &SpiderConfig) ->
Geometry {
- let (x, y) = config
- .affine
- .map_or(center, |aff| apply_affine(center.0, center.1, &aff));
+ let (x, y) = round_coordinates(center.0, center.1, GEOMETRY_PRECISION);
+ let (x, y) = config.affine.map_or((x, y), |aff| apply_affine(x, y, &aff));
Geometry::Point(Point::new(x, y))
}
@@ -240,7 +242,8 @@ pub fn generate_box_geom(center: (f64, f64), config:
&SpiderConfig, rng: &mut St
let coords: Vec<_> = corners
.iter()
- .map(|&(x, y)| config.affine.map_or((x, y), |aff| apply_affine(x, y,
&aff)))
+ .map(|&(x, y)| round_coordinates(x, y, GEOMETRY_PRECISION))
+ .map(|(x, y)| config.affine.map_or((x, y), |aff| apply_affine(x, y,
&aff)))
.map(|(x, y)| coord! { x: x, y: y })
.collect();
@@ -259,6 +262,7 @@ pub fn generate_polygon_geom(
rng.gen_range(0..=(config.maxseg - min_segs)) + min_segs
};
+ // Sample angles and sort for a simple, non-self-intersecting polygon
let mut angles: Vec<f64> = (0..num_segments)
.map(|_| rand_unit(rng) * 2.0 * PI)
.collect();
@@ -266,17 +270,46 @@ pub fn generate_polygon_geom(
let mut coords = angles
.iter()
- .map(|angle| {
- let (x, y) = (
- center.0 + config.polysize * angle.cos(),
- center.1 + config.polysize * angle.sin(),
- );
- config.affine.map_or((x, y), |aff| apply_affine(x, y, &aff))
+ .map(|&angle| {
+ // 1) Propose vertex around center
+ let x0 = center.0 + config.polysize * angle.cos();
+ let y0 = center.1 + config.polysize * angle.sin();
+
+ // 2) Clamp in unit square BEFORE affine to keep it in [0,1]^2
+ let x1 = x0.clamp(0.0, 1.0);
+ let y1 = y0.clamp(0.0, 1.0);
+
+ // 3) Round coordinates before affine transformation
+ let (x2, y2) = round_coordinates(x1, y1, GEOMETRY_PRECISION);
+
+ // 4) Apply affine transformation
+ let (xg, yg) = if let Some(aff) = config.affine {
+ apply_affine(x2, y2, &aff)
+ } else {
+ (x2, y2)
+ };
+
+ coord! { x: xg, y: yg }
})
- .map(|(x, y)| coord! { x: x, y: y })
.collect::<Vec<_>>();
- coords.push(coords[0]); // close the ring
+ // Close ring
+ if let Some(first) = coords.first().cloned() {
+ coords.push(first);
+ }
Geometry::Polygon(Polygon::new(LineString::from(coords), vec![]))
}
+
+#[inline]
+fn round_coordinate(coord: f64, precision: f64) -> f64 {
+ (coord * precision).round() / precision
+}
+
+#[inline]
+fn round_coordinates(x: f64, y: f64, precision: f64) -> (f64, f64) {
+ (
+ round_coordinate(x, precision),
+ round_coordinate(y, precision),
+ )
+}
diff --git a/spatialbench/src/spider_defaults.rs
b/spatialbench/src/spider_defaults.rs
index acd41f1..883c360 100644
--- a/spatialbench/src/spider_defaults.rs
+++ b/spatialbench/src/spider_defaults.rs
@@ -8,8 +8,8 @@ impl SpiderDefaults {
const FULL_WORLD_AFFINE: [f64; 6] = [
360.0, // Scale X to cover full longitude range (-180° to 180°)
0.0, -180.0, // Offset X to start at -180° (west edge of map)
- 0.0, 180.0, // Scale Y to cover full latitude range (-90° to 90°)
- -90.0, // Offset Y to start at -90° (south edge of map)
+ 0.0, -160.0, // Scale Y: maps unit square [0,1] to latitude range
[80°, -80°]
+ 80.0, // Offset Y to start at 80° (north edge of map)
];
pub fn trip_default() -> SpiderGenerator {
@@ -30,7 +30,7 @@ impl SpiderDefaults {
params: DistributionParams::Bit {
probability: 0.2,
- digits: 10,
+ digits: 30,
},
};
SpiderGenerator::new(config)