This is an automated email from the ASF dual-hosted git repository.
jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
The following commit(s) were added to refs/heads/main by this push:
new eb82013 Improve the base zone table and update the trip table
complexity (#3)
eb82013 is described below
commit eb82013139299fb9ea2fc8a3972a240f0b70477b
Author: Jia Yu <[email protected]>
AuthorDate: Tue Sep 2 09:39:51 2025 -0700
Improve the base zone table and update the trip table complexity (#3)
* Update
* Fix format
* Fix failed files
* fix doc tests
---------
Co-authored-by: Pranav Toggi <[email protected]>
---
spatialbench-arrow/src/lib.rs | 20 ++++++-------
spatialbench-cli/tests/cli_integration.rs | 2 +-
spatialbench-config.yml | 2 +-
spatialbench/data/sf-v1/building.tbl.gz | Bin 32501 -> 32501 bytes
spatialbench/data/sf-v1/customer.tbl.gz | Bin 542440 -> 542440 bytes
spatialbench/data/sf-v1/driver.tbl.gz | Bin 9350 -> 9350 bytes
spatialbench/data/sf-v1/trip.tbl.gz | Bin 269490 -> 280781 bytes
spatialbench/data/sf-v1/vehicle.tbl.gz | Bin 955 -> 955 bytes
spatialbench/src/generators.rs | 45 +++++++++++++++++++-----------
spatialbench/src/lib.rs | 6 ++--
spatialbench/src/spider_defaults.rs | 2 +-
11 files changed, 45 insertions(+), 32 deletions(-)
diff --git a/spatialbench-arrow/src/lib.rs b/spatialbench-arrow/src/lib.rs
index a9e8b85..6b4dc19 100644
--- a/spatialbench-arrow/src/lib.rs
+++ b/spatialbench-arrow/src/lib.rs
@@ -21,16 +21,16 @@
//!
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
//! "| t_tripkey | t_custkey | t_driverkey | t_vehiclekey | t_pickuptime
| t_dropofftime | t_fare | t_tip | t_totalamount | t_distance |
t_pickuploc | t_dropoffloc
|",
//!
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+",
-//! "| 1 | 215 | 1 | 1 |
1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037 |
0.00014 | 01010000006c5ace4aff9e65c0e479ba04f19b4d40 |
010100000055ca008a709f65c0a1581b210b8a4d40 |",
-//! "| 2 | 172 | 1 | 1 |
1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004 |
0.00001 | 0101000000e711ed7431fd64c02fab0bd352644140 |
01010000005d19f1f0c0fc64c0bd7aaa7d99644140 |",
-//! "| 3 | 46 | 1 | 1 |
1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 0101000000a620e461467165c033cd2a2279fd4340 |
0101000000b85ae511557165c03a9b65813cfd4340 |",
-//! "| 4 | 40 | 1 | 1 |
1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005 |
0.00002 | 010100000060a28b97b80756c095bfd60000fb4d40 |
0101000000bcbaf8154a0856c0f38f7c2d01f84d40 |",
-//! "| 5 | 232 | 1 | 1 |
1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003 |
0.00001 | 010100000096c4fe57c25b60c00080d1c19f8664bf |
0101000000d46da2f9305c60c0031ad78540aa783f |",
-//! "| 6 | 46 | 1 | 1 |
1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 0101000000c356bf886c2266c000fa5635520004c0 |
0101000000611467b9aa2266c0b566129258e403c0 |",
-//! "| 7 | 284 | 1 | 1 |
1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 010100000097a0d0fc7b2563c074fb9b06fbf54340 |
010100000097a0d0fc7b2563c074fb9b06fbf54340 |",
-//! "| 8 | 233 | 1 | 1 |
1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 01010000002c7986ba597f56c0a27a6b60ab544340 |
0101000000ec62a25a678056c0c77309c97a544340 |",
-//! "| 9 | 178 | 1 | 1 |
1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007 |
0.00003 | 0101000000b3295778975166c09078680effff4840 |
010100000059198d7c7e5166c00760c105f2fb4840 |",
-//! "| 10 | 118 | 1 | 1 |
1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001 |
0.00000 | 01010000004900edfdfc7f66c0c58ec6a17eef5240 |
01010000005d59fd6cdf7f66c038887360bbef5240 |",
+//! "| 1 | 215 | 1 | 1 |
1997-07-24T06:58:22 | 1997-07-24T13:59:54 | 0.00034 | 0.00002 | 0.00037 |
0.00014 | 01010000000c63c1b3f39e65c0e4086355ce984840 |
0101000000f6d2f3f2649f65c0a1e7c371e8864840 |",
+//! "| 2 | 172 | 1 | 1 |
1997-12-24T08:47:14 | 1997-12-24T09:28:57 | 0.00003 | 0.00000 | 0.00004 |
0.00001 | 01010000007ea1ecd22bfd64c0e885e05dd3282c40 |
0101000000f5a8f04ebbfc64c021c45b08ee292c40 |",
+//! "| 3 | 46 | 1 | 1 |
1993-06-27T13:27:07 | 1993-06-27T13:34:51 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 01010000007bbe66b96c865fc0b898b047f2e63d40 |
01010000009f3269198a865fc0c834260679e63d40 |",
+//! "| 4 | 40 | 1 | 1 |
1996-08-02T04:14:27 | 1996-08-02T05:29:32 | 0.00005 | 0.00000 | 0.00005 |
0.00002 | 01010000008e90813cbb0456c0987384679dec4d40 |
0101000000eba8eeba4c0556c0f6432a949ee94d40 |",
+//! "| 5 | 232 | 1 | 1 |
1996-08-23T12:48:20 | 1996-08-23T13:36:15 | 0.00002 | 0.00000 | 0.00003 |
0.00001 | 01010000005da8fc6b79e75dc0c8c5bd9e540049c0 |
0101000000d7fa43af56e85dc0c98f3a323dff48c0 |",
+//! "| 6 | 46 | 1 | 1 |
1994-11-16T16:39:14 | 1994-11-16T17:26:07 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 0101000000406716574b700740c8dbb694984c2ac0 |
01010000009eff262dbf600740ffb6e52b9a452ac0 |",
+//! "| 7 | 284 | 1 | 1 |
1996-01-20T06:18:56 | 1996-01-20T06:18:56 | 0.00000 | 0.00000 | 0.00000 |
0.00000 | 01010000002028b7ed7bbd61c090cde90d52eb3d40 |
01010000002028b7ed7bbd61c08fcde90d52eb3d40 |",
+//! "| 8 | 233 | 1 | 1 |
1995-01-09T23:26:54 | 1995-01-10T00:16:28 | 0.00003 | 0.00000 | 0.00003 |
0.00001 | 010100000095eeaeb321ab53c0a8da13c9fca83740 |
010100000056d8ca532fac53c0f2cc4f9a9ba83740 |",
+//! "| 9 | 178 | 1 | 1 |
1993-10-13T11:07:04 | 1993-10-13T12:42:27 | 0.00005 | 0.00001 | 0.00007 |
0.00003 | 0101000000d4be1479ed1756c000b14d2a1a6beb3f |
0101000000209e8081bb1756c0568f8700d867ea3f |",
+//! "| 10 | 118 | 1 | 1 |
1994-11-08T21:05:58 | 1994-11-08T21:21:29 | 0.00001 | 0.00000 | 0.00001 |
0.00000 | 0101000000b0251de5609e35c07455eaa39d544440 |
010100000047ee9f5d749d35c05948442117554440 |",
//!
"+-----------+-----------+-------------+--------------+---------------------+---------------------+---------+---------+---------------+------------+--------------------------------------------+--------------------------------------------+"
//! ]);
//! ```
diff --git a/spatialbench-cli/tests/cli_integration.rs
b/spatialbench-cli/tests/cli_integration.rs
index 88d34ee..6b3bbb9 100644
--- a/spatialbench-cli/tests/cli_integration.rs
+++ b/spatialbench-cli/tests/cli_integration.rs
@@ -84,7 +84,7 @@ fn test_spatialbench_cli_parts() {
// Create a temporary directory
let temp_dir = tempdir().expect("Failed to create temporary directory");
- // generate 4 parts of the trip table with scale factor 0.1
+ // generate 4 parts of the trip table with scale factor 0.001
// into directories /part1, /part2, /part3, /part4
// use threads to run the command concurrently to minimize the time taken
let num_parts = 4;
diff --git a/spatialbench-config.yml b/spatialbench-config.yml
index df56384..eb414fc 100644
--- a/spatialbench-config.yml
+++ b/spatialbench-config.yml
@@ -10,7 +10,7 @@ trip:
polysize: 0.0
params:
type: bit
- probability: 0.2
+ probability: 0.35
digits: 30
building:
diff --git a/spatialbench/data/sf-v1/building.tbl.gz
b/spatialbench/data/sf-v1/building.tbl.gz
index 26e2359..bfdb04e 100644
Binary files a/spatialbench/data/sf-v1/building.tbl.gz and
b/spatialbench/data/sf-v1/building.tbl.gz differ
diff --git a/spatialbench/data/sf-v1/customer.tbl.gz
b/spatialbench/data/sf-v1/customer.tbl.gz
index 6334db1..b526b48 100644
Binary files a/spatialbench/data/sf-v1/customer.tbl.gz and
b/spatialbench/data/sf-v1/customer.tbl.gz differ
diff --git a/spatialbench/data/sf-v1/driver.tbl.gz
b/spatialbench/data/sf-v1/driver.tbl.gz
index bc3b7c9..6745854 100644
Binary files a/spatialbench/data/sf-v1/driver.tbl.gz and
b/spatialbench/data/sf-v1/driver.tbl.gz differ
diff --git a/spatialbench/data/sf-v1/trip.tbl.gz
b/spatialbench/data/sf-v1/trip.tbl.gz
index 15b75ac..8b672f7 100644
Binary files a/spatialbench/data/sf-v1/trip.tbl.gz and
b/spatialbench/data/sf-v1/trip.tbl.gz differ
diff --git a/spatialbench/data/sf-v1/vehicle.tbl.gz
b/spatialbench/data/sf-v1/vehicle.tbl.gz
index 9f6d184..6f82178 100644
Binary files a/spatialbench/data/sf-v1/vehicle.tbl.gz and
b/spatialbench/data/sf-v1/vehicle.tbl.gz differ
diff --git a/spatialbench/src/generators.rs b/spatialbench/src/generators.rs
index e8e4db7..bbc6ec0 100644
--- a/spatialbench/src/generators.rs
+++ b/spatialbench/src/generators.rs
@@ -1450,10 +1450,10 @@ impl ZoneGenerator {
/// Get zone subtypes based on scale factor
fn get_zone_subtypes_for_scale_factor(scale_factor: f64) -> Vec<&'static
str> {
- let mut subtypes = vec!["microhood", "macrohood"];
+ let mut subtypes = vec!["microhood", "macrohood", "county"];
if scale_factor >= 10.0 {
- subtypes.extend_from_slice(&["neighborhood", "county"]);
+ subtypes.extend_from_slice(&["neighborhood"]);
}
if scale_factor >= 100.0 {
@@ -1578,6 +1578,9 @@ impl ZoneGenerator {
)
};
+ // Combine subtype filter with is_land filter
+ let combined_filter = format!("{} AND is_land = true", subtype_filter);
+
let query = format!(
"SELECT
COALESCE(id, '') as z_gersid,
@@ -1589,7 +1592,7 @@ impl ZoneGenerator {
FROM read_parquet('{}', hive_partitioning=1)
WHERE {}
LIMIT {} OFFSET {};",
- zones_url, subtype_filter, zones_per_part, offset
+ zones_url, combined_filter, zones_per_part, offset
);
debug!("Generated partition query: {}", query);
@@ -1814,7 +1817,7 @@ mod tests {
// Check first Trip
let first = &trips[1];
assert_eq!(first.t_tripkey, 2);
- assert_eq!(first.to_string(), "2|172|1|1|1997-12-24
08:47:14|1997-12-24 09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9122872
34.7837776)|POINT(-167.89855239 34.78593417)|");
+ assert_eq!(first.to_string(), "2|172|1|1|1997-12-24
08:47:14|1997-12-24 09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9115996
14.079737600000001)|POINT(-167.89786479 14.08189417)|");
}
#[test]
@@ -1849,28 +1852,38 @@ mod tests {
let generator = ZoneGenerator::new(0.001, 1, 1);
let zones: Vec<_> = generator.into_iter().collect();
- assert_eq!(zones.len(), 118);
+ assert_eq!(zones.len(), 158);
- // Check first Driver
+ // Check first zone
let first = &zones[0];
assert_eq!(first.z_zonekey, 1);
- assert_eq!(
- first.to_string(),
- "1|635d3a50-3055-44a6-8968-7e7d65dd3f61|WF|WF-UV|Place
Sagato-Soane|microhood|POLYGON((-176.1735809 -13.28369,-176.1737479
-13.283821,-176.1738536 -13.2838989,-176.173536 -13.2842404,-176.1725987
-13.2833717,-176.1725033 -13.2833872,-176.1724121 -13.2833876,-176.1723319
-13.283372,-176.1722686 -13.2833485,-176.1720379 -13.283278,-176.172337
-13.2830551,-176.17235 -13.2830455,-176.1724748 -13.283002,-176.1725888
-13.2829915,-176.1727488 -13.2830245,-176.1728399 -13.2830431,-17 [...]
- )
+ // The first zone is now a county due to the is_land filter and county
being in base subtypes
+ assert_eq!(first.z_subtype, "county");
+ // Verify the string format matches the expected pattern (but don't
check exact content since it's dynamic)
+ let expected_pattern = format!(
+ "{}|{}|{}|{}|{}|{}|{:?}|",
+ first.z_zonekey,
+ first.z_gersid,
+ first.z_country,
+ first.z_region,
+ first.z_name,
+ first.z_subtype,
+ first.z_boundary
+ );
+ assert_eq!(first.to_string(), expected_pattern);
}
#[test]
fn test_zone_subtype_filters() {
- // Test scale factor 0-10: should only include microhood and macrohood
+ // Test scale factor 0-10: should include microhood, macrohood, and
county
let subtypes_0_10 =
ZoneGenerator::get_zone_subtypes_for_scale_factor(5.0);
- assert_eq!(subtypes_0_10, vec!["microhood", "macrohood"]);
+ assert_eq!(subtypes_0_10, vec!["microhood", "macrohood", "county"]);
- // Test scale factor 10-100: should include microhood, macrohood,
neighborhood, county
+ // Test scale factor 10-100: should include microhood, macrohood,
county, and neighborhood
let subtypes_10_100 =
ZoneGenerator::get_zone_subtypes_for_scale_factor(50.0);
assert_eq!(
subtypes_10_100,
- vec!["microhood", "macrohood", "neighborhood", "county"]
+ vec!["microhood", "macrohood", "county", "neighborhood"]
);
// Test scale factor 100-1000: should include all except country
@@ -1880,8 +1893,8 @@ mod tests {
vec![
"microhood",
"macrohood",
- "neighborhood",
"county",
+ "neighborhood",
"localadmin",
"locality",
"region",
@@ -1896,8 +1909,8 @@ mod tests {
vec![
"microhood",
"macrohood",
- "neighborhood",
"county",
+ "neighborhood",
"localadmin",
"locality",
"region",
diff --git a/spatialbench/src/lib.rs b/spatialbench/src/lib.rs
index 628cb89..db9c36a 100644
--- a/spatialbench/src/lib.rs
+++ b/spatialbench/src/lib.rs
@@ -21,9 +21,9 @@
//! .collect::<Vec<_>>();
//! assert_eq!(
//! trips.join("\n"),"\
-//! 1|215|1|1|1997-07-24 06:58:22|1997-07-24
13:59:54|0.34|0.02|0.37|0.14|POINT(-172.9686636 59.2182928)|POINT(-172.98248768
59.07846464)|\n\
-//! 2|172|1|1|1997-12-24 08:47:14|1997-12-24
09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9122872 34.7837776)|POINT(-167.89855239
34.78593417)|\n\
-//! 3|46|1|1|1993-06-27 13:27:07|1993-06-27
13:34:51|0.00|0.00|0.00|0.00|POINT(-171.5398416 39.9802592)|POINT(-171.54163451
39.97840898)|"
+//! 1|215|1|1|1997-07-24 06:58:22|1997-07-24
13:59:54|0.34|0.02|0.37|0.14|POINT(-172.9672488 49.1937968)|POINT(-172.98107288
49.05396864)|\n\
+//! 2|172|1|1|1997-12-24 08:47:14|1997-12-24
09:28:57|0.03|0.00|0.04|0.01|POINT(-167.9115996
14.079737600000001)|POINT(-167.89786479 14.08189417)|\n\
+//! 3|46|1|1|1993-06-27 13:27:07|1993-06-27
13:34:51|0.00|0.00|0.00|0.00|POINT(-126.100386
29.902134399999994)|POINT(-126.10217891 29.90028418)|"
//! );
//! ```
//!
diff --git a/spatialbench/src/spider_defaults.rs
b/spatialbench/src/spider_defaults.rs
index 883c360..f521dd0 100644
--- a/spatialbench/src/spider_defaults.rs
+++ b/spatialbench/src/spider_defaults.rs
@@ -29,7 +29,7 @@ impl SpiderDefaults {
polysize: 0.0,
params: DistributionParams::Bit {
- probability: 0.2,
+ probability: 0.35,
digits: 30,
},
};