This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git

commit 4bf49418d0b57ed2f3424305c0df0212d7a8661d
Author: Pranav Toggi <[email protected]>
AuthorDate: Fri Jun 27 08:34:37 2025 -0700

    add support for box and polygon types
---
 tpchgen-arrow/src/trip.rs     |  20 +--
 tpchgen/src/csv.rs            |   4 +-
 tpchgen/src/dates.rs          |  38 +++++-
 tpchgen/src/generators.rs     |  71 +++++-----
 tpchgen/src/spider.rs         | 296 ++++++++++++++++++++----------------------
 tpchgen/src/spider_presets.rs | 104 ++++++++++++++-
 6 files changed, 316 insertions(+), 217 deletions(-)

diff --git a/tpchgen-arrow/src/trip.rs b/tpchgen-arrow/src/trip.rs
index 93b6c61..afab560 100644
--- a/tpchgen-arrow/src/trip.rs
+++ b/tpchgen-arrow/src/trip.rs
@@ -1,6 +1,6 @@
 use crate::conversions::{decimal128_array_from_iter, to_arrow_date32};
 use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
-use arrow::array::{Date32Array, Float64Array, Int64Array, RecordBatch};
+use arrow::array::{Date32Array, Int64Array, RecordBatch, StringViewArray};
 use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use std::sync::{Arc, LazyLock, Mutex};
 use tpchgen::generators::{Trip, TripGenerator, TripGeneratorIterator};
@@ -79,10 +79,8 @@ impl Iterator for TripArrow {
         let t_tip = decimal128_array_from_iter(rows.iter().map(|row| 
row.t_tip));
         let t_totalamount = decimal128_array_from_iter(rows.iter().map(|row| 
row.t_totalamount));
         let t_distance = decimal128_array_from_iter(rows.iter().map(|row| 
row.t_distance));
-        let t_pickupx = Float64Array::from_iter_values(rows.iter().map(|row| 
row.t_pickupx));
-        let t_pickupy = Float64Array::from_iter_values(rows.iter().map(|row| 
row.t_pickupy));
-        let t_dropoffx = Float64Array::from_iter_values(rows.iter().map(|row| 
row.t_dropoffx));
-        let t_dropoffy = Float64Array::from_iter_values(rows.iter().map(|row| 
row.t_dropoffy));
+        let t_pickuploc = 
StringViewArray::from_iter_values(rows.iter().map(|row| 
row.t_pickuploc.clone()));
+        let t_dropoffloc = 
StringViewArray::from_iter_values(rows.iter().map(|row| 
row.t_dropoffloc.clone()));
 
         let batch = RecordBatch::try_new(
             Arc::clone(&self.schema),
@@ -97,10 +95,8 @@ impl Iterator for TripArrow {
                 Arc::new(t_tip),
                 Arc::new(t_totalamount),
                 Arc::new(t_distance),
-                Arc::new(t_pickupx),
-                Arc::new(t_pickupy),
-                Arc::new(t_dropoffx),
-                Arc::new(t_dropoffy),
+                Arc::new(t_pickuploc),
+                Arc::new(t_dropoffloc),
             ],
         )
             .unwrap();
@@ -124,9 +120,7 @@ fn make_trip_schema() -> SchemaRef {
         Field::new("t_tip", DataType::Decimal128(15, 2), false),
         Field::new("t_totalamount", DataType::Decimal128(15, 2), false),
         Field::new("t_distance", DataType::Decimal128(15, 2), false),
-        Field::new("t_pickupx", DataType::Float64, false),
-        Field::new("t_pickupy", DataType::Float64, false),
-        Field::new("t_dropoffx", DataType::Float64, false),
-        Field::new("t_dropoffy", DataType::Float64, false),
+        Field::new("t_pickuploc", DataType::Utf8View, false),
+        Field::new("t_dropoffloc", DataType::Utf8View, false),
     ]))
 }
\ No newline at end of file
diff --git a/tpchgen/src/csv.rs b/tpchgen/src/csv.rs
index d0fbea5..c8b126f 100644
--- a/tpchgen/src/csv.rs
+++ b/tpchgen/src/csv.rs
@@ -432,8 +432,8 @@ impl Display for TripCsv {
             self.inner.t_tip,
             self.inner.t_totalamount,
             self.inner.t_distance,
-            self.inner.t_pickupx,
-            self.inner.t_pickupy,
+            self.inner.t_pickuploc,
+            self.inner.t_dropoffloc,
         )
     }
 }
diff --git a/tpchgen/src/dates.rs b/tpchgen/src/dates.rs
index f6630b2..954ed57 100644
--- a/tpchgen/src/dates.rs
+++ b/tpchgen/src/dates.rs
@@ -37,22 +37,48 @@ static JULIAN_DATE: LazyLock<Vec<i32>> = LazyLock::new(|| {
 pub struct GenerateUtils;
 
 impl GenerateUtils {
-    /// Calculates row count for a specific part of the data
+    /// Calculates row count with linear scaling (original behavior)
     pub fn calculate_row_count(
         scale_base: i32,
         scale_factor: f64,
         part: i32,
         part_count: i32,
     ) -> i64 {
-        let total_row_count = (scale_base as f64 * scale_factor) as i64;
-        let mut row_count = total_row_count / part_count as i64;
+        Self::calculate_scaled_row_count(scale_base, scale_factor, part, 
part_count, false)
+    }
+
+    /// Calculates row count with logarithmic scaling (for buildings)
+    pub fn calculate_logarithmic_row_count(
+        scale_base: i32,
+        scale_factor: f64,
+        part: i32,
+        part_count: i32,
+    ) -> i64 {
+        Self::calculate_scaled_row_count(scale_base, scale_factor, part, 
part_count, true)
+    }
+
+    /// Internal implementation for row count calculation with scaling option
+    fn calculate_scaled_row_count(
+        scale_base: i32,
+        scale_factor: f64,
+        part: i32,
+        part_count: i32,
+        log_scale: bool,
+    ) -> i64 {
+        let total_row_count = if log_scale {
+            (scale_base as f64 * (1.0 + scale_factor.log2())) as i64
+        } else {
+            (scale_base as f64 * scale_factor) as i64
+        };
+
+        let rows_per_part = total_row_count / part_count as i64;
 
         if part == part_count {
             // for the last part, add the remainder rows
-            row_count += total_row_count % part_count as i64;
+            rows_per_part + (total_row_count % part_count as i64)
+        } else {
+            rows_per_part
         }
-
-        row_count
     }
 
     /// Calculates start index for a specific part of the data
diff --git a/tpchgen/src/generators.rs b/tpchgen/src/generators.rs
index a87c39e..3a482d6 100644
--- a/tpchgen/src/generators.rs
+++ b/tpchgen/src/generators.rs
@@ -392,7 +392,7 @@ pub struct VehicleGenerator<'a> {
 
 impl<'a> VehicleGenerator<'a> {
     /// Base scale for vehicle generation
-    const SCALE_BASE: i32 = 200_000;
+    const SCALE_BASE: i32 = 100;
 
     // Constants for vehicle generation
     const NAME_WORDS: i32 = 5;
@@ -653,7 +653,7 @@ pub struct DriverGenerator<'a> {
 
 impl<'a> DriverGenerator<'a> {
     /// Base scale for Driver generation
-    const SCALE_BASE: i32 = 10_000;
+    const SCALE_BASE: i32 = 500;
 
     /// Base scale for vehicle-driver generation
     const DRIVERS_PER_VEHICLE: i32 = 4;
@@ -942,7 +942,7 @@ pub struct CustomerGenerator<'a> {
 
 impl<'a> CustomerGenerator<'a> {
     /// Base scale for customer generation
-    const SCALE_BASE: i32 = 150_000;
+    const SCALE_BASE: i32 = 30_000;
 
     // Constants for customer generation
     const ACCOUNT_BALANCE_MIN: i32 = -99999;
@@ -1999,18 +1999,16 @@ pub struct Trip {
     /// Trip distance
     pub t_distance: TPCHDecimal,
     /// Trip pickup coordinates
-    pub t_pickupx: f64,
-    pub t_pickupy: f64,
+    pub t_pickuploc: String,
     /// Trip dropoff coordinates
-    pub t_dropoffx: f64,
-    pub t_dropoffy: f64,
+    pub t_dropoffloc: String,
 }
 
 impl fmt::Display for Trip {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(
             f,
-            "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|",
+            "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|",
             self.t_tripkey,
             self.t_custkey,
             self.t_driverkey,
@@ -2021,10 +2019,8 @@ impl fmt::Display for Trip {
             self.t_tip,
             self.t_totalamount,
             self.t_distance,
-            self.t_pickupx,
-            self.t_pickupy,
-            self.t_dropoffx,
-            self.t_dropoffy,
+            self.t_pickuploc,
+            self.t_dropoffloc,
         )
     }
 }
@@ -2043,7 +2039,7 @@ pub struct TripGenerator {
 
 impl TripGenerator {
     /// Base scale for trip generation
-    const SCALE_BASE: i32 = 500_000;
+    const SCALE_BASE: i32 = 6_000_000;
 
     // Constants for trip generation
     const FARE_MIN_PER_MILE: i32 = 150; // $1.50 per mile
@@ -2062,7 +2058,7 @@ impl TripGenerator {
             Distributions::static_default(),
             TextPool::get_or_init_default(),
             crate::kde::default_distance_kde(),
-            SpiderPresets::for_trip_pickups(),
+            SpiderPresets::for_trip_pickups4(),
         )
     }
 
@@ -2247,16 +2243,25 @@ impl TripGeneratorIterator {
         let distance = TPCHDecimal((distance_value * 100.0) as i64);
 
         // Pickup
-        let (pickup_x, pickup_y) = 
self.spatial_gen.generate_pickup_point(trip_key as u64);
+        let pickuploc = self.spatial_gen.generate(trip_key as u64);
+
+        // Extract just the coordinates part by removing "POINT (" and ")"
+        let coords_str = pickuploc.trim_start_matches("POINT 
(").trim_end_matches(")");
+        let coords: Vec<&str> = coords_str.split_whitespace().collect();
+
+        // Parse the coordinates directly
+        let pickup_x = coords[0].parse::<f64>().unwrap();
+        let pickup_y = coords[1].parse::<f64>().unwrap();
 
         // Angle
-        let angle_seed = crate::spider::spider_seed_for_index(trip_key as u64, 
1234);
+        let angle_seed = spider_seed_for_index(trip_key as u64, 1234);
         let mut angle_rng = StdRng::seed_from_u64(angle_seed);
         let angle: f64 = angle_rng.gen::<f64>() * std::f64::consts::TAU;
 
         // Dropoff via polar projection
         let dropoff_x = pickup_x + distance_value * angle.cos();
         let dropoff_y = pickup_y + distance_value * angle.sin();
+        let dropoffloc = format!("POINT ({} {})", dropoff_x, dropoff_y);
 
         // Fix multiplication of f64 by integers by using f64 literals
         let fare_per_mile = self.fare_per_mile_random.next_value() as f64;
@@ -2287,10 +2292,8 @@ impl TripGeneratorIterator {
             t_tip: tip,
             t_totalamount: total,
             t_distance: distance,
-            t_pickupx: pickup_x,
-            t_pickupy: pickup_y,
-            t_dropoffx: dropoff_x,
-            t_dropoffy: dropoff_y,
+            t_pickuploc: pickuploc,
+            t_dropoffloc: dropoffloc,
         }
     }
 }
@@ -2396,7 +2399,7 @@ impl<'a> BuildingGenerator<'a> {
 
     /// Return the row count for the given scale factor and generator part 
count
     pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) 
-> i64 {
-        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
part, part_count)
+        GenerateUtils::calculate_logarithmic_row_count(Self::SCALE_BASE, 
scale_factor, part, part_count)
     }
 
     /// Returns an iterator over the part rows
@@ -2429,7 +2432,6 @@ impl<'a> IntoIterator for &'a BuildingGenerator<'a> {
 #[derive(Debug)]
 pub struct BuildingGeneratorIterator<'a> {
     name_random: RandomStringSequence<'a>,
-    wkt_random: RandomText<'a>,
     spatial_gen: SpiderGenerator,
 
     start_index: i64,
@@ -2462,7 +2464,6 @@ impl<'a> BuildingGeneratorIterator<'a> {
 
         BuildingGeneratorIterator {
             name_random,
-            wkt_random,
             start_index,
             row_count,
             spatial_gen,
@@ -2474,10 +2475,7 @@ impl<'a> BuildingGeneratorIterator<'a> {
     /// Creates a part with the given key
     fn make_building(&mut self, building_key: i64) -> Building<'a> {
         let name = self.name_random.next_value();
-
-        let seed = spider_seed_for_index(building_key as u64, 1234);
-        let mut rng = StdRng::seed_from_u64(seed);
-        let wkt = self.spatial_gen.generate_parcel(&mut rng);
+        let wkt = self.spatial_gen.generate(building_key as u64);
 
         Building {
             b_buildingkey: building_key,
@@ -2498,7 +2496,6 @@ impl<'a> Iterator for BuildingGeneratorIterator<'a> {
         let building = self.make_building(self.start_index + self.index + 1);
 
         self.name_random.row_finished();
-        self.wkt_random.row_finished();
 
         self.index += 1;
 
@@ -2637,7 +2634,7 @@ mod tests {
         let trips: Vec<_> = generator.iter().collect();
 
         // Should have 0.01 * 1,000,000 = 10,000 trips
-        assert_eq!(trips.len(), 5000);
+        assert_eq!(trips.len(), 200);
 
         // Check first trip
         let first = &trips[0];
@@ -2658,7 +2655,7 @@ mod tests {
 
         // Verify the string format matches the expected pattern
         let expected_pattern = format!(
-            "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|",
+            "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|",
             first.t_tripkey,
             first.t_custkey,
             first.t_driverkey,
@@ -2669,27 +2666,25 @@ mod tests {
             first.t_tip,
             first.t_totalamount,
             first.t_distance,
-            first.t_pickupx,
-            first.t_pickupy,
-            first.t_dropoffx,
-            first.t_dropoffy,
+            first.t_pickuploc,
+            first.t_dropoffloc,
         );
         assert_eq!(first.to_string(), expected_pattern);
 
         // Check first Trip
         let first = &trips[1];
         assert_eq!(first.t_tripkey, 2);
-        assert_eq!(first.to_string(), 
"2|851|1286|1285|1997-12-25|1997-12-25|0.03|0.00|0.04|0.01|-102.20681068856331|34.032813907715486|-102.19307587853756|34.03497048015551|")
+        assert_eq!(first.to_string(), 
"2|851|1286|1285|1997-12-25|1997-12-25|0.03|0.00|0.04|0.01|POINT 
(-102.44792625704861 37.56233603076481)|POINT (-102.43419144702285 
37.56449260320483)|")
     }
 
     #[test]
     fn test_building_generation() {
         // Create a generator with a small scale factor
-        let generator = BuildingGenerator::new(0.01, 1, 1);
+        let generator = BuildingGenerator::new(1.0, 1, 1);
         let buildings: Vec<_> = generator.iter().collect();
 
         // Should have 0.01 * 20,000 = 200 buildings
-        assert_eq!(buildings.len(), 200);
+        assert_eq!(buildings.len(), 20_000);
 
         // Check first building
         let first = &buildings[0];
@@ -2707,7 +2702,7 @@ mod tests {
         // Check first Building
         let first = &buildings[1];
         assert_eq!(first.b_buildingkey, 2);
-        assert_eq!(first.to_string(), "2|blush|lar accounts amo|")
+        assert_eq!(first.to_string(), "2|blush|POLYGON ((-102.2154579691 
40.5193652499, -102.2133112848 40.5193652499, -102.2133112848 40.5207006446, 
-102.2154579691 40.5207006446, -102.2154579691 40.5193652499))|")
     }
 
     #[test]
diff --git a/tpchgen/src/spider.rs b/tpchgen/src/spider.rs
index 7d1042b..9693d86 100644
--- a/tpchgen/src/spider.rs
+++ b/tpchgen/src/spider.rs
@@ -1,3 +1,4 @@
+use std::f64::consts::PI;
 use rand::{Rng, SeedableRng};
 use rand::rngs::StdRng;
 
@@ -8,7 +9,6 @@ pub enum DistributionType {
     Diagonal,
     Sierpinski,
     Bit,
-    Parcel,
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -18,15 +18,6 @@ pub enum GeomType {
     Point,
 }
 
-#[derive(Debug, Clone)]
-pub struct BoxWithDepth {
-    pub depth: i32,
-    pub x: f64,
-    pub y: f64,
-    pub w: f64,
-    pub h: f64,
-}
-
 #[derive(Debug, Clone)]
 pub enum DistributionParams {
     None,
@@ -66,7 +57,7 @@ impl SpiderGenerator {
         Self { config }
     }
 
-    pub fn generate_point(&self, index: u64) -> (f64, f64) {
+    pub fn generate(&self, index: u64) -> String {
         let seed = spider_seed_for_index(index, self.config.seed as u64);
         let mut rng = StdRng::seed_from_u64(seed);
 
@@ -75,57 +66,77 @@ impl SpiderGenerator {
             DistributionType::Normal => self.generate_normal(&mut rng),
             DistributionType::Diagonal => self.generate_diagonal(&mut rng),
             DistributionType::Bit => self.generate_bit(&mut rng),
-            DistributionType::Sierpinski => self.generate_sierpinski(&mut rng),
-            _ => (rng.gen(), rng.gen())
+            DistributionType::Sierpinski => self.generate_sierpinski(&mut rng)
         }
-
     }
 
-    fn generate_uniform(&self, rng: &mut StdRng) -> (f64, f64) {
-        (rand_unit(rng), rand_unit(rng))
+    fn generate_uniform(&self, rng: &mut StdRng) -> String {
+        let x = rand_unit(rng);
+        let y = rand_unit(rng);
+
+        match self.config.geom_type {
+            GeomType::Point => generate_point_wkt((x, y), &self.config),
+            GeomType::Box => generate_box_wkt((x, y), &self.config, rng),
+            GeomType::Polygon => generate_polygon_wkt((x, y), &self.config, 
rng),
+        }
     }
 
-    fn generate_normal(&self, rng: &mut StdRng) -> (f64, f64) {
-        if let DistributionParams::Normal { mu, sigma } = self.config.params {
-            let x = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
-            let y = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
-            (x, y)
-        } else {
-            // Default values or error handling
-            (rng.gen(), rng.gen())
+    fn generate_normal(&self, rng: &mut StdRng) -> String {
+        match self.config.params {
+            DistributionParams::Normal { mu, sigma } => {
+                let x = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
+                let y = rand_normal(rng, mu, sigma).clamp(0.0, 1.0);
+                match self.config.geom_type {
+                    GeomType::Point => generate_point_wkt((x, y), 
&self.config),
+                    GeomType::Box => generate_box_wkt((x, y), &self.config, 
rng),
+                    GeomType::Polygon => generate_polygon_wkt((x, y), 
&self.config, rng),
+                }
+            },
+            _ => panic!("Expected Normal distribution parameters but got 
{:?}", self.config.params)
         }
     }
 
-    fn generate_diagonal(&self, rng: &mut StdRng) -> (f64, f64) {
-        if let DistributionParams::Diagonal { percentage, buffer } = 
self.config.params {
-            if rng.gen::<f64>() < percentage {
-                let v = rng.gen();
-                (v, v)
-            } else {
-                let c: f64 = rng.gen();
-                let d: f64 = rand_normal(rng, 0.0, buffer / 5.0);
-                let x: f64 = (c + d / f64::sqrt(2.0)).clamp(0.0, 1.0);
-                let y: f64 = (c - d / f64::sqrt(2.0)).clamp(0.0, 1.0);
-                (x, y)
-            }
-        } else {
-            // Default values or error handling
-            (rng.gen(), rng.gen())
+    fn generate_diagonal(&self, rng: &mut StdRng) -> String {
+        match self.config.params {
+            DistributionParams::Diagonal { percentage, buffer } => {
+                let (x, y) = if rng.gen::<f64>() < percentage {
+                    let v = rng.gen();
+                    (v, v)
+                } else {
+                    let c: f64 = rng.gen();
+                    let d: f64 = rand_normal(rng, 0.0, buffer / 5.0);
+                    let x: f64 = (c + d / f64::sqrt(2.0)).clamp(0.0, 1.0);
+                    let y: f64 = (c - d / f64::sqrt(2.0)).clamp(0.0, 1.0);
+                    (x, y)
+                };
+
+                match self.config.geom_type {
+                    GeomType::Point => generate_point_wkt((x, y), 
&self.config),
+                    GeomType::Box => generate_box_wkt((x, y), &self.config, 
rng),
+                    GeomType::Polygon => generate_polygon_wkt((x, y), 
&self.config, rng),
+                }
+            },
+            _ => panic!("Expected Diagonal distribution parameters but got 
{:?}", self.config.params)
         }
     }
 
-    fn generate_bit(&self, rng: &mut StdRng) -> (f64, f64) {
-        if let DistributionParams::Bit { probability, digits } = 
self.config.params {
-            let x = spider_bit(rng, probability, digits);
-            let y = spider_bit(rng, probability, digits);
-            (x, y)
-        } else {
-            // Default values or error handling
-            (rng.gen(), rng.gen())
+    fn generate_bit(&self, rng: &mut StdRng) -> String {
+        match self.config.params {
+            DistributionParams::Bit { probability, digits } => {
+                let x = spider_bit(rng, probability, digits);
+                let y = spider_bit(rng, probability, digits);
+
+                match self.config.geom_type {
+                    GeomType::Point => generate_point_wkt((x, y), 
&self.config),
+                    GeomType::Box => generate_box_wkt((x, y), &self.config, 
rng),
+                    GeomType::Polygon => generate_polygon_wkt((x, y), 
&self.config, rng),
+                }
+            },
+            _ => panic!("Expected Bit distribution parameters but got {:?}", 
self.config.params)
         }
     }
 
-    fn generate_sierpinski(&self, rng: &mut StdRng) -> (f64, f64) {
+    fn generate_sierpinski(&self, rng: &mut StdRng) -> String {
         let (mut x, mut y) = (0.0, 0.0);
         let a = (0.0, 0.0);
         let b = (1.0, 0.0);
@@ -137,100 +148,11 @@ impl SpiderGenerator {
                 _ => { x = (x + c.0) / 2.0; y = (y + c.1) / 2.0; }
             }
         }
-        (x, y)
-    }
-
-    pub fn generate_parcel(&self, rng: &mut StdRng) -> String {
-        if let DistributionParams::Parcel { srange, dither } = 
self.config.params {
-            let mut box_stack = vec![BoxWithDepth {
-                depth: 0,
-                x: 0.0,
-                y: 0.0,
-                w: 1.0,
-                h: 1.0,
-            }];
-
-            // Pick a depth based on dim (log2) or fixed depth
-            let depth_limit = 6; // You can make this configurable if needed
-
-            for _ in 0..depth_limit {
-                let b = box_stack.pop().unwrap();
-                let (b1, b2) = if b.w > b.h {
-                    let split = b.w * (srange + rand_unit(rng) * (1.0 - 2.0 * 
srange));
-                    (
-                        BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y, w: 
split, h: b.h },
-                        BoxWithDepth { depth: b.depth + 1, x: b.x + split, y: 
b.y, w: b.w - split, h: b.h },
-                    )
-                } else {
-                    let split = b.h * (srange + rand_unit(rng) * (1.0 - 2.0 * 
srange));
-                    (
-                        BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y, w: 
b.w, h: split },
-                        BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y + 
split, w: b.w, h: b.h - split },
-                    )
-                };
-
-                // Randomly pick one of the two
-                if rng.gen_bool(0.5) {
-                    box_stack.push(b1);
-                } else {
-                    box_stack.push(b2);
-                }
-            }
-
-            let mut b = box_stack.pop().unwrap();
-
-            // Apply dither
-            let dx = b.w * dither * (rand_unit(rng) - 0.5);
-            let dy = b.h * dither * (rand_unit(rng) - 0.5);
-            b.x += dx / 2.0;
-            b.y += dy / 2.0;
-            b.w -= dx;
-            b.h -= dy;
-
-            // Pick random point inside the box
-            let _x = b.x + rand_unit(rng) * b.w;
-            let _y = b.y + rand_unit(rng) * b.h;
-
-            self.box_to_wkt(&b)
-        } else {
-            self.box_to_wkt(&BoxWithDepth {
-                depth: 0,
-                x: 0.0,
-                y: 0.0,
-                w: 1.0,
-                h: 1.0,
-            })
-        }
-    }
-
-    fn box_to_wkt(&self, b: &BoxWithDepth) -> String {
-        let corners = [
-            (b.x, b.y),
-            (b.x + b.w, b.y),
-            (b.x + b.w, b.y + b.h),
-            (b.x, b.y + b.h),
-            (b.x, b.y),
-        ];
-
-        let affine = self.config.affine.unwrap_or([1.0, 0.0, 0.0, 0.0, 1.0, 
0.0]);
-
-        let coords: Vec<String> = corners
-            .iter()
-            .map(|&(x, y)| {
-                let (tx, ty) = apply_affine(x, y, &affine);
-                format!("{:.6} {:.6}", tx, ty)
-            })
-            .collect();
-
-        format!("POLYGON (({}))", coords.join(", "))
-    }
 
-    pub fn generate_pickup_point(&self, trip_id: u64) -> (f64, f64) {
-        let (x, y) = self.generate_point(trip_id);
-        if let Some(aff) = &self.config.affine {
-            apply_affine(x, y, aff)
-        } else {
-            (x, y)
+        match self.config.geom_type {
+            GeomType::Point => generate_point_wkt((x, y), &self.config),
+            GeomType::Box => generate_box_wkt((x, y), &self.config, rng),
+            GeomType::Polygon => generate_polygon_wkt((x, y), &self.config, 
rng),
         }
     }
 }
@@ -267,16 +189,82 @@ fn spider_bit(rng: &mut StdRng, prob: f64, digits: u32) 
-> f64 {
         .sum()
 }
 
-// impl Default for SpiderGenerator {
-//     fn default() -> Self {
-//         let config = SpiderConfig {
-//             dist: SpiderDistribution::Uniform,
-//             global_seed: 42,
-//             affine: Some([
-//                 58.368269, 0.0, -125.244606, // scale X to 58.37°, offset 
to -125.24°
-//                 0.0, 25.175375, 24.006328,    // scale Y to 25.18°, offset 
to 24.00°
-//             ]),
-//         };
-//         SpiderGenerator::new(config)
-//     }
-// }
\ No newline at end of file
+pub fn generate_point_wkt(center: (f64, f64), config: &SpiderConfig) -> String 
{
+    let (x, y) = if let Some(aff) = &config.affine {
+        apply_affine(center.0, center.1, aff)
+    } else {
+        center
+    };
+    format!("POINT ({} {})", x, y)
+}
+
+pub fn generate_box_wkt(center: (f64, f64), config: &SpiderConfig, rng: &mut 
StdRng) -> String {
+    let half_width = rand_unit(rng) * config.width / 2.0;
+    let half_height = rand_unit(rng) * config.height / 2.0;
+
+    let corners = [
+        (center.0 - half_width, center.1 - half_height), // lower-left
+        (center.0 + half_width, center.1 - half_height), // lower-right
+        (center.0 + half_width, center.1 + half_height), // upper-right
+        (center.0 - half_width, center.1 + half_height), // upper-left
+        (center.0 - half_width, center.1 - half_height), // close ring
+    ];
+
+    let coords: Vec<String> = corners.iter().map(|&(x, y)| {
+        let (tx, ty) = if let Some(aff) = &config.affine {
+            apply_affine(x, y, aff)
+        } else {
+            (x, y)
+        };
+        format!("{:.10} {:.10}", tx, ty)
+    }).collect();
+
+    format!("POLYGON (({}))", coords.join(", "))
+}
+
+pub fn generate_polygon_wkt(center: (f64, f64), config: &SpiderConfig, rng: 
&mut StdRng) -> String {
+    let min_segs = 3;
+    let num_segments = if config.maxseg <= 3 {
+        3
+    } else {
+        rng.gen_range(0..=(config.maxseg - min_segs)) + min_segs
+    };
+
+    // Generate random angles
+    let mut angles: Vec<f64> = (0..num_segments)
+        .map(|_| rand_unit(rng) * 2.0 * PI)
+        .collect();
+
+    // Sort angles to form a valid polygon
+    angles.sort_by(|a, b| a.partial_cmp(b).unwrap());
+
+    let mut coords = Vec::with_capacity((num_segments + 1) as usize);
+
+    for angle in &angles {
+        let local = (
+            center.0 + config.polysize * angle.cos(),
+            center.1 + config.polysize * angle.sin(),
+        );
+        let (tx, ty) = if let Some(aff) = &config.affine {
+            apply_affine(local.0, local.1, aff)
+        } else {
+            local
+        };
+        coords.push(format!("{:.10} {:.10}", tx, ty));
+    }
+
+    // Close the ring by repeating the first point
+    let first_angle = angles[0];
+    let local0 = (
+        center.0 + config.polysize * first_angle.cos(),
+        center.1 + config.polysize * first_angle.sin(),
+    );
+    let (tx0, ty0) = if let Some(aff) = &config.affine {
+        apply_affine(local0.0, local0.1, aff)
+    } else {
+        local0
+    };
+    coords.push(format!("{:.10} {:.10}", tx0, ty0));
+
+    format!("POLYGON (({}))", coords.join(", "))
+}
\ No newline at end of file
diff --git a/tpchgen/src/spider_presets.rs b/tpchgen/src/spider_presets.rs
index e6827b0..ee11806 100644
--- a/tpchgen/src/spider_presets.rs
+++ b/tpchgen/src/spider_presets.rs
@@ -27,9 +27,105 @@ impl SpiderPresets {
         SpiderGenerator::new(config)
     }
 
+    pub fn for_trip_pickups2() -> SpiderGenerator {
+        let config = SpiderConfig {
+            dist_type: DistributionType::Diagonal,
+            geom_type: GeomType::Point,
+            dim: 2,
+            seed: 42,
+            affine: Some([
+                58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to 
-125.24°
+                0.0, 25.175375, 24.006328,   // scale Y to 25.18°, offset to 
24.00°
+            ]),
+
+            // geometry = box
+            width: 0.0,
+            height: 0.0,
+
+            // geometry = polygon
+            maxseg: 0,
+            polysize: 0.0,
+
+            params: DistributionParams::Diagonal { percentage: 0.5, buffer: 
0.5},
+        };
+        SpiderGenerator::new(config)
+    }
+
+    pub fn for_trip_pickups3() -> SpiderGenerator {
+        let config = SpiderConfig {
+            dist_type: DistributionType::Sierpinski,
+            geom_type: GeomType::Point,
+            dim: 2,
+            seed: 42,
+            affine: Some([
+                58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to 
-125.24°
+                0.0, 25.175375, 24.006328,   // scale Y to 25.18°, offset to 
24.00°
+            ]),
+
+            // geometry = box
+            width: 0.0,
+            height: 0.0,
+
+            // geometry = polygon
+            maxseg: 0,
+            polysize: 0.0,
+
+            params: DistributionParams::None,
+        };
+        SpiderGenerator::new(config)
+    }
+
+    pub fn for_trip_pickups4() -> SpiderGenerator {
+        let config = SpiderConfig {
+            dist_type: DistributionType::Bit,
+            geom_type: GeomType::Point,
+            dim: 2,
+            seed: 42,
+            affine: Some([
+                58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to 
-125.24°
+                0.0, 25.175375, 24.006328,   // scale Y to 25.18°, offset to 
24.00°
+            ]),
+
+            // geometry = box
+            width: 0.0,
+            height: 0.0,
+
+            // geometry = polygon
+            maxseg: 0,
+            polysize: 0.0,
+
+            params: DistributionParams::Bit { probability: 0.2, digits: 10},
+        };
+        SpiderGenerator::new(config)
+    }
+
+    pub fn for_trip_pickups5() -> SpiderGenerator {
+        let config = SpiderConfig {
+            dist_type: DistributionType::Normal,
+            geom_type: GeomType::Point,
+            dim: 2,
+            seed: 42,
+            affine: Some([
+                58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to 
-125.24°
+                0.0, 25.175375, 24.006328,   // scale Y to 25.18°, offset to 
24.00°
+            ]),
+
+            // geometry = box
+            width: 0.0,
+            height: 0.0,
+
+            // geometry = polygon
+            maxseg: 0,
+            polysize: 0.0,
+
+            params: DistributionParams::Normal {mu: 0.5, sigma: 0.1},
+        };
+        SpiderGenerator::new(config)
+    }
+
     pub fn for_building_polygons() -> SpiderGenerator {
         let config = SpiderConfig {
-            dist_type: DistributionType::Parcel,
+            dist_type: DistributionType::Bit,
             geom_type: GeomType::Box,
             dim: 2,
             seed: 12345,
@@ -39,14 +135,14 @@ impl SpiderPresets {
             ]),
 
             // geometry = box
-            width: 0.0,
-            height: 0.0,
+            width: 0.00005,
+            height: 0.0001,
 
             // geometry = polygon
             maxseg: 0,
             polysize: 0.0,
 
-            params: DistributionParams::Parcel { srange: 0.1, dither: 2.0 },
+            params: DistributionParams::Bit { probability: 0.5, digits: 20},
         };
         SpiderGenerator::new(config)
     }

Reply via email to