This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
commit 4bf49418d0b57ed2f3424305c0df0212d7a8661d Author: Pranav Toggi <[email protected]> AuthorDate: Fri Jun 27 08:34:37 2025 -0700 add support for box and polygon types --- tpchgen-arrow/src/trip.rs | 20 +-- tpchgen/src/csv.rs | 4 +- tpchgen/src/dates.rs | 38 +++++- tpchgen/src/generators.rs | 71 +++++----- tpchgen/src/spider.rs | 296 ++++++++++++++++++++---------------------- tpchgen/src/spider_presets.rs | 104 ++++++++++++++- 6 files changed, 316 insertions(+), 217 deletions(-) diff --git a/tpchgen-arrow/src/trip.rs b/tpchgen-arrow/src/trip.rs index 93b6c61..afab560 100644 --- a/tpchgen-arrow/src/trip.rs +++ b/tpchgen-arrow/src/trip.rs @@ -1,6 +1,6 @@ use crate::conversions::{decimal128_array_from_iter, to_arrow_date32}; use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator}; -use arrow::array::{Date32Array, Float64Array, Int64Array, RecordBatch}; +use arrow::array::{Date32Array, Int64Array, RecordBatch, StringViewArray}; use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; use std::sync::{Arc, LazyLock, Mutex}; use tpchgen::generators::{Trip, TripGenerator, TripGeneratorIterator}; @@ -79,10 +79,8 @@ impl Iterator for TripArrow { let t_tip = decimal128_array_from_iter(rows.iter().map(|row| row.t_tip)); let t_totalamount = decimal128_array_from_iter(rows.iter().map(|row| row.t_totalamount)); let t_distance = decimal128_array_from_iter(rows.iter().map(|row| row.t_distance)); - let t_pickupx = Float64Array::from_iter_values(rows.iter().map(|row| row.t_pickupx)); - let t_pickupy = Float64Array::from_iter_values(rows.iter().map(|row| row.t_pickupy)); - let t_dropoffx = Float64Array::from_iter_values(rows.iter().map(|row| row.t_dropoffx)); - let t_dropoffy = Float64Array::from_iter_values(rows.iter().map(|row| row.t_dropoffy)); + let t_pickuploc = StringViewArray::from_iter_values(rows.iter().map(|row| row.t_pickuploc.clone())); + let t_dropoffloc = StringViewArray::from_iter_values(rows.iter().map(|row| row.t_dropoffloc.clone())); let batch = RecordBatch::try_new( Arc::clone(&self.schema), @@ -97,10 +95,8 @@ impl Iterator for TripArrow { Arc::new(t_tip), Arc::new(t_totalamount), Arc::new(t_distance), - Arc::new(t_pickupx), - Arc::new(t_pickupy), - Arc::new(t_dropoffx), - Arc::new(t_dropoffy), + Arc::new(t_pickuploc), + Arc::new(t_dropoffloc), ], ) .unwrap(); @@ -124,9 +120,7 @@ fn make_trip_schema() -> SchemaRef { Field::new("t_tip", DataType::Decimal128(15, 2), false), Field::new("t_totalamount", DataType::Decimal128(15, 2), false), Field::new("t_distance", DataType::Decimal128(15, 2), false), - Field::new("t_pickupx", DataType::Float64, false), - Field::new("t_pickupy", DataType::Float64, false), - Field::new("t_dropoffx", DataType::Float64, false), - Field::new("t_dropoffy", DataType::Float64, false), + Field::new("t_pickuploc", DataType::Utf8View, false), + Field::new("t_dropoffloc", DataType::Utf8View, false), ])) } \ No newline at end of file diff --git a/tpchgen/src/csv.rs b/tpchgen/src/csv.rs index d0fbea5..c8b126f 100644 --- a/tpchgen/src/csv.rs +++ b/tpchgen/src/csv.rs @@ -432,8 +432,8 @@ impl Display for TripCsv { self.inner.t_tip, self.inner.t_totalamount, self.inner.t_distance, - self.inner.t_pickupx, - self.inner.t_pickupy, + self.inner.t_pickuploc, + self.inner.t_dropoffloc, ) } } diff --git a/tpchgen/src/dates.rs b/tpchgen/src/dates.rs index f6630b2..954ed57 100644 --- a/tpchgen/src/dates.rs +++ b/tpchgen/src/dates.rs @@ -37,22 +37,48 @@ static JULIAN_DATE: LazyLock<Vec<i32>> = LazyLock::new(|| { pub struct GenerateUtils; impl GenerateUtils { - /// Calculates row count for a specific part of the data + /// Calculates row count with linear scaling (original behavior) pub fn calculate_row_count( scale_base: i32, scale_factor: f64, part: i32, part_count: i32, ) -> i64 { - let total_row_count = (scale_base as f64 * scale_factor) as i64; - let mut row_count = total_row_count / part_count as i64; + Self::calculate_scaled_row_count(scale_base, scale_factor, part, part_count, false) + } + + /// Calculates row count with logarithmic scaling (for buildings) + pub fn calculate_logarithmic_row_count( + scale_base: i32, + scale_factor: f64, + part: i32, + part_count: i32, + ) -> i64 { + Self::calculate_scaled_row_count(scale_base, scale_factor, part, part_count, true) + } + + /// Internal implementation for row count calculation with scaling option + fn calculate_scaled_row_count( + scale_base: i32, + scale_factor: f64, + part: i32, + part_count: i32, + log_scale: bool, + ) -> i64 { + let total_row_count = if log_scale { + (scale_base as f64 * (1.0 + scale_factor.log2())) as i64 + } else { + (scale_base as f64 * scale_factor) as i64 + }; + + let rows_per_part = total_row_count / part_count as i64; if part == part_count { // for the last part, add the remainder rows - row_count += total_row_count % part_count as i64; + rows_per_part + (total_row_count % part_count as i64) + } else { + rows_per_part } - - row_count } /// Calculates start index for a specific part of the data diff --git a/tpchgen/src/generators.rs b/tpchgen/src/generators.rs index a87c39e..3a482d6 100644 --- a/tpchgen/src/generators.rs +++ b/tpchgen/src/generators.rs @@ -392,7 +392,7 @@ pub struct VehicleGenerator<'a> { impl<'a> VehicleGenerator<'a> { /// Base scale for vehicle generation - const SCALE_BASE: i32 = 200_000; + const SCALE_BASE: i32 = 100; // Constants for vehicle generation const NAME_WORDS: i32 = 5; @@ -653,7 +653,7 @@ pub struct DriverGenerator<'a> { impl<'a> DriverGenerator<'a> { /// Base scale for Driver generation - const SCALE_BASE: i32 = 10_000; + const SCALE_BASE: i32 = 500; /// Base scale for vehicle-driver generation const DRIVERS_PER_VEHICLE: i32 = 4; @@ -942,7 +942,7 @@ pub struct CustomerGenerator<'a> { impl<'a> CustomerGenerator<'a> { /// Base scale for customer generation - const SCALE_BASE: i32 = 150_000; + const SCALE_BASE: i32 = 30_000; // Constants for customer generation const ACCOUNT_BALANCE_MIN: i32 = -99999; @@ -1999,18 +1999,16 @@ pub struct Trip { /// Trip distance pub t_distance: TPCHDecimal, /// Trip pickup coordinates - pub t_pickupx: f64, - pub t_pickupy: f64, + pub t_pickuploc: String, /// Trip dropoff coordinates - pub t_dropoffx: f64, - pub t_dropoffy: f64, + pub t_dropoffloc: String, } impl fmt::Display for Trip { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, - "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|", + "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|", self.t_tripkey, self.t_custkey, self.t_driverkey, @@ -2021,10 +2019,8 @@ impl fmt::Display for Trip { self.t_tip, self.t_totalamount, self.t_distance, - self.t_pickupx, - self.t_pickupy, - self.t_dropoffx, - self.t_dropoffy, + self.t_pickuploc, + self.t_dropoffloc, ) } } @@ -2043,7 +2039,7 @@ pub struct TripGenerator { impl TripGenerator { /// Base scale for trip generation - const SCALE_BASE: i32 = 500_000; + const SCALE_BASE: i32 = 6_000_000; // Constants for trip generation const FARE_MIN_PER_MILE: i32 = 150; // $1.50 per mile @@ -2062,7 +2058,7 @@ impl TripGenerator { Distributions::static_default(), TextPool::get_or_init_default(), crate::kde::default_distance_kde(), - SpiderPresets::for_trip_pickups(), + SpiderPresets::for_trip_pickups4(), ) } @@ -2247,16 +2243,25 @@ impl TripGeneratorIterator { let distance = TPCHDecimal((distance_value * 100.0) as i64); // Pickup - let (pickup_x, pickup_y) = self.spatial_gen.generate_pickup_point(trip_key as u64); + let pickuploc = self.spatial_gen.generate(trip_key as u64); + + // Extract just the coordinates part by removing "POINT (" and ")" + let coords_str = pickuploc.trim_start_matches("POINT (").trim_end_matches(")"); + let coords: Vec<&str> = coords_str.split_whitespace().collect(); + + // Parse the coordinates directly + let pickup_x = coords[0].parse::<f64>().unwrap(); + let pickup_y = coords[1].parse::<f64>().unwrap(); // Angle - let angle_seed = crate::spider::spider_seed_for_index(trip_key as u64, 1234); + let angle_seed = spider_seed_for_index(trip_key as u64, 1234); let mut angle_rng = StdRng::seed_from_u64(angle_seed); let angle: f64 = angle_rng.gen::<f64>() * std::f64::consts::TAU; // Dropoff via polar projection let dropoff_x = pickup_x + distance_value * angle.cos(); let dropoff_y = pickup_y + distance_value * angle.sin(); + let dropoffloc = format!("POINT ({} {})", dropoff_x, dropoff_y); // Fix multiplication of f64 by integers by using f64 literals let fare_per_mile = self.fare_per_mile_random.next_value() as f64; @@ -2287,10 +2292,8 @@ impl TripGeneratorIterator { t_tip: tip, t_totalamount: total, t_distance: distance, - t_pickupx: pickup_x, - t_pickupy: pickup_y, - t_dropoffx: dropoff_x, - t_dropoffy: dropoff_y, + t_pickuploc: pickuploc, + t_dropoffloc: dropoffloc, } } } @@ -2396,7 +2399,7 @@ impl<'a> BuildingGenerator<'a> { /// Return the row count for the given scale factor and generator part count pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) -> i64 { - GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, part, part_count) + GenerateUtils::calculate_logarithmic_row_count(Self::SCALE_BASE, scale_factor, part, part_count) } /// Returns an iterator over the part rows @@ -2429,7 +2432,6 @@ impl<'a> IntoIterator for &'a BuildingGenerator<'a> { #[derive(Debug)] pub struct BuildingGeneratorIterator<'a> { name_random: RandomStringSequence<'a>, - wkt_random: RandomText<'a>, spatial_gen: SpiderGenerator, start_index: i64, @@ -2462,7 +2464,6 @@ impl<'a> BuildingGeneratorIterator<'a> { BuildingGeneratorIterator { name_random, - wkt_random, start_index, row_count, spatial_gen, @@ -2474,10 +2475,7 @@ impl<'a> BuildingGeneratorIterator<'a> { /// Creates a part with the given key fn make_building(&mut self, building_key: i64) -> Building<'a> { let name = self.name_random.next_value(); - - let seed = spider_seed_for_index(building_key as u64, 1234); - let mut rng = StdRng::seed_from_u64(seed); - let wkt = self.spatial_gen.generate_parcel(&mut rng); + let wkt = self.spatial_gen.generate(building_key as u64); Building { b_buildingkey: building_key, @@ -2498,7 +2496,6 @@ impl<'a> Iterator for BuildingGeneratorIterator<'a> { let building = self.make_building(self.start_index + self.index + 1); self.name_random.row_finished(); - self.wkt_random.row_finished(); self.index += 1; @@ -2637,7 +2634,7 @@ mod tests { let trips: Vec<_> = generator.iter().collect(); // Should have 0.01 * 1,000,000 = 10,000 trips - assert_eq!(trips.len(), 5000); + assert_eq!(trips.len(), 200); // Check first trip let first = &trips[0]; @@ -2658,7 +2655,7 @@ mod tests { // Verify the string format matches the expected pattern let expected_pattern = format!( - "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|", + "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|", first.t_tripkey, first.t_custkey, first.t_driverkey, @@ -2669,27 +2666,25 @@ mod tests { first.t_tip, first.t_totalamount, first.t_distance, - first.t_pickupx, - first.t_pickupy, - first.t_dropoffx, - first.t_dropoffy, + first.t_pickuploc, + first.t_dropoffloc, ); assert_eq!(first.to_string(), expected_pattern); // Check first Trip let first = &trips[1]; assert_eq!(first.t_tripkey, 2); - assert_eq!(first.to_string(), "2|851|1286|1285|1997-12-25|1997-12-25|0.03|0.00|0.04|0.01|-102.20681068856331|34.032813907715486|-102.19307587853756|34.03497048015551|") + assert_eq!(first.to_string(), "2|851|1286|1285|1997-12-25|1997-12-25|0.03|0.00|0.04|0.01|POINT (-102.44792625704861 37.56233603076481)|POINT (-102.43419144702285 37.56449260320483)|") } #[test] fn test_building_generation() { // Create a generator with a small scale factor - let generator = BuildingGenerator::new(0.01, 1, 1); + let generator = BuildingGenerator::new(1.0, 1, 1); let buildings: Vec<_> = generator.iter().collect(); // Should have 0.01 * 20,000 = 200 buildings - assert_eq!(buildings.len(), 200); + assert_eq!(buildings.len(), 20_000); // Check first building let first = &buildings[0]; @@ -2707,7 +2702,7 @@ mod tests { // Check first Building let first = &buildings[1]; assert_eq!(first.b_buildingkey, 2); - assert_eq!(first.to_string(), "2|blush|lar accounts amo|") + assert_eq!(first.to_string(), "2|blush|POLYGON ((-102.2154579691 40.5193652499, -102.2133112848 40.5193652499, -102.2133112848 40.5207006446, -102.2154579691 40.5207006446, -102.2154579691 40.5193652499))|") } #[test] diff --git a/tpchgen/src/spider.rs b/tpchgen/src/spider.rs index 7d1042b..9693d86 100644 --- a/tpchgen/src/spider.rs +++ b/tpchgen/src/spider.rs @@ -1,3 +1,4 @@ +use std::f64::consts::PI; use rand::{Rng, SeedableRng}; use rand::rngs::StdRng; @@ -8,7 +9,6 @@ pub enum DistributionType { Diagonal, Sierpinski, Bit, - Parcel, } #[derive(Debug, Clone, Copy)] @@ -18,15 +18,6 @@ pub enum GeomType { Point, } -#[derive(Debug, Clone)] -pub struct BoxWithDepth { - pub depth: i32, - pub x: f64, - pub y: f64, - pub w: f64, - pub h: f64, -} - #[derive(Debug, Clone)] pub enum DistributionParams { None, @@ -66,7 +57,7 @@ impl SpiderGenerator { Self { config } } - pub fn generate_point(&self, index: u64) -> (f64, f64) { + pub fn generate(&self, index: u64) -> String { let seed = spider_seed_for_index(index, self.config.seed as u64); let mut rng = StdRng::seed_from_u64(seed); @@ -75,57 +66,77 @@ impl SpiderGenerator { DistributionType::Normal => self.generate_normal(&mut rng), DistributionType::Diagonal => self.generate_diagonal(&mut rng), DistributionType::Bit => self.generate_bit(&mut rng), - DistributionType::Sierpinski => self.generate_sierpinski(&mut rng), - _ => (rng.gen(), rng.gen()) + DistributionType::Sierpinski => self.generate_sierpinski(&mut rng) } - } - fn generate_uniform(&self, rng: &mut StdRng) -> (f64, f64) { - (rand_unit(rng), rand_unit(rng)) + fn generate_uniform(&self, rng: &mut StdRng) -> String { + let x = rand_unit(rng); + let y = rand_unit(rng); + + match self.config.geom_type { + GeomType::Point => generate_point_wkt((x, y), &self.config), + GeomType::Box => generate_box_wkt((x, y), &self.config, rng), + GeomType::Polygon => generate_polygon_wkt((x, y), &self.config, rng), + } } - fn generate_normal(&self, rng: &mut StdRng) -> (f64, f64) { - if let DistributionParams::Normal { mu, sigma } = self.config.params { - let x = rand_normal(rng, mu, sigma).clamp(0.0, 1.0); - let y = rand_normal(rng, mu, sigma).clamp(0.0, 1.0); - (x, y) - } else { - // Default values or error handling - (rng.gen(), rng.gen()) + fn generate_normal(&self, rng: &mut StdRng) -> String { + match self.config.params { + DistributionParams::Normal { mu, sigma } => { + let x = rand_normal(rng, mu, sigma).clamp(0.0, 1.0); + let y = rand_normal(rng, mu, sigma).clamp(0.0, 1.0); + match self.config.geom_type { + GeomType::Point => generate_point_wkt((x, y), &self.config), + GeomType::Box => generate_box_wkt((x, y), &self.config, rng), + GeomType::Polygon => generate_polygon_wkt((x, y), &self.config, rng), + } + }, + _ => panic!("Expected Normal distribution parameters but got {:?}", self.config.params) } } - fn generate_diagonal(&self, rng: &mut StdRng) -> (f64, f64) { - if let DistributionParams::Diagonal { percentage, buffer } = self.config.params { - if rng.gen::<f64>() < percentage { - let v = rng.gen(); - (v, v) - } else { - let c: f64 = rng.gen(); - let d: f64 = rand_normal(rng, 0.0, buffer / 5.0); - let x: f64 = (c + d / f64::sqrt(2.0)).clamp(0.0, 1.0); - let y: f64 = (c - d / f64::sqrt(2.0)).clamp(0.0, 1.0); - (x, y) - } - } else { - // Default values or error handling - (rng.gen(), rng.gen()) + fn generate_diagonal(&self, rng: &mut StdRng) -> String { + match self.config.params { + DistributionParams::Diagonal { percentage, buffer } => { + let (x, y) = if rng.gen::<f64>() < percentage { + let v = rng.gen(); + (v, v) + } else { + let c: f64 = rng.gen(); + let d: f64 = rand_normal(rng, 0.0, buffer / 5.0); + let x: f64 = (c + d / f64::sqrt(2.0)).clamp(0.0, 1.0); + let y: f64 = (c - d / f64::sqrt(2.0)).clamp(0.0, 1.0); + (x, y) + }; + + match self.config.geom_type { + GeomType::Point => generate_point_wkt((x, y), &self.config), + GeomType::Box => generate_box_wkt((x, y), &self.config, rng), + GeomType::Polygon => generate_polygon_wkt((x, y), &self.config, rng), + } + }, + _ => panic!("Expected Diagonal distribution parameters but got {:?}", self.config.params) } } - fn generate_bit(&self, rng: &mut StdRng) -> (f64, f64) { - if let DistributionParams::Bit { probability, digits } = self.config.params { - let x = spider_bit(rng, probability, digits); - let y = spider_bit(rng, probability, digits); - (x, y) - } else { - // Default values or error handling - (rng.gen(), rng.gen()) + fn generate_bit(&self, rng: &mut StdRng) -> String { + match self.config.params { + DistributionParams::Bit { probability, digits } => { + let x = spider_bit(rng, probability, digits); + let y = spider_bit(rng, probability, digits); + + match self.config.geom_type { + GeomType::Point => generate_point_wkt((x, y), &self.config), + GeomType::Box => generate_box_wkt((x, y), &self.config, rng), + GeomType::Polygon => generate_polygon_wkt((x, y), &self.config, rng), + } + }, + _ => panic!("Expected Bit distribution parameters but got {:?}", self.config.params) } } - fn generate_sierpinski(&self, rng: &mut StdRng) -> (f64, f64) { + fn generate_sierpinski(&self, rng: &mut StdRng) -> String { let (mut x, mut y) = (0.0, 0.0); let a = (0.0, 0.0); let b = (1.0, 0.0); @@ -137,100 +148,11 @@ impl SpiderGenerator { _ => { x = (x + c.0) / 2.0; y = (y + c.1) / 2.0; } } } - (x, y) - } - - pub fn generate_parcel(&self, rng: &mut StdRng) -> String { - if let DistributionParams::Parcel { srange, dither } = self.config.params { - let mut box_stack = vec![BoxWithDepth { - depth: 0, - x: 0.0, - y: 0.0, - w: 1.0, - h: 1.0, - }]; - - // Pick a depth based on dim (log2) or fixed depth - let depth_limit = 6; // You can make this configurable if needed - - for _ in 0..depth_limit { - let b = box_stack.pop().unwrap(); - let (b1, b2) = if b.w > b.h { - let split = b.w * (srange + rand_unit(rng) * (1.0 - 2.0 * srange)); - ( - BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y, w: split, h: b.h }, - BoxWithDepth { depth: b.depth + 1, x: b.x + split, y: b.y, w: b.w - split, h: b.h }, - ) - } else { - let split = b.h * (srange + rand_unit(rng) * (1.0 - 2.0 * srange)); - ( - BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y, w: b.w, h: split }, - BoxWithDepth { depth: b.depth + 1, x: b.x, y: b.y + split, w: b.w, h: b.h - split }, - ) - }; - - // Randomly pick one of the two - if rng.gen_bool(0.5) { - box_stack.push(b1); - } else { - box_stack.push(b2); - } - } - - let mut b = box_stack.pop().unwrap(); - - // Apply dither - let dx = b.w * dither * (rand_unit(rng) - 0.5); - let dy = b.h * dither * (rand_unit(rng) - 0.5); - b.x += dx / 2.0; - b.y += dy / 2.0; - b.w -= dx; - b.h -= dy; - - // Pick random point inside the box - let _x = b.x + rand_unit(rng) * b.w; - let _y = b.y + rand_unit(rng) * b.h; - - self.box_to_wkt(&b) - } else { - self.box_to_wkt(&BoxWithDepth { - depth: 0, - x: 0.0, - y: 0.0, - w: 1.0, - h: 1.0, - }) - } - } - - fn box_to_wkt(&self, b: &BoxWithDepth) -> String { - let corners = [ - (b.x, b.y), - (b.x + b.w, b.y), - (b.x + b.w, b.y + b.h), - (b.x, b.y + b.h), - (b.x, b.y), - ]; - - let affine = self.config.affine.unwrap_or([1.0, 0.0, 0.0, 0.0, 1.0, 0.0]); - - let coords: Vec<String> = corners - .iter() - .map(|&(x, y)| { - let (tx, ty) = apply_affine(x, y, &affine); - format!("{:.6} {:.6}", tx, ty) - }) - .collect(); - - format!("POLYGON (({}))", coords.join(", ")) - } - pub fn generate_pickup_point(&self, trip_id: u64) -> (f64, f64) { - let (x, y) = self.generate_point(trip_id); - if let Some(aff) = &self.config.affine { - apply_affine(x, y, aff) - } else { - (x, y) + match self.config.geom_type { + GeomType::Point => generate_point_wkt((x, y), &self.config), + GeomType::Box => generate_box_wkt((x, y), &self.config, rng), + GeomType::Polygon => generate_polygon_wkt((x, y), &self.config, rng), } } } @@ -267,16 +189,82 @@ fn spider_bit(rng: &mut StdRng, prob: f64, digits: u32) -> f64 { .sum() } -// impl Default for SpiderGenerator { -// fn default() -> Self { -// let config = SpiderConfig { -// dist: SpiderDistribution::Uniform, -// global_seed: 42, -// affine: Some([ -// 58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to -125.24° -// 0.0, 25.175375, 24.006328, // scale Y to 25.18°, offset to 24.00° -// ]), -// }; -// SpiderGenerator::new(config) -// } -// } \ No newline at end of file +pub fn generate_point_wkt(center: (f64, f64), config: &SpiderConfig) -> String { + let (x, y) = if let Some(aff) = &config.affine { + apply_affine(center.0, center.1, aff) + } else { + center + }; + format!("POINT ({} {})", x, y) +} + +pub fn generate_box_wkt(center: (f64, f64), config: &SpiderConfig, rng: &mut StdRng) -> String { + let half_width = rand_unit(rng) * config.width / 2.0; + let half_height = rand_unit(rng) * config.height / 2.0; + + let corners = [ + (center.0 - half_width, center.1 - half_height), // lower-left + (center.0 + half_width, center.1 - half_height), // lower-right + (center.0 + half_width, center.1 + half_height), // upper-right + (center.0 - half_width, center.1 + half_height), // upper-left + (center.0 - half_width, center.1 - half_height), // close ring + ]; + + let coords: Vec<String> = corners.iter().map(|&(x, y)| { + let (tx, ty) = if let Some(aff) = &config.affine { + apply_affine(x, y, aff) + } else { + (x, y) + }; + format!("{:.10} {:.10}", tx, ty) + }).collect(); + + format!("POLYGON (({}))", coords.join(", ")) +} + +pub fn generate_polygon_wkt(center: (f64, f64), config: &SpiderConfig, rng: &mut StdRng) -> String { + let min_segs = 3; + let num_segments = if config.maxseg <= 3 { + 3 + } else { + rng.gen_range(0..=(config.maxseg - min_segs)) + min_segs + }; + + // Generate random angles + let mut angles: Vec<f64> = (0..num_segments) + .map(|_| rand_unit(rng) * 2.0 * PI) + .collect(); + + // Sort angles to form a valid polygon + angles.sort_by(|a, b| a.partial_cmp(b).unwrap()); + + let mut coords = Vec::with_capacity((num_segments + 1) as usize); + + for angle in &angles { + let local = ( + center.0 + config.polysize * angle.cos(), + center.1 + config.polysize * angle.sin(), + ); + let (tx, ty) = if let Some(aff) = &config.affine { + apply_affine(local.0, local.1, aff) + } else { + local + }; + coords.push(format!("{:.10} {:.10}", tx, ty)); + } + + // Close the ring by repeating the first point + let first_angle = angles[0]; + let local0 = ( + center.0 + config.polysize * first_angle.cos(), + center.1 + config.polysize * first_angle.sin(), + ); + let (tx0, ty0) = if let Some(aff) = &config.affine { + apply_affine(local0.0, local0.1, aff) + } else { + local0 + }; + coords.push(format!("{:.10} {:.10}", tx0, ty0)); + + format!("POLYGON (({}))", coords.join(", ")) +} \ No newline at end of file diff --git a/tpchgen/src/spider_presets.rs b/tpchgen/src/spider_presets.rs index e6827b0..ee11806 100644 --- a/tpchgen/src/spider_presets.rs +++ b/tpchgen/src/spider_presets.rs @@ -27,9 +27,105 @@ impl SpiderPresets { SpiderGenerator::new(config) } + pub fn for_trip_pickups2() -> SpiderGenerator { + let config = SpiderConfig { + dist_type: DistributionType::Diagonal, + geom_type: GeomType::Point, + dim: 2, + seed: 42, + affine: Some([ + 58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to -125.24° + 0.0, 25.175375, 24.006328, // scale Y to 25.18°, offset to 24.00° + ]), + + // geometry = box + width: 0.0, + height: 0.0, + + // geometry = polygon + maxseg: 0, + polysize: 0.0, + + params: DistributionParams::Diagonal { percentage: 0.5, buffer: 0.5}, + }; + SpiderGenerator::new(config) + } + + pub fn for_trip_pickups3() -> SpiderGenerator { + let config = SpiderConfig { + dist_type: DistributionType::Sierpinski, + geom_type: GeomType::Point, + dim: 2, + seed: 42, + affine: Some([ + 58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to -125.24° + 0.0, 25.175375, 24.006328, // scale Y to 25.18°, offset to 24.00° + ]), + + // geometry = box + width: 0.0, + height: 0.0, + + // geometry = polygon + maxseg: 0, + polysize: 0.0, + + params: DistributionParams::None, + }; + SpiderGenerator::new(config) + } + + pub fn for_trip_pickups4() -> SpiderGenerator { + let config = SpiderConfig { + dist_type: DistributionType::Bit, + geom_type: GeomType::Point, + dim: 2, + seed: 42, + affine: Some([ + 58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to -125.24° + 0.0, 25.175375, 24.006328, // scale Y to 25.18°, offset to 24.00° + ]), + + // geometry = box + width: 0.0, + height: 0.0, + + // geometry = polygon + maxseg: 0, + polysize: 0.0, + + params: DistributionParams::Bit { probability: 0.2, digits: 10}, + }; + SpiderGenerator::new(config) + } + + pub fn for_trip_pickups5() -> SpiderGenerator { + let config = SpiderConfig { + dist_type: DistributionType::Normal, + geom_type: GeomType::Point, + dim: 2, + seed: 42, + affine: Some([ + 58.368269, 0.0, -125.244606, // scale X to 58.37°, offset to -125.24° + 0.0, 25.175375, 24.006328, // scale Y to 25.18°, offset to 24.00° + ]), + + // geometry = box + width: 0.0, + height: 0.0, + + // geometry = polygon + maxseg: 0, + polysize: 0.0, + + params: DistributionParams::Normal {mu: 0.5, sigma: 0.1}, + }; + SpiderGenerator::new(config) + } + pub fn for_building_polygons() -> SpiderGenerator { let config = SpiderConfig { - dist_type: DistributionType::Parcel, + dist_type: DistributionType::Bit, geom_type: GeomType::Box, dim: 2, seed: 12345, @@ -39,14 +135,14 @@ impl SpiderPresets { ]), // geometry = box - width: 0.0, - height: 0.0, + width: 0.00005, + height: 0.0001, // geometry = polygon maxseg: 0, polysize: 0.0, - params: DistributionParams::Parcel { srange: 0.1, dither: 2.0 }, + params: DistributionParams::Bit { probability: 0.5, digits: 20}, }; SpiderGenerator::new(config) }
