This is an automated email from the ASF dual-hosted git repository.

jiayu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git

commit fbdeafdd0cb0b355145b47bc0d85c566055ff044
Author: Pranav Toggi <[email protected]>
AuthorDate: Wed Jun 25 13:13:32 2025 -0700

    add skeleton for Building
---
 tpchgen-arrow/src/building.rs  |  85 +++++++++
 tpchgen-arrow/src/lib.rs       |   2 +
 tpchgen-arrow/tests/reparse.rs |  14 +-
 tpchgen-cli/src/csv.rs         |   5 +-
 tpchgen-cli/src/main.rs        |  27 ++-
 tpchgen-cli/src/tbl.rs         |   3 +-
 tpchgen/Cargo.toml             |   1 -
 tpchgen/src/csv.rs             |  46 ++++-
 tpchgen/src/generators.rs      | 397 +++++++++++++++++++++++++++++++----------
 9 files changed, 469 insertions(+), 111 deletions(-)

diff --git a/tpchgen-arrow/src/building.rs b/tpchgen-arrow/src/building.rs
new file mode 100644
index 0000000..34a6ccb
--- /dev/null
+++ b/tpchgen-arrow/src/building.rs
@@ -0,0 +1,85 @@
+use crate::conversions::string_view_array_from_display_iter;
+use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator};
+use arrow::array::{Int64Array, RecordBatch, StringViewArray};
+use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
+use std::sync::{Arc, LazyLock};
+use tpchgen::generators::{BuildingGenerator, BuildingGeneratorIterator};
+
+/// Generate [`Building`]s in [`RecordBatch`] format
+///
+/// [`Building`]: tpchgen::generators::Building
+///
+/// # Example
+/// ```
+/// # use tpchgen::generators::{BuildingGenerator};
+/// # use tpchgen_arrow::BuildingArrow;
+///
+/// // Create a SF=1.0 generator and wrap it in an Arrow generator
+/// let generator = BuildingGenerator::new(1.0, 1, 1);
+/// let mut arrow_generator = BuildingArrow::new(generator)
+///   .with_batch_size(10);
+/// // Read the first batch
+/// let batch = arrow_generator.next().unwrap();
+/// ```
+pub struct BuildingArrow {
+    inner: BuildingGeneratorIterator<'static>,
+    batch_size: usize,
+}
+
+impl BuildingArrow {
+    pub fn new(generator: BuildingGenerator<'static>) -> Self {
+        Self {
+            inner: generator.iter(),
+            batch_size: DEFAULT_BATCH_SIZE,
+        }
+    }
+
+    /// Set the batch size
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = batch_size;
+        self
+    }
+}
+
+impl RecordBatchIterator for BuildingArrow {
+    fn schema(&self) -> &SchemaRef {
+        &BUILDING_SCHEMA
+    }
+}
+
+impl Iterator for BuildingArrow {
+    type Item = RecordBatch;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // Get next rows to convert
+        let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect();
+        if rows.is_empty() {
+            return None;
+        }
+
+        let buildingkey = Int64Array::from_iter_values(rows.iter().map(|r| 
r.b_buildingkey));
+        let name = string_view_array_from_display_iter(rows.iter().map(|r| 
&r.b_name));
+        let polygon_wkt = 
StringViewArray::from_iter_values(rows.iter().map(|r| r.b_polygonwkt));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(self.schema()),
+            vec![
+                Arc::new(buildingkey),
+                Arc::new(name),
+                Arc::new(polygon_wkt),
+            ],
+        )
+            .unwrap();
+        Some(batch)
+    }
+}
+
+/// Schema for the Building
+static BUILDING_SCHEMA: LazyLock<SchemaRef> = 
LazyLock::new(make_building_schema);
+fn make_building_schema() -> SchemaRef {
+    Arc::new(Schema::new(vec![
+        Field::new("b_buildingkey", DataType::Int64, false),
+        Field::new("b_name", DataType::Utf8View, false),
+        Field::new("b_polygonwkt", DataType::Utf8View, false),
+    ]))
+}
\ No newline at end of file
diff --git a/tpchgen-arrow/src/lib.rs b/tpchgen-arrow/src/lib.rs
index 151fdf6..f729803 100644
--- a/tpchgen-arrow/src/lib.rs
+++ b/tpchgen-arrow/src/lib.rs
@@ -43,6 +43,7 @@ mod vehicle;
 mod region;
 mod driver;
 mod trip;
+mod building;
 
 use arrow::array::RecordBatch;
 use arrow::datatypes::SchemaRef;
@@ -54,6 +55,7 @@ pub use vehicle::VehicleArrow;
 pub use region::RegionArrow;
 pub use driver::DriverArrow;
 pub use trip::TripArrow;
+pub use building::BuildingArrow;
 
 /// Iterator of Arrow [`RecordBatch`] that also knows its schema
 pub trait RecordBatchIterator: Iterator<Item = RecordBatch> + Send {
diff --git a/tpchgen-arrow/tests/reparse.rs b/tpchgen-arrow/tests/reparse.rs
index acd0ae7..045a684 100644
--- a/tpchgen-arrow/tests/reparse.rs
+++ b/tpchgen-arrow/tests/reparse.rs
@@ -6,16 +6,16 @@ use arrow::datatypes::SchemaRef;
 use std::io::Write;
 use std::sync::Arc;
 use tpchgen::csv::{
-    CustomerCsv, LineItemCsv, NationCsv, OrderCsv, VehicleCsv, RegionCsv, 
DriverCsv,
+    CustomerCsv, LineItemCsv, NationCsv, OrderCsv, VehicleCsv, RegionCsv, 
DriverCsv, TripCsv, BuildingCsv
 };
 use tpchgen::generators::{
     Customer, CustomerGenerator, LineItem, LineItemGenerator, Nation, 
NationGenerator, Order,
     OrderGenerator, Vehicle, VehicleGenerator, Region, RegionGenerator,
-    Driver, DriverGenerator,
+    Driver, DriverGenerator, TripGenerator, BuildingGenerator
 };
 use tpchgen_arrow::{
     CustomerArrow, LineItemArrow, NationArrow, OrderArrow, VehicleArrow,
-    RecordBatchIterator, RegionArrow, DriverArrow,
+    RecordBatchIterator, RegionArrow, DriverArrow, TripArrow, BuildingArrow
 };
 
 /// Macro that defines tests for tbl for a given type
@@ -44,12 +44,16 @@ test_row_type!(nation_tbl, NationGenerator, NationArrow, 
Test::tbl());
 test_row_type!(nation_csv, NationGenerator, NationArrow, Test::csv());
 test_row_type!(order_tbl, OrderGenerator, OrderArrow, Test::tbl());
 test_row_type!(order_csv, OrderGenerator, OrderArrow, Test::csv());
-test_row_type!(part_tbl, VehicleGenerator, VehicleArrow, Test::tbl());
-test_row_type!(part_csv, VehicleGenerator, VehicleArrow, Test::csv());
+test_row_type!(vehicle_tbl, VehicleGenerator, VehicleArrow, Test::tbl());
+test_row_type!(vehicle_csv, VehicleGenerator, VehicleArrow, Test::csv());
 test_row_type!(region_tbl, RegionGenerator, RegionArrow, Test::tbl());
 test_row_type!(region_csv, RegionGenerator, RegionArrow, Test::csv());
 test_row_type!(driver_tbl, DriverGenerator, DriverArrow, Test::tbl());
 test_row_type!(driver_csv, DriverGenerator, DriverArrow, Test::csv());
+// test_row_type!(trip_tbl, TripGenerator, TripArrow, Test::tbl());
+// test_row_type!(trip_csv, TripGenerator, TripArrow, Test::csv());
+// test_row_type!(building_tbl, BuildingGenerator, BuildingArrow, Test::tbl());
+// test_row_type!(building_csv, BuildingGenerator, BuildingArrow, Test::csv());
 
 /// Common trait for writing rows in TBL and CSV format
 trait RowType {
diff --git a/tpchgen-cli/src/csv.rs b/tpchgen-cli/src/csv.rs
index 9d01ed0..f58bff7 100644
--- a/tpchgen-cli/src/csv.rs
+++ b/tpchgen-cli/src/csv.rs
@@ -2,10 +2,10 @@
 use super::generate::Source;
 use std::io::Write;
 use tpchgen::csv::{
-    CustomerCsv, LineItemCsv, NationCsv, OrderCsv, VehicleCsv, RegionCsv, 
DriverCsv, TripCsv
+    CustomerCsv, LineItemCsv, NationCsv, OrderCsv, VehicleCsv, RegionCsv, 
DriverCsv, TripCsv, BuildingCsv
 };
 use tpchgen::generators::{
-    CustomerGenerator, LineItemGenerator, NationGenerator, OrderGenerator, 
VehicleGenerator, RegionGenerator, DriverGenerator, TripGenerator,
+    CustomerGenerator, LineItemGenerator, NationGenerator, OrderGenerator, 
VehicleGenerator, RegionGenerator, DriverGenerator, TripGenerator, 
BuildingGenerator,
 };
 
 /// Define a Source that writes the table in CSV format
@@ -49,3 +49,4 @@ define_csv_source!(CustomerCsvSource, 
CustomerGenerator<'static>, CustomerCsv);
 define_csv_source!(OrderCsvSource, OrderGenerator<'static>, OrderCsv);
 define_csv_source!(LineItemCsvSource, LineItemGenerator<'static>, LineItemCsv);
 define_csv_source!(TripCsvSource, TripGenerator, TripCsv);
+define_csv_source!(BuildingCsvSource, BuildingGenerator<'static>, BuildingCsv);
diff --git a/tpchgen-cli/src/main.rs b/tpchgen-cli/src/main.rs
index d518d6f..fa2449a 100644
--- a/tpchgen-cli/src/main.rs
+++ b/tpchgen-cli/src/main.rs
@@ -62,12 +62,12 @@ use std::str::FromStr;
 use std::time::Instant;
 use tpchgen::distribution::Distributions;
 use tpchgen::generators::{
-    CustomerGenerator, LineItemGenerator, NationGenerator, OrderGenerator, 
VehicleGenerator, RegionGenerator, DriverGenerator, TripGenerator,
+    CustomerGenerator, LineItemGenerator, NationGenerator, OrderGenerator, 
VehicleGenerator, RegionGenerator, DriverGenerator, TripGenerator, 
BuildingGenerator,
 };
 use tpchgen::text::TextPool;
 use tpchgen_arrow::{
     CustomerArrow, LineItemArrow, NationArrow, OrderArrow, VehicleArrow,
-    RecordBatchIterator, RegionArrow, DriverArrow, TripArrow
+    RecordBatchIterator, RegionArrow, DriverArrow, TripArrow, BuildingArrow,
 };
 
 #[derive(Parser)]
@@ -138,6 +138,7 @@ enum Table {
     Orders,
     Lineitem,
     Trip,
+    Building,
 }
 
 impl Display for Table {
@@ -179,6 +180,7 @@ impl TypedValueParser for TableValueParser {
                 clap::builder::PossibleValue::new("orders").help("Orders table 
(alias: O)"),
                 clap::builder::PossibleValue::new("lineitem").help("LineItem 
table (alias: L)"),
                 clap::builder::PossibleValue::new("trip").help("Trip table 
(alias: T)"),
+                clap::builder::PossibleValue::new("building").help("Trip table 
(alias: b)"),
             ]
             .into_iter(),
         ))
@@ -204,6 +206,7 @@ impl FromStr for Table {
             "O" | "orders" => Ok(Table::Orders),
             "L" | "lineitem" => Ok(Table::Lineitem),
             "T" | "trip" => Ok(Table::Trip),
+            "b" | "building" => Ok(Table::Building),
             _ => Err("Invalid table name {s}"),
         }
     }
@@ -219,7 +222,8 @@ impl Table {
             Table::Customer => "customer",
             Table::Orders => "orders",
             Table::Lineitem => "lineitem",
-            Table::Trip => "Trip",
+            Table::Trip => "trip",
+            Table::Building => "building",
         }
     }
 }
@@ -316,12 +320,13 @@ impl Cli {
             match table {
                 Table::Nation => self.generate_nation().await?,
                 Table::Region => self.generate_region().await?,
-                Table::Vehicle => self.generate_part().await?,
+                Table::Vehicle => self.generate_vehicle().await?,
                 Table::Driver => self.generate_driver().await?,
                 Table::Customer => self.generate_customer().await?,
                 Table::Orders => self.generate_orders().await?,
                 Table::Lineitem => self.generate_lineitem().await?,
                 Table::Trip => self.generate_trip().await?,
+                Table::Building => self.generate_building().await?,
             }
         }
 
@@ -346,7 +351,7 @@ impl Cli {
         RegionArrow
     );
     define_generate!(
-        generate_part,
+        generate_vehicle,
         Table::Vehicle,
         VehicleGenerator,
         VehicleTblSource,
@@ -393,6 +398,14 @@ impl Cli {
         TripCsvSource,
         TripArrow
     );
+    define_generate!(
+        generate_building,
+        Table::Building,
+        BuildingGenerator,
+        BuildingTblSource,
+        BuildingCsvSource,
+        BuildingArrow
+    );
 
     /// return the output filename for the given table
     fn output_filename(&self, table: Table) -> String {
@@ -454,6 +467,10 @@ impl Cli {
                 (128, row_count)
             },
             &Table::Trip => (130, 
TripGenerator::calculate_row_count(self.scale_factor, 1, 1)),
+            Table::Building => (
+                115,
+                BuildingGenerator::calculate_row_count(self.scale_factor, 1, 
1),
+            ),
         };
         // target chunks of about 16MB (use 15MB to ensure we don't exceed the 
target size)
         let target_chunk_size_bytes = 15 * 1024 * 1024;
diff --git a/tpchgen-cli/src/tbl.rs b/tpchgen-cli/src/tbl.rs
index 441a128..4ea9158 100644
--- a/tpchgen-cli/src/tbl.rs
+++ b/tpchgen-cli/src/tbl.rs
@@ -2,7 +2,7 @@
 
 use super::generate::Source;
 use std::io::Write;
-use tpchgen::generators::{CustomerGenerator, LineItemGenerator, 
NationGenerator, OrderGenerator, VehicleGenerator, RegionGenerator, 
DriverGenerator, TripGenerator};
+use tpchgen::generators::{CustomerGenerator, LineItemGenerator, 
NationGenerator, OrderGenerator, VehicleGenerator, RegionGenerator, 
DriverGenerator, TripGenerator, BuildingGenerator};
 
 /// Define a Source that writes the table in TBL format
 macro_rules! define_tbl_source {
@@ -43,3 +43,4 @@ define_tbl_source!(CustomerTblSource, 
CustomerGenerator<'static>);
 define_tbl_source!(OrderTblSource, OrderGenerator<'static>);
 define_tbl_source!(LineItemTblSource, LineItemGenerator<'static>);
 define_tbl_source!(TripTblSource, TripGenerator);
+define_tbl_source!(BuildingTblSource, BuildingGenerator<'static>);
diff --git a/tpchgen/Cargo.toml b/tpchgen/Cargo.toml
index dc10c10..7ed89f8 100644
--- a/tpchgen/Cargo.toml
+++ b/tpchgen/Cargo.toml
@@ -13,7 +13,6 @@ license = { workspace = true }
 # See ../ARCHITECTURE.md for more details
 [dependencies]
 rand = { version = "0.8", features = ["small_rng"] }
-rand_distr = "0.4.3"
 
 [dev-dependencies]
 flate2 = "1.1.0"
diff --git a/tpchgen/src/csv.rs b/tpchgen/src/csv.rs
index 9331f9b..d0fbea5 100644
--- a/tpchgen/src/csv.rs
+++ b/tpchgen/src/csv.rs
@@ -1,6 +1,6 @@
 //! CSV formatting support for the row struct objects generated by the library.
 
-use crate::generators::{Customer, LineItem, Nation, Order, Vehicle, Region, 
Driver, Trip};
+use crate::generators::{Customer, LineItem, Nation, Order, Vehicle, Region, 
Driver, Trip, Building};
 use core::fmt;
 use std::fmt::Display;
 
@@ -437,3 +437,47 @@ impl Display for TripCsv {
         )
     }
 }
+
+/// Write [`Building`]s in CSV format.
+///
+/// # Example
+/// ```
+/// # use tpchgen::generators::BuildingGenerator;
+/// # use tpchgen::csv::BuildingCsv;
+/// # use std::fmt::Write;
+/// // Output the first 3 rows in CSV format
+/// let generator = BuildingGenerator::new(1.0, 1, 1);
+/// let mut csv = String::new();
+/// writeln!(&mut csv, "{}", BuildingCsv::header()).unwrap(); // write header
+/// for line in generator.iter().take(3) {
+///   // write line using CSV formatter
+///   writeln!(&mut csv, "{}", BuildingCsv::new(line)).unwrap();
+/// }
+/// ```
+pub struct BuildingCsv<'a> {
+    inner: Building<'a>,
+}
+
+impl<'a> BuildingCsv<'a> {
+    pub fn new(inner: Building<'a>) -> Self {
+        Self { inner }
+    }
+
+    /// Returns the CSV header for the Building table
+    pub fn header() -> &'static str {
+        "b_buildingkey,b_name,b_polygonwkt"
+    }
+}
+
+impl Display for BuildingCsv<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            // note must quote the comment field as it may contain commas
+            "{},{},\"{}\"",
+            self.inner.b_buildingkey,
+            self.inner.b_name,
+            self.inner.b_polygonwkt,
+        )
+    }
+}
\ No newline at end of file
diff --git a/tpchgen/src/generators.rs b/tpchgen/src/generators.rs
index 694ac88..7d27dd8 100644
--- a/tpchgen/src/generators.rs
+++ b/tpchgen/src/generators.rs
@@ -5,7 +5,7 @@ use crate::distribution::Distribution;
 use crate::distribution::Distributions;
 use crate::random::RandomPhoneNumber;
 use crate::random::RowRandomInt;
-use crate::random::{PhoneNumberInstance, RandomBoundedLong};
+use crate::random::{PhoneNumberInstance, RandomBoundedLong, 
StringSequenceInstance};
 use crate::random::{RandomAlphaNumeric, RandomAlphaNumericInstance};
 use crate::text::TextPool;
 use core::fmt;
@@ -33,7 +33,7 @@ impl Default for NationGenerator<'_> {
 impl<'a> NationGenerator<'a> {
     /// Creates a new NationGenerator with default distributions and text pool
     ///
-    /// Nations does not depend on the scale factor or the vehicle number. The 
signature of
+    /// Nations does not depend on the scale factor or the part number. The 
signature of
     /// this method is provided to be consistent with the other generators, 
but the
     /// parameters are ignored. You can use [`NationGenerator::default`] to 
create a
     /// default generator.
@@ -224,7 +224,7 @@ impl Default for RegionGenerator<'_> {
 impl<'a> RegionGenerator<'a> {
     /// Creates a new RegionGenerator with default distributions and text pool
     ///
-    /// Regions does not depend on the scale factor or the vehicle number. The 
signature of
+    /// Regions does not depend on the scale factor or the part number. The 
signature of
     /// this method is provided to be consistent with the other generators, 
but the
     /// parameters are ignored. You can use [`RegionGenerator::default`] to 
create a
     /// default generator.
@@ -383,8 +383,8 @@ impl fmt::Display for Vehicle<'_> {
 #[derive(Debug, Clone)]
 pub struct VehicleGenerator<'a> {
     scale_factor: f64,
-    vehicle: i32,
-    vehicle_count: i32,
+    part: i32,
+    part_count: i32,
     distributions: &'a Distributions,
     text_pool: &'a TextPool,
 }
@@ -407,12 +407,12 @@ impl<'a> VehicleGenerator<'a> {
     ///
     /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] 
for
     /// more details.
-    pub fn new(scale_factor: f64, vehicle: i32, vehicle_count: i32) -> 
VehicleGenerator<'static> {
+    pub fn new(scale_factor: f64, part: i32, part_count: i32) -> 
VehicleGenerator<'static> {
         // Note: use explicit lifetime to ensure this remains `&'static`
         Self::new_with_distributions_and_text_pool(
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             Distributions::static_default(),
             TextPool::get_or_init_default(),
         )
@@ -421,26 +421,26 @@ impl<'a> VehicleGenerator<'a> {
     /// Creates a VehicleGenerator with specified distributions and text pool
     pub fn new_with_distributions_and_text_pool<'b>(
         scale_factor: f64,
-        vehicle: i32,
-        vehicle_count: i32,
+        part: i32,
+        part_count: i32,
         distributions: &'b Distributions,
         text_pool: &'b TextPool,
     ) -> VehicleGenerator<'b> {
         VehicleGenerator {
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             distributions,
             text_pool,
         }
     }
 
-    /// Return the row count for the given scale factor and generator vehicle 
count
-    pub fn calculate_row_count(scale_factor: f64, vehicle: i32, vehicle_count: 
i32) -> i64 {
-        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
vehicle, vehicle_count)
+    /// Return the row count for the given scale factor and generator part 
count
+    pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) 
-> i64 {
+        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
part, part_count)
     }
 
-    /// Returns an iterator over the vehicle rows
+    /// Returns an iterator over the part rows
     pub fn iter(&self) -> VehicleGeneratorIterator<'a> {
         VehicleGeneratorIterator::new(
             self.distributions,
@@ -448,10 +448,10 @@ impl<'a> VehicleGenerator<'a> {
             GenerateUtils::calculate_start_index(
                 Self::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
-            Self::calculate_row_count(self.scale_factor, self.vehicle, 
self.vehicle_count),
+            Self::calculate_row_count(self.scale_factor, self.part, 
self.part_count),
         )
     }
 }
@@ -644,8 +644,8 @@ impl fmt::Display for Driver {
 #[derive(Debug, Clone)]
 pub struct DriverGenerator<'a> {
     scale_factor: f64,
-    vehicle: i32,
-    vehicle_count: i32,
+    part: i32,
+    part_count: i32,
     distributions: &'a Distributions,
     text_pool: &'a TextPool,
 }
@@ -676,12 +676,12 @@ impl<'a> DriverGenerator<'a> {
     ///
     /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] 
for
     /// more details.
-    pub fn new(scale_factor: f64, vehicle: i32, vehicle_count: i32) -> 
DriverGenerator<'static> {
+    pub fn new(scale_factor: f64, part: i32, part_count: i32) -> 
DriverGenerator<'static> {
         // Note: use explicit lifetime to ensure this remains `&'static`
         Self::new_with_distributions_and_text_pool(
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             Distributions::static_default(),
             TextPool::get_or_init_default(),
         )
@@ -690,23 +690,23 @@ impl<'a> DriverGenerator<'a> {
     /// Creates a DriverGenerator with specified distributions and text pool
     pub fn new_with_distributions_and_text_pool<'b>(
         scale_factor: f64,
-        vehicle: i32,
-        vehicle_count: i32,
+        part: i32,
+        part_count: i32,
         distributions: &'b Distributions,
         text_pool: &'b TextPool,
     ) -> DriverGenerator<'b> {
         DriverGenerator {
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             distributions,
             text_pool,
         }
     }
 
-    /// Return the row count for the given scale factor and generator vehicle 
count
-    pub fn calculate_row_count(scale_factor: f64, vehicle: i32, vehicle_count: 
i32) -> i64 {
-        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
vehicle, vehicle_count)
+    /// Return the row count for the given scale factor and generator part 
count
+    pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) 
-> i64 {
+        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
part, part_count)
     }
 
     /// Returns an iterator over the Driver rows
@@ -717,10 +717,10 @@ impl<'a> DriverGenerator<'a> {
             GenerateUtils::calculate_start_index(
                 Self::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
-            Self::calculate_row_count(self.scale_factor, self.vehicle, 
self.vehicle_count),
+            Self::calculate_row_count(self.scale_factor, self.part, 
self.part_count),
         )
     }
 }
@@ -933,8 +933,8 @@ impl fmt::Display for Customer<'_> {
 #[derive(Debug, Clone)]
 pub struct CustomerGenerator<'a> {
     scale_factor: f64,
-    vehicle: i32,
-    vehicle_count: i32,
+    part: i32,
+    part_count: i32,
     distributions: &'a Distributions,
     text_pool: &'a TextPool,
 }
@@ -953,12 +953,12 @@ impl<'a> CustomerGenerator<'a> {
     ///
     /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] 
for
     /// more details.
-    pub fn new(scale_factor: f64, vehicle: i32, vehicle_count: i32) -> 
CustomerGenerator<'static> {
+    pub fn new(scale_factor: f64, part: i32, part_count: i32) -> 
CustomerGenerator<'static> {
         // Note: use explicit lifetime to ensure this remains `&'static`
         Self::new_with_distributions_and_text_pool(
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             Distributions::static_default(),
             TextPool::get_or_init_default(),
         )
@@ -967,23 +967,23 @@ impl<'a> CustomerGenerator<'a> {
     /// Creates a CustomerGenerator with specified distributions and text pool
     pub fn new_with_distributions_and_text_pool<'b>(
         scale_factor: f64,
-        vehicle: i32,
-        vehicle_count: i32,
+        part: i32,
+        part_count: i32,
         distributions: &'b Distributions,
         text_pool: &'b TextPool,
     ) -> CustomerGenerator<'b> {
         CustomerGenerator {
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             distributions,
             text_pool,
         }
     }
 
-    /// Return the row count for the given scale factor and generator vehicle 
count
-    pub fn calculate_row_count(scale_factor: f64, vehicle: i32, vehicle_count: 
i32) -> i64 {
-        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
vehicle, vehicle_count)
+    /// Return the row count for the given scale factor and generator part 
count
+    pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) 
-> i64 {
+        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
part, part_count)
     }
 
     /// Returns an iterator over the customer rows
@@ -994,10 +994,10 @@ impl<'a> CustomerGenerator<'a> {
             GenerateUtils::calculate_start_index(
                 Self::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
-            Self::calculate_row_count(self.scale_factor, self.vehicle, 
self.vehicle_count),
+            Self::calculate_row_count(self.scale_factor, self.part, 
self.part_count),
         )
     }
 }
@@ -1202,8 +1202,8 @@ impl fmt::Display for Order<'_> {
 #[derive(Debug, Clone)]
 pub struct OrderGenerator<'a> {
     scale_factor: f64,
-    vehicle: i32,
-    vehicle_count: i32,
+    part: i32,
+    part_count: i32,
     distributions: &'a Distributions,
     text_pool: &'a TextPool,
 }
@@ -1230,12 +1230,12 @@ impl<'a> OrderGenerator<'a> {
     ///
     /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] 
for
     /// more details.
-    pub fn new(scale_factor: f64, vehicle: i32, vehicle_count: i32) -> 
OrderGenerator<'static> {
+    pub fn new(scale_factor: f64, part: i32, part_count: i32) -> 
OrderGenerator<'static> {
         // Note: use explicit lifetime to ensure this remains `&'static`
         Self::new_with_distributions_and_text_pool(
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             Distributions::static_default(),
             TextPool::get_or_init_default(),
         )
@@ -1244,23 +1244,23 @@ impl<'a> OrderGenerator<'a> {
     /// Creates a OrderGenerator with specified distributions and text pool
     pub fn new_with_distributions_and_text_pool<'b>(
         scale_factor: f64,
-        vehicle: i32,
-        vehicle_count: i32,
+        part: i32,
+        part_count: i32,
         distributions: &'b Distributions,
         text_pool: &'b TextPool,
     ) -> OrderGenerator<'b> {
         OrderGenerator {
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             distributions,
             text_pool,
         }
     }
 
-    /// Return the row count for the given scale factor and generator vehicle 
count
-    pub fn calculate_row_count(scale_factor: f64, vehicle: i32, vehicle_count: 
i32) -> i64 {
-        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
vehicle, vehicle_count)
+    /// Return the row count for the given scale factor and generator part 
count
+    pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) 
-> i64 {
+        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
part, part_count)
     }
 
     /// Returns an iterator over the order rows
@@ -1272,10 +1272,10 @@ impl<'a> OrderGenerator<'a> {
             GenerateUtils::calculate_start_index(
                 Self::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
-            Self::calculate_row_count(self.scale_factor, self.vehicle, 
self.vehicle_count),
+            Self::calculate_row_count(self.scale_factor, self.part, 
self.part_count),
         )
     }
 
@@ -1571,8 +1571,8 @@ impl fmt::Display for LineItem<'_> {
 #[derive(Debug, Clone)]
 pub struct LineItemGenerator<'a> {
     scale_factor: f64,
-    vehicle: i32,
-    vehicle_count: i32,
+    part: i32,
+    part_count: i32,
     distributions: &'a Distributions,
     text_pool: &'a TextPool,
 }
@@ -1601,11 +1601,11 @@ impl<'a> LineItemGenerator<'a> {
     ///
     /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] 
for
     /// more details.
-    pub fn new(scale_factor: f64, vehicle: i32, vehicle_count: i32) -> 
LineItemGenerator<'static> {
+    pub fn new(scale_factor: f64, part: i32, part_count: i32) -> 
LineItemGenerator<'static> {
         Self::new_with_distributions_and_text_pool(
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             Distributions::static_default(),
             TextPool::get_or_init_default(),
         )
@@ -1614,15 +1614,15 @@ impl<'a> LineItemGenerator<'a> {
     /// Creates a LineItemGenerator with specified distributions and text pool
     pub fn new_with_distributions_and_text_pool<'b>(
         scale_factor: f64,
-        vehicle: i32,
-        vehicle_count: i32,
+        part: i32,
+        part_count: i32,
         distributions: &'b Distributions,
         text_pool: &'b TextPool,
     ) -> LineItemGenerator<'b> {
         LineItemGenerator {
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             distributions,
             text_pool,
         }
@@ -1637,14 +1637,14 @@ impl<'a> LineItemGenerator<'a> {
             GenerateUtils::calculate_start_index(
                 OrderGenerator::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
             GenerateUtils::calculate_row_count(
                 OrderGenerator::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
         )
     }
@@ -2032,8 +2032,8 @@ impl fmt::Display for Trip {
 #[derive(Debug, Clone)]
 pub struct TripGenerator {
     scale_factor: f64,
-    vehicle: i32,
-    vehicle_count: i32,
+    part: i32,
+    part_count: i32,
     distributions: Distributions,
     text_pool: TextPool,
     distance_kde: crate::kde::DistanceKDE,
@@ -2053,11 +2053,11 @@ impl TripGenerator {
     const TRIP_DURATION_MAX_PER_MILE: i32 = 3; // max 3 minutes per mile
 
     /// Creates a new TripGenerator with the given scale factor
-    pub fn new(scale_factor: f64, vehicle: i32, vehicle_count: i32) -> 
TripGenerator {
+    pub fn new(scale_factor: f64, part: i32, part_count: i32) -> TripGenerator 
{
         Self::new_with_distributions_and_text_pool(
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             Distributions::static_default(),
             TextPool::get_or_init_default(),
             crate::kde::default_distance_kde(),
@@ -2068,8 +2068,8 @@ impl TripGenerator {
     /// Creates a TripGenerator with specified distributions and text pool
     pub fn new_with_distributions_and_text_pool<'b>(
         scale_factor: f64,
-        vehicle: i32,
-        vehicle_count: i32,
+        part: i32,
+        part_count: i32,
         distributions: &'b Distributions,
         text_pool: &'b TextPool,
         distance_kde: crate::kde::DistanceKDE,
@@ -2077,8 +2077,8 @@ impl TripGenerator {
     ) -> TripGenerator {
         TripGenerator {
             scale_factor,
-            vehicle,
-            vehicle_count,
+            part,
+            part_count,
             distributions: distributions.clone(),
             text_pool: text_pool.clone(),
             distance_kde,
@@ -2086,9 +2086,9 @@ impl TripGenerator {
         }
     }
 
-    /// Return the row count for the given scale factor and generator vehicle 
count
-    pub fn calculate_row_count(scale_factor: f64, vehicle: i32, vehicle_count: 
i32) -> i64 {
-        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
vehicle, vehicle_count)
+    /// Return the row count for the given scale factor and generator part 
count
+    pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) 
-> i64 {
+        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
part, part_count)
     }
 
     /// Returns an iterator over the trip rows
@@ -2100,14 +2100,14 @@ impl TripGenerator {
             GenerateUtils::calculate_start_index(
                 Self::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
             GenerateUtils::calculate_row_count(
                 Self::SCALE_BASE,
                 self.scale_factor,
-                self.vehicle,
-                self.vehicle_count,
+                self.part,
+                self.part_count,
             ),
             self.distance_kde.clone(), // Add the KDE model
             self.spatial_gen.clone(),
@@ -2319,6 +2319,179 @@ impl<'a> Iterator for TripGeneratorIterator {
     }
 }
 
+/// Represents a building in the dataset
+#[derive(Debug, Clone, PartialEq)]
+pub struct Building<'a> {
+    /// Unique identifier for the building
+    pub b_buildingkey: i64,
+    /// Name of the building
+    pub b_name: StringSequenceInstance<'a>,
+    /// WKT representation of the building's polygon
+    pub b_polygonwkt: &'a str,
+}
+
+impl Display for Building<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}|{}|{}|",
+            self.b_buildingkey,
+            self.b_name,
+            self.b_polygonwkt,
+        )
+    }
+}
+
+/// Generator for [`Building`]s
+pub struct BuildingGenerator<'a> {
+    scale_factor: f64,
+    part: i32,
+    part_count: i32,
+    distributions: &'a Distributions,
+    text_pool: &'a TextPool,
+}
+
+impl<'a> BuildingGenerator<'a> {
+    /// Base scale for vehicle generation
+    const SCALE_BASE: i32 = 20_000;
+    const NAME_WORDS: i32 = 1;
+    const COMMENT_AVERAGE_LENGTH: i32 = 14;
+
+    /// Creates a new VehicleGenerator with the given scale factor
+    ///
+    /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] 
for
+    /// more details.
+    pub fn new(scale_factor: f64, part: i32, part_count: i32) -> 
BuildingGenerator<'static> {
+        // Note: use explicit lifetime to ensure this remains `&'static`
+        Self::new_with_distributions_and_text_pool(
+            scale_factor,
+            part,
+            part_count,
+            Distributions::static_default(),
+            TextPool::get_or_init_default(),
+        )
+    }
+
+    /// Creates a BuildingGenerator with specified distributions and text pool
+    pub fn new_with_distributions_and_text_pool<'b>(
+        scale_factor: f64,
+        part: i32,
+        part_count: i32,
+        distributions: &'b Distributions,
+        text_pool: &'b TextPool,
+    ) -> BuildingGenerator<'b> {
+        BuildingGenerator {
+            scale_factor,
+            part,
+            part_count,
+            distributions,
+            text_pool,
+        }
+    }
+
+    /// Return the row count for the given scale factor and generator part 
count
+    pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) 
-> i64 {
+        GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, 
part, part_count)
+    }
+
+    /// Returns an iterator over the part rows
+    pub fn iter(&self) -> BuildingGeneratorIterator<'a> {
+        BuildingGeneratorIterator::new(
+            self.distributions,
+            self.text_pool,
+            GenerateUtils::calculate_start_index(
+                Self::SCALE_BASE,
+                self.scale_factor,
+                self.part,
+                self.part_count,
+            ),
+            Self::calculate_row_count(self.scale_factor, self.part, 
self.part_count),
+        )
+    }
+}
+
+impl<'a> IntoIterator for &'a BuildingGenerator<'a> {
+    type Item = Building<'a>;
+    type IntoIter = BuildingGeneratorIterator<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+/// Iterator that generates Building rows
+#[derive(Debug)]
+pub struct BuildingGeneratorIterator<'a> {
+    name_random: RandomStringSequence<'a>,
+    wkt_random: RandomText<'a>,
+
+    start_index: i64,
+    row_count: i64,
+    index: i64,
+}
+
+impl<'a> BuildingGeneratorIterator<'a> {
+    fn new(
+        distributions: &'a Distributions,
+        text_pool: &'a TextPool,
+        start_index: i64,
+        row_count: i64,
+    ) -> Self {
+        let mut name_random = RandomStringSequence::new(
+            709314158,
+            BuildingGenerator::NAME_WORDS,
+            distributions.part_colors(),
+        );
+        let mut wkt_random = RandomText::new(
+            804159733,
+            text_pool,
+            BuildingGenerator::COMMENT_AVERAGE_LENGTH as f64,
+        );
+
+        // Advance all generators to the starting position
+        name_random.advance_rows(start_index);
+        wkt_random.advance_rows(start_index);
+
+        BuildingGeneratorIterator {
+            name_random,
+            wkt_random,
+            start_index,
+            row_count,
+            index: 0,
+        }
+    }
+
+    /// Creates a part with the given key
+    fn make_building(&mut self, building_key: i64) -> Building<'a> {
+        let name = self.name_random.next_value();
+
+        Building {
+            b_buildingkey: building_key,
+            b_name: name,
+            b_polygonwkt: self.wkt_random.next_value(),
+        }
+    }
+}
+
+impl<'a> Iterator for BuildingGeneratorIterator<'a> {
+    type Item = Building<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.index >= self.row_count {
+            return None;
+        }
+
+        let building = self.make_building(self.start_index + self.index + 1);
+
+        self.name_random.row_finished();
+        self.wkt_random.row_finished();
+
+        self.index += 1;
+
+        Some(building)
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -2471,7 +2644,7 @@ mod tests {
 
         // Verify the string format matches the expected pattern
         let expected_pattern = format!(
-            "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|",
+            "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|",
             first.t_tripkey,
             first.t_custkey,
             first.t_driverkey,
@@ -2481,14 +2654,46 @@ mod tests {
             first.t_fare,
             first.t_tip,
             first.t_totalamount,
-            first.t_distance
+            first.t_distance,
+            first.t_pickupx,
+            first.t_pickupy,
+            first.t_dropoffx,
+            first.t_dropoffy,
         );
         assert_eq!(first.to_string(), expected_pattern);
 
         // Check first Trip
         let first = &trips[1];
         assert_eq!(first.t_tripkey, 2);
-        assert_eq!(first.to_string(), 
"2|851|1286|1285|1997-12-24|1997-12-24|37.00|6.00|43.00|1.40|")
+        assert_eq!(first.to_string(), 
"2|851|1286|1285|1997-12-25|1997-12-25|0.03|0.00|0.04|0.01|-102.20681068856331|34.032813907715486|-102.19307587853756|34.03497048015551|")
+    }
+
+    #[test]
+    fn test_building_generation() {
+        // Create a generator with a small scale factor
+        let generator = BuildingGenerator::new(0.01, 1, 1);
+        let buildings: Vec<_> = generator.iter().collect();
+
+        // Should have 0.01 * 20,000 = 200 buildings
+        assert_eq!(buildings.len(), 200);
+
+        // Check first building
+        let first = &buildings[0];
+        assert_eq!(first.b_buildingkey, 1);
+
+        // Verify the string format matches the expected pattern
+        let expected_pattern = format!(
+            "{}|{}|{}|",
+            first.b_buildingkey,
+            first.b_name,
+            first.b_polygonwkt,
+        );
+        assert_eq!(first.to_string(), expected_pattern);
+
+        // Check first Building
+        let first = &buildings[1];
+        assert_eq!(first.b_buildingkey, 2);
+        assert_eq!(first.to_string(), "2|blush|lar accounts amo|")
     }
 
     #[test]


Reply via email to