This is an automated email from the ASF dual-hosted git repository. jiayu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
commit 638897ff157cd841c94256aa407ba19299963430 Author: Pranav Toggi <[email protected]> AuthorDate: Thu Jul 10 22:44:24 2025 -0700 clean up 1 --- tpchgen-arrow/src/lib.rs | 8 - tpchgen-arrow/src/lineitem.rs | 174 ------ tpchgen-arrow/src/nation.rs | 108 ---- tpchgen-arrow/src/order.rs | 129 ---- tpchgen-arrow/src/region.rs | 96 --- tpchgen-arrow/tests/reparse.rs | 12 - tpchgen-cli/src/csv.rs | 4 - tpchgen-cli/src/main.rs | 71 --- tpchgen-cli/src/tbl.rs | 4 - tpchgen/src/csv.rs | 220 ------- tpchgen/src/generators.rs | 1295 +--------------------------------------- 11 files changed, 1 insertion(+), 2120 deletions(-) diff --git a/tpchgen-arrow/src/lib.rs b/tpchgen-arrow/src/lib.rs index a6384ae..24e045d 100644 --- a/tpchgen-arrow/src/lib.rs +++ b/tpchgen-arrow/src/lib.rs @@ -38,10 +38,6 @@ mod building; pub mod conversions; mod customer; mod driver; -// mod lineitem; -// mod nation; -// mod order; -// mod region; mod trip; mod vehicle; mod zone; @@ -51,10 +47,6 @@ use arrow::datatypes::SchemaRef; pub use building::BuildingArrow; pub use customer::CustomerArrow; pub use driver::DriverArrow; -// pub use lineitem::LineItemArrow; -// pub use nation::NationArrow; -// pub use order::OrderArrow; -// pub use region::RegionArrow; pub use trip::TripArrow; pub use vehicle::VehicleArrow; pub use zone::ZoneArrow; diff --git a/tpchgen-arrow/src/lineitem.rs b/tpchgen-arrow/src/lineitem.rs deleted file mode 100644 index deb0728..0000000 --- a/tpchgen-arrow/src/lineitem.rs +++ /dev/null @@ -1,174 +0,0 @@ -// use crate::conversions::{decimal128_array_from_iter, to_arrow_date32}; -// use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator}; -// use arrow::array::{ -// Date32Array, Decimal128Array, Int32Array, Int64Array, RecordBatch, StringViewArray, -// }; -// use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -// use std::sync::{Arc, LazyLock}; -// use tpchgen::generators::{LineItemGenerator, LineItemGeneratorIterator}; -// -// /// Generate [`LineItem`]s in [`RecordBatch`] format -// /// -// /// [`LineItem`]: tpchgen::generators::LineItem -// /// -// /// # Example -// /// ``` -// /// # use tpchgen::generators::LineItemGenerator; -// /// # use tpchgen_arrow::LineItemArrow; -// /// -// /// // Create a SF=1.0 generator and wrap it in an Arrow generator -// /// let generator = LineItemGenerator::new(1.0, 1, 1); -// /// let mut arrow_generator = LineItemArrow::new(generator) -// /// .with_batch_size(10); -// /// // Read the first 10 batches -// /// let batch = arrow_generator.next().unwrap(); -// /// // compare the output by pretty printing it -// /// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch]) -// /// .unwrap() -// /// .to_string(); -// /// let lines = formatted_batches.lines().collect::<Vec<_>>(); -// /// assert_eq!(lines, vec![ -// /// "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+", -// /// "| l_orderkey | l_vehiclekey | l_suppkey | l_linenumber | l_quantity | l_extendedprice | l_discount | l_tax | l_returnflag | l_linestatus | l_shipdate | l_commitdate | l_receiptdate | l_shipinstruct | l_shipmode | l_comment |", -// /// "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+", -// /// "| 1 | 155190 | 7706 | 1 | 17.00 | 21168.23 | 0.04 | 0.02 | N | O | 1996-03-13 | 1996-02-12 | 1996-03-22 | DELIVER IN PERSON | TRUCK | egular courts above the |", -// /// "| 1 | 67310 | 7311 | 2 | 36.00 | 45983.16 | 0.09 | 0.06 | N | O | 1996-04-12 | 1996-02-28 | 1996-04-20 | TAKE BACK RETURN | MAIL | ly final dependencies: slyly bold |", -// /// "| 1 | 63700 | 3701 | 3 | 8.00 | 13309.60 | 0.10 | 0.02 | N | O | 1996-01-29 | 1996-03-05 | 1996-01-31 | TAKE BACK RETURN | REG AIR | riously. regular, express dep |", -// /// "| 1 | 2132 | 4633 | 4 | 28.00 | 28955.64 | 0.09 | 0.06 | N | O | 1996-04-21 | 1996-03-30 | 1996-05-16 | NONE | AIR | lites. fluffily even de |", -// /// "| 1 | 24027 | 1534 | 5 | 24.00 | 22824.48 | 0.10 | 0.04 | N | O | 1996-03-30 | 1996-03-14 | 1996-04-01 | NONE | FOB | pending foxes. slyly re |", -// /// "| 1 | 15635 | 638 | 6 | 32.00 | 49620.16 | 0.07 | 0.02 | N | O | 1996-01-30 | 1996-02-07 | 1996-02-03 | DELIVER IN PERSON | MAIL | arefully slyly ex |", -// /// "| 2 | 106170 | 1191 | 1 | 38.00 | 44694.46 | 0.00 | 0.05 | N | O | 1997-01-28 | 1997-01-14 | 1997-02-02 | TAKE BACK RETURN | RAIL | ven requests. deposits breach a |", -// /// "| 3 | 4297 | 1798 | 1 | 45.00 | 54058.05 | 0.06 | 0.00 | R | F | 1994-02-02 | 1994-01-04 | 1994-02-23 | NONE | AIR | ongside of the furiously brave acco |", -// /// "| 3 | 19036 | 6540 | 2 | 49.00 | 46796.47 | 0.10 | 0.00 | R | F | 1993-11-09 | 1993-12-20 | 1993-11-24 | TAKE BACK RETURN | RAIL | unusual accounts. eve |", -// /// "| 3 | 128449 | 3474 | 3 | 27.00 | 39890.88 | 0.06 | 0.07 | A | F | 1994-01-16 | 1993-11-22 | 1994-01-23 | DELIVER IN PERSON | SHIP | nal foxes wake. |", -// /// "+------------+-----------+-----------+--------------+------------+-----------------+------------+-------+--------------+--------------+------------+--------------+---------------+-------------------+------------+-------------------------------------+" -// /// ]); -// /// ``` -// // # TODOs: -// // 1. create individual column iterators to avoid a copy into rows -// // 2. Maybe Recycle buffers (don't reallocate new ones all the time) :thinking: -// // Based off code / types from DataFusion -// // https://github.com/apache/datafusion/blob/a1ae15826245097e7c12d4f0ed3425b25af6c431/benchmarks/src/tpch/mod.rs#L104-L103 -// pub struct LineItemArrow { -// inner: LineItemGeneratorIterator<'static>, -// batch_size: usize, -// } -// -// impl LineItemArrow { -// pub fn new(generator: LineItemGenerator<'static>) -> Self { -// Self { -// inner: generator.iter(), -// batch_size: DEFAULT_BATCH_SIZE, -// } -// } -// -// /// Set the batch size -// pub fn with_batch_size(mut self, batch_size: usize) -> Self { -// self.batch_size = batch_size; -// self -// } -// } -// -// impl RecordBatchIterator for LineItemArrow { -// fn schema(&self) -> &SchemaRef { -// &LINEITEM_SCHEMA -// } -// } -// -// impl Iterator for LineItemArrow { -// type Item = RecordBatch; -// -// /// Generate the next batch of data, if there is one -// fn next(&mut self) -> Option<Self::Item> { -// // Get next rows to convert -// let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect(); -// if rows.is_empty() { -// return None; -// } -// -// // Convert column by column -// let l_orderkey = Int64Array::from_iter_values(rows.iter().map(|row| row.l_orderkey)); -// let l_vehiclekey = Int64Array::from_iter_values(rows.iter().map(|row| row.l_vehiclekey)); -// let l_suppkey = Int64Array::from_iter_values(rows.iter().map(|row| row.l_suppkey)); -// let l_linenumber = Int32Array::from_iter_values(rows.iter().map(|row| row.l_linenumber)); -// let l_quantity = Decimal128Array::from_iter_values(rows.iter().map(|row| { -// // Convert the i64 to Arrow Decimal(15,2) -// // TODO it is supposed to be decimal in the spec -// (row.l_quantity as i128) * 100 -// })) -// .with_precision_and_scale(15, 2) -// .unwrap(); -// let l_extended_price = -// decimal128_array_from_iter(rows.iter().map(|row| row.l_extendedprice)); -// let l_discount = decimal128_array_from_iter(rows.iter().map(|row| row.l_discount)); -// let l_tax = decimal128_array_from_iter(rows.iter().map(|row| row.l_tax)); -// let l_returnflag = -// StringViewArray::from_iter_values(rows.iter().map(|row| row.l_returnflag)); -// let l_linestatus = -// StringViewArray::from_iter_values(rows.iter().map(|row| row.l_linestatus)); -// let l_shipdate = Date32Array::from_iter_values( -// rows.iter().map(|row| row.l_shipdate).map(to_arrow_date32), -// ); -// let l_commitdate = Date32Array::from_iter_values( -// rows.iter().map(|row| row.l_commitdate).map(to_arrow_date32), -// ); -// let l_receiptdate = Date32Array::from_iter_values( -// rows.iter() -// .map(|row| row.l_receiptdate) -// .map(to_arrow_date32), -// ); -// let l_shipinstruct = -// StringViewArray::from_iter_values(rows.iter().map(|row| row.l_shipinstruct)); -// let l_shipmode = StringViewArray::from_iter_values(rows.iter().map(|row| row.l_shipmode)); -// let l_comment = StringViewArray::from_iter_values(rows.iter().map(|row| row.l_comment)); -// -// let batch = RecordBatch::try_new( -// Arc::clone(self.schema()), -// vec![ -// Arc::new(l_orderkey), -// Arc::new(l_vehiclekey), -// Arc::new(l_suppkey), -// Arc::new(l_linenumber), -// Arc::new(l_quantity), -// Arc::new(l_extended_price), -// Arc::new(l_discount), -// Arc::new(l_tax), -// Arc::new(l_returnflag), -// Arc::new(l_linestatus), -// Arc::new(l_shipdate), -// Arc::new(l_commitdate), -// Arc::new(l_receiptdate), -// Arc::new(l_shipinstruct), -// Arc::new(l_shipmode), -// Arc::new(l_comment), -// ], -// ) -// .unwrap(); -// -// Some(batch) -// } -// } -// -// /// Schema for the LineItem table -// static LINEITEM_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_lineitem_schema); -// -// fn make_lineitem_schema() -> SchemaRef { -// Arc::new(Schema::new(vec![ -// Field::new("l_orderkey", DataType::Int64, false), -// Field::new("l_vehiclekey", DataType::Int64, false), -// Field::new("l_suppkey", DataType::Int64, false), -// Field::new("l_linenumber", DataType::Int32, false), -// Field::new("l_quantity", DataType::Decimal128(15, 2), false), -// Field::new("l_extendedprice", DataType::Decimal128(15, 2), false), -// Field::new("l_discount", DataType::Decimal128(15, 2), false), -// Field::new("l_tax", DataType::Decimal128(15, 2), false), -// Field::new("l_returnflag", DataType::Utf8View, false), -// Field::new("l_linestatus", DataType::Utf8View, false), -// Field::new("l_shipdate", DataType::Date32, false), -// Field::new("l_commitdate", DataType::Date32, false), -// Field::new("l_receiptdate", DataType::Date32, false), -// Field::new("l_shipinstruct", DataType::Utf8View, false), -// Field::new("l_shipmode", DataType::Utf8View, false), -// Field::new("l_comment", DataType::Utf8View, false), -// ])) -// } diff --git a/tpchgen-arrow/src/nation.rs b/tpchgen-arrow/src/nation.rs deleted file mode 100644 index 02491d9..0000000 --- a/tpchgen-arrow/src/nation.rs +++ /dev/null @@ -1,108 +0,0 @@ -// use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator}; -// use arrow::array::{Int64Array, RecordBatch, StringViewArray}; -// use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -// use std::sync::{Arc, LazyLock}; -// use tpchgen::generators::{NationGenerator, NationGeneratorIterator}; -// -// /// Generate [`Nation`]s in [`RecordBatch`] format -// /// -// /// [`Nation`]: tpchgen::generators::Nation -// /// -// /// # Example -// /// ``` -// /// # use tpchgen::generators::{NationGenerator}; -// /// # use tpchgen_arrow::NationArrow; -// /// -// /// // Create a SF=1.0 generator and wrap it in an Arrow generator -// /// let generator = NationGenerator::new(1.0, 1, 1); -// /// let mut arrow_generator = NationArrow::new(generator) -// /// .with_batch_size(10); -// /// // Read the first 10 batches -// /// let batch = arrow_generator.next().unwrap(); -// /// // compare the output by pretty printing it -// /// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch]) -// /// .unwrap() -// /// .to_string(); -// /// let lines = formatted_batches.lines().collect::<Vec<_>>(); -// /// assert_eq!(lines, vec![ -// /// "+-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+", -// /// "| n_nationkey | n_name | n_regionkey | n_comment |", -// /// "+-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+", -// /// "| 0 | ALGERIA | 0 | haggle. carefully final deposits detect slyly agai |", -// /// "| 1 | ARGENTINA | 1 | al foxes promise slyly according to the regular accounts. bold requests alon |", -// /// "| 2 | BRAZIL | 1 | y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special |", -// /// "| 3 | CANADA | 1 | eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold |", -// /// "| 4 | EGYPT | 4 | y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d |", -// /// "| 5 | ETHIOPIA | 0 | ven packages wake quickly. regu |", -// /// "| 6 | FRANCE | 3 | refully final requests. regular, ironi |", -// /// "| 7 | GERMANY | 3 | l platelets. regular accounts x-ray: unusual, regular acco |", -// /// "| 8 | INDIA | 2 | ss excuses cajole slyly across the packages. deposits print aroun |", -// /// "| 9 | INDONESIA | 2 | slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull |", -// /// "+-------------+-----------+-------------+--------------------------------------------------------------------------------------------------------------------+" -// /// ]); -// /// ``` -// pub struct NationArrow { -// inner: NationGeneratorIterator<'static>, -// batch_size: usize, -// } -// -// impl NationArrow { -// pub fn new(generator: NationGenerator<'static>) -> Self { -// Self { -// inner: generator.iter(), -// batch_size: DEFAULT_BATCH_SIZE, -// } -// } -// -// /// Set the batch size -// pub fn with_batch_size(mut self, batch_size: usize) -> Self { -// self.batch_size = batch_size; -// self -// } -// } -// -// impl RecordBatchIterator for NationArrow { -// fn schema(&self) -> &SchemaRef { -// &NATION_SCHEMA -// } -// } -// -// impl Iterator for NationArrow { -// type Item = RecordBatch; -// -// fn next(&mut self) -> Option<Self::Item> { -// // Get next rows to convert -// let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect(); -// if rows.is_empty() { -// return None; -// } -// -// let n_nationkey = Int64Array::from_iter_values(rows.iter().map(|r| r.n_nationkey)); -// let n_name = StringViewArray::from_iter_values(rows.iter().map(|r| r.n_name)); -// let n_regionkey = Int64Array::from_iter_values(rows.iter().map(|r| r.n_regionkey)); -// let n_comment = StringViewArray::from_iter_values(rows.iter().map(|r| r.n_comment)); -// -// let batch = RecordBatch::try_new( -// Arc::clone(self.schema()), -// vec![ -// Arc::new(n_nationkey), -// Arc::new(n_name), -// Arc::new(n_regionkey), -// Arc::new(n_comment), -// ], -// ) -// .unwrap(); -// Some(batch) -// } -// } -// -// /// Schema for the Nation -// static NATION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_nation_schema); -// fn make_nation_schema() -> SchemaRef { -// Arc::new(Schema::new(vec![ -// Field::new("n_nationkey", DataType::Int64, false), -// Field::new("n_name", DataType::Utf8View, false), -// Field::new("n_regionkey", DataType::Int64, false), -// Field::new("n_comment", DataType::Utf8View, false), -// ])) -// } diff --git a/tpchgen-arrow/src/order.rs b/tpchgen-arrow/src/order.rs deleted file mode 100644 index 615473e..0000000 --- a/tpchgen-arrow/src/order.rs +++ /dev/null @@ -1,129 +0,0 @@ -// use crate::conversions::{ -// decimal128_array_from_iter, string_view_array_from_display_iter, to_arrow_date32, -// }; -// use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator}; -// use arrow::array::{Date32Array, Int32Array, Int64Array, RecordBatch, StringViewArray}; -// use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -// use std::sync::{Arc, LazyLock}; -// use tpchgen::generators::{OrderGenerator, OrderGeneratorIterator}; -// -// /// Generate [`Order`]s in [`RecordBatch`] format -// /// -// /// [`Order`]: tpchgen::generators::Order -// /// -// /// # Example -// /// ``` -// /// # use tpchgen::generators::{OrderGenerator}; -// /// # use tpchgen_arrow::OrderArrow; -// /// -// /// // Create a SF=1.0 generator and wrap it in an Arrow generator -// /// let generator = OrderGenerator::new(1.0, 1, 1); -// /// let mut arrow_generator = OrderArrow::new(generator) -// /// .with_batch_size(10); -// /// // Read the first 10 batches -// /// let batch = arrow_generator.next().unwrap(); -// /// // compare the output by pretty printing it -// /// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch]) -// /// .unwrap() -// /// .to_string(); -// /// let lines = formatted_batches.lines().collect::<Vec<_>>(); -// /// assert_eq!(lines, vec![ -// /// "+------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+---------------------------------------------------------------------------+", -// /// "| o_orderkey | o_custkey | o_orderstatus | o_totalprice | o_orderdate | o_orderpriority | o_clerk | o_shippriority | o_comment |", -// /// "+------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+---------------------------------------------------------------------------+", -// /// "| 1 | 36901 | O | 173665.47 | 1996-01-02 | 5-LOW | Clerk#000000951 | 0 | nstructions sleep furiously among |", -// /// "| 2 | 78002 | O | 46929.18 | 1996-12-01 | 1-URGENT | Clerk#000000880 | 0 | foxes. pending accounts at the pending, silent asymptot |", -// /// "| 3 | 123314 | F | 193846.25 | 1993-10-14 | 5-LOW | Clerk#000000955 | 0 | sly final accounts boost. carefully regular ideas cajole carefully. depos |", -// /// "| 4 | 136777 | O | 32151.78 | 1995-10-11 | 5-LOW | Clerk#000000124 | 0 | sits. slyly regular warthogs cajole. regular, regular theodolites acro |", -// /// "| 5 | 44485 | F | 144659.20 | 1994-07-30 | 5-LOW | Clerk#000000925 | 0 | quickly. bold deposits sleep slyly. packages use slyly |", -// /// "| 6 | 55624 | F | 58749.59 | 1992-02-21 | 4-NOT SPECIFIED | Clerk#000000058 | 0 | ggle. special, final requests are against the furiously specia |", -// /// "| 7 | 39136 | O | 252004.18 | 1996-01-10 | 2-HIGH | Clerk#000000470 | 0 | ly special requests |", -// /// "| 32 | 130057 | O | 208660.75 | 1995-07-16 | 2-HIGH | Clerk#000000616 | 0 | ise blithely bold, regular requests. quickly unusual dep |", -// /// "| 33 | 66958 | F | 163243.98 | 1993-10-27 | 3-MEDIUM | Clerk#000000409 | 0 | uriously. furiously final request |", -// /// "| 34 | 61001 | O | 58949.67 | 1998-07-21 | 3-MEDIUM | Clerk#000000223 | 0 | ly final packages. fluffily final deposits wake blithely ideas. spe |", -// /// "+------------+-----------+---------------+--------------+-------------+-----------------+-----------------+----------------+---------------------------------------------------------------------------+" -// /// ]); -// /// ``` -// pub struct OrderArrow { -// inner: OrderGeneratorIterator<'static>, -// batch_size: usize, -// } -// -// impl OrderArrow { -// pub fn new(generator: OrderGenerator<'static>) -> Self { -// Self { -// inner: generator.iter(), -// batch_size: DEFAULT_BATCH_SIZE, -// } -// } -// -// /// Set the batch size -// pub fn with_batch_size(mut self, batch_size: usize) -> Self { -// self.batch_size = batch_size; -// self -// } -// } -// -// impl RecordBatchIterator for OrderArrow { -// fn schema(&self) -> &SchemaRef { -// &ORDER_SCHEMA -// } -// } -// -// impl Iterator for OrderArrow { -// type Item = RecordBatch; -// -// fn next(&mut self) -> Option<Self::Item> { -// // Get next rows to convert -// let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect(); -// if rows.is_empty() { -// return None; -// } -// -// let o_orderkey = Int64Array::from_iter_values(rows.iter().map(|r| r.o_orderkey)); -// let o_custkey = Int64Array::from_iter_values(rows.iter().map(|r| r.o_custkey)); -// let o_orderstatus = -// string_view_array_from_display_iter(rows.iter().map(|r| r.o_orderstatus)); -// let o_totalprice = decimal128_array_from_iter(rows.iter().map(|r| r.o_totalprice)); -// let o_orderdate = -// Date32Array::from_iter_values(rows.iter().map(|r| r.o_orderdate).map(to_arrow_date32)); -// let o_orderpriority = -// StringViewArray::from_iter_values(rows.iter().map(|r| r.o_orderpriority)); -// let o_clerk = string_view_array_from_display_iter(rows.iter().map(|r| r.o_clerk)); -// let o_shippriority = Int32Array::from_iter_values(rows.iter().map(|r| r.o_shippriority)); -// let o_comment = StringViewArray::from_iter_values(rows.iter().map(|r| r.o_comment)); -// -// let batch = RecordBatch::try_new( -// Arc::clone(self.schema()), -// vec![ -// Arc::new(o_orderkey), -// Arc::new(o_custkey), -// Arc::new(o_orderstatus), -// Arc::new(o_totalprice), -// Arc::new(o_orderdate), -// Arc::new(o_orderpriority), -// Arc::new(o_clerk), -// Arc::new(o_shippriority), -// Arc::new(o_comment), -// ], -// ) -// .unwrap(); -// Some(batch) -// } -// } -// -// /// Schema for the Order -// static ORDER_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_order_schema); -// fn make_order_schema() -> SchemaRef { -// Arc::new(Schema::new(vec![ -// Field::new("o_orderkey", DataType::Int64, false), -// Field::new("o_custkey", DataType::Int64, false), -// Field::new("o_orderstatus", DataType::Utf8View, false), -// Field::new("o_totalprice", DataType::Decimal128(15, 2), false), -// Field::new("o_orderdate", DataType::Date32, false), -// Field::new("o_orderpriority", DataType::Utf8View, false), -// Field::new("o_clerk", DataType::Utf8View, false), -// Field::new("o_shippriority", DataType::Int32, false), -// Field::new("o_comment", DataType::Utf8View, false), -// ])) -// } diff --git a/tpchgen-arrow/src/region.rs b/tpchgen-arrow/src/region.rs deleted file mode 100644 index 51d62de..0000000 --- a/tpchgen-arrow/src/region.rs +++ /dev/null @@ -1,96 +0,0 @@ -// use crate::{DEFAULT_BATCH_SIZE, RecordBatchIterator}; -// use arrow::array::{Int64Array, RecordBatch, StringViewArray}; -// use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; -// use std::sync::{Arc, LazyLock}; -// use tpchgen::generators::{RegionGenerator, RegionGeneratorIterator}; -// -// /// Generate [`Region`]s in [`RecordBatch`] format -// /// -// /// [`Region`]: tpchgen::generators::Region -// /// -// /// # Example -// /// ``` -// /// # use tpchgen::generators::{RegionGenerator}; -// /// # use tpchgen_arrow::RegionArrow; -// /// -// /// // Create a SF=1.0 generator and wrap it in an Arrow generator -// /// let generator = RegionGenerator::new(1.0, 1, 1); -// /// let mut arrow_generator = RegionArrow::new(generator) -// /// .with_batch_size(10); -// /// // Read the first 10 batches -// /// let batch = arrow_generator.next().unwrap(); -// /// // compare the output by pretty printing it -// /// let formatted_batches = arrow::util::pretty::pretty_format_batches(&[batch]) -// /// .unwrap() -// /// .to_string(); -// /// let lines = formatted_batches.lines().collect::<Vec<_>>(); -// /// assert_eq!(lines, vec![ -// /// "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+", -// /// "| r_regionkey | r_name | r_comment |", -// /// "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+", -// /// "| 0 | AFRICA | lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to |", -// /// "| 1 | AMERICA | hs use ironic, even requests. s |", -// /// "| 2 | ASIA | ges. thinly even pinto beans ca |", -// /// "| 3 | EUROPE | ly final courts cajole furiously final excuse |", -// /// "| 4 | MIDDLE EAST | uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl |", -// /// "+-------------+-------------+---------------------------------------------------------------------------------------------------------------------+" -// /// ]); -// /// ``` -// pub struct RegionArrow { -// inner: RegionGeneratorIterator<'static>, -// batch_size: usize, -// } -// -// impl RegionArrow { -// pub fn new(generator: RegionGenerator<'static>) -> Self { -// Self { -// inner: generator.iter(), -// batch_size: DEFAULT_BATCH_SIZE, -// } -// } -// -// /// Set the batch size -// pub fn with_batch_size(mut self, batch_size: usize) -> Self { -// self.batch_size = batch_size; -// self -// } -// } -// -// impl RecordBatchIterator for RegionArrow { -// fn schema(&self) -> &SchemaRef { -// ®ION_SCHEMA -// } -// } -// -// impl Iterator for RegionArrow { -// type Item = RecordBatch; -// -// fn next(&mut self) -> Option<Self::Item> { -// // Get next rows to convert -// let rows: Vec<_> = self.inner.by_ref().take(self.batch_size).collect(); -// if rows.is_empty() { -// return None; -// } -// -// let r_regionkey = Int64Array::from_iter_values(rows.iter().map(|r| r.r_regionkey)); -// let r_name = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_name)); -// let r_comment = StringViewArray::from_iter_values(rows.iter().map(|r| r.r_comment)); -// -// let batch = RecordBatch::try_new( -// Arc::clone(self.schema()), -// vec![Arc::new(r_regionkey), Arc::new(r_name), Arc::new(r_comment)], -// ) -// .unwrap(); -// Some(batch) -// } -// } -// -// /// Schema for the Region -// static REGION_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(make_region_schema); -// fn make_region_schema() -> SchemaRef { -// Arc::new(Schema::new(vec![ -// Field::new("r_regionkey", DataType::Int64, false), -// Field::new("r_name", DataType::Utf8View, false), -// Field::new("r_comment", DataType::Utf8View, false), -// ])) -// } diff --git a/tpchgen-arrow/tests/reparse.rs b/tpchgen-arrow/tests/reparse.rs index f98fd30..6d35d4a 100644 --- a/tpchgen-arrow/tests/reparse.rs +++ b/tpchgen-arrow/tests/reparse.rs @@ -35,16 +35,8 @@ macro_rules! test_row_type { test_row_type!(customer_tbl, CustomerGenerator, CustomerArrow, Test::tbl()); test_row_type!(customer_csv, CustomerGenerator, CustomerArrow, Test::csv()); -// test_row_type!(lineitem_tbl, LineItemGenerator, LineItemArrow, Test::tbl()); -// test_row_type!(lineitem_csv, LineItemGenerator, LineItemArrow, Test::csv()); -// test_row_type!(nation_tbl, NationGenerator, NationArrow, Test::tbl()); -// test_row_type!(nation_csv, NationGenerator, NationArrow, Test::csv()); -// test_row_type!(order_tbl, OrderGenerator, OrderArrow, Test::tbl()); -// test_row_type!(order_csv, OrderGenerator, OrderArrow, Test::csv()); test_row_type!(vehicle_tbl, VehicleGenerator, VehicleArrow, Test::tbl()); test_row_type!(vehicle_csv, VehicleGenerator, VehicleArrow, Test::csv()); -// test_row_type!(region_tbl, RegionGenerator, RegionArrow, Test::tbl()); -// test_row_type!(region_csv, RegionGenerator, RegionArrow, Test::csv()); test_row_type!(driver_tbl, DriverGenerator, DriverArrow, Test::tbl()); test_row_type!(driver_csv, DriverGenerator, DriverArrow, Test::csv()); test_row_type!(trip_tbl, TripGenerator, TripArrow, Test::tbl()); @@ -82,11 +74,7 @@ macro_rules! impl_row_type { } impl_row_type!(Customer<'_>, CustomerCsv); -// impl_row_type!(LineItem<'_>, LineItemCsv); -// impl_row_type!(Nation<'_>, NationCsv); -// impl_row_type!(Order<'_>, OrderCsv); impl_row_type!(Vehicle<'_>, VehicleCsv); -// impl_row_type!(Region<'_>, RegionCsv); impl_row_type!(Driver, DriverCsv); impl_row_type!(Trip, TripCsv); impl_row_type!(Building<'_>, BuildingCsv); diff --git a/tpchgen-cli/src/csv.rs b/tpchgen-cli/src/csv.rs index dd88c0f..93048d0 100644 --- a/tpchgen-cli/src/csv.rs +++ b/tpchgen-cli/src/csv.rs @@ -40,13 +40,9 @@ macro_rules! define_csv_source { } // Define .csv sources for all tables -// define_csv_source!(NationCsvSource, NationGenerator<'static>, NationCsv); -// define_csv_source!(RegionCsvSource, RegionGenerator<'static>, RegionCsv); define_csv_source!(VehicleCsvSource, VehicleGenerator<'static>, VehicleCsv); define_csv_source!(DriverCsvSource, DriverGenerator<'static>, DriverCsv); define_csv_source!(CustomerCsvSource, CustomerGenerator<'static>, CustomerCsv); -// define_csv_source!(OrderCsvSource, OrderGenerator<'static>, OrderCsv); -// define_csv_source!(LineItemCsvSource, LineItemGenerator<'static>, LineItemCsv); define_csv_source!(TripCsvSource, TripGenerator, TripCsv); define_csv_source!(BuildingCsvSource, BuildingGenerator<'static>, BuildingCsv); define_csv_source!(ZoneCsvSource, ZoneGenerator, ZoneCsv); diff --git a/tpchgen-cli/src/main.rs b/tpchgen-cli/src/main.rs index 8cca245..e708789 100644 --- a/tpchgen-cli/src/main.rs +++ b/tpchgen-cli/src/main.rs @@ -131,13 +131,9 @@ struct Cli { #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] enum Table { - // Nation, - // Region, Vehicle, Driver, Customer, - // Orders, - // Lineitem, Trip, Building, Zone, @@ -174,13 +170,9 @@ impl TypedValueParser for TableValueParser { ) -> Option<Box<dyn Iterator<Item = clap::builder::PossibleValue> + '_>> { Some(Box::new( [ - // clap::builder::PossibleValue::new("region").help("Region table (alias: r)"), - // clap::builder::PossibleValue::new("nation").help("Nation table (alias: n)"), clap::builder::PossibleValue::new("driver").help("Driver table (alias: d)"), clap::builder::PossibleValue::new("customer").help("Customer table (alias: c)"), clap::builder::PossibleValue::new("vehicle").help("Vehicle table (alias: V)"), - // clap::builder::PossibleValue::new("orders").help("Orders table (alias: O)"), - // clap::builder::PossibleValue::new("lineitem").help("LineItem table (alias: L)"), clap::builder::PossibleValue::new("trip").help("Trip table (alias: T)"), clap::builder::PossibleValue::new("building").help("Building table (alias: b)"), clap::builder::PossibleValue::new("zone").help("Zone table (alias: z)"), @@ -201,13 +193,9 @@ impl FromStr for Table { /// only support the exclusive abbreviations. fn from_str(s: &str) -> Result<Self, Self::Err> { match s { - // "n" | "nation" => Ok(Table::Nation), - // "r" | "region" => Ok(Table::Region), "d" | "driver" => Ok(Table::Driver), "V" | "vehicle" => Ok(Table::Vehicle), "c" | "customer" => Ok(Table::Customer), - // "O" | "orders" => Ok(Table::Orders), - // "L" | "lineitem" => Ok(Table::Lineitem), "T" | "trip" => Ok(Table::Trip), "b" | "building" => Ok(Table::Building), "z" | "zone" => Ok(Table::Zone), @@ -219,13 +207,9 @@ impl FromStr for Table { impl Table { fn name(&self) -> &'static str { match self { - // Table::Nation => "nation", - // Table::Region => "region", Table::Vehicle => "vehicle", Table::Driver => "driver", Table::Customer => "customer", - // Table::Orders => "orders", - // Table::Lineitem => "lineitem", Table::Trip => "trip", Table::Building => "building", Table::Zone => "zone", @@ -300,13 +284,9 @@ impl Cli { tables.clone() } else { vec![ - // Table::Nation, - // Table::Region, Table::Vehicle, Table::Driver, Table::Customer, - // Table::Orders, - // Table::Lineitem, Table::Trip, Table::Building, ] @@ -324,13 +304,9 @@ impl Cli { // Generate each table for table in tables { match table { - // Table::Nation => self.generate_nation().await?, - // Table::Region => self.generate_region().await?, Table::Vehicle => self.generate_vehicle().await?, Table::Driver => self.generate_driver().await?, Table::Customer => self.generate_customer().await?, - // Table::Orders => self.generate_orders().await?, - // Table::Lineitem => self.generate_lineitem().await?, Table::Trip => self.generate_trip().await?, Table::Building => self.generate_building().await?, Table::Zone => self.generate_zone().await?, @@ -340,23 +316,6 @@ impl Cli { info!("Generation complete!"); Ok(()) } - - // define_generate!( - // generate_nation, - // Table::Nation, - // NationGenerator, - // NationTblSource, - // NationCsvSource, - // NationArrow - // ); - // define_generate!( - // generate_region, - // Table::Region, - // RegionGenerator, - // RegionTblSource, - // RegionCsvSource, - // RegionArrow - // ); define_generate!( generate_vehicle, Table::Vehicle, @@ -381,22 +340,6 @@ impl Cli { CustomerCsvSource, CustomerArrow ); - // define_generate!( - // generate_orders, - // Table::Orders, - // OrderGenerator, - // OrderTblSource, - // OrderCsvSource, - // OrderArrow - // ); - // define_generate!( - // generate_lineitem, - // Table::Lineitem, - // LineItemGenerator, - // LineItemTblSource, - // LineItemCsvSource, - // LineItemArrow - // ); define_generate!( generate_trip, Table::Trip, @@ -455,8 +398,6 @@ impl Cli { // Avg row size is an estimate of the average row size in bytes from the first 100 rows // of the table in tbl format let (avg_row_size_bytes, row_count) = match table { - // Table::Nation => (88, 1), - // Table::Region => (77, 1), Table::Vehicle => ( 115, VehicleGenerator::calculate_row_count(self.scale_factor, 1, 1), @@ -469,18 +410,6 @@ impl Cli { 160, CustomerGenerator::calculate_row_count(self.scale_factor, 1, 1), ), - // Table::Orders => ( - // 114, - // OrderGenerator::calculate_row_count(self.scale_factor, 1, 1), - // ), - // Table::Lineitem => { - // // there are on average 4 line items per order. - // // For example, in SF=10, - // // * orders has 15,000,000 rows - // // * lineitem has around 60,000,000 rows - // let row_count = 4 * OrderGenerator::calculate_row_count(self.scale_factor, 1, 1); - // (128, row_count) - // }, &Table::Trip => ( 130, TripGenerator::calculate_row_count(self.scale_factor, 1, 1), diff --git a/tpchgen-cli/src/tbl.rs b/tpchgen-cli/src/tbl.rs index c0d8540..29de2a3 100644 --- a/tpchgen-cli/src/tbl.rs +++ b/tpchgen-cli/src/tbl.rs @@ -38,13 +38,9 @@ macro_rules! define_tbl_source { } // Define .tbl sources for all tables -// define_tbl_source!(NationTblSource, NationGenerator<'static>); -// define_tbl_source!(RegionTblSource, RegionGenerator<'static>); define_tbl_source!(VehicleTblSource, VehicleGenerator<'static>); define_tbl_source!(DriverTblSource, DriverGenerator<'static>); define_tbl_source!(CustomerTblSource, CustomerGenerator<'static>); -// define_tbl_source!(OrderTblSource, OrderGenerator<'static>); -// define_tbl_source!(LineItemTblSource, LineItemGenerator<'static>); define_tbl_source!(TripTblSource, TripGenerator); define_tbl_source!(BuildingTblSource, BuildingGenerator<'static>); define_tbl_source!(ZoneTblSource, ZoneGenerator); diff --git a/tpchgen/src/csv.rs b/tpchgen/src/csv.rs index f4631c6..5521d87 100644 --- a/tpchgen/src/csv.rs +++ b/tpchgen/src/csv.rs @@ -4,104 +4,6 @@ use crate::generators::{Building, Customer, Driver, Trip, Vehicle, Zone}; use core::fmt; use std::fmt::Display; -// Write [`Nation`]s in CSV format. -// -// # Example -// ``` -// # use tpchgen::generators::NationGenerator; -// # use tpchgen::csv::NationCsv; -// # use std::fmt::Write; -// // Output the first 3 rows in CSV format -// let generator = NationGenerator::default(); -// let mut csv = String::new(); -// writeln!(&mut csv, "{}", NationCsv::header()).unwrap(); // write header -// for line in generator.iter().take(3) { -// // write line using CSV formatter -// writeln!(&mut csv, "{}", NationCsv::new(line)).unwrap(); -// } -// assert_eq!( -// csv, -// "n_nationkey,n_name,n_regionkey,n_comment\n\ -// 0,ALGERIA,0,\" haggle. carefully final deposits detect slyly agai\"\n\ -// 1,ARGENTINA,1,\"al foxes promise slyly according to the regular accounts. bold requests alon\"\n\ -// 2,BRAZIL,1,\"y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special \"\n" -// ); -// ``` -// pub struct NationCsv<'a> { -// inner: Nation<'a>, -// } -// -// impl<'a> NationCsv<'a> { -// pub fn new(inner: Nation<'a>) -> Self { -// Self { inner } -// } -// -// /// Returns the CSV header for the Nation table -// pub fn header() -> &'static str { -// "n_nationkey,n_name,n_regionkey,n_comment" -// } -// } -// -// impl Display for NationCsv<'_> { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!( -// f, -// // note must quote the comment field as it may contain commas -// "{},{},{},\"{}\"", -// self.inner.n_nationkey, self.inner.n_name, self.inner.n_regionkey, self.inner.n_comment -// ) -// } -// } - -// Write [`Region`]s in CSV format. -// -// # Example -// ``` -// # use tpchgen::generators::{RegionGenerator}; -// # use tpchgen::csv::{NationCsv, RegionCsv}; -// # use std::fmt::Write; -// let generator = RegionGenerator::default(); -// // Output the first 3 rows in CSV format -// let mut csv = String::new(); -// writeln!(&mut csv, "{}", RegionCsv::header()).unwrap(); // write header -// for line in generator.iter().take(3) { -// // write line using CSV formatter -// writeln!(&mut csv, "{}", RegionCsv::new(line)).unwrap(); -// } -// assert_eq!( -// csv, -// "r_regionkey,r_name,r_comment\n\ -// 0,AFRICA,\"lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to \"\n\ -// 1,AMERICA,\"hs use ironic, even requests. s\"\n\ -// 2,ASIA,\"ges. thinly even pinto beans ca\"\n" -// ); -// ``` -// pub struct RegionCsv<'a> { -// inner: Region<'a>, -// } -// -// impl<'a> RegionCsv<'a> { -// pub fn new(inner: Region<'a>) -> Self { -// Self { inner } -// } -// -// /// Returns the CSV header for the Region table -// pub fn header() -> &'static str { -// "r_regionkey,r_name,r_comment" -// } -// } -// -// impl Display for RegionCsv<'_> { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!( -// f, -// // note must quote the comment field as it may contain commas -// "{},{},\"{}\"", -// self.inner.r_regionkey, self.inner.r_name, self.inner.r_comment -// ) -// } -// } - /// Write [`Vehicle`]s in CSV format. /// /// # Example @@ -263,128 +165,6 @@ impl Display for CustomerCsv<'_> { } } -// Write [`Order`]s in CSV format. -// -// # Example -// ``` -// # use tpchgen::generators::OrderGenerator; -// # use tpchgen::csv::OrderCsv; -// # use std::fmt::Write; -// // Output the first 3 rows in CSV format -// let generator = OrderGenerator::new(1.0, 1, 1); -// let mut csv = String::new(); -// writeln!(&mut csv, "{}", OrderCsv::header()).unwrap(); // write header -// for line in generator.iter().take(3) { -// // write line using CSV formatter -// writeln!(&mut csv, "{}", OrderCsv::new(line)).unwrap(); -// } -// assert_eq!( -// csv, -// "o_orderkey,o_custkey,o_orderstatus,o_totalprice,o_orderdate,o_orderpriority,o_clerk,o_shippriority,o_comment\n\ -// 1,36901,O,173665.47,1996-01-02,5-LOW,Clerk#000000951,0,\"nstructions sleep furiously among \"\n\ -// 2,78002,O,46929.18,1996-12-01,1-URGENT,Clerk#000000880,0,\" foxes. pending accounts at the pending, silent asymptot\"\n\ -// 3,123314,F,193846.25,1993-10-14,5-LOW,Clerk#000000955,0,\"sly final accounts boost. carefully regular ideas cajole carefully. depos\"\n" -// ); -// ``` -// pub struct OrderCsv<'a> { -// inner: Order<'a>, -// } -// -// impl<'a> OrderCsv<'a> { -// pub fn new(inner: Order<'a>) -> Self { -// Self { inner } -// } -// -// /// Returns the CSV header for the Order table -// pub fn header() -> &'static str { -// "o_orderkey,o_custkey,o_orderstatus,o_totalprice,o_orderdate,o_orderpriority,o_clerk,o_shippriority,o_comment" -// } -// } -// -// impl Display for OrderCsv<'_> { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!( -// f, -// // note must quote the comment field as it may contain commas -// "{},{},{},{},{},{},{},{},\"{}\"", -// self.inner.o_orderkey, -// self.inner.o_custkey, -// self.inner.o_orderstatus, -// self.inner.o_totalprice, -// self.inner.o_orderdate, -// self.inner.o_orderpriority, -// self.inner.o_clerk, -// self.inner.o_shippriority, -// self.inner.o_comment -// ) -// } -// } -// -// /// Write [`LineItem`]s in CSV format. -// /// -// /// # Example -// /// ``` -// /// # use tpchgen::generators::LineItemGenerator; -// /// # use tpchgen::csv::LineItemCsv; -// /// # use std::fmt::Write; -// /// // Output the first 3 rows in CSV format -// /// let generator = LineItemGenerator::new(1.0, 1, 1); -// /// let mut csv = String::new(); -// /// writeln!(&mut csv, "{}", LineItemCsv::header()).unwrap(); // write header -// /// for line in generator.iter().take(3) { -// /// // write line using CSV formatter -// /// writeln!(&mut csv, "{}", LineItemCsv::new(line)).unwrap(); -// /// } -// /// assert_eq!( -// /// csv, -// /// "l_orderkey,l_vehiclekey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment\n\ -// /// 1,155190,7706,1,17,21168.23,0.04,0.02,N,O,1996-03-13,1996-02-12,1996-03-22,DELIVER IN PERSON,TRUCK,\"egular courts above the\"\n\ -// /// 1,67310,7311,2,36,45983.16,0.09,0.06,N,O,1996-04-12,1996-02-28,1996-04-20,TAKE BACK RETURN,MAIL,\"ly final dependencies: slyly bold \"\n\ -// /// 1,63700,3701,3,8,13309.60,0.10,0.02,N,O,1996-01-29,1996-03-05,1996-01-31,TAKE BACK RETURN,REG AIR,\"riously. regular, express dep\"\n" -// /// ); -// /// ``` -// /// -// /// [crate documentation]: crate -// pub struct LineItemCsv<'a> { -// inner: LineItem<'a>, -// } -// impl<'a> LineItemCsv<'a> { -// pub fn new(inner: LineItem<'a>) -> Self { -// Self { inner } -// } -// -// /// Returns the CSV header for the LineItem table -// pub fn header() -> &'static str { -// "l_orderkey,l_vehiclekey,l_suppkey,l_linenumber,l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment" -// } -// } -// -// impl Display for LineItemCsv<'_> { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!( -// f, -// // note must quote the comment field as it may contain commas -// "{},{},{},{},{},{},{},{},{},{},{},{},{},{},{},\"{}\"", -// self.inner.l_orderkey, -// self.inner.l_vehiclekey, -// self.inner.l_suppkey, -// self.inner.l_linenumber, -// self.inner.l_quantity, -// self.inner.l_extendedprice, -// self.inner.l_discount, -// self.inner.l_tax, -// self.inner.l_returnflag, -// self.inner.l_linestatus, -// self.inner.l_shipdate, -// self.inner.l_commitdate, -// self.inner.l_receiptdate, -// self.inner.l_shipinstruct, -// self.inner.l_shipmode, -// self.inner.l_comment -// ) -// } -// } - /// Write [`Trip`]s in CSV format. /// /// # Example diff --git a/tpchgen/src/generators.rs b/tpchgen/src/generators.rs index 44b77b1..04c3b17 100644 --- a/tpchgen/src/generators.rs +++ b/tpchgen/src/generators.rs @@ -18,300 +18,6 @@ use rand::{Rng, SeedableRng}; use std::fmt; use std::fmt::Display; -// /// Generator for Nation table data -// #[derive(Debug, Clone)] -// pub struct NationGenerator<'a> { -// distributions: &'a Distributions, -// text_pool: &'a TextPool, -// } -// -// impl Default for NationGenerator<'_> { -// fn default() -> Self { -// // arguments are ignored -// Self::new(1.0, 1, 1) -// } -// } -// -// impl<'a> NationGenerator<'a> { -// /// Creates a new NationGenerator with default distributions and text pool -// /// -// /// Nations does not depend on the scale factor or the part number. The signature of -// /// this method is provided to be consistent with the other generators, but the -// /// parameters are ignored. You can use [`NationGenerator::default`] to create a -// /// default generator. -// /// -// /// The generator's lifetime is `&'static` because it references global -// /// [`Distribution]`s and thus can be shared safely between threads. -// pub fn new(_scale_factor: f64, _vehicle: i32, _vehicle_count: i32) -> NationGenerator<'static> { -// // Note: use explicit lifetime to ensure this remains `&'static` -// Self::new_with_distributions_and_text_pool( -// Distributions::static_default(), -// TextPool::get_or_init_default(), -// ) -// } -// -// /// Creates a NationGenerator with the specified distributions and text pool -// pub fn new_with_distributions_and_text_pool<'b>( -// distributions: &'b Distributions, -// text_pool: &'b TextPool, -// ) -> NationGenerator<'b> { -// NationGenerator { -// distributions, -// text_pool, -// } -// } -// -// /// Returns an iterator over the nation rows -// pub fn iter(&self) -> NationGeneratorIterator<'a> { -// NationGeneratorIterator::new(self.distributions.nations(), self.text_pool) -// } -// } -// -// impl<'a> IntoIterator for NationGenerator<'a> { -// type Item = Nation<'a>; -// type IntoIter = NationGeneratorIterator<'a>; -// -// fn into_iter(self) -> Self::IntoIter { -// self.iter() -// } -// } -// -// /// The NATION table -// /// -// /// The Display trait is implemented to format the line item data as a string -// /// in the default TPC-H 'tbl' format. -// /// -// /// ```text -// /// 0|ALGERIA|0| haggle. carefully final deposits detect slyly agai| -// /// 1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon| -// /// ``` -// #[derive(Debug, Clone, PartialEq, Eq)] -// pub struct Nation<'a> { -// /// Primary key (0-24) -// pub n_nationkey: i64, -// /// Nation name -// pub n_name: &'a str, -// /// Foreign key to REGION -// pub n_regionkey: i64, -// /// Variable length comment -// pub n_comment: &'a str, -// } -// -// impl Display for Nation<'_> { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!( -// f, -// "{}|{}|{}|{}|", -// self.n_nationkey, self.n_name, self.n_regionkey, self.n_comment -// ) -// } -// } -// -// impl<'a> Nation<'a> { -// /// Create a new `nation` record with the specified values. -// pub fn new(n_nationkey: i64, n_name: &'a str, n_regionkey: i64, n_comment: &'a str) -> Self { -// Nation { -// n_nationkey, -// n_name, -// n_regionkey, -// n_comment, -// } -// } -// } -// -// /// Iterator that generates Nation rows -// #[derive(Debug)] -// pub struct NationGeneratorIterator<'a> { -// nations: &'a Distribution, -// comment_random: RandomText<'a>, -// index: usize, -// } -// -// impl<'a> NationGeneratorIterator<'a> { -// const COMMENT_AVERAGE_LENGTH: i32 = 72; -// -// fn new(nations: &'a Distribution, text_pool: &'a TextPool) -> Self { -// NationGeneratorIterator { -// nations, -// comment_random: RandomText::new( -// 606179079, -// text_pool, -// Self::COMMENT_AVERAGE_LENGTH as f64, -// ), -// index: 0, -// } -// } -// } -// -// impl<'a> Iterator for NationGeneratorIterator<'a> { -// type Item = Nation<'a>; -// -// fn next(&mut self) -> Option<Self::Item> { -// if self.index >= self.nations.size() { -// return None; -// } -// -// let nation = Nation { -// // n_nationkey -// n_nationkey: self.index as i64, -// // n_name -// n_name: self.nations.get_value(self.index), -// // n_regionkey -// n_regionkey: self.nations.get_weight(self.index) as i64, -// // n_comment -// n_comment: self.comment_random.next_value(), -// }; -// -// self.comment_random.row_finished(); -// self.index += 1; -// -// Some(nation) -// } -// } -// -// /// The REGION table -// /// -// /// The Display trait is implemented to format the line item data as a string -// /// in the default TPC-H 'tbl' format. -// /// -// /// ```text -// /// 0|AFRICA|lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to | -// /// 1|AMERICA|hs use ironic, even requests. s| -// /// ``` -// #[derive(Debug, Clone, PartialEq, Eq)] -// pub struct Region<'a> { -// /// Primary key (0-4) -// pub r_regionkey: i64, -// /// Region name (AFRICA, AMERICA, ASIA, EUROPE, MIDDLE EAST) -// pub r_name: &'a str, -// /// Variable length comment -// pub r_comment: &'a str, -// } -// -// impl Display for Region<'_> { -// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { -// write!( -// f, -// "{}|{}|{}|", -// self.r_regionkey, self.r_name, self.r_comment -// ) -// } -// } -// -// impl<'a> Region<'a> { -// /// Creates a new `region` record with the specified values. -// pub fn new(r_regionkey: i64, r_name: &'a str, r_comment: &'a str) -> Self { -// Region { -// r_regionkey, -// r_name, -// r_comment, -// } -// } -// } -// -// /// Generator for Region table data -// #[derive(Debug, Clone)] -// pub struct RegionGenerator<'a> { -// distributions: &'a Distributions, -// text_pool: &'a TextPool, -// } -// -// impl Default for RegionGenerator<'_> { -// fn default() -> Self { -// // arguments are ignored -// Self::new(1.0, 1, 1) -// } -// } -// -// impl<'a> RegionGenerator<'a> { -// /// Creates a new RegionGenerator with default distributions and text pool -// /// -// /// Regions does not depend on the scale factor or the part number. The signature of -// /// this method is provided to be consistent with the other generators, but the -// /// parameters are ignored. You can use [`RegionGenerator::default`] to create a -// /// default generator. -// /// -// /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] for -// /// more details. -// pub fn new(_scale_factor: f64, _vehicle: i32, _vehicle_count: i32) -> RegionGenerator<'static> { -// // Note: use explicit lifetime to ensure this remains `&'static` -// Self::new_with_distributions_and_text_pool( -// Distributions::static_default(), -// TextPool::get_or_init_default(), -// ) -// } -// -// /// Creates a RegionGenerator with the specified distributions and text pool -// pub fn new_with_distributions_and_text_pool<'b>( -// distributions: &'b Distributions, -// text_pool: &'b TextPool, -// ) -> RegionGenerator<'b> { -// RegionGenerator { -// distributions, -// text_pool, -// } -// } -// -// /// Returns an iterator over the region rows -// pub fn iter(&self) -> RegionGeneratorIterator<'a> { -// RegionGeneratorIterator::new(self.distributions.regions(), self.text_pool) -// } -// } -// -// impl<'a> IntoIterator for &'a RegionGenerator<'a> { -// type Item = Region<'a>; -// type IntoIter = RegionGeneratorIterator<'a>; -// -// fn into_iter(self) -> Self::IntoIter { -// self.iter() -// } -// } -// -// /// Iterator that generates Region rows -// #[derive(Debug)] -// pub struct RegionGeneratorIterator<'a> { -// regions: &'a Distribution, -// comment_random: RandomText<'a>, -// index: usize, -// } -// -// impl<'a> RegionGeneratorIterator<'a> { -// const COMMENT_AVERAGE_LENGTH: i32 = 72; -// -// fn new(regions: &'a Distribution, text_pool: &'a TextPool) -> Self { -// RegionGeneratorIterator { -// regions, -// comment_random: RandomText::new( -// 1500869201, -// text_pool, -// Self::COMMENT_AVERAGE_LENGTH as f64, -// ), -// index: 0, -// } -// } -// } -// -// impl<'a> Iterator for RegionGeneratorIterator<'a> { -// type Item = Region<'a>; -// -// fn next(&mut self) -> Option<Self::Item> { -// if self.index >= self.regions.size() { -// return None; -// } -// -// let region = Region { -// r_regionkey: self.index as i64, -// r_name: self.regions.get_value(self.index), -// r_comment: self.comment_random.next_value(), -// }; -// -// self.comment_random.row_finished(); -// self.index += 1; -// -// Some(region) -// } -// } - /// A Vehicle Manufacturer, formatted as `"Manufacturer#<n>"` #[derive(Debug, Clone, Copy, PartialEq)] pub struct VehicleManufacturerName(i32); @@ -1106,869 +812,6 @@ impl<'a> Iterator for CustomerGeneratorIterator<'a> { } } -// /// A clerk name, formatted as `"Clerk#<n>"` -// #[derive(Debug, Clone, Copy, PartialEq)] -// pub struct ClerkName(i32); -// -// impl ClerkName { -// /// Creates a new ClerkName with the given value -// pub fn new(value: i32) -> Self { -// ClerkName(value) -// } -// } -// -// impl Display for ClerkName { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!(f, "Clerk#{:09}", self.0) -// } -// } -// -// /// Order status (F=final, O=open, P=pending) -// #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd)] -// pub enum OrderStatus { -// /// Fulfilled - all line items shipped -// Fulfilled, -// /// Open - no line items shipped -// Open, -// /// Partially fulfilled - some line items shipped -// Pending, -// } -// -// impl OrderStatus { -// pub fn as_str(&self) -> &'static str { -// match self { -// OrderStatus::Fulfilled => "F", -// OrderStatus::Open => "O", -// OrderStatus::Pending => "P", -// } -// } -// } -// -// impl Display for OrderStatus { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!(f, "{}", self.as_str()) -// } -// } -// -// /// The ORDERS table -// /// -// /// The Display trait is implemented to format the line item data as a string -// /// in the default TPC-H 'tbl' format. -// /// -// /// ```text -// /// 1|37|O|131251.81|1996-01-02|5-LOW|Clerk#000000951|0|nstructions sleep furiously among | -// /// 2|79|O|40183.29|1996-12-01|1-URGENT|Clerk#000000880|0| foxes. pending accounts at the pending, silent asymptot| -// /// ``` -// #[derive(Debug, Clone, PartialEq)] -// pub struct Order<'a> { -// /// Primary key -// pub o_orderkey: i64, -// /// Foreign key to CUSTOMER -// pub o_custkey: i64, -// /// Order status (F=final, O=open, P=pending) -// pub o_orderstatus: OrderStatus, -// /// Order total price -// pub o_totalprice: TPCHDecimal, -// /// Order date -// pub o_orderdate: TPCHDate, -// /// Order priority -// pub o_orderpriority: &'a str, -// /// Clerk who processed the order. -// pub o_clerk: ClerkName, -// /// Order shipping priority -// pub o_shippriority: i32, -// /// Variable length comment -// pub o_comment: &'a str, -// } -// -// impl Display for Order<'_> { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!( -// f, -// "{}|{}|{}|{}|{}|{}|{}|{}|{}|", -// self.o_orderkey, -// self.o_custkey, -// self.o_orderstatus, -// self.o_totalprice, -// self.o_orderdate, -// self.o_orderpriority, -// self.o_clerk, -// self.o_shippriority, -// self.o_comment -// ) -// } -// } -// -// /// Generator for Order table data -// #[derive(Debug, Clone)] -// pub struct OrderGenerator<'a> { -// scale_factor: f64, -// part: i32, -// part_count: i32, -// distributions: &'a Distributions, -// text_pool: &'a TextPool, -// } -// -// impl<'a> OrderGenerator<'a> { -// /// Base scale for order generation -// pub const SCALE_BASE: i32 = 1_500_000; -// -// // Constants for order generation -// const CUSTOMER_MORTALITY: i32 = 3; // portion with no orders -// const ORDER_DATE_MIN: i32 = dates::MIN_GENERATE_DATE; -// const ORDER_DATE_MAX: i32 = -// Self::ORDER_DATE_MIN + (dates::TOTAL_DATE_RANGE - LineItemGenerator::ITEM_SHIP_DAYS - 1); -// const CLERK_SCALE_BASE: i32 = 1000; -// -// const LINE_COUNT_MIN: i32 = 1; -// pub const LINE_COUNT_MAX: i32 = 7; -// -// const COMMENT_AVERAGE_LENGTH: i32 = 49; -// -// const ORDER_KEY_SPARSE_BITS: i32 = 2; -// const ORDER_KEY_SPARSE_KEEP: i32 = 3; -// /// Creates a new OrderGenerator with the given scale factor -// /// -// /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] for -// /// more details. -// pub fn new(scale_factor: f64, part: i32, part_count: i32) -> OrderGenerator<'static> { -// // Note: use explicit lifetime to ensure this remains `&'static` -// Self::new_with_distributions_and_text_pool( -// scale_factor, -// part, -// part_count, -// Distributions::static_default(), -// TextPool::get_or_init_default(), -// ) -// } -// -// /// Creates a OrderGenerator with specified distributions and text pool -// pub fn new_with_distributions_and_text_pool<'b>( -// scale_factor: f64, -// part: i32, -// part_count: i32, -// distributions: &'b Distributions, -// text_pool: &'b TextPool, -// ) -> OrderGenerator<'b> { -// OrderGenerator { -// scale_factor, -// part, -// part_count, -// distributions, -// text_pool, -// } -// } -// -// /// Return the row count for the given scale factor and generator part count -// pub fn calculate_row_count(scale_factor: f64, part: i32, part_count: i32) -> i64 { -// GenerateUtils::calculate_row_count(Self::SCALE_BASE, scale_factor, part, part_count) -// } -// -// /// Returns an iterator over the order rows -// pub fn iter(&self) -> OrderGeneratorIterator<'a> { -// OrderGeneratorIterator::new( -// self.distributions, -// self.text_pool, -// self.scale_factor, -// GenerateUtils::calculate_start_index( -// Self::SCALE_BASE, -// self.scale_factor, -// self.part, -// self.part_count, -// ), -// Self::calculate_row_count(self.scale_factor, self.part, self.part_count), -// ) -// } -// -// /// Creates the order date random generator -// pub fn create_order_date_random() -> RandomBoundedInt { -// RandomBoundedInt::new(1066728069, Self::ORDER_DATE_MIN, Self::ORDER_DATE_MAX) -// } -// -// /// Creates the line count random generator -// pub fn create_line_count_random() -> RandomBoundedInt { -// RandomBoundedInt::new(1434868289, Self::LINE_COUNT_MIN, Self::LINE_COUNT_MAX) -// } -// -// /// Creates an order key from an index -// pub fn make_order_key(order_index: i64) -> i64 { -// let low_bits = order_index & ((1 << Self::ORDER_KEY_SPARSE_KEEP) - 1); -// -// let mut ok = order_index; -// ok >>= Self::ORDER_KEY_SPARSE_KEEP; -// ok <<= Self::ORDER_KEY_SPARSE_BITS; -// ok <<= Self::ORDER_KEY_SPARSE_KEEP; -// ok += low_bits; -// -// ok -// } -// } -// -// impl<'a> IntoIterator for &'a OrderGenerator<'a> { -// type Item = Order<'a>; -// type IntoIter = OrderGeneratorIterator<'a>; -// -// fn into_iter(self) -> Self::IntoIter { -// self.iter() -// } -// } -// -// /// Iterator that generates Order rows -// #[derive(Debug)] -// pub struct OrderGeneratorIterator<'a> { -// order_date_random: RandomBoundedInt, -// line_count_random: RandomBoundedInt, -// customer_key_random: RandomBoundedLong, -// order_priority_random: RandomString<'a>, -// clerk_random: RandomBoundedInt, -// comment_random: RandomText<'a>, -// -// // For line item simulation to determine order status -// line_quantity_random: RandomBoundedInt, -// line_discount_random: RandomBoundedInt, -// line_tax_random: RandomBoundedInt, -// line_vehicle_key_random: RandomBoundedLong, -// line_ship_date_random: RandomBoundedInt, -// -// start_index: i64, -// row_count: i64, -// max_customer_key: i64, -// -// index: i64, -// } -// impl<'a> OrderGeneratorIterator<'a> { -// fn new( -// distributions: &'a Distributions, -// text_pool: &'a TextPool, -// scale_factor: f64, -// start_index: i64, -// row_count: i64, -// ) -> Self { -// let mut order_date_random = OrderGenerator::create_order_date_random(); -// let mut line_count_random = OrderGenerator::create_line_count_random(); -// -// let max_customer_key = (CustomerGenerator::SCALE_BASE as f64 * scale_factor) as i64; -// -// let mut customer_key_random = -// RandomBoundedLong::new(851767375, scale_factor >= 30000.0, 1, max_customer_key); -// -// let mut order_priority_random = -// RandomString::new(591449447, distributions.order_priority()); -// -// let max_clerk = (scale_factor * OrderGenerator::CLERK_SCALE_BASE as f64) -// .max(OrderGenerator::CLERK_SCALE_BASE as f64) as i32; -// let mut clerk_random = RandomBoundedInt::new(1171034773, 1, max_clerk); -// -// let mut comment_random = RandomText::new( -// 276090261, -// text_pool, -// OrderGenerator::COMMENT_AVERAGE_LENGTH as f64, -// ); -// -// // For line item simulation -// let mut line_quantity_random = LineItemGenerator::create_quantity_random(); -// let mut line_discount_random = LineItemGenerator::create_discount_random(); -// let mut line_tax_random = LineItemGenerator::create_tax_random(); -// let mut line_vehicle_key_random = -// LineItemGenerator::create_vehicle_key_random(scale_factor); -// let mut line_ship_date_random = LineItemGenerator::create_ship_date_random(); -// -// // Advance all generators to the starting position -// order_date_random.advance_rows(start_index); -// line_count_random.advance_rows(start_index); -// customer_key_random.advance_rows(start_index); -// order_priority_random.advance_rows(start_index); -// clerk_random.advance_rows(start_index); -// comment_random.advance_rows(start_index); -// -// line_quantity_random.advance_rows(start_index); -// line_discount_random.advance_rows(start_index); -// line_tax_random.advance_rows(start_index); -// line_vehicle_key_random.advance_rows(start_index); -// line_ship_date_random.advance_rows(start_index); -// -// OrderGeneratorIterator { -// order_date_random, -// line_count_random, -// customer_key_random, -// order_priority_random, -// clerk_random, -// comment_random, -// line_quantity_random, -// line_discount_random, -// line_tax_random, -// line_vehicle_key_random, -// line_ship_date_random, -// start_index, -// row_count, -// max_customer_key, -// index: 0, -// } -// } -// -// /// Creates an order with the given index -// fn make_order(&mut self, index: i64) -> Order<'a> { -// let order_key = OrderGenerator::make_order_key(index); -// -// let order_date = self.order_date_random.next_value(); -// -// // generate customer key, taking into account customer mortality rate -// let mut customer_key = self.customer_key_random.next_value(); -// let mut delta = 1; -// while customer_key % OrderGenerator::CUSTOMER_MORTALITY as i64 == 0 { -// customer_key += delta; -// customer_key = customer_key.min(self.max_customer_key); -// delta *= -1; -// } -// -// let mut total_price = 0; -// let mut shipped_count = 0; -// -// let line_count = self.line_count_random.next_value(); -// for _ in 0..line_count { -// let quantity = self.line_quantity_random.next_value(); -// let discount = self.line_discount_random.next_value(); -// let tax = self.line_tax_random.next_value(); -// -// let vehicle_key = self.line_vehicle_key_random.next_value(); -// -// let vehicle_price = VehicleGeneratorIterator::calculate_vehicle_price(vehicle_key); -// let extended_price = vehicle_price * quantity as i64; -// let discounted_price = extended_price * (100 - discount as i64); -// total_price += ((discounted_price / 100) * (100 + tax as i64)) / 100; -// -// let ship_date = self.line_ship_date_random.next_value() + order_date; -// if TPCHDate::is_in_past(ship_date) { -// shipped_count += 1; -// } -// } -// -// let order_status = if shipped_count == line_count { -// OrderStatus::Fulfilled -// } else if shipped_count > 0 { -// OrderStatus::Pending -// } else { -// OrderStatus::Open -// }; -// -// let clerk_id = self.clerk_random.next_value(); -// let clerk_name = ClerkName::new(clerk_id); -// -// Order { -// o_orderkey: order_key, -// o_custkey: customer_key, -// o_orderstatus: order_status, -// o_totalprice: TPCHDecimal(total_price), -// o_orderdate: TPCHDate::new(order_date, 0, 0), -// o_orderpriority: self.order_priority_random.next_value(), -// o_clerk: clerk_name, -// o_shippriority: 0, // Fixed value per TPC-H spec -// o_comment: self.comment_random.next_value(), -// } -// } -// } -// -// impl<'a> Iterator for OrderGeneratorIterator<'a> { -// type Item = Order<'a>; -// -// fn next(&mut self) -> Option<Self::Item> { -// if self.index >= self.row_count { -// return None; -// } -// -// let order = self.make_order(self.start_index + self.index + 1); -// -// self.order_date_random.row_finished(); -// self.line_count_random.row_finished(); -// self.customer_key_random.row_finished(); -// self.order_priority_random.row_finished(); -// self.clerk_random.row_finished(); -// self.comment_random.row_finished(); -// -// self.line_quantity_random.row_finished(); -// self.line_discount_random.row_finished(); -// self.line_tax_random.row_finished(); -// self.line_vehicle_key_random.row_finished(); -// self.line_ship_date_random.row_finished(); -// -// self.index += 1; -// -// Some(order) -// } -// } -// -// /// The LINEITEM table -// /// -// /// The Display trait is implemented to format the line item data as a string -// /// in the default TPC-H 'tbl' format. -// /// -// /// Example -// /// ```text -// /// 1|156|4|1|17|17954.55|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the| -// /// 1|68|9|2|36|34850.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold | -// /// ``` -// #[derive(Debug, Clone, PartialEq)] -// pub struct LineItem<'a> { -// /// Foreign key to ORDERS -// pub l_orderkey: i64, -// /// Foreign key to VEHICLE -// pub l_vehiclekey: i64, -// /// Foreign key to Driver -// pub l_suppkey: i64, -// /// Line item number within order -// pub l_linenumber: i32, -// /// Quantity ordered -// // TODO: Spec has this as decimal. -// pub l_quantity: i64, -// /// Extended price (l_quantity * p_retailprice) -// pub l_extendedprice: TPCHDecimal, -// /// Discount percentage -// pub l_discount: TPCHDecimal, -// /// Tax percentage -// pub l_tax: TPCHDecimal, -// /// Return flag (R=returned, A=accepted, null=pending) -// pub l_returnflag: &'a str, -// /// Line status (O=ordered, F=fulfilled) -// pub l_linestatus: &'static str, -// /// Date shipped -// pub l_shipdate: TPCHDate, -// /// Date committed to ship -// pub l_commitdate: TPCHDate, -// /// Date received -// pub l_receiptdate: TPCHDate, -// /// Shipping instructions -// pub l_shipinstruct: &'a str, -// /// Shipping mode -// pub l_shipmode: &'a str, -// /// Variable length comment -// pub l_comment: &'a str, -// } -// -// impl Display for LineItem<'_> { -// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { -// write!( -// f, -// "{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|{}|", -// self.l_orderkey, -// self.l_vehiclekey, -// self.l_suppkey, -// self.l_linenumber, -// self.l_quantity, -// self.l_extendedprice, -// self.l_discount, -// self.l_tax, -// self.l_returnflag, -// self.l_linestatus, -// self.l_shipdate, -// self.l_commitdate, -// self.l_receiptdate, -// self.l_shipinstruct, -// self.l_shipmode, -// self.l_comment -// ) -// } -// } -// -// /// Generator for LineItem table data -// #[derive(Debug, Clone)] -// pub struct LineItemGenerator<'a> { -// scale_factor: f64, -// part: i32, -// part_count: i32, -// distributions: &'a Distributions, -// text_pool: &'a TextPool, -// } -// -// impl<'a> LineItemGenerator<'a> { -// // Constants for line item generation -// const QUANTITY_MIN: i32 = 1; -// const QUANTITY_MAX: i32 = 50; -// const TAX_MIN: TPCHDecimal = TPCHDecimal(0); // 0.00 -// const TAX_MAX: TPCHDecimal = TPCHDecimal(8); // 0.08 -// const DISCOUNT_MIN: TPCHDecimal = TPCHDecimal(0); // 0.00 -// const DISCOUNT_MAX: TPCHDecimal = TPCHDecimal(10); // 0.10 -// const VEHICLE_KEY_MIN: i32 = 1; -// const SHIP_DATE_MIN: i32 = 1; -// const SHIP_DATE_MAX: i32 = 121; -// const COMMIT_DATE_MIN: i32 = 30; -// const COMMIT_DATE_MAX: i32 = 90; -// const RECEIPT_DATE_MIN: i32 = 1; -// const RECEIPT_DATE_MAX: i32 = 30; -// -// pub const ITEM_SHIP_DAYS: i32 = Self::SHIP_DATE_MAX + Self::RECEIPT_DATE_MAX; -// -// const COMMENT_AVERAGE_LENGTH: i32 = 27; -// -// /// Creates a new LineItemGenerator with the given scale factor -// /// -// /// Note the generator's lifetime is `&'static`. See [`NationGenerator`] for -// /// more details. -// pub fn new(scale_factor: f64, part: i32, part_count: i32) -> LineItemGenerator<'static> { -// Self::new_with_distributions_and_text_pool( -// scale_factor, -// part, -// part_count, -// Distributions::static_default(), -// TextPool::get_or_init_default(), -// ) -// } -// -// /// Creates a LineItemGenerator with specified distributions and text pool -// pub fn new_with_distributions_and_text_pool<'b>( -// scale_factor: f64, -// part: i32, -// part_count: i32, -// distributions: &'b Distributions, -// text_pool: &'b TextPool, -// ) -> LineItemGenerator<'b> { -// LineItemGenerator { -// scale_factor, -// part, -// part_count, -// distributions, -// text_pool, -// } -// } -// -// /// Returns an iterator over the line item rows -// pub fn iter(&self) -> LineItemGeneratorIterator<'a> { -// LineItemGeneratorIterator::new( -// self.distributions, -// self.text_pool, -// self.scale_factor, -// GenerateUtils::calculate_start_index( -// OrderGenerator::SCALE_BASE, -// self.scale_factor, -// self.part, -// self.part_count, -// ), -// GenerateUtils::calculate_row_count( -// OrderGenerator::SCALE_BASE, -// self.scale_factor, -// self.part, -// self.part_count, -// ), -// ) -// } -// -// /// Creates a quantity random generator -// pub fn create_quantity_random() -> RandomBoundedInt { -// RandomBoundedInt::new_with_seeds_per_row( -// 209208115, -// Self::QUANTITY_MIN, -// Self::QUANTITY_MAX, -// OrderGenerator::LINE_COUNT_MAX, -// ) -// } -// -// /// Creates a discount random generator -// pub fn create_discount_random() -> RandomBoundedInt { -// RandomBoundedInt::new_with_seeds_per_row( -// 554590007, -// Self::DISCOUNT_MIN.0 as i32, -// Self::DISCOUNT_MAX.0 as i32, -// OrderGenerator::LINE_COUNT_MAX, -// ) -// } -// -// /// Creates a tax random generator -// pub fn create_tax_random() -> RandomBoundedInt { -// RandomBoundedInt::new_with_seeds_per_row( -// 721958466, -// Self::TAX_MIN.0 as i32, -// Self::TAX_MAX.0 as i32, -// OrderGenerator::LINE_COUNT_MAX, -// ) -// } -// -// /// Creates a vehicle key random generator -// pub fn create_vehicle_key_random(scale_factor: f64) -> RandomBoundedLong { -// // If scale_factor >= 30000, use long `RandomBoundedLong` otherwise -// // use `RandomBoundedInt` to avoid overflow. -// RandomBoundedLong::new_with_seeds_per_row( -// 1808217256, -// scale_factor >= 30000.0, -// Self::VEHICLE_KEY_MIN as i64, -// (VehicleGenerator::SCALE_BASE as f64 * scale_factor) as i64, -// OrderGenerator::LINE_COUNT_MAX, -// ) -// } -// -// /// Creates a ship date random generator -// pub fn create_ship_date_random() -> RandomBoundedInt { -// RandomBoundedInt::new_with_seeds_per_row( -// 1769349045, -// Self::SHIP_DATE_MIN, -// Self::SHIP_DATE_MAX, -// OrderGenerator::LINE_COUNT_MAX, -// ) -// } -// } -// -// impl<'a> IntoIterator for &'a LineItemGenerator<'a> { -// type Item = LineItem<'a>; -// type IntoIter = LineItemGeneratorIterator<'a>; -// -// fn into_iter(self) -> Self::IntoIter { -// self.iter() -// } -// } -// -// /// Iterator that generates LineItem rows -// #[derive(Debug)] -// pub struct LineItemGeneratorIterator<'a> { -// order_date_random: RandomBoundedInt, -// line_count_random: RandomBoundedInt, -// -// quantity_random: RandomBoundedInt, -// discount_random: RandomBoundedInt, -// tax_random: RandomBoundedInt, -// -// line_vehicle_key_random: RandomBoundedLong, -// -// driver_number_random: RandomBoundedInt, -// -// ship_date_random: RandomBoundedInt, -// commit_date_random: RandomBoundedInt, -// receipt_date_random: RandomBoundedInt, -// -// returned_flag_random: RandomString<'a>, -// ship_instructions_random: RandomString<'a>, -// ship_mode_random: RandomString<'a>, -// -// comment_random: RandomText<'a>, -// -// scale_factor: f64, -// start_index: i64, -// row_count: i64, -// -// index: i64, -// order_date: i32, -// line_count: i32, -// line_number: i32, -// } -// -// impl<'a> LineItemGeneratorIterator<'a> { -// fn new( -// distributions: &'a Distributions, -// text_pool: &'a TextPool, -// scale_factor: f64, -// start_index: i64, -// row_count: i64, -// ) -> Self { -// let mut order_date_random = OrderGenerator::create_order_date_random(); -// let mut line_count_random = OrderGenerator::create_line_count_random(); -// -// let mut quantity_random = LineItemGenerator::create_quantity_random(); -// let mut discount_random = LineItemGenerator::create_discount_random(); -// let mut tax_random = LineItemGenerator::create_tax_random(); -// -// let mut line_vehicle_key_random = -// LineItemGenerator::create_vehicle_key_random(scale_factor); -// -// let mut driver_number_random = RandomBoundedInt::new_with_seeds_per_row( -// 2095021727, -// 0, -// 3, -// OrderGenerator::LINE_COUNT_MAX, -// ); -// -// let mut ship_date_random = LineItemGenerator::create_ship_date_random(); -// let mut commit_date_random = RandomBoundedInt::new_with_seeds_per_row( -// 904914315, -// LineItemGenerator::COMMIT_DATE_MIN, -// LineItemGenerator::COMMIT_DATE_MAX, -// OrderGenerator::LINE_COUNT_MAX, -// ); -// let mut receipt_date_random = RandomBoundedInt::new_with_seeds_per_row( -// 373135028, -// LineItemGenerator::RECEIPT_DATE_MIN, -// LineItemGenerator::RECEIPT_DATE_MAX, -// OrderGenerator::LINE_COUNT_MAX, -// ); -// -// let mut returned_flag_random = RandomString::new_with_expected_row_count( -// 717419739, -// distributions.return_flags(), -// OrderGenerator::LINE_COUNT_MAX, -// ); -// let mut ship_instructions_random = RandomString::new_with_expected_row_count( -// 1371272478, -// distributions.ship_instructions(), -// OrderGenerator::LINE_COUNT_MAX, -// ); -// let mut ship_mode_random = RandomString::new_with_expected_row_count( -// 675466456, -// distributions.ship_modes(), -// OrderGenerator::LINE_COUNT_MAX, -// ); -// let mut comment_random = RandomText::new_with_expected_row_count( -// 1095462486, -// text_pool, -// LineItemGenerator::COMMENT_AVERAGE_LENGTH as f64, -// OrderGenerator::LINE_COUNT_MAX, -// ); -// -// // Advance all generators to the starting position -// order_date_random.advance_rows(start_index); -// line_count_random.advance_rows(start_index); -// -// quantity_random.advance_rows(start_index); -// discount_random.advance_rows(start_index); -// tax_random.advance_rows(start_index); -// -// line_vehicle_key_random.advance_rows(start_index); -// -// driver_number_random.advance_rows(start_index); -// -// ship_date_random.advance_rows(start_index); -// commit_date_random.advance_rows(start_index); -// receipt_date_random.advance_rows(start_index); -// -// returned_flag_random.advance_rows(start_index); -// ship_instructions_random.advance_rows(start_index); -// ship_mode_random.advance_rows(start_index); -// -// comment_random.advance_rows(start_index); -// -// // generate information for initial order -// let order_date = order_date_random.next_value(); -// let line_count = line_count_random.next_value() - 1; -// -// LineItemGeneratorIterator { -// order_date_random, -// line_count_random, -// quantity_random, -// discount_random, -// tax_random, -// line_vehicle_key_random, -// driver_number_random, -// ship_date_random, -// commit_date_random, -// receipt_date_random, -// returned_flag_random, -// ship_instructions_random, -// ship_mode_random, -// comment_random, -// scale_factor, -// start_index, -// row_count, -// index: 0, -// order_date, -// line_count, -// line_number: 0, -// } -// } -// -// /// Creates a line item with the given order index -// fn make_line_item(&mut self, order_index: i64) -> LineItem<'a> { -// let order_key = OrderGenerator::make_order_key(order_index); -// -// let quantity = self.quantity_random.next_value(); -// let discount = self.discount_random.next_value(); -// let tax = self.tax_random.next_value(); -// -// let vehicle_key = self.line_vehicle_key_random.next_value(); -// -// // let driver_number = self.driver_number_random.next_value() as i64; -// let driver_key = DriverGeneratorIterator::select_driver( -// vehicle_key, -// self.line_number as i64, -// self.scale_factor, -// ); -// -// let vehicle_price = VehicleGeneratorIterator::calculate_vehicle_price(vehicle_key); -// let extended_price = vehicle_price * quantity as i64; -// -// let mut ship_date = self.ship_date_random.next_value(); -// ship_date += self.order_date; -// let mut commit_date = self.commit_date_random.next_value(); -// commit_date += self.order_date; -// let mut receipt_date = self.receipt_date_random.next_value(); -// receipt_date += ship_date; -// -// let returned_flag = if TPCHDate::is_in_past(receipt_date) { -// self.returned_flag_random.next_value() -// } else { -// "N" -// }; -// -// let status = if TPCHDate::is_in_past(ship_date) { -// "F" // Fulfilled -// } else { -// "O" // Open -// }; -// -// let ship_instructions = self.ship_instructions_random.next_value(); -// let ship_mode = self.ship_mode_random.next_value(); -// let comment = self.comment_random.next_value(); -// -// LineItem { -// l_orderkey: order_key, -// l_vehiclekey: vehicle_key, -// l_suppkey: driver_key, -// l_linenumber: self.line_number + 1, -// l_quantity: quantity as i64, -// l_extendedprice: TPCHDecimal(extended_price), -// l_discount: TPCHDecimal(discount as i64), -// l_tax: TPCHDecimal(tax as i64), -// l_returnflag: returned_flag, -// l_linestatus: status, -// l_shipdate: TPCHDate::new(ship_date, 0, 0), -// l_commitdate: TPCHDate::new(commit_date, 0, 0), -// l_receiptdate: TPCHDate::new(receipt_date, 0, 0), -// l_shipinstruct: ship_instructions, -// l_shipmode: ship_mode, -// l_comment: comment, -// } -// } -// } -// -// impl<'a> Iterator for LineItemGeneratorIterator<'a> { -// type Item = LineItem<'a>; -// -// fn next(&mut self) -> Option<Self::Item> { -// if self.index >= self.row_count { -// return None; -// } -// -// let line_item = self.make_line_item(self.start_index + self.index + 1); -// self.line_number += 1; -// -// // advance next row only when all lines for the order have been produced -// if self.line_number > self.line_count { -// self.order_date_random.row_finished(); -// self.line_count_random.row_finished(); -// -// self.quantity_random.row_finished(); -// self.discount_random.row_finished(); -// self.tax_random.row_finished(); -// -// self.line_vehicle_key_random.row_finished(); -// self.driver_number_random.row_finished(); -// -// self.ship_date_random.row_finished(); -// self.commit_date_random.row_finished(); -// self.receipt_date_random.row_finished(); -// -// self.returned_flag_random.row_finished(); -// self.ship_instructions_random.row_finished(); -// self.ship_mode_random.row_finished(); -// -// self.comment_random.row_finished(); -// -// self.index += 1; -// -// // generate information for next order -// self.line_count = self.line_count_random.next_value() - 1; -// self.order_date = self.order_date_random.next_value(); -// self.line_number = 0; -// } -// -// Some(line_item) -// } -// } - /// The TRIP table (fact table) /// /// The Display trait is implemented to format the trip data as a string @@ -2758,24 +1601,6 @@ impl Iterator for ZoneGeneratorIterator { mod tests { use super::*; - // #[test] - // fn test_nation_generator() { - // let generator = NationGenerator::default(); - // let nations: Vec<_> = generator.iter().collect(); - // - // // TPC-H typically has 25 nations - // assert_eq!(nations.len(), 25); - // } - // - // #[test] - // fn test_region_generator() { - // let generator = RegionGenerator::default(); - // let regions: Vec<_> = generator.iter().collect(); - // - // // TPC-H typically has 5 regions - // assert_eq!(regions.len(), 5); - // } - #[test] fn test_vehicle_generation() { // Create a generator with a small scale factor @@ -2842,48 +1667,7 @@ mod tests { ); assert_eq!(first.to_string(), expected_pattern); } - - // #[test] - // fn test_order_generation() { - // // Create a generator with a small scale factor - // let generator = OrderGenerator::new(0.01, 1, 1); - // let orders: Vec<_> = generator.iter().collect(); - // - // // Should have 0.01 * 1,500,000 = 15,000 orders - // assert_eq!(orders.len(), 15000); - // - // // Check first order - // let first = &orders[0]; - // assert_eq!(first.o_orderkey, OrderGenerator::make_order_key(1)); - // assert!(first.o_custkey > 0); - // assert!(first.o_totalprice > TPCHDecimal::ZERO); - // - // // Check order status distribution - // let status_counts = - // orders - // .iter() - // .fold(std::collections::HashMap::new(), |mut acc, order| { - // *acc.entry(&order.o_orderstatus).or_insert(0) += 1; - // acc - // }); - // - // // Should have multiple order statuses - // assert!(status_counts.len() >= 2); - // - // // Check customer key distribution - no customer with mortality factor - // assert!(orders - // .iter() - // .all(|o| o.o_custkey % OrderGenerator::CUSTOMER_MORTALITY as i64 != 0)); - // - // // Check order key sparsity - // for (i, order) in orders.iter().enumerate() { - // assert_eq!( - // order.o_orderkey, - // OrderGenerator::make_order_key(i as i64 + 1) - // ); - // } - // } - + #[test] fn test_trip_generation() { // Create a generator with a small scale factor @@ -2976,81 +1760,4 @@ mod tests { "1|54bea793-2dc6-47b0-a4c1-5b96f17e66a3|Chatham Islands Territory|county|MULTIPOLYGON (((-176.2418754 -44.4327352, -176.2396744 -44.4349882, -176.2379244 -44.4330281, -176.2384204 -44.4312342, -176.2418754 -44.4327352)), ((-176.165218 -44.3563138, -176.1650533 -44.3413916, -176.1773808 -44.3358569, -176.18558 -44.3493409, -176.165218 -44.3563138)), ((-176.2463812 -44.3292996, -176.25687 -44.3447818, -176.2382722 -44.3507201, -176.2271372 -44.334208, -176.2025537 -44.3268945, [...] ) } - - // #[test] - // fn test_make_order_key() { - // // Test order key generation logic - // assert_eq!(OrderGenerator::make_order_key(1), 1); // Low values are preserved - // assert_eq!(OrderGenerator::make_order_key(8), 32); // 8 becomes 1000000 - // assert_eq!(OrderGenerator::make_order_key(9), 32 + 1); // 9 becomes 1000001 - // assert_eq!(OrderGenerator::make_order_key(10), 32 + 2); // 10 becomes 1000010 - // } - - // #[test] - // fn test_line_item_generation() { - // // Create a generator with a small scale factor - // let generator = LineItemGenerator::new(0.01, 1, 1); - // let line_items: Vec<_> = generator.iter().collect(); - // - // // Check first line item - // let first = &line_items[0]; - // assert_eq!(first.l_orderkey, OrderGenerator::make_order_key(1)); - // assert_eq!(first.l_linenumber, 1); - // assert!(first.l_vehiclekey > 0); - // assert!(first.l_suppkey > 0); - // - // assert!(first.l_quantity >= LineItemGenerator::QUANTITY_MIN as i64); - // assert!(first.l_quantity <= LineItemGenerator::QUANTITY_MAX as i64); - // - // assert!(first.l_discount >= LineItemGenerator::DISCOUNT_MIN); - // assert!(first.l_discount <= LineItemGenerator::DISCOUNT_MAX); - // - // assert!(first.l_tax >= LineItemGenerator::TAX_MIN); - // assert!(first.l_tax <= LineItemGenerator::TAX_MAX); - // - // // Verify line numbers are sequential per order - // let mut order_lines = std::collections::HashMap::new(); - // for line in &line_items { - // order_lines - // .entry(line.l_orderkey) - // .or_insert_with(Vec::new) - // .push(line.l_linenumber); - // } - // - // // Check each order's line numbers - // for (_, lines) in order_lines { - // let mut sorted_lines = lines.clone(); - // sorted_lines.sort(); - // - // // Line numbers should start at 1 and be sequential - // for (i, line_num) in sorted_lines.iter().enumerate() { - // assert_eq!(*line_num, (i + 1) as i32); - // } - // } - // - // // Verify return flags and line status distributions - // let return_flags: std::collections::HashSet<_> = - // line_items.iter().map(|l| &l.l_returnflag).collect(); - // - // assert!(return_flags.len() > 1); - // - // let line_statuses: std::collections::HashSet<_> = - // line_items.iter().map(|l| &l.l_linestatus).collect(); - // - // assert!(!line_statuses.is_empty()); - // } - // - // #[test] - // fn check_iter_static_lifetimes() { - // // Lifetimes of iterators should be independent of the generator that - // // created it. This test case won't compile if that's not the case. - // - // let _iter: NationGeneratorIterator<'static> = NationGenerator::default().iter(); - // let _iter: RegionGeneratorIterator<'static> = RegionGenerator::default().iter(); - // let _iter: VehicleGeneratorIterator<'static> = VehicleGenerator::new(0.1, 1, 1).iter(); - // let _iter: DriverGeneratorIterator<'static> = DriverGenerator::new(0.1, 1, 1).iter(); - // let _iter: CustomerGeneratorIterator<'static> = CustomerGenerator::new(0.1, 1, 1).iter(); - // let _iter: OrderGeneratorIterator<'static> = OrderGenerator::new(0.1, 1, 1).iter(); - // let _iter: LineItemGeneratorIterator<'static> = LineItemGenerator::new(0.1, 1, 1).iter(); - // } }
