This is an automated email from the ASF dual-hosted git repository.
prantogg pushed a commit to branch support-multipart
in repository https://gitbox.apache.org/repos/asf/sedona-spatialbench.git
The following commit(s) were added to refs/heads/support-multipart by this push:
new b071003 Improve code readability and logging in multi-part generation
b071003 is described below
commit b0710032f31c72623959e63a8efe41c38561fe91
Author: Pranav Toggi <[email protected]>
AuthorDate: Sat Oct 25 21:14:20 2025 -0700
Improve code readability and logging in multi-part generation
---
tpchgen-cli/src/runner.rs | 5 ++---
tpchgen-cli/src/zone/config.rs | 1 -
tpchgen-cli/src/zone/main.rs | 5 +++--
tpchgen-cli/src/zone/writer.rs | 16 ++++++++--------
4 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/tpchgen-cli/src/runner.rs b/tpchgen-cli/src/runner.rs
index 3afb694..e62ba78 100644
--- a/tpchgen-cli/src/runner.rs
+++ b/tpchgen-cli/src/runner.rs
@@ -11,11 +11,10 @@ use std::io;
use std::io::BufWriter;
use tokio::task::{JoinError, JoinSet};
use tpchgen::generators::{
- CustomerGenerator, TripGenerator, VehicleGenerator, BuildingGenerator,
DriverGenerator,
+ BuildingGenerator, CustomerGenerator, DriverGenerator, TripGenerator,
VehicleGenerator,
};
use tpchgen_arrow::{
- CustomerArrow, TripArrow, VehicleArrow,
- RecordBatchIterator, BuildingArrow, DriverArrow,
+ BuildingArrow, CustomerArrow, DriverArrow, RecordBatchIterator, TripArrow,
VehicleArrow,
};
/// Runs multiple [`OutputPlan`]s in parallel, managing the number of threads
diff --git a/tpchgen-cli/src/zone/config.rs b/tpchgen-cli/src/zone/config.rs
index 5fe25fa..0bbfa32 100644
--- a/tpchgen-cli/src/zone/config.rs
+++ b/tpchgen-cli/src/zone/config.rs
@@ -52,5 +52,4 @@ impl ZoneDfArgs {
self.output_dir.join("zone.parquet")
}
}
-
}
diff --git a/tpchgen-cli/src/zone/main.rs b/tpchgen-cli/src/zone/main.rs
index b37d97c..5afd899 100644
--- a/tpchgen-cli/src/zone/main.rs
+++ b/tpchgen-cli/src/zone/main.rs
@@ -19,13 +19,14 @@ pub async fn generate_zone(
OutputFormat::Parquet => {
let parts = parts.unwrap_or(1);
- if part.is_some() {
+ if let Some(part_num) = part {
// Single part mode - use LIMIT/OFFSET
+ info!("Generating part {} of {} for zone table", part_num,
parts);
let args = ZoneDfArgs::new(
1.0f64.max(scale_factor),
output_dir,
parts,
- part.unwrap(),
+ part_num,
parquet_row_group_bytes,
parquet_compression,
);
diff --git a/tpchgen-cli/src/zone/writer.rs b/tpchgen-cli/src/zone/writer.rs
index 8bf89db..7aff003 100644
--- a/tpchgen-cli/src/zone/writer.rs
+++ b/tpchgen-cli/src/zone/writer.rs
@@ -48,9 +48,9 @@ impl ParquetWriter {
// Check if file already exists
if self.output_path.exists() {
info!(
- "{} already exists, skipping generation",
- self.output_path.display()
- );
+ "{} already exists, skipping generation",
+ self.output_path.display()
+ );
return Ok(());
}
@@ -70,11 +70,11 @@ impl ParquetWriter {
// Rename temp file to final output
std::fs::rename(&temp_path, &self.output_path).map_err(|e| {
anyhow::anyhow!(
- "Failed to rename {:?} to {:?}: {}",
- temp_path,
- self.output_path,
- e
- )
+ "Failed to rename {:?} to {:?}: {}",
+ temp_path,
+ self.output_path,
+ e
+ )
})?;
let duration = t0.elapsed();