Copilot commented on code in PR #57:
URL: 
https://github.com/apache/sedona-spatialbench/pull/57#discussion_r2463272924


##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -229,11 +382,62 @@ fn test_spatialbench_cli_parts() {
     for thread in threads {
         thread.join().expect("Thread panicked");
     }
-    // Read the generated files into a single buffer and compare them
-    // to the contents of the reference file
+    verify_table(temp_dir.path(), "trip", num_parts, "v1");

Review Comment:
   The test generates the 'orders' table (line 374) but verifies the 'trip' 
table. This should verify 'orders' instead.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -229,11 +382,62 @@ fn test_spatialbench_cli_parts() {
     for thread in threads {
         thread.join().expect("Thread panicked");
     }
-    // Read the generated files into a single buffer and compare them
-    // to the contents of the reference file
+    verify_table(temp_dir.path(), "trip", num_parts, "v1");
+}
+
+/// Create all tables using --parts option and verify the output layouts
+#[test]
+fn test_spatialbench_cli_parts_all_tables() {
+    let temp_dir = tempdir().expect("Failed to create temporary directory");
+
+    let num_parts = 8;
+    let output_dir = temp_dir.path().to_path_buf();
+    Command::cargo_bin("spatialbench-cli")
+        .expect("Binary not found")
+        .arg("--scale-factor")
+        .arg("0.51")
+        .arg("--format")
+        .arg("tbl")
+        .arg("--tables")
+        .arg("building,driver,vehicle,customer")
+        .arg("--output-dir")
+        .arg(&output_dir)
+        .arg("--parts")
+        .arg(num_parts.to_string())
+        .assert()
+        .success();
+
+    Command::cargo_bin("spatialbench-cli")
+        .expect("Binary not found")
+        .arg("--scale-factor")
+        .arg("0.001")
+        .arg("--format")
+        .arg("tbl")
+        .arg("--tables")
+        .arg("trip")
+        .arg("--output-dir")
+        .arg(&output_dir)
+        .arg("--parts")
+        .arg(num_parts.to_string())
+        .assert()
+        .success();
+
+    verify_table(temp_dir.path(), "trip", num_parts, "v1");
+    verify_table(temp_dir.path(), "customer", num_parts, "v1");
+    // Note, building, vehicle and driver have only a single part regardless 
of --parts
+    verify_table(temp_dir.path(), "building", 1, "v1");
+    verify_table(temp_dir.path(), "vehicle", 1, "v1");
+    verify_table(temp_dir.path(), "driver", 1, "v1");
+}
+
+/// Read the N files from `output_dir/table_name/table_name.part.tml` into a

Review Comment:
   Corrected spelling of 'tml' to 'tbl'.
   ```suggestion
   /// Read the N files from `output_dir/table_name/table_name.part.tbl` into a
   ```



##########
spatialbench-cli/src/output_plan.rs:
##########
@@ -0,0 +1,267 @@
+//! * [`OutputLocation`]: where to output the generated data
+//! * [`OutputPlan`]: an output file that will be generated
+//! * [`OutputPlanGenerator`]: plans the output files to be generated
+
+use crate::plan::GenerationPlan;
+use crate::{OutputFormat, Table};
+use log::debug;
+use parquet::basic::Compression;
+use std::collections::HashSet;
+use std::fmt::{Display, Formatter};
+use std::io;
+use std::path::PathBuf;
+
+/// Where a partition will be output
+#[derive(Debug, Clone, PartialEq)]
+pub enum OutputLocation {
+    /// Output to a file
+    File(PathBuf),
+    /// Output to stdout
+    Stdout,
+}
+
+impl Display for OutputLocation {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        match self {
+            OutputLocation::File(path) => {
+                let Some(file) = path.file_name() else {
+                    return write!(f, "{}", path.display());
+                };
+                // Display the file name only, not the full path
+                write!(f, "{}", file.to_string_lossy())
+            }
+            OutputLocation::Stdout => write!(f, "Stdout"),
+        }
+    }
+}
+
+/// Describes an output partition (file) that will be generated
+#[derive(Debug, Clone, PartialEq)]
+pub struct OutputPlan {
+    /// The table
+    table: Table,
+    /// The scale factor
+    scale_factor: f64,
+    /// The output format (TODO don't depend back on something in main)
+    output_format: OutputFormat,
+    /// If the output is parquet, what compression level to use
+    parquet_compression: Compression,
+    /// Where to output
+    output_location: OutputLocation,
+    /// Plan for generating the table
+    generation_plan: GenerationPlan,
+}
+
+impl OutputPlan {
+    pub fn new(
+        table: Table,
+        scale_factor: f64,
+        output_format: OutputFormat,
+        parquet_compression: Compression,
+        output_location: OutputLocation,
+        generation_plan: GenerationPlan,
+    ) -> Self {
+        Self {
+            table,
+            scale_factor,
+            output_format,
+            parquet_compression,
+            output_location,
+            generation_plan,
+        }
+    }
+
+    /// Return the table this partition is for
+    pub fn table(&self) -> Table {
+        self.table
+    }
+
+    /// Return the scale factor for this partition
+    pub fn scale_factor(&self) -> f64 {
+        self.scale_factor
+    }
+
+    /// Return the output format for this partition
+    pub fn output_format(&self) -> OutputFormat {
+        self.output_format
+    }
+
+    /// return the output location
+    pub fn output_location(&self) -> &OutputLocation {
+        &self.output_location
+    }
+
+    /// Return the parquet compression level for this partition
+    pub fn parquet_compression(&self) -> Compression {
+        self.parquet_compression
+    }
+
+    /// Return the number of chunks part(ition) count (the number of data 
chunks
+    /// in the underlying generation plan)
+    pub fn chunk_count(&self) -> usize {
+        self.generation_plan.chunk_count()
+    }
+
+    /// return the generation plan for this partition
+    pub fn generation_plan(&self) -> &GenerationPlan {
+        &self.generation_plan
+    }
+}
+
+impl Display for OutputPlan {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "table {} (SF={}, {} chunks) to {}",
+            self.table,
+            self.scale_factor,
+            self.chunk_count(),
+            self.output_location
+        )
+    }
+}
+
+/// Plans the creation of output files
+pub struct OutputPlanGenerator {
+    format: OutputFormat,
+    scale_factor: f64,
+    parquet_compression: Compression,
+    parquet_row_group_bytes: i64,
+    stdout: bool,
+    output_dir: PathBuf,
+    /// The generated output plans
+    output_plans: Vec<OutputPlan>,
+    /// Output directores that have been created so far

Review Comment:
   Corrected spelling of 'directores' to 'directories'.
   ```suggestion
       /// Output directories that have been created so far
   ```



##########
spatialbench-cli/src/main.rs:
##########
@@ -142,11 +130,11 @@ struct Cli {
     /// Target size in row group bytes in Parquet files
     ///
     /// Row groups are the typical unit of parallel processing and compression
-    /// in Parquet. With many query engines, smaller row groups enable better
+    /// with many query engines. Therfore, smaller row groups enable better

Review Comment:
   Corrected spelling of 'Therfore' to 'Therefore'.
   ```suggestion
       /// with many query engines. Therefore, smaller row groups enable better
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to