Copilot commented on code in PR #59:
URL:
https://github.com/apache/sedona-spatialbench/pull/59#discussion_r2467624206
##########
spatialbench-cli/src/plan.rs:
##########
@@ -263,8 +263,9 @@ impl OutputSize {
// Scale based on zone subtype count for the scale factor
match scale_factor {
sf if sf < 10.0 => 1332,
- sf if sf < 100.0 => 2000,
- _ => 4258,
+ sf if sf < 100.0 => 4445,
+ sf if sf < 100.0 => 5220,
Review Comment:
Duplicate condition `sf if sf < 100.0` appears on consecutive lines. The
second condition will never be evaluated. The second line should likely check
`sf if sf < 1000.0` to match the pattern of scale factor ranges.
```suggestion
sf if sf < 1000.0 => 5220,
```
##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -778,6 +778,120 @@ fn test_zone_generation_tbl_fails() {
));
}
+#[tokio::test]
+async fn test_trip_output_file_size() {
+ let temp_dir = tempdir().expect("Failed to create temporary directory");
+
+ // Generate Trip table with max file size of 2MB
+ Command::cargo_bin("spatialbench-cli")
+ .expect("Binary not found")
+ .arg("--format")
+ .arg("parquet")
+ .arg("--scale-factor")
+ .arg("0.01")
+ .arg("--tables")
+ .arg("trip")
+ .arg("--output-dir")
+ .arg(temp_dir.path())
+ .arg("--mb-per-file")
+ .arg("2")
+ .assert()
+ .success();
+
+ let trip_dir = temp_dir.path().join("trip");
+ let output_file_size_mb = 2 * 1024 * 1024; // 2MB in bytes
+
+ // Verify all files are under the max size
+ for entry in fs::read_dir(&trip_dir).expect("Failed to read trip
directory") {
+ let entry = entry.expect("Failed to read directory entry");
+ let metadata = entry.metadata().expect("Failed to get file metadata");
+ let file_size = metadata.len();
+
+ assert!(
+ file_size <= output_file_size_mb,
+ "File {} size ({} bytes) exceeds max size ({} bytes)",
+ entry.file_name().to_string_lossy(),
+ file_size,
+ output_file_size_mb
+ );
+ }
+
+ // Verify multiple files were created
+ let file_count = fs::read_dir(&trip_dir)
+ .expect("Failed to read trip directory")
+ .count();
+ assert!(file_count > 1, "Expected multiple files with 10MB limit");
Review Comment:
Assertion message references '10MB limit' but the test uses a 2MB limit
(line 797).
##########
spatialbench-cli/src/zone/partition.rs:
##########
@@ -1,22 +1,24 @@
+use crate::zone::stats::ZoneTableStats;
use arrow_array::RecordBatch;
use datafusion::prelude::*;
-use log::info;
+use log::{debug, info};
pub struct PartitionStrategy {
offset: i64,
limit: i64,
}
impl PartitionStrategy {
- pub fn calculate(total_rows: i64, parts: i32, part: i32) -> Self {
- let parts = parts as i64;
- let i = (part as i64) - 1;
+ pub fn calculate(total_rows: i64, parts: Option<i32>, part: Option<i32>)
-> Self {
+ let parts = parts.unwrap_or(1);
+ let part = part.unwrap_or(1);
+ let i = part - 1;
- let base = total_rows / parts;
- let rem = total_rows % parts;
+ let base = total_rows / parts as i64;
+ let rem = total_rows % parts as i64;
- let limit = base + if i < rem { 1 } else { 0 };
- let offset = i * base + std::cmp::min(i, rem);
+ let limit = base + if i < rem as i32 { 1 } else { 0 };
Review Comment:
Type mismatch in comparison: `i` is `i32` (from line 15) but `rem` is `i64`
(from line 18). The cast should be applied to `i` instead: `if (i as i64) <
rem`.
```suggestion
let limit = base + if (i as i64) < rem { 1 } else { 0 };
```
##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -778,6 +778,120 @@ fn test_zone_generation_tbl_fails() {
));
}
+#[tokio::test]
+async fn test_trip_output_file_size() {
+ let temp_dir = tempdir().expect("Failed to create temporary directory");
+
+ // Generate Trip table with max file size of 2MB
+ Command::cargo_bin("spatialbench-cli")
+ .expect("Binary not found")
+ .arg("--format")
+ .arg("parquet")
+ .arg("--scale-factor")
+ .arg("0.01")
+ .arg("--tables")
+ .arg("trip")
+ .arg("--output-dir")
+ .arg(temp_dir.path())
+ .arg("--mb-per-file")
+ .arg("2")
+ .assert()
+ .success();
+
+ let trip_dir = temp_dir.path().join("trip");
+ let output_file_size_mb = 2 * 1024 * 1024; // 2MB in bytes
+
+ // Verify all files are under the max size
+ for entry in fs::read_dir(&trip_dir).expect("Failed to read trip
directory") {
+ let entry = entry.expect("Failed to read directory entry");
+ let metadata = entry.metadata().expect("Failed to get file metadata");
+ let file_size = metadata.len();
+
+ assert!(
+ file_size <= output_file_size_mb,
+ "File {} size ({} bytes) exceeds max size ({} bytes)",
+ entry.file_name().to_string_lossy(),
+ file_size,
+ output_file_size_mb
+ );
+ }
+
+ // Verify multiple files were created
+ let file_count = fs::read_dir(&trip_dir)
+ .expect("Failed to read trip directory")
+ .count();
+ assert!(file_count > 1, "Expected multiple files with 10MB limit");
+}
+
+#[tokio::test]
+async fn test_customer_output_file_size() {
+ let temp_dir = tempdir().expect("Failed to create temporary directory");
+
+ // Generate Customer table with approx file size of 1MB
+ Command::cargo_bin("spatialbench-cli")
+ .expect("Binary not found")
+ .arg("--format")
+ .arg("parquet")
+ .arg("--scale-factor")
+ .arg("1")
+ .arg("--tables")
+ .arg("customer")
+ .arg("--output-dir")
+ .arg(temp_dir.path())
+ .arg("--mb-per-file")
+ .arg("1")
+ .assert()
+ .success();
+
+ let customer_dir = temp_dir.path().join("customer");
+ let output_file_size_mb = 1 * 1024 * 1024; // 1MB in bytes
+
+ // Verify all files are under the max size
+ for entry in fs::read_dir(&customer_dir).expect("Failed to read customer
directory") {
+ let entry = entry.expect("Failed to read directory entry");
+ let metadata = entry.metadata().expect("Failed to get file metadata");
+ let file_size = metadata.len();
+
+ assert!(
+ file_size <= output_file_size_mb,
+ "File {} size ({} bytes) exceeds max size ({} bytes)",
+ entry.file_name().to_string_lossy(),
+ file_size,
+ output_file_size_mb
+ );
+ }
+
+ // Verify multiple files were created
+ let file_count = fs::read_dir(&customer_dir)
+ .expect("Failed to read trip directory")
+ .count();
+ assert!(file_count > 1, "Expected multiple files with 10MB limit");
Review Comment:
Assertion message references '10MB limit' but the test uses a 1MB limit
(line 842).
```suggestion
assert!(file_count > 1, "Expected multiple files with 1MB limit");
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]