Copilot commented on code in PR #56:
URL: 
https://github.com/apache/sedona-spatialbench/pull/56#discussion_r2441518927


##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -316,23 +316,23 @@ async fn test_write_parquet_row_group_size_default() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
-                row_group_bytes: vec![123519959, 123486809, 123476361, 
123492237],
+                row_group_bytes: vec![123493205, 123460055, 123449607, 
123465483],
             },
             RowGroups {
                 table: "driver",
-                row_group_bytes: vec![41594],
+                row_group_bytes: vec![41361],

Review Comment:
   [nitpick] Multiple hard-coded row group byte sizes make this test brittle; 
every internal change to parquet sizing requires editing these magic numbers. 
Consider asserting on row group count plus acceptable size ranges or relative 
ordering instead of exact byte-for-byte equality, and/or document how these 
values were derived.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -316,23 +316,23 @@ async fn test_write_parquet_row_group_size_default() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],

Review Comment:
   [nitpick] Multiple hard-coded row group byte sizes make this test brittle; 
every internal change to parquet sizing requires editing these magic numbers. 
Consider asserting on row group count plus acceptable size ranges or relative 
ordering instead of exact byte-for-byte equality, and/or document how these 
values were derived.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -316,23 +316,23 @@ async fn test_write_parquet_row_group_size_default() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
-                row_group_bytes: vec![123519959, 123486809, 123476361, 
123492237],
+                row_group_bytes: vec![123493205, 123460055, 123449607, 
123465483],

Review Comment:
   [nitpick] Multiple hard-coded row group byte sizes make this test brittle; 
every internal change to parquet sizing requires editing these magic numbers. 
Consider asserting on row group count plus acceptable size ranges or relative 
ordering instead of exact byte-for-byte equality, and/or document how these 
values were derived.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -316,23 +316,23 @@ async fn test_write_parquet_row_group_size_default() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
-                row_group_bytes: vec![123519959, 123486809, 123476361, 
123492237],
+                row_group_bytes: vec![123493205, 123460055, 123449607, 
123465483],
             },
             RowGroups {
                 table: "driver",
-                row_group_bytes: vec![41594],
+                row_group_bytes: vec![41361],
             },
             RowGroups {
                 table: "vehicle",
-                row_group_bytes: vec![5393],
+                row_group_bytes: vec![5214],

Review Comment:
   [nitpick] Multiple hard-coded row group byte sizes make this test brittle; 
every internal change to parquet sizing requires editing these magic numbers. 
Consider asserting on row group count plus acceptable size ranges or relative 
ordering instead of exact byte-for-byte equality, and/or document how these 
values were derived.



##########
spatialbench-arrow/Cargo.toml:
##########
@@ -9,11 +9,11 @@ readme = "README.md"
 license = "Apache-2.0"
 
 [dependencies]
-arrow = { version = "55.2", default-features = false, features = 
["prettyprint"] }
+arrow = { version = "56.2.0", default-features = false, features = 
["prettyprint"] }

Review Comment:
   [nitpick] Exact patch pinning differs from previous looser minor spec; if 
strict pinning is intentional, add a comment explaining the reason (e.g. 
reproducible benchmarks). Otherwise consider reverting to a minor-compatible 
specification (56.2) for easier maintenance.
   ```suggestion
   arrow = { version = "56.2", default-features = false, features = 
["prettyprint"] }
   ```



##########
spatialbench-cli/Cargo.toml:
##########
@@ -23,10 +23,10 @@ env_logger = "0.11.7"
 serde = { version = "1.0.219", features = ["derive"] }
 anyhow = "1.0.99"
 serde_yaml = "0.9.33"
-datafusion = "47.0.0"
+datafusion = "50.2.0"
 object_store = { version = "0.12.4", features = ["aws"] }
-arrow-array = "55.2.0"
-arrow-schema = "55.2.0"
+arrow-array = "56.2.0"
+arrow-schema = "56.2.0"

Review Comment:
   [nitpick] Version specifications changed from a minor form (e.g. 55.2) to 
fully pinned patch versions (56.2.0). Pinning all to exact patch can impede 
receiving compatible patch updates; consider using a consistent semver style 
(e.g. 56.2) unless exact pinning is intentionally required and documented.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -316,23 +316,23 @@ async fn test_write_parquet_row_group_size_default() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
-                row_group_bytes: vec![123519959, 123486809, 123476361, 
123492237],
+                row_group_bytes: vec![123493205, 123460055, 123449607, 
123465483],
             },
             RowGroups {
                 table: "driver",
-                row_group_bytes: vec![41594],
+                row_group_bytes: vec![41361],
             },
             RowGroups {
                 table: "vehicle",
-                row_group_bytes: vec![5393],
+                row_group_bytes: vec![5214],
             },
             RowGroups {
                 table: "building",
-                row_group_bytes: vec![2492865],
+                row_group_bytes: vec![2492359],

Review Comment:
   [nitpick] Multiple hard-coded row group byte sizes make this test brittle; 
every internal change to parquet sizing requires editing these magic numbers. 
Consider asserting on row group count plus acceptable size ranges or relative 
ordering instead of exact byte-for-byte equality, and/or document how these 
values were derived.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -363,7 +363,7 @@ async fn test_zone_write_parquet_row_group_size_default() {
         output_dir.path(),
         vec![RowGroups {
             table: "zone",
-            row_group_bytes: vec![91351103],
+            row_group_bytes: vec![86288517],

Review Comment:
   [nitpick] Single large magic number for expected row group size is fragile; 
a tolerance-based check (e.g. within ±X bytes) or validating number of row 
groups and approximate total size would reduce churn when parquet internals 
change.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -390,27 +390,27 @@ async fn test_write_parquet_row_group_size_20mb() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
                 row_group_bytes: vec![
-                    24361422, 24361685, 24350928, 24348682, 24353605, 
24335813, 24358941, 24343011,
-                    24345967, 24361312, 24337627, 24345972, 24348724, 
24361400, 24361528, 24346264,
-                    24351137, 24338412, 24348304, 24361680, 24351433,
+                    24356144, 24356407, 24345650, 24343404, 24348327, 
24330535, 24353663, 24337733,
+                    24340689, 24356034, 24332349, 24340694, 24343446, 
24356122, 24356250, 24340986,
+                    24345859, 24333134, 24343026, 24356402, 24346155,
                 ],
             },
             RowGroups {
                 table: "driver",
-                row_group_bytes: vec![41594],
+                row_group_bytes: vec![41361],
             },
             RowGroups {
                 table: "vehicle",
-                row_group_bytes: vec![5393],
+                row_group_bytes: vec![5214],
             },
             RowGroups {
                 table: "building",
-                row_group_bytes: vec![2492865],
+                row_group_bytes: vec![2492359],

Review Comment:
   [nitpick] Extensive lists of exact byte sizes amplify maintenance burden; 
prefer deriving expectations (e.g. computing sizes then asserting invariants 
like consistent range, non-zero, or total size) or summarizing with documented 
rationale rather than enumerating every value verbatim.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -390,27 +390,27 @@ async fn test_write_parquet_row_group_size_20mb() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],

Review Comment:
   [nitpick] Extensive lists of exact byte sizes amplify maintenance burden; 
prefer deriving expectations (e.g. computing sizes then asserting invariants 
like consistent range, non-zero, or total size) or summarizing with documented 
rationale rather than enumerating every value verbatim.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -390,27 +390,27 @@ async fn test_write_parquet_row_group_size_20mb() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
                 row_group_bytes: vec![
-                    24361422, 24361685, 24350928, 24348682, 24353605, 
24335813, 24358941, 24343011,
-                    24345967, 24361312, 24337627, 24345972, 24348724, 
24361400, 24361528, 24346264,
-                    24351137, 24338412, 24348304, 24361680, 24351433,
+                    24356144, 24356407, 24345650, 24343404, 24348327, 
24330535, 24353663, 24337733,
+                    24340689, 24356034, 24332349, 24340694, 24343446, 
24356122, 24356250, 24340986,
+                    24345859, 24333134, 24343026, 24356402, 24346155,
                 ],
             },
             RowGroups {
                 table: "driver",
-                row_group_bytes: vec![41594],
+                row_group_bytes: vec![41361],
             },
             RowGroups {
                 table: "vehicle",
-                row_group_bytes: vec![5393],
+                row_group_bytes: vec![5214],

Review Comment:
   [nitpick] Extensive lists of exact byte sizes amplify maintenance burden; 
prefer deriving expectations (e.g. computing sizes then asserting invariants 
like consistent range, non-zero, or total size) or summarizing with documented 
rationale rather than enumerating every value verbatim.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -390,27 +390,27 @@ async fn test_write_parquet_row_group_size_20mb() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
                 row_group_bytes: vec![
-                    24361422, 24361685, 24350928, 24348682, 24353605, 
24335813, 24358941, 24343011,
-                    24345967, 24361312, 24337627, 24345972, 24348724, 
24361400, 24361528, 24346264,
-                    24351137, 24338412, 24348304, 24361680, 24351433,
+                    24356144, 24356407, 24345650, 24343404, 24348327, 
24330535, 24353663, 24337733,
+                    24340689, 24356034, 24332349, 24340694, 24343446, 
24356122, 24356250, 24340986,
+                    24345859, 24333134, 24343026, 24356402, 24346155,

Review Comment:
   [nitpick] Extensive lists of exact byte sizes amplify maintenance burden; 
prefer deriving expectations (e.g. computing sizes then asserting invariants 
like consistent range, non-zero, or total size) or summarizing with documented 
rationale rather than enumerating every value verbatim.



##########
spatialbench-cli/Cargo.toml:
##########
@@ -10,8 +10,8 @@ license = { workspace = true }
 repository = { workspace = true }
 
 [dependencies]
-arrow = "55.2"
-parquet = "55.2"
+arrow = "56.2.0"
+parquet = "56.2.0"

Review Comment:
   [nitpick] Version specifications changed from a minor form (e.g. 55.2) to 
fully pinned patch versions (56.2.0). Pinning all to exact patch can impede 
receiving compatible patch updates; consider using a consistent semver style 
(e.g. 56.2) unless exact pinning is intentionally required and documented.



##########
spatialbench-cli/tests/cli_integration.rs:
##########
@@ -390,27 +390,27 @@ async fn test_write_parquet_row_group_size_20mb() {
         vec![
             RowGroups {
                 table: "customer",
-                row_group_bytes: vec![2600113],
+                row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
                 row_group_bytes: vec![
-                    24361422, 24361685, 24350928, 24348682, 24353605, 
24335813, 24358941, 24343011,
-                    24345967, 24361312, 24337627, 24345972, 24348724, 
24361400, 24361528, 24346264,
-                    24351137, 24338412, 24348304, 24361680, 24351433,
+                    24356144, 24356407, 24345650, 24343404, 24348327, 
24330535, 24353663, 24337733,
+                    24340689, 24356034, 24332349, 24340694, 24343446, 
24356122, 24356250, 24340986,
+                    24345859, 24333134, 24343026, 24356402, 24346155,
                 ],
             },
             RowGroups {
                 table: "driver",
-                row_group_bytes: vec![41594],
+                row_group_bytes: vec![41361],

Review Comment:
   [nitpick] Extensive lists of exact byte sizes amplify maintenance burden; 
prefer deriving expectations (e.g. computing sizes then asserting invariants 
like consistent range, non-zero, or total size) or summarizing with documented 
rationale rather than enumerating every value verbatim.



##########
spatialbench-arrow/Cargo.toml:
##########
@@ -9,11 +9,11 @@ readme = "README.md"
 license = "Apache-2.0"
 
 [dependencies]
-arrow = { version = "55.2", default-features = false, features = 
["prettyprint"] }
+arrow = { version = "56.2.0", default-features = false, features = 
["prettyprint"] }
 spatialbench = { path = "../spatialbench", version = "1.1.1" }
 geo = { workspace = true }
 geozero = { workspace = true }
 
 [dev-dependencies]
-arrow-csv = "55.2"
+arrow-csv = "56.2.0"

Review Comment:
   [nitpick] Exact patch pinning differs from previous looser minor spec; if 
strict pinning is intentional, add a comment explaining the reason (e.g. 
reproducible benchmarks). Otherwise consider reverting to a minor-compatible 
specification (56.2) for easier maintenance.
   ```suggestion
   arrow-csv = "56.2"
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to