prantogg commented on code in PR #46:
URL: 
https://github.com/apache/sedona-spatialbench/pull/46#discussion_r2427795583


##########
spatialbench-cli/src/zone_df.rs:
##########
@@ -0,0 +1,501 @@
+use std::{path::PathBuf, sync::Arc, time::Instant};
+
+use anyhow::{anyhow, Result};
+use arrow_array::RecordBatch;
+use arrow_schema::{Schema, SchemaRef};
+use datafusion::{
+    common::config::ConfigOptions, execution::runtime_env::RuntimeEnvBuilder, 
prelude::*,
+    sql::TableReference,
+};
+
+use crate::plan::DEFAULT_PARQUET_ROW_GROUP_BYTES;
+use datafusion::execution::runtime_env::RuntimeEnv;
+use log::{debug, info};
+use object_store::aws::AmazonS3Builder;
+use object_store::ObjectStore;
+use parquet::{
+    arrow::ArrowWriter, basic::Compression as ParquetCompression,
+    file::properties::WriterProperties,
+};
+use url::Url;
+
+const OVERTURE_RELEASE_DATE: &str = "2025-08-20.1";
+const OVERTURE_S3_BUCKET: &str = "overturemaps-us-west-2";
+const OVERTURE_S3_PREFIX: &str = "release";
+
+fn zones_parquet_url() -> String {
+    format!(
+        "s3://{}/{}/{}/theme=divisions/type=division_area/",
+        OVERTURE_S3_BUCKET, OVERTURE_S3_PREFIX, OVERTURE_RELEASE_DATE
+    )
+}
+
+fn subtypes_for_scale_factor(sf: f64) -> Vec<&'static str> {
+    let mut v = vec!["microhood", "macrohood", "county"];
+    if sf >= 10.0 {
+        v.push("neighborhood");
+    }
+    if sf >= 100.0 {
+        v.extend_from_slice(&["localadmin", "locality", "region", 
"dependency"]);
+    }
+    if sf >= 1000.0 {
+        v.push("country");
+    }
+    v
+}
+
+fn estimated_total_rows_for_sf(sf: f64) -> i64 {
+    let mut total = 0i64;
+    for s in subtypes_for_scale_factor(sf) {
+        total += match s {
+            "microhood" => 74797,
+            "macrohood" => 42619,
+            "neighborhood" => 298615,
+            "county" => 38679,
+            "localadmin" => 19007,
+            "locality" => 555834,
+            "region" => 3905,
+            "dependency" => 53,
+            "country" => 219,

Review Comment:
   The updated counts are correct. The previous counts included `isLand = 
False` condition and thats why they were larger.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to