Copilot commented on code in PR #385:
URL: https://github.com/apache/sedona-db/pull/385#discussion_r2573334805


##########
rust/sedona-expr/src/statistics.rs:
##########
@@ -55,21 +55,24 @@ pub struct GeoStatistics {
 }
 
 impl GeoStatistics {
+    /// Statistics representing unspecified information
+    pub const UNSPECIFIED: GeoStatistics = Self {
+        bbox: None,
+        geometry_types: None,
+        total_geometries: None,
+        total_size_bytes: None,
+        total_points: None,
+        puntal_count: None,
+        lineal_count: None,
+        polygonal_count: None,
+        collection_count: None,
+        total_envelope_width: None,
+        total_envelope_height: None,
+    };
+
     /// Create statistics representing unspecified information
     pub fn unspecified() -> Self {
-        Self {
-            bbox: None,
-            geometry_types: None,
-            total_geometries: None,
-            total_size_bytes: None,
-            total_points: None,
-            puntal_count: None,
-            lineal_count: None,
-            polygonal_count: None,
-            collection_count: None,
-            total_envelope_width: None,
-            total_envelope_height: None,
-        }
+        Self::UNSPECIFIED.clone()

Review Comment:
   Cloning `UNSPECIFIED` on every call defeats the purpose of making it a 
const. Consider either keeping the original method implementation that 
constructs a new instance inline, or document why cloning is necessary here. 
The const is only beneficial if it's used directly without cloning.
   ```suggestion
           Self {
               bbox: None,
               geometry_types: None,
               total_geometries: None,
               total_size_bytes: None,
               total_points: None,
               puntal_count: None,
               lineal_count: None,
               polygonal_count: None,
               collection_count: None,
               total_envelope_width: None,
               total_envelope_height: None,
           }
   ```



##########
rust/sedona-expr/src/spatial_filter.rs:
##########
@@ -389,6 +394,81 @@ impl SpatialFilter {
     }
 }
 
+/// Table GeoStatistics
+///
+/// Enables providing a collection of GeoStatistics to 
[SpatialFilter::evaluate]
+/// such that attempts to access out-of-bounds values results in a readable
+/// error.
+pub enum TableGeoStatistics {
+    /// Provide statistics for every Column in the table. These must be
+    /// [GeoStatistics::unspecified] for non-spatial columns.
+    ///
+    /// These are resolved using [Column::index].
+    ByPosition(Vec<GeoStatistics>),
+
+    /// Provide statistics for specific named columns. Columns not included
+    /// are treated as [GeoStatistics::unspecified].
+    ///
+    /// These are resolved using [Column::name]. This may be used for logical
+    /// expressions (where columns are resolved by name) or as a workaround
+    /// for physical expressions where the index is relative to a projected
+    /// schema <https://github.com/apache/sedona-db/issues/389>.
+    ByName(HashMap<String, GeoStatistics>),
+}
+
+impl TableGeoStatistics {
+    /// Construct TableGeoStatistics with no columns
+    pub fn empty() -> Self {
+        TableGeoStatistics::ByPosition(vec![])
+    }
+
+    /// Construct TableGeoStatistics from a slice of all column statistics and 
a schema
+    pub fn try_from_stats_and_schema(
+        column_stats: &[GeoStatistics],
+        schema: &Schema,
+    ) -> Result<Self> {
+        let mut stats_map = HashMap::new();
+        for i in schema.geometry_column_indices()? {
+            stats_map.insert(schema.field(i).name().to_string(), 
column_stats[i].clone());
+        }
+        Ok(Self::ByName(stats_map))
+    }
+
+    /// For a given [Column], obtain [GeoStatistics]
+    ///
+    /// This will error if the provided statistics have an index out of bounds.
+    /// Names that cannot be resolved will be treated as unspecified.
+    fn get(&self, column: &Column) -> Result<&GeoStatistics> {
+        match self {
+            Self::ByPosition(items) => {
+                if column.index() >= items.len() {
+                    sedona_internal_err!(
+                        "Can't obtain GeoStatistics for column at index {} 
from schema with {} columns",
+                        column.index(),
+                        items.len()
+                    )
+                } else {
+                    Ok(&items[column.index()])
+                }
+            }
+            Self::ByName(items) => {
+                if let Some(item) = items.get(column.name()) {
+                    Ok(item)
+                } else {
+                    Ok(&GeoStatistics::UNSPECIFIED)
+                }
+            }
+        }
+    }
+}
+
+// Useful for testing (create from a single GeoStatistics)
+impl From<GeoStatistics> for TableGeoStatistics {
+    fn from(value: GeoStatistics) -> Self {
+        TableGeoStatistics::ByPosition(vec![value.clone()])

Review Comment:
   The `value` parameter is already owned, so cloning it is unnecessary. Change 
to `TableGeoStatistics::ByPosition(vec![value])`.
   ```suggestion
           TableGeoStatistics::ByPosition(vec![value])
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to