adriangb commented on code in PR #22300:
URL: https://github.com/apache/datafusion/pull/22300#discussion_r3306026898


##########
datafusion/catalog/src/table.rs:
##########
@@ -467,6 +469,27 @@ impl<'a> ScanArgs<'a> {
     pub fn limit(&self) -> Option<usize> {
         self.limit
     }
+
+    /// Set the statistics the caller would like the provider to answer for
+    /// this scan, if it can do so cheaply.
+    ///
+    /// Providers read these via [`Self::statistics_requests()`]; anything a

Review Comment:
   3c29d2ebae



##########
datafusion/expr-common/src/statistics.rs:
##########
@@ -1694,3 +1694,44 @@ mod tests {
         all_ops.into_iter().collect()
     }
 }
+
+// ---------------------------------------------------------------------------

Review Comment:
   6f3cfc94ea



##########
datafusion/expr-common/src/statistics.rs:
##########
@@ -1694,3 +1694,44 @@ mod tests {
         all_ops.into_iter().collect()
     }
 }
+
+// ---------------------------------------------------------------------------
+// Query-aware statistics requests.
+//
+// A small extension to the existing `Statistics` model: instead of "give me
+// everything you have for every column", a caller can ask for a specific list
+// of stats by name. `StatisticsRequest` is just that vocabulary — DataFusion
+// itself does not populate or consume it. It exists so a request can be
+// threaded from a `TableScan` (see `TableScan::statistics_requests`) through
+// `ScanArgs::statistics_requests` to a `TableProvider`, which is enough for a
+// query-aware statistics feature to be implemented outside of DataFusion.
+// ---------------------------------------------------------------------------
+
+use datafusion_common::Column;
+
+/// A statistic a caller would like a provider to supply, if it can do so
+/// cheaply.
+///
+/// Each variant maps onto a field of [`datafusion_common::Statistics`] /
+/// [`datafusion_common::ColumnStatistics`], so a provider that already
+/// populates one can answer the request trivially.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub enum StatisticsRequest {
+    /// Smallest non-null value of `column`.
+    Min(Column),

Review Comment:
   c147c379f0



##########
datafusion/expr/src/logical_plan/tree_node.rs:
##########
@@ -608,23 +608,15 @@ impl LogicalPlan {
                     Transformed::new(plan, exprs.transformed, exprs.tnr)
                 }
             }
-            LogicalPlan::TableScan(TableScan {
-                table_name,
-                source,
-                projection,
-                projected_schema,
-                filters,
-                fetch,
-            }) => filters.map_elements(f)?.update_data(|filters| {
-                LogicalPlan::TableScan(TableScan {
-                    table_name,
-                    source,
-                    projection,
-                    projected_schema,
-                    filters,
-                    fetch,
+            LogicalPlan::TableScan(mut scan) => {

Review Comment:
   23d03a415e



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to