alamb commented on code in PR #14364:
URL: https://github.com/apache/datafusion/pull/14364#discussion_r1934520683


##########
datafusion/substrait/Cargo.toml:
##########
@@ -46,6 +46,7 @@ url = { workspace = true }
 
 [dev-dependencies]
 datafusion = { workspace = true, features = ["nested_expressions"] }
+datafusion-catalog = { workspace = true }

Review Comment:
   I think since datafusion re-exports `datafusion_catalog` we should be able 
to avoid this dependency:
   
   
https://github.com/apache/datafusion/blob/9ca6c2557488c1c608182542c1be96889b64fe29/datafusion/core/src/lib.rs#L731-L730



##########
datafusion/catalog/src/lib.rs:
##########
@@ -18,23 +18,264 @@
 //! Interfaces and default implementations of catalogs and schemas.
 //!
 //! Implementations
+//! * Information schema: [`information_schema`]
 //! * Simple memory based catalog: [`MemoryCatalogProviderList`], 
[`MemoryCatalogProvider`], [`MemorySchemaProvider`]
 
 pub mod memory;
+pub use datafusion_sql::{ResolvedTableReference, TableReference};
 pub use memory::{
     MemoryCatalogProvider, MemoryCatalogProviderList, MemorySchemaProvider,
 };
+use std::collections::BTreeSet;
+use std::ops::ControlFlow;
 
 mod r#async;
 mod catalog;
 mod dynamic_file;
+pub mod information_schema;
 mod schema;
 mod session;
 mod table;
-
 pub use catalog::*;
 pub use dynamic_file::catalog::*;
 pub use r#async::*;
 pub use schema::*;
 pub use session::*;
 pub use table::*;
+pub mod streaming;
+
+/// Collects all tables and views referenced in the SQL statement. CTEs are 
collected separately.
+/// This can be used to determine which tables need to be in the catalog for a 
query to be planned.
+///
+/// # Returns
+///
+/// A `(table_refs, ctes)` tuple, the first element contains table and view 
references and the second
+/// element contains any CTE aliases that were defined and possibly referenced.
+///
+/// ## Example
+///
+/// ```
+/// # use datafusion_sql::parser::DFParser;
+/// # use datafusion_catalog::resolve_table_references;
+/// let query = "SELECT a FROM foo where x IN (SELECT y FROM bar)";
+/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+/// let (table_refs, ctes) = resolve_table_references(&statement, 
true).unwrap();
+/// assert_eq!(table_refs.len(), 2);
+/// assert_eq!(table_refs[0].to_string(), "bar");
+/// assert_eq!(table_refs[1].to_string(), "foo");
+/// assert_eq!(ctes.len(), 0);
+/// ```
+///
+/// ## Example with CTEs  
+///  
+/// ```  
+/// # use datafusion_sql::parser::DFParser;  
+/// # use datafusion_catalog::resolve_table_references;
+/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";  
+/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();  
+/// let (table_refs, ctes) = resolve_table_references(&statement, 
true).unwrap();  
+/// assert_eq!(table_refs.len(), 0);
+/// assert_eq!(ctes.len(), 1);  
+/// assert_eq!(ctes[0].to_string(), "my_cte");  
+/// ```
+pub fn resolve_table_references(
+    statement: &datafusion_sql::parser::Statement,

Review Comment:
   I think since this function belongs in `datafusion-sql` as it is walking 
over the sql parse tree 
   
   perhaps we can put it in 
`https://github.com/apache/datafusion/tree/main/datafusion/sql/src/resolve.rs` 
or something and then remove the dependency of `datafusion-catalog` on 
`datafusion-sql`
   
   I think we could do this in a follow on PR as well



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to