phillipleblanc commented on code in PR #13880: URL: https://github.com/apache/datafusion/pull/13880#discussion_r1895160201
########## datafusion/sql/src/unparser/mod.rs: ########## @@ -105,13 +109,33 @@ impl<'a> Unparser<'a> { self.pretty = pretty; self } + + /// Add a custom unparser for user defined logical nodes + /// + /// DataFusion allows user to define custom logical nodes. This method allows to add custom child unparsers for these nodes. + /// Implementation of [`UserDefinedLogicalNodeUnparser`] can be added to the root unparser to handle custom logical nodes. + /// + /// The child unparsers are called iteratively. + /// There are two methods in [`Unparser`] will be called: + /// - `extension_to_statement`: This method is called when the custom logical node is a custom statement. + /// If multiple child unparsers return a non-None value, the last unparsing result will be returned. + /// - `extension_to_sql`: This method is called when the custom logical node is part of a statement. + /// If multiple child unparsers are registered for the same custom logical node, all of them will be called in order. Review Comment: I think this should also short-circuit and only do the first one? ########## datafusion/sql/src/unparser/mod.rs: ########## @@ -105,13 +109,33 @@ impl<'a> Unparser<'a> { self.pretty = pretty; self } + + /// Add a custom unparser for user defined logical nodes + /// + /// DataFusion allows user to define custom logical nodes. This method allows to add custom child unparsers for these nodes. + /// Implementation of [`UserDefinedLogicalNodeUnparser`] can be added to the root unparser to handle custom logical nodes. + /// + /// The child unparsers are called iteratively. + /// There are two methods in [`Unparser`] will be called: + /// - `extension_to_statement`: This method is called when the custom logical node is a custom statement. + /// If multiple child unparsers return a non-None value, the last unparsing result will be returned. + /// - `extension_to_sql`: This method is called when the custom logical node is part of a statement. + /// If multiple child unparsers are registered for the same custom logical node, all of them will be called in order. + pub fn with_udlp_unparsers( Review Comment: Not a fan of this name - `udlp` takes effort to understand what it means. How about renaming `udlp_*` to `extension_*`? i.e. `with_extension_unparsers`. It conveys the same meaning in an easier to understand way. ########## datafusion/sql/tests/cases/plan_to_sql.rs: ########## @@ -1406,3 +1414,150 @@ fn test_join_with_no_conditions() { "SELECT * FROM j1 CROSS JOIN j2", ); } + +#[derive(Debug, PartialEq, Eq, Hash, PartialOrd)] +struct MockUserDefinedLogicalPlan { + input: LogicalPlan, +} + +impl UserDefinedLogicalNodeCore for MockUserDefinedLogicalPlan { + fn name(&self) -> &str { + "MockUserDefinedLogicalPlan" + } + + fn inputs(&self) -> Vec<&LogicalPlan> { + vec![&self.input] + } + + fn schema(&self) -> &DFSchemaRef { + self.input.schema() + } + + fn expressions(&self) -> Vec<Expr> { + vec![] + } + + fn fmt_for_explain(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "MockUserDefinedLogicalPlan") + } + + fn with_exprs_and_inputs( + &self, + _exprs: Vec<Expr>, + inputs: Vec<LogicalPlan>, + ) -> Result<Self> { + Ok(MockUserDefinedLogicalPlan { + input: inputs.into_iter().next().unwrap(), + }) + } +} + +struct MockStatementUnparser {} + +impl UserDefinedLogicalNodeUnparser for MockStatementUnparser { + fn unparse_to_statement( + &self, + node: &dyn UserDefinedLogicalNode, + unparser: &Unparser, + ) -> Result<Option<Statement>> { + if let Some(plan) = node.as_any().downcast_ref::<MockUserDefinedLogicalPlan>() { + let input = unparser.plan_to_sql(&plan.input)?; + Ok(Some(input)) + } else { + Ok(None) + } + } +} + +#[test] +fn test_unparse_udlp_to_statement() -> Result<()> { + let dialect = GenericDialect {}; + let statement = Parser::new(&dialect) + .try_with_sql("SELECT * FROM j1")? + .parse_statement()?; + let state = MockSessionState::default(); + let context = MockContextProvider { state }; + let sql_to_rel = SqlToRel::new(&context); + let plan = sql_to_rel.sql_statement_to_plan(statement)?; + + let udlp = MockUserDefinedLogicalPlan { input: plan }; + let extension = LogicalPlan::Extension(Extension { + node: Arc::new(udlp), + }); + let unparser = + Unparser::default().with_udlp_unparsers(vec![Arc::new(MockStatementUnparser {})]); + let sql = unparser.plan_to_sql(&extension)?; + let expected = "SELECT * FROM j1"; + assert_eq!(sql.to_string(), expected); + + if let Some(err) = plan_to_sql(&extension).err() { + assert_contains!( + err.to_string(), + "This feature is not implemented: Unsupported extension node: MockUserDefinedLogicalPlan"); + } else { + panic!("Expected error"); + } + Ok(()) +} + +struct MockSqlUnparser {} + +impl UserDefinedLogicalNodeUnparser for MockSqlUnparser { + fn unparse( + &self, + node: &dyn UserDefinedLogicalNode, + unparser: &Unparser, + _query: &mut Option<&mut QueryBuilder>, + _select: &mut Option<&mut SelectBuilder>, + relation: &mut Option<&mut RelationBuilder>, + ) -> Result<()> { + if let Some(plan) = node.as_any().downcast_ref::<MockUserDefinedLogicalPlan>() { + let Statement::Query(input) = unparser.plan_to_sql(&plan.input)? else { + return Ok(()); + }; + let mut derived_builder = DerivedRelationBuilder::default(); + derived_builder.subquery(input); + derived_builder.lateral(false); + if let Some(rel) = relation { + rel.derived(derived_builder); + } + } + Ok(()) + } +} + +#[test] +fn test_unparse_udlp_to_sql() -> Result<()> { + let dialect = GenericDialect {}; + let statement = Parser::new(&dialect) + .try_with_sql("SELECT * FROM j1")? + .parse_statement()?; + let state = MockSessionState::default(); + let context = MockContextProvider { state }; + let sql_to_rel = SqlToRel::new(&context); + let plan = sql_to_rel.sql_statement_to_plan(statement)?; + + let udlp = MockUserDefinedLogicalPlan { input: plan }; + let extension = LogicalPlan::Extension(Extension { + node: Arc::new(udlp), + }); + + let plan = LogicalPlanBuilder::from(extension) + .project(vec![col("j1_id").alias("user_id")])? + .build()?; + let unparser = + Unparser::default().with_udlp_unparsers(vec![Arc::new(MockSqlUnparser {})]); + let sql = unparser.plan_to_sql(&plan)?; + let expected = "SELECT j1.j1_id AS user_id FROM (SELECT * FROM j1)"; + assert_eq!(sql.to_string(), expected); + + if let Some(err) = plan_to_sql(&plan).err() { + assert_eq!( + err.to_string(), + "External error: `relation` must be initialized" Review Comment: This error is expected? ########## datafusion/sql/src/unparser/udlp_unparser.rs: ########## Review Comment: I would also rename this file to `extension_unparser.rs` ########## datafusion/sql/src/unparser/plan.rs: ########## @@ -121,6 +124,40 @@ impl Unparser<'_> { } } + /// Try to unparse a [UserDefinedLogicalNode] to a SQL statement. + /// If multiple unparsers are registered for the same [UserDefinedLogicalNode], + /// the last unparsing result will be returned. + fn extension_to_statement( + &self, + node: &dyn UserDefinedLogicalNode, + ) -> Result<ast::Statement> { + let mut statement = None; + for unparser in &self.udlp_unparsers { + statement = unparser.unparse_to_statement(node, self)?; + } + if let Some(statement) = statement { + Ok(statement) + } else { + not_impl_err!("Unsupported extension node: {node:?}") + } + } + + /// Try to unparse a [UserDefinedLogicalNode] to a SQL statement. + /// If multiple unparsers are registered for the same [UserDefinedLogicalNode], + /// all of them will be called in order. + fn extension_to_sql( + &self, + node: &dyn UserDefinedLogicalNode, + query: &mut Option<&mut QueryBuilder>, + select: &mut Option<&mut SelectBuilder>, + relation: &mut Option<&mut RelationBuilder>, + ) -> Result<()> { + for unparser in &self.udlp_unparsers { Review Comment: Yeah, if none of the extension unparsers were able to process it, it should throw an error IMO ########## datafusion/sql/src/unparser/mod.rs: ########## @@ -105,13 +109,33 @@ impl<'a> Unparser<'a> { self.pretty = pretty; self } + + /// Add a custom unparser for user defined logical nodes + /// + /// DataFusion allows user to define custom logical nodes. This method allows to add custom child unparsers for these nodes. + /// Implementation of [`UserDefinedLogicalNodeUnparser`] can be added to the root unparser to handle custom logical nodes. + /// + /// The child unparsers are called iteratively. + /// There are two methods in [`Unparser`] will be called: + /// - `extension_to_statement`: This method is called when the custom logical node is a custom statement. + /// If multiple child unparsers return a non-None value, the last unparsing result will be returned. Review Comment: Yeah, I would also expect this to short-circuit and have the first one win. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org