alamb commented on code in PR #11186:
URL: https://github.com/apache/datafusion/pull/11186#discussion_r1664788104
##########
datafusion-examples/examples/parse_sql_expr.rs:
##########
@@ -135,7 +135,9 @@ async fn query_parquet_demo() -> Result<()> {
/// DataFusion can parse a SQL text and convert it back to SQL using
[`Unparser`].
async fn round_trip_parse_sql_expr_demo() -> Result<()> {
- let sql = "((int_col < 5) OR (double_col = 8))";
+ // unparser can also remove extra parentheses,
+ // so `((int_col < 5) OR (double_col = 8))` will also produce the same SQL
Review Comment:
👍
##########
datafusion/sql/tests/cases/plan_to_sql.rs:
##########
@@ -314,3 +310,78 @@ fn test_table_references_in_plan_to_sql() {
"SELECT \"table\".id, \"table\".\"value\" FROM \"table\"",
);
}
+
+#[test]
+fn test_pretty_roundtrip() -> Result<()> {
+ let schema = Schema::new(vec![
+ Field::new("id", DataType::Utf8, false),
+ Field::new("age", DataType::Utf8, false),
+ ]);
+
+ let df_schema = DFSchema::try_from(schema)?;
+
+ let context = MockContextProvider::default();
+ let sql_to_rel = SqlToRel::new(&context);
+
+ let unparser = Unparser::default();
+
+ let sql_to_pretty_unparse = vec![
+ ("((id < 5) OR (age = 8))", "id < 5 OR age = 8"),
+ ("((id + 5) * (age * 8))", "(id + 5) * age * 8"),
+ ("(3 + (5 * 6) * 3)", "3 + 5 * 6 * 3"),
+ ("((3 * (5 + 6)) * 3)", "3 * (5 + 6) * 3"),
+ ("((3 AND (5 OR 6)) * 3)", "(3 AND (5 OR 6)) * 3"),
+ ("((3 + (5 + 6)) * 3)", "(3 + 5 + 6) * 3"),
+ ("((3 + (5 + 6)) + 3)", "3 + 5 + 6 + 3"),
+ ("3 + 5 + 6 + 3", "3 + 5 + 6 + 3"),
+ ("3 + (5 + (6 + 3))", "3 + 5 + 6 + 3"),
+ ("3 + ((5 + 6) + 3)", "3 + 5 + 6 + 3"),
+ ("(3 + 5) + (6 + 3)", "3 + 5 + 6 + 3"),
+ ("((3 + 5) + (6 + 3))", "3 + 5 + 6 + 3"),
+ (
+ "((id > 10) OR (age BETWEEN 10 AND 20))",
+ "id > 10 OR age BETWEEN 10 AND 20",
+ ),
+ (
+ "((id > 10) * (age BETWEEN 10 AND 20))",
+ "(id > 10) * (age BETWEEN 10 AND 20)",
+ ),
+ ("id - (age - 8)", "id - (age - 8)"),
+ ("((id - age) - 8)", "id - age - 8"),
+ ("(id OR (age - 8))", "id OR age - 8"),
+ ("(id / (age - 8))", "id / (age - 8)"),
+ ("((id / age) * 8)", "id / age * 8"),
+ ("((age + 10) < 20) IS TRUE", "(age + 10 < 20) IS TRUE"),
+ (
+ "(20 > (age + 5)) IS NOT FALSE",
+ "(20 > age + 5) IS NOT FALSE",
+ ),
+ ("(true AND false) IS FALSE", "(true AND false) IS FALSE"),
+ ("true AND (false IS FALSE)", "true AND false IS FALSE"),
+ ];
+
+ for (sql, pretty) in sql_to_pretty_unparse.iter() {
+ let sql_expr = Parser::new(&GenericDialect {})
+ .try_with_sql(sql)?
+ .parse_expr()?;
+ let expr =
+ sql_to_rel.sql_to_expr(sql_expr, &df_schema, &mut
PlannerContext::new())?;
+ let round_trip_sql = unparser.expr_to_sql(&expr)?.to_string();
+ assert_eq!(pretty.to_string(), round_trip_sql);
+
+ // verify that the pretty string parses to the same underlying Expr
+ let pretty_sql_expr = Parser::new(&GenericDialect {})
+ .try_with_sql(pretty)?
+ .parse_expr()?;
+
+ let pretty_expr = sql_to_rel.sql_to_expr(
+ pretty_sql_expr,
+ &df_schema,
+ &mut PlannerContext::new(),
+ )?;
+
+ assert_eq!(expr.to_string(), pretty_expr.to_string());
Review Comment:
Shouldn't this me veriying that the expr are the same (not just that the
string is the same)?
##########
datafusion/sql/tests/cases/plan_to_sql.rs:
##########
@@ -314,3 +310,78 @@ fn test_table_references_in_plan_to_sql() {
"SELECT \"table\".id, \"table\".\"value\" FROM \"table\"",
);
}
+
+#[test]
+fn test_pretty_roundtrip() -> Result<()> {
+ let schema = Schema::new(vec![
+ Field::new("id", DataType::Utf8, false),
+ Field::new("age", DataType::Utf8, false),
+ ]);
+
+ let df_schema = DFSchema::try_from(schema)?;
+
+ let context = MockContextProvider::default();
+ let sql_to_rel = SqlToRel::new(&context);
+
+ let unparser = Unparser::default();
+
+ let sql_to_pretty_unparse = vec![
+ ("((id < 5) OR (age = 8))", "id < 5 OR age = 8"),
+ ("((id + 5) * (age * 8))", "(id + 5) * age * 8"),
+ ("(3 + (5 * 6) * 3)", "3 + 5 * 6 * 3"),
+ ("((3 * (5 + 6)) * 3)", "3 * (5 + 6) * 3"),
+ ("((3 AND (5 OR 6)) * 3)", "(3 AND (5 OR 6)) * 3"),
+ ("((3 + (5 + 6)) * 3)", "(3 + 5 + 6) * 3"),
+ ("((3 + (5 + 6)) + 3)", "3 + 5 + 6 + 3"),
+ ("3 + 5 + 6 + 3", "3 + 5 + 6 + 3"),
+ ("3 + (5 + (6 + 3))", "3 + 5 + 6 + 3"),
Review Comment:
this doesn't seem right to me -- this implies that `(3 + 5) + (6 + 3)` and
`3 + ((5 + 6) + 3)` are logically equivalent to each other as well as with `3 +
5 + 6 + 3`
I think `(3 + 5) + (6 + 3)` and `3 + ((5 + 6) + 3)` are different, right?
Aka the parenthesis are needed here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]