mildbyte opened a new issue, #14633:
URL: https://github.com/apache/datafusion/issues/14633

   ### Describe the bug
   
   Running this query:
   
   ```
   SELECT
     c1,
     SUM(c2) FILTER (WHERE c2 >= 20) AS sum_c2,
     AVG(c3) FILTER (WHERE c3 <= 70) AS avg_c3
   FROM test_table GROUP BY c1
   ```
   
   causes a parser error:
   
   ```
   SQL(ParserError("Expected end of statement, found: ("), None)
   ```
   
   ### To Reproduce
   
   Repro repo with a lockfile is available on 
https://github.com/mildbyte/datafusion-agg-filter-repro:
   
   ```
   use datafusion::prelude::{SessionConfig, SessionContext};
   
   #[tokio::main]
   async fn main() {
       let sql = "SELECT
     c1,
     SUM(c2) FILTER (WHERE c2 >= 20) AS sum_c2,
     AVG(c3) FILTER (WHERE c3 <= 70) AS avg_c3
   FROM test_table GROUP BY c1";
       let config = SessionConfig::new();
       let context = SessionContext::new_with_config(config);
       let stream = context.sql(sql).await.unwrap();
   }
   ```
   
   ```
   cargo run
   ...
   warning: `datafusion-agg-filter-repro` (bin "datafusion-agg-filter-repro") 
generated 1 warning
       Finished `dev` profile [unoptimized + debuginfo] target(s) in 1m 01s
        Running `target/debug/datafusion-agg-filter-repro`
   
   thread 'main' panicked at src/main.rs:12:41:
   called `Result::unwrap()` on an `Err` value: SQL(ParserError("Expected end 
of statement, found: ("), None)
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   ```
   
   
   ### Expected behavior
   
   Since I didn't create the tables, I don't expect the query to actually run, 
but I would expect it to get to the plan phase and error then instead of 
erroring in the parser.
   
   This test case is already covered in DF: 
https://github.com/apache/datafusion/blame/980931c5a70b8b08c918195803145f64f1ec901f/datafusion/sqllogictest/test_files/aggregate.slt#L4770-L4778
   
   ### Additional context
   
   ```
   rustc 1.86.0-nightly (9a1d156f3 2025-01-19)
   datafusion 45.0.0
   sqlparser 0.53.0
   ```
   
   This is the AST that's output by DF's parser (though I'm using 0.52.0 in 
this example). It looks like with the default dialect, it treats `SUM(c2) 
FILTER` as `FILTER` being an alias for `SUM(c2)`, then it considers the query 
to be over:
   
   <details>
   ```
   Statement(
       Query(
           Query {
               with: None,
               body: Select(
                   Select {
                       select_token: TokenWithSpan {
                           token: Word(
                               Word {
                                   value: "SELECT",
                                   quote_style: None,
                                   keyword: SELECT,
                               },
                           ),
                           span: Span(Location(0,0)..Location(0,0)),
                       },
                       distinct: None,
                       top: None,
                       top_before_distinct: false,
                       projection: [
                           UnnamedExpr(
                               Identifier(
                                   Ident {
                                       value: "c1",
                                       quote_style: None,
                                       span: Span(Location(0,0)..Location(0,0)),
                                   },
                               ),
                           ),
                           ExprWithAlias {
                               expr: Function(
                                   Function {
                                       name: ObjectName(
                                           [
                                               Ident {
                                                   value: "SUM",
                                                   quote_style: None,
                                                   span: 
Span(Location(0,0)..Location(0,0)),
                                               },
                                           ],
                                       ),
                                       uses_odbc_syntax: false,
                                       parameters: None,
                                       args: List(
                                           FunctionArgumentList {
                                               duplicate_treatment: None,
                                               args: [
                                                   Unnamed(
                                                       Expr(
                                                           Identifier(
                                                               Ident {
                                                                   value: "c2",
                                                                   quote_style: 
None,
                                                                   span: 
Span(Location(0,0)..Location(0,0)),
                                                               },
                                                           ),
                                                       ),
                                                   ),
                                               ],
                                               clauses: [],
                                           },
                                       ),
                                       filter: None,
                                       null_treatment: None,
                                       over: None,
                                       within_group: [],
                                   },
                               ),
                               alias: Ident {
                                   value: "FILTER",
                                   quote_style: None,
                                   span: Span(Location(0,0)..Location(0,0)),
                               },
                           },
                       ],
                       into: None,
                       from: [],
                       lateral_views: [],
                       prewhere: None,
                       selection: None,
                       group_by: Expressions(
                           [],
                           [],
                       ),
                       cluster_by: [],
                       distribute_by: [],
                       sort_by: [],
                       having: None,
                       named_window: [],
                       qualify: None,
                       window_before_qualify: false,
                       value_table_mode: None,
                       connect_by: None,
                   },
               ),
               order_by: None,
               limit: None,
               limit_by: [],
               offset: None,
               fetch: None,
               locks: [],
               for_clause: None,
               settings: None,
               format_clause: None,
           },
       ),
   )
   ```
   </details>
   
   


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to