iffyio commented on code in PR #1879:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1879#discussion_r2147453124


##########
src/parser/mod.rs:
##########
@@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> {
         Ok(IdentWithAlias { ident, alias })
     }
 
+    /// Parse `identifier [AS] identifier` where the AS keyword is optional
+    pub fn parse_identifier_with_optional_alias(&mut self) -> 
Result<IdentWithAlias, ParserError> {

Review Comment:
   ```suggestion
       fn parse_identifier_with_optional_alias(&mut self) -> 
Result<IdentWithAlias, ParserError> {
   ```



##########
src/parser/mod.rs:
##########
@@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> {
         Ok(IdentWithAlias { ident, alias })
     }
 
+    /// Parse `identifier [AS] identifier` where the AS keyword is optional
+    pub fn parse_identifier_with_optional_alias(&mut self) -> 
Result<IdentWithAlias, ParserError> {
+        let ident = self.parse_identifier()?;
+        let _after_as = self.parse_keyword(Keyword::AS);
+        let alias = self.parse_identifier()?;
+        Ok(IdentWithAlias { ident, alias })
+    }
+
+    /// Parse comma-separated list of parenthesized queries for pipe operators
+    fn parse_pipe_operator_queries(&mut self) -> Result<Vec<Query>, 
ParserError> {
+        self.parse_comma_separated(|parser| {
+            parser.expect_token(&Token::LParen)?;
+            let query = parser.parse_query()?;
+            parser.expect_token(&Token::RParen)?;
+            Ok(*query)
+        })
+    }
+
+    /// Parse set quantifier for pipe operators that require DISTINCT. E.g. 
INTERSECT and EXCEPT
+    fn parse_distinct_required_set_quantifier(
+        &mut self,
+        operator_name: &str,
+    ) -> Result<SetQuantifier, ParserError> {
+        if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, 
Keyword::NAME]) {
+            Ok(SetQuantifier::DistinctByName)
+        } else if self.parse_keyword(Keyword::DISTINCT) {
+            Ok(SetQuantifier::Distinct)
+        } else {
+            Err(ParserError::ParserError(format!(
+                "{} pipe operator requires DISTINCT modifier",
+                operator_name
+            )))
+        }
+    }
+
+    /// Parse optional alias (with or without AS keyword) for pipe operators
+    fn parse_optional_pipe_alias(&mut self) -> Result<Option<Ident>, 
ParserError> {
+        if self.parse_keyword(Keyword::AS) {
+            Some(self.parse_identifier()).transpose()

Review Comment:
   ```suggestion
               Ok(Some(self.parse_identifier()?))
   ```
   thinking this variant would be more explicit in this case? (the transpose 
seemed to obfuscate a bit what was going on)



##########
src/ast/query.rs:
##########
@@ -2739,7 +2812,91 @@ impl fmt::Display for PipeOperator {
             PipeOperator::TableSample { sample } => {
                 write!(f, "{}", sample)
             }
+            PipeOperator::Rename { mappings } => {
+                write!(f, "RENAME {}", display_comma_separated(mappings))
+            }
+            PipeOperator::Union {
+                set_quantifier,
+                queries,
+            } => Self::fmt_set_operation(f, "UNION", set_quantifier, queries),
+            PipeOperator::Intersect {
+                set_quantifier,
+                queries,
+            } => Self::fmt_set_operation(f, "INTERSECT", set_quantifier, 
queries),
+            PipeOperator::Except {
+                set_quantifier,
+                queries,
+            } => Self::fmt_set_operation(f, "EXCEPT", set_quantifier, queries),
+            PipeOperator::Call { function, alias } => {
+                write!(f, "CALL {}", function)?;
+                Self::fmt_optional_alias(f, alias)
+            }
+            PipeOperator::Pivot {
+                aggregate_functions,
+                value_column,
+                value_source,
+                alias,
+            } => {
+                write!(
+                    f,
+                    "PIVOT({} FOR {} IN ({}))",
+                    display_comma_separated(aggregate_functions),
+                    Expr::CompoundIdentifier(value_column.to_vec()),
+                    value_source
+                )?;
+                Self::fmt_optional_alias(f, alias)
+            }
+            PipeOperator::Unpivot {
+                value_column,
+                name_column,
+                unpivot_columns,
+                alias,
+            } => {
+                write!(
+                    f,
+                    "UNPIVOT({} FOR {} IN ({}))",
+                    value_column,
+                    name_column,
+                    display_comma_separated(unpivot_columns)
+                )?;
+                Self::fmt_optional_alias(f, alias)
+            }
+            PipeOperator::Join(join) => write!(f, "{}", join),
+        }
+    }
+}
+
+impl PipeOperator {
+    /// Helper function to format optional alias for pipe operators
+    fn fmt_optional_alias(f: &mut fmt::Formatter<'_>, alias: &Option<Ident>) 
-> fmt::Result {
+        if let Some(alias) = alias {
+            write!(f, " AS {}", alias)?;
         }
+        Ok(())
+    }
+
+    /// Helper function to format set operations (UNION, INTERSECT, EXCEPT) 
with queries
+    fn fmt_set_operation(
+        f: &mut fmt::Formatter<'_>,
+        operation: &str,
+        set_quantifier: &SetQuantifier,
+        queries: &[Query],
+    ) -> fmt::Result {
+        write!(f, "{}", operation)?;
+        match set_quantifier {
+            SetQuantifier::None => {}
+            _ => {
+                write!(f, " {}", set_quantifier)?;
+            }
+        }
+        write!(f, " ")?;
+        for (i, query) in queries.iter().enumerate() {
+            if i > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "({})", query)?;
+        }

Review Comment:
   can this use the `display_comma_separated()` helper function?



##########
src/parser/mod.rs:
##########
@@ -11142,6 +11200,217 @@ impl<'a> Parser<'a> {
                     let sample = 
self.parse_table_sample(TableSampleModifier::TableSample)?;
                     pipe_operators.push(PipeOperator::TableSample { sample });
                 }
+                Keyword::RENAME => {
+                    let mappings =
+                        
self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?;
+                    pipe_operators.push(PipeOperator::Rename { mappings });
+                }
+                Keyword::UNION => {
+                    let set_quantifier = 
self.parse_set_quantifier(&Some(SetOperator::Union));
+                    let queries = self.parse_pipe_operator_queries()?;
+                    pipe_operators.push(PipeOperator::Union {
+                        set_quantifier,
+                        queries,
+                    });
+                }
+                Keyword::INTERSECT => {
+                    let set_quantifier =
+                        
self.parse_distinct_required_set_quantifier("INTERSECT")?;
+                    let queries = self.parse_pipe_operator_queries()?;
+                    pipe_operators.push(PipeOperator::Intersect {
+                        set_quantifier,
+                        queries,
+                    });
+                }
+                Keyword::EXCEPT => {
+                    let set_quantifier = 
self.parse_distinct_required_set_quantifier("EXCEPT")?;
+                    let queries = self.parse_pipe_operator_queries()?;
+                    pipe_operators.push(PipeOperator::Except {
+                        set_quantifier,
+                        queries,
+                    });
+                }
+                Keyword::CALL => {
+                    let function_name = self.parse_object_name(false)?;
+                    let function_expr = self.parse_function(function_name)?;
+                    if let Expr::Function(function) = function_expr {
+                        let alias = self.parse_optional_pipe_alias()?;
+                        pipe_operators.push(PipeOperator::Call { function, 
alias });
+                    } else {
+                        return Err(ParserError::ParserError(
+                            "Expected function call after CALL".to_string(),
+                        ));
+                    }
+                }
+                Keyword::PIVOT => {
+                    self.expect_token(&Token::LParen)?;
+                    let aggregate_functions =
+                        
self.parse_comma_separated(Self::parse_aliased_function_call)?;
+                    self.expect_keyword_is(Keyword::FOR)?;
+                    let value_column = self.parse_period_separated(|p| 
p.parse_identifier())?;
+                    self.expect_keyword_is(Keyword::IN)?;
+
+                    self.expect_token(&Token::LParen)?;
+                    let value_source = if self.parse_keyword(Keyword::ANY) {
+                        let order_by = if 
self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
+                            
self.parse_comma_separated(Parser::parse_order_by_expr)?
+                        } else {
+                            vec![]
+                        };
+                        PivotValueSource::Any(order_by)
+                    } else if self.peek_sub_query() {
+                        PivotValueSource::Subquery(self.parse_query()?)
+                    } else {
+                        PivotValueSource::List(
+                            
self.parse_comma_separated(Self::parse_expr_with_alias)?,
+                        )
+                    };
+                    self.expect_token(&Token::RParen)?;
+                    self.expect_token(&Token::RParen)?;
+
+                    let alias = self.parse_optional_pipe_alias()?;
+
+                    pipe_operators.push(PipeOperator::Pivot {
+                        aggregate_functions,
+                        value_column,
+                        value_source,
+                        alias,
+                    });
+                }
+                Keyword::UNPIVOT => {
+                    self.expect_token(&Token::LParen)?;
+                    let value_column = self.parse_identifier()?;
+                    self.expect_keyword(Keyword::FOR)?;
+                    let name_column = self.parse_identifier()?;
+                    self.expect_keyword(Keyword::IN)?;
+
+                    self.expect_token(&Token::LParen)?;
+                    let unpivot_columns = 
self.parse_comma_separated(Parser::parse_identifier)?;
+                    self.expect_token(&Token::RParen)?;
+
+                    self.expect_token(&Token::RParen)?;
+
+                    let alias = self.parse_optional_pipe_alias()?;
+
+                    pipe_operators.push(PipeOperator::Unpivot {
+                        value_column,
+                        name_column,
+                        unpivot_columns,
+                        alias,
+                    });
+                }
+                Keyword::JOIN => {
+                    let relation = self.parse_table_factor()?;
+                    let constraint = self.parse_join_constraint(false)?;
+                    if matches!(constraint, JoinConstraint::None) {
+                        return Err(ParserError::ParserError(
+                            "JOIN in pipe syntax requires ON or USING 
clause".to_string(),
+                        ));
+                    }

Review Comment:
   thinking these constraint validations might not be ideal and we can skip 
them? in case the pipe syntax is later ported to other dialects, there the 
semantics might differ between dialects, [prev related 
behavior](https://github.com/apache/datafusion-sqlparser-rs/pull/1552)



##########
src/parser/mod.rs:
##########
@@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> {
         Ok(IdentWithAlias { ident, alias })
     }
 
+    /// Parse `identifier [AS] identifier` where the AS keyword is optional
+    pub fn parse_identifier_with_optional_alias(&mut self) -> 
Result<IdentWithAlias, ParserError> {
+        let ident = self.parse_identifier()?;
+        let _after_as = self.parse_keyword(Keyword::AS);
+        let alias = self.parse_identifier()?;
+        Ok(IdentWithAlias { ident, alias })
+    }
+
+    /// Parse comma-separated list of parenthesized queries for pipe operators
+    fn parse_pipe_operator_queries(&mut self) -> Result<Vec<Query>, 
ParserError> {
+        self.parse_comma_separated(|parser| {
+            parser.expect_token(&Token::LParen)?;
+            let query = parser.parse_query()?;
+            parser.expect_token(&Token::RParen)?;
+            Ok(*query)
+        })
+    }
+
+    /// Parse set quantifier for pipe operators that require DISTINCT. E.g. 
INTERSECT and EXCEPT
+    fn parse_distinct_required_set_quantifier(
+        &mut self,
+        operator_name: &str,
+    ) -> Result<SetQuantifier, ParserError> {
+        if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, 
Keyword::NAME]) {
+            Ok(SetQuantifier::DistinctByName)
+        } else if self.parse_keyword(Keyword::DISTINCT) {
+            Ok(SetQuantifier::Distinct)
+        } else {
+            Err(ParserError::ParserError(format!(
+                "{} pipe operator requires DISTINCT modifier",
+                operator_name
+            )))
+        }
+    }
+
+    /// Parse optional alias (with or without AS keyword) for pipe operators
+    fn parse_optional_pipe_alias(&mut self) -> Result<Option<Ident>, 
ParserError> {

Review Comment:
   ```suggestion
       fn parse_identifier_optional_alias(&mut self) -> Result<Option<Ident>, 
ParserError> {
   ```
   maybe we could rename this to something more generic? feels like it could be 
reusable in other contexts than pipe operators



##########
src/parser/mod.rs:
##########
@@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> {
         Ok(IdentWithAlias { ident, alias })
     }
 
+    /// Parse `identifier [AS] identifier` where the AS keyword is optional
+    pub fn parse_identifier_with_optional_alias(&mut self) -> 
Result<IdentWithAlias, ParserError> {
+        let ident = self.parse_identifier()?;
+        let _after_as = self.parse_keyword(Keyword::AS);
+        let alias = self.parse_identifier()?;
+        Ok(IdentWithAlias { ident, alias })
+    }
+
+    /// Parse comma-separated list of parenthesized queries for pipe operators
+    fn parse_pipe_operator_queries(&mut self) -> Result<Vec<Query>, 
ParserError> {
+        self.parse_comma_separated(|parser| {
+            parser.expect_token(&Token::LParen)?;
+            let query = parser.parse_query()?;
+            parser.expect_token(&Token::RParen)?;
+            Ok(*query)
+        })
+    }
+
+    /// Parse set quantifier for pipe operators that require DISTINCT. E.g. 
INTERSECT and EXCEPT
+    fn parse_distinct_required_set_quantifier(
+        &mut self,
+        operator_name: &str,
+    ) -> Result<SetQuantifier, ParserError> {
+        if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, 
Keyword::NAME]) {
+            Ok(SetQuantifier::DistinctByName)
+        } else if self.parse_keyword(Keyword::DISTINCT) {
+            Ok(SetQuantifier::Distinct)
+        } else {
+            Err(ParserError::ParserError(format!(
+                "{} pipe operator requires DISTINCT modifier",
+                operator_name
+            )))
+        }

Review Comment:
   Thinking since this block is a subset of the set_quantifier parsing logic 
[here](https://github.com/apache/datafusion-sqlparser-rs/blob/552def1f9ec99919222e41e18a8a517eaf582b93/src/parser/mod.rs#L11693-L11707),
 it might be good to reuse? one way could be we pull the latter out into a 
function and call it here, rejecting any returned upexpected variants. another 
would be that we update the latter to call this 
`parse_distinct_required_set_quantifier()` function



##########
src/parser/mod.rs:
##########
@@ -11142,6 +11200,217 @@ impl<'a> Parser<'a> {
                     let sample = 
self.parse_table_sample(TableSampleModifier::TableSample)?;
                     pipe_operators.push(PipeOperator::TableSample { sample });
                 }
+                Keyword::RENAME => {
+                    let mappings =
+                        
self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?;
+                    pipe_operators.push(PipeOperator::Rename { mappings });
+                }
+                Keyword::UNION => {
+                    let set_quantifier = 
self.parse_set_quantifier(&Some(SetOperator::Union));
+                    let queries = self.parse_pipe_operator_queries()?;
+                    pipe_operators.push(PipeOperator::Union {
+                        set_quantifier,
+                        queries,
+                    });
+                }
+                Keyword::INTERSECT => {
+                    let set_quantifier =
+                        
self.parse_distinct_required_set_quantifier("INTERSECT")?;
+                    let queries = self.parse_pipe_operator_queries()?;
+                    pipe_operators.push(PipeOperator::Intersect {
+                        set_quantifier,
+                        queries,
+                    });
+                }
+                Keyword::EXCEPT => {
+                    let set_quantifier = 
self.parse_distinct_required_set_quantifier("EXCEPT")?;
+                    let queries = self.parse_pipe_operator_queries()?;
+                    pipe_operators.push(PipeOperator::Except {
+                        set_quantifier,
+                        queries,
+                    });
+                }
+                Keyword::CALL => {
+                    let function_name = self.parse_object_name(false)?;
+                    let function_expr = self.parse_function(function_name)?;
+                    if let Expr::Function(function) = function_expr {
+                        let alias = self.parse_optional_pipe_alias()?;
+                        pipe_operators.push(PipeOperator::Call { function, 
alias });
+                    } else {
+                        return Err(ParserError::ParserError(
+                            "Expected function call after CALL".to_string(),
+                        ));
+                    }
+                }
+                Keyword::PIVOT => {
+                    self.expect_token(&Token::LParen)?;
+                    let aggregate_functions =
+                        
self.parse_comma_separated(Self::parse_aliased_function_call)?;
+                    self.expect_keyword_is(Keyword::FOR)?;
+                    let value_column = self.parse_period_separated(|p| 
p.parse_identifier())?;
+                    self.expect_keyword_is(Keyword::IN)?;
+
+                    self.expect_token(&Token::LParen)?;
+                    let value_source = if self.parse_keyword(Keyword::ANY) {
+                        let order_by = if 
self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
+                            
self.parse_comma_separated(Parser::parse_order_by_expr)?
+                        } else {
+                            vec![]
+                        };
+                        PivotValueSource::Any(order_by)
+                    } else if self.peek_sub_query() {
+                        PivotValueSource::Subquery(self.parse_query()?)
+                    } else {
+                        PivotValueSource::List(
+                            
self.parse_comma_separated(Self::parse_expr_with_alias)?,
+                        )
+                    };
+                    self.expect_token(&Token::RParen)?;
+                    self.expect_token(&Token::RParen)?;
+
+                    let alias = self.parse_optional_pipe_alias()?;
+
+                    pipe_operators.push(PipeOperator::Pivot {
+                        aggregate_functions,
+                        value_column,
+                        value_source,
+                        alias,
+                    });
+                }
+                Keyword::UNPIVOT => {
+                    self.expect_token(&Token::LParen)?;
+                    let value_column = self.parse_identifier()?;
+                    self.expect_keyword(Keyword::FOR)?;
+                    let name_column = self.parse_identifier()?;
+                    self.expect_keyword(Keyword::IN)?;
+
+                    self.expect_token(&Token::LParen)?;
+                    let unpivot_columns = 
self.parse_comma_separated(Parser::parse_identifier)?;
+                    self.expect_token(&Token::RParen)?;
+
+                    self.expect_token(&Token::RParen)?;
+
+                    let alias = self.parse_optional_pipe_alias()?;
+
+                    pipe_operators.push(PipeOperator::Unpivot {
+                        value_column,
+                        name_column,
+                        unpivot_columns,
+                        alias,
+                    });
+                }
+                Keyword::JOIN => {

Review Comment:
   for the joins, I'm wondering if its possible to pull out and reuse [the 
existing logic 
here](https://github.com/apache/datafusion-sqlparser-rs/blob/552def1f9ec99919222e41e18a8a517eaf582b93/src/parser/mod.rs#L12606-L12702)?
 e.g. here we'll `prev_token()` if needed whenever we see the next operator 
looks like a join and delegate to that helper function?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to