iffyio commented on code in PR #1879: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1879#discussion_r2147453124
########## src/parser/mod.rs: ########## @@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse `identifier [AS] identifier` where the AS keyword is optional + pub fn parse_identifier_with_optional_alias(&mut self) -> Result<IdentWithAlias, ParserError> { Review Comment: ```suggestion fn parse_identifier_with_optional_alias(&mut self) -> Result<IdentWithAlias, ParserError> { ``` ########## src/parser/mod.rs: ########## @@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse `identifier [AS] identifier` where the AS keyword is optional + pub fn parse_identifier_with_optional_alias(&mut self) -> Result<IdentWithAlias, ParserError> { + let ident = self.parse_identifier()?; + let _after_as = self.parse_keyword(Keyword::AS); + let alias = self.parse_identifier()?; + Ok(IdentWithAlias { ident, alias }) + } + + /// Parse comma-separated list of parenthesized queries for pipe operators + fn parse_pipe_operator_queries(&mut self) -> Result<Vec<Query>, ParserError> { + self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(*query) + }) + } + + /// Parse set quantifier for pipe operators that require DISTINCT. E.g. INTERSECT and EXCEPT + fn parse_distinct_required_set_quantifier( + &mut self, + operator_name: &str, + ) -> Result<SetQuantifier, ParserError> { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + Ok(SetQuantifier::DistinctByName) + } else if self.parse_keyword(Keyword::DISTINCT) { + Ok(SetQuantifier::Distinct) + } else { + Err(ParserError::ParserError(format!( + "{} pipe operator requires DISTINCT modifier", + operator_name + ))) + } + } + + /// Parse optional alias (with or without AS keyword) for pipe operators + fn parse_optional_pipe_alias(&mut self) -> Result<Option<Ident>, ParserError> { + if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()).transpose() Review Comment: ```suggestion Ok(Some(self.parse_identifier()?)) ``` thinking this variant would be more explicit in this case? (the transpose seemed to obfuscate a bit what was going on) ########## src/ast/query.rs: ########## @@ -2739,7 +2812,91 @@ impl fmt::Display for PipeOperator { PipeOperator::TableSample { sample } => { write!(f, "{}", sample) } + PipeOperator::Rename { mappings } => { + write!(f, "RENAME {}", display_comma_separated(mappings)) + } + PipeOperator::Union { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "UNION", set_quantifier, queries), + PipeOperator::Intersect { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "INTERSECT", set_quantifier, queries), + PipeOperator::Except { + set_quantifier, + queries, + } => Self::fmt_set_operation(f, "EXCEPT", set_quantifier, queries), + PipeOperator::Call { function, alias } => { + write!(f, "CALL {}", function)?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + } => { + write!( + f, + "PIVOT({} FOR {} IN ({}))", + display_comma_separated(aggregate_functions), + Expr::CompoundIdentifier(value_column.to_vec()), + value_source + )?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + } => { + write!( + f, + "UNPIVOT({} FOR {} IN ({}))", + value_column, + name_column, + display_comma_separated(unpivot_columns) + )?; + Self::fmt_optional_alias(f, alias) + } + PipeOperator::Join(join) => write!(f, "{}", join), + } + } +} + +impl PipeOperator { + /// Helper function to format optional alias for pipe operators + fn fmt_optional_alias(f: &mut fmt::Formatter<'_>, alias: &Option<Ident>) -> fmt::Result { + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; } + Ok(()) + } + + /// Helper function to format set operations (UNION, INTERSECT, EXCEPT) with queries + fn fmt_set_operation( + f: &mut fmt::Formatter<'_>, + operation: &str, + set_quantifier: &SetQuantifier, + queries: &[Query], + ) -> fmt::Result { + write!(f, "{}", operation)?; + match set_quantifier { + SetQuantifier::None => {} + _ => { + write!(f, " {}", set_quantifier)?; + } + } + write!(f, " ")?; + for (i, query) in queries.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "({})", query)?; + } Review Comment: can this use the `display_comma_separated()` helper function? ########## src/parser/mod.rs: ########## @@ -11142,6 +11200,217 @@ impl<'a> Parser<'a> { let sample = self.parse_table_sample(TableSampleModifier::TableSample)?; pipe_operators.push(PipeOperator::TableSample { sample }); } + Keyword::RENAME => { + let mappings = + self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; + pipe_operators.push(PipeOperator::Rename { mappings }); + } + Keyword::UNION => { + let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Union)); + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Union { + set_quantifier, + queries, + }); + } + Keyword::INTERSECT => { + let set_quantifier = + self.parse_distinct_required_set_quantifier("INTERSECT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Intersect { + set_quantifier, + queries, + }); + } + Keyword::EXCEPT => { + let set_quantifier = self.parse_distinct_required_set_quantifier("EXCEPT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Except { + set_quantifier, + queries, + }); + } + Keyword::CALL => { + let function_name = self.parse_object_name(false)?; + let function_expr = self.parse_function(function_name)?; + if let Expr::Function(function) = function_expr { + let alias = self.parse_optional_pipe_alias()?; + pipe_operators.push(PipeOperator::Call { function, alias }); + } else { + return Err(ParserError::ParserError( + "Expected function call after CALL".to_string(), + )); + } + } + Keyword::PIVOT => { + self.expect_token(&Token::LParen)?; + let aggregate_functions = + self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.expect_keyword_is(Keyword::FOR)?; + let value_column = self.parse_period_separated(|p| p.parse_identifier())?; + self.expect_keyword_is(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self.peek_sub_query() { + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List( + self.parse_comma_separated(Self::parse_expr_with_alias)?, + ) + }; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_pipe_alias()?; + + pipe_operators.push(PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + }); + } + Keyword::UNPIVOT => { + self.expect_token(&Token::LParen)?; + let value_column = self.parse_identifier()?; + self.expect_keyword(Keyword::FOR)?; + let name_column = self.parse_identifier()?; + self.expect_keyword(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let unpivot_columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_pipe_alias()?; + + pipe_operators.push(PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + }); + } + Keyword::JOIN => { + let relation = self.parse_table_factor()?; + let constraint = self.parse_join_constraint(false)?; + if matches!(constraint, JoinConstraint::None) { + return Err(ParserError::ParserError( + "JOIN in pipe syntax requires ON or USING clause".to_string(), + )); + } Review Comment: thinking these constraint validations might not be ideal and we can skip them? in case the pipe syntax is later ported to other dialects, there the semantics might differ between dialects, [prev related behavior](https://github.com/apache/datafusion-sqlparser-rs/pull/1552) ########## src/parser/mod.rs: ########## @@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse `identifier [AS] identifier` where the AS keyword is optional + pub fn parse_identifier_with_optional_alias(&mut self) -> Result<IdentWithAlias, ParserError> { + let ident = self.parse_identifier()?; + let _after_as = self.parse_keyword(Keyword::AS); + let alias = self.parse_identifier()?; + Ok(IdentWithAlias { ident, alias }) + } + + /// Parse comma-separated list of parenthesized queries for pipe operators + fn parse_pipe_operator_queries(&mut self) -> Result<Vec<Query>, ParserError> { + self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(*query) + }) + } + + /// Parse set quantifier for pipe operators that require DISTINCT. E.g. INTERSECT and EXCEPT + fn parse_distinct_required_set_quantifier( + &mut self, + operator_name: &str, + ) -> Result<SetQuantifier, ParserError> { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + Ok(SetQuantifier::DistinctByName) + } else if self.parse_keyword(Keyword::DISTINCT) { + Ok(SetQuantifier::Distinct) + } else { + Err(ParserError::ParserError(format!( + "{} pipe operator requires DISTINCT modifier", + operator_name + ))) + } + } + + /// Parse optional alias (with or without AS keyword) for pipe operators + fn parse_optional_pipe_alias(&mut self) -> Result<Option<Ident>, ParserError> { Review Comment: ```suggestion fn parse_identifier_optional_alias(&mut self) -> Result<Option<Ident>, ParserError> { ``` maybe we could rename this to something more generic? feels like it could be reusable in other contexts than pipe operators ########## src/parser/mod.rs: ########## @@ -9947,6 +9947,51 @@ impl<'a> Parser<'a> { Ok(IdentWithAlias { ident, alias }) } + /// Parse `identifier [AS] identifier` where the AS keyword is optional + pub fn parse_identifier_with_optional_alias(&mut self) -> Result<IdentWithAlias, ParserError> { + let ident = self.parse_identifier()?; + let _after_as = self.parse_keyword(Keyword::AS); + let alias = self.parse_identifier()?; + Ok(IdentWithAlias { ident, alias }) + } + + /// Parse comma-separated list of parenthesized queries for pipe operators + fn parse_pipe_operator_queries(&mut self) -> Result<Vec<Query>, ParserError> { + self.parse_comma_separated(|parser| { + parser.expect_token(&Token::LParen)?; + let query = parser.parse_query()?; + parser.expect_token(&Token::RParen)?; + Ok(*query) + }) + } + + /// Parse set quantifier for pipe operators that require DISTINCT. E.g. INTERSECT and EXCEPT + fn parse_distinct_required_set_quantifier( + &mut self, + operator_name: &str, + ) -> Result<SetQuantifier, ParserError> { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + Ok(SetQuantifier::DistinctByName) + } else if self.parse_keyword(Keyword::DISTINCT) { + Ok(SetQuantifier::Distinct) + } else { + Err(ParserError::ParserError(format!( + "{} pipe operator requires DISTINCT modifier", + operator_name + ))) + } Review Comment: Thinking since this block is a subset of the set_quantifier parsing logic [here](https://github.com/apache/datafusion-sqlparser-rs/blob/552def1f9ec99919222e41e18a8a517eaf582b93/src/parser/mod.rs#L11693-L11707), it might be good to reuse? one way could be we pull the latter out into a function and call it here, rejecting any returned upexpected variants. another would be that we update the latter to call this `parse_distinct_required_set_quantifier()` function ########## src/parser/mod.rs: ########## @@ -11142,6 +11200,217 @@ impl<'a> Parser<'a> { let sample = self.parse_table_sample(TableSampleModifier::TableSample)?; pipe_operators.push(PipeOperator::TableSample { sample }); } + Keyword::RENAME => { + let mappings = + self.parse_comma_separated(Parser::parse_identifier_with_optional_alias)?; + pipe_operators.push(PipeOperator::Rename { mappings }); + } + Keyword::UNION => { + let set_quantifier = self.parse_set_quantifier(&Some(SetOperator::Union)); + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Union { + set_quantifier, + queries, + }); + } + Keyword::INTERSECT => { + let set_quantifier = + self.parse_distinct_required_set_quantifier("INTERSECT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Intersect { + set_quantifier, + queries, + }); + } + Keyword::EXCEPT => { + let set_quantifier = self.parse_distinct_required_set_quantifier("EXCEPT")?; + let queries = self.parse_pipe_operator_queries()?; + pipe_operators.push(PipeOperator::Except { + set_quantifier, + queries, + }); + } + Keyword::CALL => { + let function_name = self.parse_object_name(false)?; + let function_expr = self.parse_function(function_name)?; + if let Expr::Function(function) = function_expr { + let alias = self.parse_optional_pipe_alias()?; + pipe_operators.push(PipeOperator::Call { function, alias }); + } else { + return Err(ParserError::ParserError( + "Expected function call after CALL".to_string(), + )); + } + } + Keyword::PIVOT => { + self.expect_token(&Token::LParen)?; + let aggregate_functions = + self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.expect_keyword_is(Keyword::FOR)?; + let value_column = self.parse_period_separated(|p| p.parse_identifier())?; + self.expect_keyword_is(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self.peek_sub_query() { + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List( + self.parse_comma_separated(Self::parse_expr_with_alias)?, + ) + }; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_pipe_alias()?; + + pipe_operators.push(PipeOperator::Pivot { + aggregate_functions, + value_column, + value_source, + alias, + }); + } + Keyword::UNPIVOT => { + self.expect_token(&Token::LParen)?; + let value_column = self.parse_identifier()?; + self.expect_keyword(Keyword::FOR)?; + let name_column = self.parse_identifier()?; + self.expect_keyword(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let unpivot_columns = self.parse_comma_separated(Parser::parse_identifier)?; + self.expect_token(&Token::RParen)?; + + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_pipe_alias()?; + + pipe_operators.push(PipeOperator::Unpivot { + value_column, + name_column, + unpivot_columns, + alias, + }); + } + Keyword::JOIN => { Review Comment: for the joins, I'm wondering if its possible to pull out and reuse [the existing logic here](https://github.com/apache/datafusion-sqlparser-rs/blob/552def1f9ec99919222e41e18a8a517eaf582b93/src/parser/mod.rs#L12606-L12702)? e.g. here we'll `prev_token()` if needed whenever we see the next operator looks like a join and delegate to that helper function? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org