mvzink commented on code in PR #1757: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1757#discussion_r1984139573
########## tests/sqlparser_common.rs: ########## @@ -14654,3 +14640,23 @@ fn parse_set_names() { dialects.verified_stmt("SET NAMES 'utf8'"); dialects.verified_stmt("SET NAMES UTF8 COLLATE bogus"); } + +#[test] +fn parse_multiple_set_statements() -> Result<(), ParserError> { + let dialects = all_dialects_where(|d| d.supports_comma_separated_set_assignments()); + let stmt = dialects.parse_sql_statements("SET @a = 1, b = 2")?; Review Comment: I think it would be preferable to have a `verified_stmt` here, and do more explicit checks for the variables and values (not just length checks) below. For one thing, if my other comment about MySQL style getting rendered as Snowflake style is correct, using `verified_stmt` here would catch that. ########## src/dialect/mysql.rs: ########## @@ -141,6 +141,10 @@ impl Dialect for MySqlDialect { fn supports_set_names(&self) -> bool { true } + + fn supports_comma_separated_set_assignments(&self) -> bool { + true Review Comment: Probably should set this to true in `GenericDialect` too. ########## src/parser/mod.rs: ########## @@ -10961,127 +10961,182 @@ impl<'a> Parser<'a> { }) } - pub fn parse_set(&mut self) -> Result<Statement, ParserError> { - let modifier = - self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); - if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; - } else if let Some(set_role_stmt) = - self.maybe_parse(|parser| parser.parse_set_role(modifier))? - { - return Ok(set_role_stmt); + fn parse_set_values( + &mut self, + parenthesized_assignment: bool, + ) -> Result<Vec<Expr>, ParserError> { + let mut values = vec![]; + + if parenthesized_assignment { + self.expect_token(&Token::LParen)?; } - let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - OneOrManyWithParens::One(ObjectName::from(vec!["TIMEZONE".into()])) - } else if self.dialect.supports_parenthesized_set_variables() + loop { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; + + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + + if parenthesized_assignment { + self.expect_token(&Token::RParen)?; + } + return Ok(values); + } + } + + fn parse_set_assignment( + &mut self, + ) -> Result<(OneOrManyWithParens<ObjectName>, Expr), ParserError> { + let variables = if self.dialect.supports_parenthesized_set_variables() && self.consume_token(&Token::LParen) { - let variables = OneOrManyWithParens::Many( + let vars = OneOrManyWithParens::Many( self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? .into_iter() .map(|ident| ObjectName::from(vec![ident])) .collect(), ); self.expect_token(&Token::RParen)?; - variables + vars } else { OneOrManyWithParens::One(self.parse_object_name(false)?) }; - let names = matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES")); + if !(self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO)) { + return self.expected("assignment operator", self.peek_token()); + } - if names && self.dialect.supports_set_names() { - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::SetNamesDefault {}); - } + let values = self.parse_expr()?; - let charset_name = self.parse_identifier()?; - let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { - Some(self.parse_literal_string()?) - } else { - None - }; + Ok((variables, values)) + } - return Ok(Statement::SetNames { - charset_name, - collation_name, - }); + fn parse_set(&mut self) -> Result<Statement, ParserError> { + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; } - let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); + if let Some(set_role_stmt) = self.maybe_parse(|parser| parser.parse_set_role(modifier))? { + return Ok(set_role_stmt); + } - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if parenthesized_assignment { - self.expect_token(&Token::LParen)?; - } + if self.dialect.supports_comma_separated_set_assignments() { + if let Ok(v) = + self.try_parse(|parser| parser.parse_comma_separated(Parser::parse_set_assignment)) + { Review Comment: If I'm reading this `if` branch correctly, this will actually translate MySQL-style syntax into Snowflake-style syntax (i.e. the test string below will parse the same as, and get `format`ted as, `SET (@a, b) = (1, 2)`, because we still just have a pair of `OneOrManyWithParens` in the `SetVariable`, and nothing to tell us which style was used. Please let me know if that's wrong. Otherwise, I don't know what would be the best way to modify/replace `SetVariable` to represent this distinction, but we'll need something. In line with a recent PR to improve `CASE` handling (#1733), I would suggest that if we parse the MySQL style, we should have an entirely different `Statement` variant along the lines of `SetVariables { assignments: Vec<(Scope, Ident, Expr)> }`. (Note that with scope, I'm looking ahead to #1694 because each variable assignment has its own scope, and it could be either `@@global.var` or `GLOBAR var` style, which isn't currently supported.) (And I would maybe even replace `Statement::SetVariable` with `Statement::Set(SetStatement)` to encapsulate all of these by having `parse_set` return `SetStatement`, but I don't really know whether others would find that desirable.) ########## src/parser/mod.rs: ########## @@ -10961,127 +10961,184 @@ impl<'a> Parser<'a> { }) } - pub fn parse_set(&mut self) -> Result<Statement, ParserError> { - let modifier = - self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); - if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; - } else if let Some(set_role_stmt) = - self.maybe_parse(|parser| parser.parse_set_role(modifier))? - { - return Ok(set_role_stmt); + fn parse_set_values( + &mut self, + parenthesized_assignment: bool, + ) -> Result<Vec<Expr>, ParserError> { + let mut values = vec![]; + + if parenthesized_assignment { + self.expect_token(&Token::LParen)?; } - let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - OneOrManyWithParens::One(ObjectName::from(vec!["TIMEZONE".into()])) - } else if self.dialect.supports_parenthesized_set_variables() + loop { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; + + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + + if parenthesized_assignment { + self.expect_token(&Token::RParen)?; + } + return Ok(values); + } + } + + fn parse_set_assignment( + &mut self, + ) -> Result<(OneOrManyWithParens<ObjectName>, Expr), ParserError> { + let variables = if self.dialect.supports_parenthesized_set_variables() && self.consume_token(&Token::LParen) { - let variables = OneOrManyWithParens::Many( + let vars = OneOrManyWithParens::Many( self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? .into_iter() .map(|ident| ObjectName::from(vec![ident])) .collect(), ); self.expect_token(&Token::RParen)?; - variables + vars } else { OneOrManyWithParens::One(self.parse_object_name(false)?) }; - let names = matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES")); + if !(self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO)) { + return self.expected("assignment operator", self.peek_token()); + } - if names && self.dialect.supports_set_names() { - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::SetNamesDefault {}); - } + let values = self.parse_expr()?; - let charset_name = self.parse_identifier()?; - let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { - Some(self.parse_literal_string()?) - } else { - None - }; + Ok((variables, values)) + } - return Ok(Statement::SetNames { - charset_name, - collation_name, - }); + fn parse_set(&mut self) -> Result<Statement, ParserError> { + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; } - let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); + if let Some(set_role_stmt) = self.maybe_parse(|parser| parser.parse_set_role(modifier))? { + return Ok(set_role_stmt); + } - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if parenthesized_assignment { - self.expect_token(&Token::LParen)?; - } + if self.dialect.supports_comma_separated_set_assignments() { + if let Ok(v) = self + .try_parse(|parser| Ok(parser.parse_comma_separated(Parser::parse_set_assignment)?)) + { + let (variables, values): (Vec<_>, Vec<_>) = v.into_iter().unzip(); - let mut values = vec![]; - loop { - let value = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Ok(expr) = self.parse_expr() { - expr + let variables = if variables.len() == 1 { + variables.into_iter().next().unwrap() } else { - self.expected("variable value", self.peek_token())? + OneOrManyWithParens::Many(variables.into_iter().flatten().map(|v| v).collect()) }; - values.push(value); - if self.consume_token(&Token::Comma) { - continue; - } - - if parenthesized_assignment { - self.expect_token(&Token::RParen)?; - } return Ok(Statement::SetVariable { local: modifier == Some(Keyword::LOCAL), - hivevar: Some(Keyword::HIVEVAR) == modifier, + hivevar: modifier == Some(Keyword::HIVEVAR), variables, value: values, }); } } + let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { + OneOrManyWithParens::One(ObjectName::from(vec!["TIMEZONE".into()])) + } else if self.dialect.supports_parenthesized_set_variables() + && self.consume_token(&Token::LParen) + { + let variables = OneOrManyWithParens::Many( + self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? + .into_iter() + .map(|ident| ObjectName::from(vec![ident])) + .collect(), + ); + self.expect_token(&Token::RParen)?; + variables + } else { + OneOrManyWithParens::One(self.parse_object_name(false)?) + }; + + if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); + let values = self.parse_set_values(parenthesized_assignment)?; + + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: modifier == Some(Keyword::HIVEVAR), + variables, + value: values, + }); + } + let OneOrManyWithParens::One(variable) = variables else { return self.expected("set variable", self.peek_token()); }; - if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { - // for some db (e.g. postgresql), SET TIME ZONE <value> is an alias for SET TIMEZONE [TO|=] <value> - match self.parse_expr() { - Ok(expr) => Ok(Statement::SetTimeZone { - local: modifier == Some(Keyword::LOCAL), - value: expr, - }), - _ => self.expected("timezone value", self.peek_token())?, - } - } else if variable.to_string() == "CHARACTERISTICS" { - self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; - Ok(Statement::SetTransaction { - modes: self.parse_transaction_modes()?, - snapshot: None, - session: true, - }) - } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { - if self.parse_keyword(Keyword::SNAPSHOT) { - let snapshot_id = self.parse_value()?.value; + match variable.to_string().to_ascii_uppercase().as_str() { + "NAMES" if self.dialect.supports_set_names() => { + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(Statement::SetNamesDefault {}); + } + let charset_name = self.parse_identifier()?; + let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { + Some(self.parse_literal_string()?) + } else { + None + }; + + return Ok(Statement::SetNames { + charset_name, + collation_name, + }); + } + "TIMEZONE" => match self.parse_expr() { + Ok(expr) => { + return Ok(Statement::SetTimeZone { + local: modifier == Some(Keyword::LOCAL), + value: expr, + }) + } + _ => return self.expected("timezone value", self.peek_token()), + }, + "CHARACTERISTICS" => { + self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; + return Ok(Statement::SetTransaction { + modes: self.parse_transaction_modes()?, + snapshot: None, + session: true, + }); + } + "TRANSACTION" if modifier.is_none() => { + if self.parse_keyword(Keyword::SNAPSHOT) { Review Comment: This could probably be something more like: ```rust let snapshot = if self.parse_keyword(Keyword::SNAPSHOT) { self.parse_value()?.value } else { None } Ok(Statement::SetTransaction { snapshot, .. } ``` ########## src/parser/mod.rs: ########## @@ -10961,127 +10961,184 @@ impl<'a> Parser<'a> { }) } - pub fn parse_set(&mut self) -> Result<Statement, ParserError> { - let modifier = - self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); - if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; - } else if let Some(set_role_stmt) = - self.maybe_parse(|parser| parser.parse_set_role(modifier))? - { - return Ok(set_role_stmt); + fn parse_set_values( + &mut self, + parenthesized_assignment: bool, + ) -> Result<Vec<Expr>, ParserError> { + let mut values = vec![]; + + if parenthesized_assignment { + self.expect_token(&Token::LParen)?; } - let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - OneOrManyWithParens::One(ObjectName::from(vec!["TIMEZONE".into()])) - } else if self.dialect.supports_parenthesized_set_variables() + loop { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; + + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + + if parenthesized_assignment { + self.expect_token(&Token::RParen)?; + } + return Ok(values); + } + } + + fn parse_set_assignment( + &mut self, + ) -> Result<(OneOrManyWithParens<ObjectName>, Expr), ParserError> { + let variables = if self.dialect.supports_parenthesized_set_variables() && self.consume_token(&Token::LParen) { - let variables = OneOrManyWithParens::Many( + let vars = OneOrManyWithParens::Many( self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? .into_iter() .map(|ident| ObjectName::from(vec![ident])) .collect(), ); self.expect_token(&Token::RParen)?; - variables + vars } else { OneOrManyWithParens::One(self.parse_object_name(false)?) }; - let names = matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES")); + if !(self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO)) { + return self.expected("assignment operator", self.peek_token()); + } - if names && self.dialect.supports_set_names() { - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::SetNamesDefault {}); - } + let values = self.parse_expr()?; - let charset_name = self.parse_identifier()?; - let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { - Some(self.parse_literal_string()?) - } else { - None - }; + Ok((variables, values)) + } - return Ok(Statement::SetNames { - charset_name, - collation_name, - }); + fn parse_set(&mut self) -> Result<Statement, ParserError> { + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; } - let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); + if let Some(set_role_stmt) = self.maybe_parse(|parser| parser.parse_set_role(modifier))? { + return Ok(set_role_stmt); + } - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if parenthesized_assignment { - self.expect_token(&Token::LParen)?; - } + if self.dialect.supports_comma_separated_set_assignments() { + if let Ok(v) = self + .try_parse(|parser| Ok(parser.parse_comma_separated(Parser::parse_set_assignment)?)) + { + let (variables, values): (Vec<_>, Vec<_>) = v.into_iter().unzip(); - let mut values = vec![]; - loop { - let value = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Ok(expr) = self.parse_expr() { - expr + let variables = if variables.len() == 1 { + variables.into_iter().next().unwrap() } else { - self.expected("variable value", self.peek_token())? + OneOrManyWithParens::Many(variables.into_iter().flatten().map(|v| v).collect()) }; - values.push(value); - if self.consume_token(&Token::Comma) { - continue; - } - - if parenthesized_assignment { - self.expect_token(&Token::RParen)?; - } return Ok(Statement::SetVariable { local: modifier == Some(Keyword::LOCAL), - hivevar: Some(Keyword::HIVEVAR) == modifier, + hivevar: modifier == Some(Keyword::HIVEVAR), variables, value: values, }); } } + let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { + OneOrManyWithParens::One(ObjectName::from(vec!["TIMEZONE".into()])) + } else if self.dialect.supports_parenthesized_set_variables() + && self.consume_token(&Token::LParen) + { + let variables = OneOrManyWithParens::Many( + self.parse_comma_separated(|parser: &mut Parser<'a>| parser.parse_identifier())? + .into_iter() + .map(|ident| ObjectName::from(vec![ident])) + .collect(), + ); + self.expect_token(&Token::RParen)?; + variables + } else { + OneOrManyWithParens::One(self.parse_object_name(false)?) + }; + + if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); + let values = self.parse_set_values(parenthesized_assignment)?; + + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: modifier == Some(Keyword::HIVEVAR), + variables, + value: values, + }); + } + let OneOrManyWithParens::One(variable) = variables else { return self.expected("set variable", self.peek_token()); }; - if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { - // for some db (e.g. postgresql), SET TIME ZONE <value> is an alias for SET TIMEZONE [TO|=] <value> - match self.parse_expr() { - Ok(expr) => Ok(Statement::SetTimeZone { - local: modifier == Some(Keyword::LOCAL), - value: expr, - }), - _ => self.expected("timezone value", self.peek_token())?, - } - } else if variable.to_string() == "CHARACTERISTICS" { - self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; - Ok(Statement::SetTransaction { - modes: self.parse_transaction_modes()?, - snapshot: None, - session: true, - }) - } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { - if self.parse_keyword(Keyword::SNAPSHOT) { - let snapshot_id = self.parse_value()?.value; + match variable.to_string().to_ascii_uppercase().as_str() { Review Comment: Would be nice to avoid allocating a new `String` for every variable for this check (though only doing it once with this refactor is an improvement). I think you could match on the `ObjectName` and only apply this check if there's a single `Ident`, and just directly compare to its value (with a series of if-else `ident.value.eq_ignore_ascii_case`). That said—unless it turns out that these different specially named set statements can also be done multiply (e.g. `SET NAMES utf8, TIMEZONE utc`), which I doubt and we already don't support—I actually think it would be simpler to just handle these cases individually up front, instead of passing them through this intermediary `variable`. As an example, the non-locality introduced by checking for `TIME ZONE` and turning it into `TIMEZONE` earlier in the function, knowing it will be handled specially here, just makes this difficult to read, has a needless performance hit, and doesn't buy us anything as far as I can tell. I don't thinking starting `parse_set` with a series of checks for these keywords would be a problem, before jumping into arbitrarily-named variable assignment parsing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org