This is an automated email from the ASF dual-hosted git repository. github-bot pushed a commit to branch gh-readonly-queue/main/pr-2280-7c4eac3098063d191337ecb9ac0f695ac205de67 in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
commit df0d56cfa28ae556a4dd3740c6150af46415bf12 Author: Ayman Elkfrawy <[email protected]> AuthorDate: Thu Mar 26 08:24:15 2026 -0700 Fix the tokenization of `<` edge cases (#2280) --- src/tokenizer.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5ca686d4..c055db8f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1627,6 +1627,9 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship), + // `<=+` and `<=-` are not valid combined operators; treat `<=` as + // the operator and leave `+`/`-` to be tokenized separately. + Some('+') | Some('-') => Ok(Some(Token::LtEq)), _ => self.start_binop(chars, "<=", Token::LtEq), } } @@ -1646,13 +1649,15 @@ impl<'a> Tokenizer<'a> { } } Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft), + // `<+` is not a valid combined operator; treat `<` as the operator + // and leave `+` to be tokenized separately. + Some('+') => Ok(Some(Token::Lt)), Some('-') if self.dialect.supports_geometric_types() => { - chars.next(); // consume - match chars.peek() { - Some('>') => { - self.consume_for_binop(chars, "<->", Token::TwoWayArrow) - } - _ => self.start_binop_opt(chars, "<-", None), + if chars.peekable.clone().nth(1) == Some('>') { + chars.next(); // consume `-` + self.consume_for_binop(chars, "<->", Token::TwoWayArrow) + } else { + Ok(Some(Token::Lt)) } } Some('^') if self.dialect.supports_geometric_types() => { @@ -2628,9 +2633,10 @@ fn take_char_from_hex_digits( mod tests { use super::*; use crate::dialect::{ - BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, + BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, + PostgreSqlDialect, SQLiteDialect, }; - use crate::test_utils::{all_dialects_except, all_dialects_where}; + use crate::test_utils::{all_dialects, all_dialects_except, all_dialects_where}; use core::fmt::Debug; #[test] @@ -4420,4 +4426,92 @@ mod tests { tokens, ); } + + #[test] + fn tokenize_lt() { + all_dialects().tokenizes_to( + "select a <-50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Minus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <+50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Plus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <=-50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::LtEq, + Token::Minus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <=+50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::LtEq, + Token::Plus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects_where(|d| d.supports_geometric_types()).tokenizes_to( + "select a <->b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::TwoWayArrow, + Token::make_word("b", None), + ], + ); + + all_dialects().tokenizes_to( + "select a <-b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Minus, + Token::make_word("b", None), + ], + ); + all_dialects().tokenizes_to( + "select a <+b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Plus, + Token::make_word("b", None), + ], + ); + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
