This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch 
gh-readonly-queue/main/pr-2280-7c4eac3098063d191337ecb9ac0f695ac205de67
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git

commit df0d56cfa28ae556a4dd3740c6150af46415bf12
Author: Ayman Elkfrawy <[email protected]>
AuthorDate: Thu Mar 26 08:24:15 2026 -0700

    Fix the tokenization of `<` edge cases (#2280)
---
 src/tokenizer.rs | 110 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 102 insertions(+), 8 deletions(-)

diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 5ca686d4..c055db8f 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1627,6 +1627,9 @@ impl<'a> Tokenizer<'a> {
                             chars.next();
                             match chars.peek() {
                                 Some('>') => self.consume_for_binop(chars, 
"<=>", Token::Spaceship),
+                                // `<=+` and `<=-` are not valid combined 
operators; treat `<=` as
+                                // the operator and leave `+`/`-` to be 
tokenized separately.
+                                Some('+') | Some('-') => Ok(Some(Token::LtEq)),
                                 _ => self.start_binop(chars, "<=", 
Token::LtEq),
                             }
                         }
@@ -1646,13 +1649,15 @@ impl<'a> Tokenizer<'a> {
                             }
                         }
                         Some('<') => self.consume_for_binop(chars, "<<", 
Token::ShiftLeft),
+                        // `<+` is not a valid combined operator; treat `<` as 
the operator
+                        // and leave `+` to be tokenized separately.
+                        Some('+') => Ok(Some(Token::Lt)),
                         Some('-') if self.dialect.supports_geometric_types() 
=> {
-                            chars.next(); // consume
-                            match chars.peek() {
-                                Some('>') => {
-                                    self.consume_for_binop(chars, "<->", 
Token::TwoWayArrow)
-                                }
-                                _ => self.start_binop_opt(chars, "<-", None),
+                            if chars.peekable.clone().nth(1) == Some('>') {
+                                chars.next(); // consume `-`
+                                self.consume_for_binop(chars, "<->", 
Token::TwoWayArrow)
+                            } else {
+                                Ok(Some(Token::Lt))
                             }
                         }
                         Some('^') if self.dialect.supports_geometric_types() 
=> {
@@ -2628,9 +2633,10 @@ fn take_char_from_hex_digits(
 mod tests {
     use super::*;
     use crate::dialect::{
-        BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, 
MySqlDialect, SQLiteDialect,
+        BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, 
MySqlDialect,
+        PostgreSqlDialect, SQLiteDialect,
     };
-    use crate::test_utils::{all_dialects_except, all_dialects_where};
+    use crate::test_utils::{all_dialects, all_dialects_except, 
all_dialects_where};
     use core::fmt::Debug;
 
     #[test]
@@ -4420,4 +4426,92 @@ mod tests {
             tokens,
         );
     }
+
+    #[test]
+    fn tokenize_lt() {
+        all_dialects().tokenizes_to(
+            "select a <-50",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("a", None),
+                Token::Whitespace(Whitespace::Space),
+                Token::Lt,
+                Token::Minus,
+                Token::Number("50".to_string(), false),
+            ],
+        );
+        all_dialects().tokenizes_to(
+            "select a <+50",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("a", None),
+                Token::Whitespace(Whitespace::Space),
+                Token::Lt,
+                Token::Plus,
+                Token::Number("50".to_string(), false),
+            ],
+        );
+        all_dialects().tokenizes_to(
+            "select a <=-50",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("a", None),
+                Token::Whitespace(Whitespace::Space),
+                Token::LtEq,
+                Token::Minus,
+                Token::Number("50".to_string(), false),
+            ],
+        );
+        all_dialects().tokenizes_to(
+            "select a <=+50",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("a", None),
+                Token::Whitespace(Whitespace::Space),
+                Token::LtEq,
+                Token::Plus,
+                Token::Number("50".to_string(), false),
+            ],
+        );
+        all_dialects_where(|d| d.supports_geometric_types()).tokenizes_to(
+            "select a <->b",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("a", None),
+                Token::Whitespace(Whitespace::Space),
+                Token::TwoWayArrow,
+                Token::make_word("b", None),
+            ],
+        );
+
+        all_dialects().tokenizes_to(
+            "select a <-b",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("a", None),
+                Token::Whitespace(Whitespace::Space),
+                Token::Lt,
+                Token::Minus,
+                Token::make_word("b", None),
+            ],
+        );
+        all_dialects().tokenizes_to(
+            "select a <+b",
+            vec![
+                Token::make_keyword("select"),
+                Token::Whitespace(Whitespace::Space),
+                Token::make_word("a", None),
+                Token::Whitespace(Whitespace::Space),
+                Token::Lt,
+                Token::Plus,
+                Token::make_word("b", None),
+            ],
+        );
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to