(datafusion-sqlparser-rs) 01/01: refactor: use `to_ident()` instead of `clone().into_ident()` for borrowed Words (#2177)

github-bot Fri, 23 Jan 2026 11:28:48 -0800

This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch 
gh-readonly-queue/main/pr-2177-614ea06e31b415e012ac59afd8c90a723d8868b5
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


commit 4305dd43f315968d98ec1df5d1f22a4fecf534a7
Author: Andy Grove <[email protected]>
AuthorDate: Fri Jan 23 12:27:59 2026 -0700

    refactor: use `to_ident()` instead of `clone().into_ident()` for borrowed 
Words (#2177)
    
    Co-authored-by: Claude Opus 4.5 <[email protected]>
---
 sqlparser_bench/benches/sqlparser_bench.rs | 72 +++++++++++++++++++++++++++++-
 src/parser/mod.rs                          | 35 +++++++++------
 2 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/sqlparser_bench/benches/sqlparser_bench.rs 
b/sqlparser_bench/benches/sqlparser_bench.rs
index 9637a98f..b52683aa 100644
--- a/sqlparser_bench/benches/sqlparser_bench.rs
+++ b/sqlparser_bench/benches/sqlparser_bench.rs
@@ -17,7 +17,9 @@
 
 use criterion::{criterion_group, criterion_main, Criterion};
 use sqlparser::dialect::GenericDialect;
+use sqlparser::keywords::Keyword;
 use sqlparser::parser::Parser;
+use sqlparser::tokenizer::{Span, Word};
 
 fn basic_queries(c: &mut Criterion) {
     let mut group = c.benchmark_group("sqlparser-rs parsing benchmark");
@@ -82,5 +84,73 @@ fn basic_queries(c: &mut Criterion) {
     });
 }
 
-criterion_group!(benches, basic_queries);
+/// Benchmark comparing `to_ident(&self)` vs `clone().into_ident(self)`.
+///
+/// Both approaches have equivalent performance since the String clone 
dominates.
+/// `to_ident()` is preferred for clearer code (one method call vs two).
+fn word_to_ident(c: &mut Criterion) {
+    let mut group = c.benchmark_group("word_to_ident");
+
+    // Create Word instances with varying identifier lengths
+    let words: Vec<Word> = (0..100)
+        .map(|i| Word {
+            value: format!("identifier_name_with_number_{i}"),
+            quote_style: None,
+            keyword: Keyword::NoKeyword,
+        })
+        .collect();
+    let span = Span::empty();
+
+    // clone().into_ident(): clones entire Word struct, then moves the String 
value
+    group.bench_function("clone_into_ident_100x", |b| {
+        b.iter(|| {
+            for w in &words {
+                std::hint::black_box(w.clone().into_ident(span));
+            }
+        });
+    });
+
+    // to_ident(): clones only the String value directly into the Ident
+    group.bench_function("to_ident_100x", |b| {
+        b.iter(|| {
+            for w in &words {
+                std::hint::black_box(w.to_ident(span));
+            }
+        });
+    });
+
+    group.finish();
+}
+
+/// Benchmark parsing queries with many identifiers to show real-world impact
+fn parse_many_identifiers(c: &mut Criterion) {
+    let mut group = c.benchmark_group("parse_identifiers");
+    let dialect = GenericDialect {};
+
+    // Query with many column references (identifiers)
+    let many_columns = (0..100)
+        .map(|n| format!("column_{n}"))
+        .collect::<Vec<_>>()
+        .join(", ");
+    let query = format!("SELECT {many_columns} FROM my_table");
+
+    group.bench_function("select_100_columns", |b| {
+        b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(&query)));
+    });
+
+    // Query with many table.column references
+    let qualified_columns = (0..100)
+        .map(|n| format!("t{}.column_{n}", n % 5))
+        .collect::<Vec<_>>()
+        .join(", ");
+    let query_qualified = format!("SELECT {qualified_columns} FROM t0, t1, t2, 
t3, t4");
+
+    group.bench_function("select_100_qualified_columns", |b| {
+        b.iter(|| Parser::parse_sql(&dialect, 
std::hint::black_box(&query_qualified)));
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, basic_queries, word_to_ident, 
parse_many_identifiers);
 criterion_main!(benches);
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 6fb06c64..d021d163 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1460,7 +1460,7 @@ impl<'a> Parser<'a> {
             if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
                 {
                     Ok(Some(Expr::Function(Function {
-                        name: 
ObjectName::from(vec![w.clone().into_ident(w_span)]),
+                        name: ObjectName::from(vec![w.to_ident(w_span)]),
                         uses_odbc_syntax: false,
                         parameters: FunctionArguments::None,
                         args: FunctionArguments::None,
@@ -1475,7 +1475,7 @@ impl<'a> Parser<'a> {
             | Keyword::CURRENT_DATE
             | Keyword::LOCALTIME
             | Keyword::LOCALTIMESTAMP => {
-                
Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.clone().into_ident(w_span)]))?))
+                
Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.to_ident(w_span)]))?))
             }
             Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
             Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
@@ -1500,7 +1500,7 @@ impl<'a> Parser<'a> {
             Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
             Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
             Keyword::POSITION if self.peek_token_ref().token == Token::LParen 
=> {
-                
Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?))
+                Ok(Some(self.parse_position_expr(w.to_ident(w_span))?))
             }
             Keyword::SUBSTR | Keyword::SUBSTRING => {
                 self.prev_token();
@@ -1522,7 +1522,7 @@ impl<'a> Parser<'a> {
                     let query = self.parse_query()?;
                     self.expect_token(&Token::RParen)?;
                     Ok(Some(Expr::Function(Function {
-                        name: 
ObjectName::from(vec![w.clone().into_ident(w_span)]),
+                        name: ObjectName::from(vec![w.to_ident(w_span)]),
                         uses_odbc_syntax: false,
                         parameters: FunctionArguments::None,
                         args: FunctionArguments::Subquery(query),
@@ -1572,7 +1572,7 @@ impl<'a> Parser<'a> {
     ) -> Result<Expr, ParserError> {
         match self.peek_token().token {
             Token::LParen if !self.peek_outer_join_operator() => {
-                let id_parts = vec![w.clone().into_ident(w_span)];
+                let id_parts = vec![w.to_ident(w_span)];
                 self.parse_function(ObjectName::from(id_parts))
             }
             // string introducer 
https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
@@ -1582,7 +1582,7 @@ impl<'a> Parser<'a> {
                 if w.value.starts_with('_') =>
             {
                 Ok(Expr::Prefixed {
-                    prefix: w.clone().into_ident(w_span),
+                    prefix: w.to_ident(w_span),
                     value: self.parse_introduced_string_expr()?.into(),
                 })
             }
@@ -1593,19 +1593,19 @@ impl<'a> Parser<'a> {
                 if w.value.starts_with('_') =>
             {
                 Ok(Expr::Prefixed {
-                    prefix: w.clone().into_ident(w_span),
+                    prefix: w.to_ident(w_span),
                     value: self.parse_introduced_string_expr()?.into(),
                 })
             }
             Token::Arrow if self.dialect.supports_lambda_functions() => {
                 self.expect_token(&Token::Arrow)?;
                 Ok(Expr::Lambda(LambdaFunction {
-                    params: 
OneOrManyWithParens::One(w.clone().into_ident(w_span)),
+                    params: OneOrManyWithParens::One(w.to_ident(w_span)),
                     body: Box::new(self.parse_expr()?),
                     syntax: LambdaSyntax::Arrow,
                 }))
             }
-            _ => Ok(Expr::Identifier(w.clone().into_ident(w_span))),
+            _ => Ok(Expr::Identifier(w.to_ident(w_span))),
         }
     }
 
@@ -12401,9 +12401,10 @@ impl<'a> Parser<'a> {
     pub fn parse_identifiers(&mut self) -> Result<Vec<Ident>, ParserError> {
         let mut idents = vec![];
         loop {
-            match &self.peek_token_ref().token {
+            let token = self.peek_token_ref();
+            match &token.token {
                 Token::Word(w) => {
-                    
idents.push(w.clone().into_ident(self.peek_token_ref().span));
+                    idents.push(w.to_ident(token.span));
                 }
                 Token::EOF | Token::Eq | Token::SemiColon => break,
                 _ => {}
@@ -19203,8 +19204,11 @@ fn maybe_prefixed_expr(expr: Expr, prefix: 
Option<Ident>) -> Expr {
 }
 
 impl Word {
-    #[deprecated(since = "0.54.0", note = "please use `into_ident` instead")]
-    /// Convert this word into an [`Ident`] identifier
+    /// Convert a reference to this word into an [`Ident`] by cloning the 
value.
+    ///
+    /// Use this method when you need to keep the original `Word` around.
+    /// If you can consume the `Word`, prefer [`into_ident`](Self::into_ident) 
instead
+    /// to avoid cloning.
     pub fn to_ident(&self, span: Span) -> Ident {
         Ident {
             value: self.value.clone(),
@@ -19213,7 +19217,10 @@ impl Word {
         }
     }
 
-    /// Convert this word into an [`Ident`] identifier
+    /// Convert this word into an [`Ident`] identifier, consuming the `Word`.
+    ///
+    /// This avoids cloning the string value. If you need to keep the original
+    /// `Word`, use [`to_ident`](Self::to_ident) instead.
     pub fn into_ident(self, span: Span) -> Ident {
         Ident {
             value: self.value,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-sqlparser-rs) 01/01: refactor: use `to_ident()` instead of `clone().into_ident()` for borrowed Words (#2177)

Reply via email to