iffyio commented on code in PR #1576:
URL:
https://github.com/apache/datafusion-sqlparser-rs/pull/1576#discussion_r1873272299
##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
Ok(Some(Token::DoubleQuotedString(s)))
}
// delimited (quoted) identifier
+ quote_start if self.dialect.is_delimited_identifier_start(ch)
=> {
+ let word = self.tokenize_quoted_identifier(quote_start,
chars)?;
+ Ok(Some(Token::make_word(&word, Some(quote_start))))
+ }
+ // special (quoted) identifier
quote_start
- if self.dialect.is_delimited_identifier_start(ch)
+ if self
+ .dialect
+ .is_nested_delimited_identifier_start(quote_start)
&& self
.dialect
-
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
+
.nested_delimited_identifier(chars.peekable.clone())
+ .is_some() =>
{
- let error_loc = chars.location();
- chars.next(); // consume the opening quote
+ let (quote_start, nested_quote_start) = self
+ .dialect
+ .nested_delimited_identifier(chars.peekable.clone())
+ .unwrap();
Review Comment:
```suggestion
```
we can return an error here?
##########
src/dialect/mod.rs:
##########
@@ -128,14 +128,23 @@ pub trait Dialect: Debug + Any {
ch == '"' || ch == '`'
}
- /// Return the character used to quote identifiers.
- fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+ /// Determine if a character starts a potential nested quoted identifier.
+ /// RedShift support old way of quotation with `[` and it can cover even
nested quoted identifier.
Review Comment:
```suggestion
/// Example: RedShift supports the following quote styles to all mean
the same thing:
/// ```sql
/// SELECT 1 AS foo;
/// SELECT 1 AS "foo";
/// SELECT 1 AS [foo];
/// SELECT 1 AS ["foo"];
/// ```
```
Thinking something like this to clarify the behavior?
##########
tests/sqlparser_redshift.rs:
##########
@@ -279,6 +279,31 @@ fn test_redshift_json_path() {
},
expr_from_projection(only(&select.projection))
);
+
+ let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM
customer_orders_lineitem"#;
+ let select = dialects.verified_only_select(sql);
+ assert_eq!(
+ &Expr::JsonAccess {
+ value: Box::new(Expr::CompoundIdentifier(vec![
+ Ident::new("db1"),
+ Ident::new("sc1"),
+ Ident::new("tbl1"),
+ Ident::new("col1")
+ ])),
+ path: JsonPath {
+ path: vec![
+ JsonPathElem::Bracket {
+ key: Expr::Value(Value::Number("0".parse().unwrap(),
false))
Review Comment:
```suggestion
key: Expr::Value(number("0"))
```
##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
Ok(Some(Token::DoubleQuotedString(s)))
}
// delimited (quoted) identifier
+ quote_start if self.dialect.is_delimited_identifier_start(ch)
=> {
+ let word = self.tokenize_quoted_identifier(quote_start,
chars)?;
+ Ok(Some(Token::make_word(&word, Some(quote_start))))
+ }
+ // special (quoted) identifier
quote_start
- if self.dialect.is_delimited_identifier_start(ch)
+ if self
+ .dialect
+ .is_nested_delimited_identifier_start(quote_start)
&& self
.dialect
-
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
+
.nested_delimited_identifier(chars.peekable.clone())
+ .is_some() =>
{
- let error_loc = chars.location();
- chars.next(); // consume the opening quote
+ let (quote_start, nested_quote_start) = self
+ .dialect
+ .nested_delimited_identifier(chars.peekable.clone())
+ .unwrap();
+
+ let Some(nested_quote_start) = nested_quote_start else {
+ let word =
self.tokenize_quoted_identifier(quote_start, chars)?;
+ return Ok(Some(Token::make_word(&word,
Some(quote_start))));
+ };
+
+ let mut word = vec![];
let quote_end = Word::matching_end_quote(quote_start);
- let (s, last_char) = self.parse_quoted_ident(chars,
quote_end);
+ let nested_quote_end =
Word::matching_end_quote(nested_quote_start);
+ let error_loc = chars.location();
- if last_char == Some(quote_end) {
- Ok(Some(Token::make_word(&s, Some(quote_start))))
- } else {
- self.tokenizer_error(
+ chars.next(); // skip the first delimiter
+ word.push(peeking_take_while(chars, |ch|
ch.is_whitespace()));
+ if chars.peek() != Some(&nested_quote_start) {
+ return self.tokenizer_error(
+ error_loc,
+ format!("Expected nested delimiter
'{nested_quote_start}' before EOF."),
+ );
+ }
+ word.push(format!("{nested_quote_start}"));
Review Comment:
```suggestion
word.push(nested_quote_start.into()));
```
##########
src/dialect/mod.rs:
##########
@@ -128,14 +128,23 @@ pub trait Dialect: Debug + Any {
ch == '"' || ch == '`'
}
- /// Return the character used to quote identifiers.
- fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+ /// Determine if a character starts a potential nested quoted identifier.
+ /// RedShift support old way of quotation with `[` and it can cover even
nested quoted identifier.
+ fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
+ false
+ }
+
+ /// Determine if nested quoted characters are presented
+ fn nested_delimited_identifier(
Review Comment:
```suggestion
fn peek_nested_delimited_identifier_quotes(
```
##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
Ok(Some(Token::DoubleQuotedString(s)))
}
// delimited (quoted) identifier
+ quote_start if self.dialect.is_delimited_identifier_start(ch)
=> {
+ let word = self.tokenize_quoted_identifier(quote_start,
chars)?;
+ Ok(Some(Token::make_word(&word, Some(quote_start))))
+ }
+ // special (quoted) identifier
Review Comment:
```suggestion
// Potentially nested delimited (quoted) identifier
```
##########
src/dialect/redshift.rs:
##########
@@ -32,21 +32,36 @@ pub struct RedshiftSqlDialect {}
// in the Postgres dialect, the query will be parsed as an array, while in the
Redshift dialect it will
// be a json path
impl Dialect for RedshiftSqlDialect {
- fn is_delimited_identifier_start(&self, ch: char) -> bool {
- ch == '"' || ch == '['
+ fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
+ ch == '['
}
- /// Determine if quoted characters are proper for identifier
+ /// Determine if quoted characters are looks like special case of
quotation begining with `[`.
Review Comment:
Maybe we can copy over the suggested dialect method comment here since its
already describing redshift.
##########
tests/sqlparser_redshift.rs:
##########
@@ -353,3 +378,20 @@ fn test_parse_json_path_from() {
_ => panic!(),
}
}
+
+#[test]
+fn test_parse_select_numbered_columns() {
Review Comment:
thinking a couple cases we can include?
```sql
SELECT 1 AS ["1];
SELECT 1 AS [ " 1 " ];
```
##########
src/dialect/mod.rs:
##########
@@ -128,14 +128,23 @@ pub trait Dialect: Debug + Any {
ch == '"' || ch == '`'
}
- /// Return the character used to quote identifiers.
- fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+ /// Determine if a character starts a potential nested quoted identifier.
+ /// RedShift support old way of quotation with `[` and it can cover even
nested quoted identifier.
+ fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
+ false
+ }
+
+ /// Determine if nested quoted characters are presented
Review Comment:
```suggestion
/// Only applicable whenever
[`Self::is_nested_delimited_identifier_start`] returns true
/// If the next sequence of tokens potentially represent a nested
identifier, then this method
/// returns a tuple containing the outer quote style, and if present,
the inner (nested) quote style.
///
/// Example (Redshift):
/// ```text
/// `["foo"]` => (Some(`[`), Some(`"`))
/// `[foo]` => (Some(`[`), None)
/// `"foo"` => None
/// ```
```
Similarly, to clarify the behavior with an example since it wouldn't be
intuitive otherwise
##########
tests/sqlparser_redshift.rs:
##########
@@ -353,3 +378,20 @@ fn test_parse_json_path_from() {
_ => panic!(),
}
}
+
+#[test]
+fn test_parse_select_numbered_columns() {
+ redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
+ // RedShift specific case - quoted identifier inside square bracket
Review Comment:
```suggestion
```
this one seems to be already covered by `test_parse_create_numbered_columns`?
##########
src/tokenizer.rs:
##########
@@ -1075,25 +1075,56 @@ impl<'a> Tokenizer<'a> {
Ok(Some(Token::DoubleQuotedString(s)))
}
// delimited (quoted) identifier
+ quote_start if self.dialect.is_delimited_identifier_start(ch)
=> {
+ let word = self.tokenize_quoted_identifier(quote_start,
chars)?;
+ Ok(Some(Token::make_word(&word, Some(quote_start))))
+ }
+ // special (quoted) identifier
quote_start
- if self.dialect.is_delimited_identifier_start(ch)
+ if self
+ .dialect
+ .is_nested_delimited_identifier_start(quote_start)
&& self
.dialect
-
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
+
.nested_delimited_identifier(chars.peekable.clone())
+ .is_some() =>
{
- let error_loc = chars.location();
- chars.next(); // consume the opening quote
+ let (quote_start, nested_quote_start) = self
+ .dialect
+ .nested_delimited_identifier(chars.peekable.clone())
+ .unwrap();
+
+ let Some(nested_quote_start) = nested_quote_start else {
+ let word =
self.tokenize_quoted_identifier(quote_start, chars)?;
+ return Ok(Some(Token::make_word(&word,
Some(quote_start))));
+ };
+
+ let mut word = vec![];
let quote_end = Word::matching_end_quote(quote_start);
- let (s, last_char) = self.parse_quoted_ident(chars,
quote_end);
+ let nested_quote_end =
Word::matching_end_quote(nested_quote_start);
+ let error_loc = chars.location();
- if last_char == Some(quote_end) {
- Ok(Some(Token::make_word(&s, Some(quote_start))))
- } else {
- self.tokenizer_error(
+ chars.next(); // skip the first delimiter
+ word.push(peeking_take_while(chars, |ch|
ch.is_whitespace()));
+ if chars.peek() != Some(&nested_quote_start) {
+ return self.tokenizer_error(
+ error_loc,
+ format!("Expected nested delimiter
'{nested_quote_start}' before EOF."),
+ );
+ }
+ word.push(format!("{nested_quote_start}"));
+
word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
+ word.push(format!("{nested_quote_end}"));
Review Comment:
```suggestion
word.push(nested_quote_end.into());
```
##########
tests/sqlparser_redshift.rs:
##########
@@ -353,3 +378,20 @@ fn test_parse_json_path_from() {
_ => panic!(),
}
}
+
+#[test]
+fn test_parse_select_numbered_columns() {
+ redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
+ // RedShift specific case - quoted identifier inside square bracket
+ redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
+ redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
+ redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
+ redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
+}
+
+#[test]
+fn test_parse_create_numbered_columns() {
+ redshift_and_generic().verified_stmt(
+ r#"CREATE TABLE test_table_1 ("1" INT, "d" VARCHAR(155), "2" DOUBLE
PRECISION)"#,
+ );
+}
Review Comment:
Can we incorporate this as a test case under the
`parse_delimited_identifiers` test?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]