iffyio commented on code in PR #1707: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1707#discussion_r1970924952
########## src/parser/mod.rs: ########## @@ -7629,24 +7643,34 @@ impl<'a> Parser<'a> { } pub fn parse_index_type(&mut self) -> Result<IndexType, ParserError> { - if self.parse_keyword(Keyword::BTREE) { - Ok(IndexType::BTree) + Ok(if self.parse_keyword(Keyword::BTREE) { + IndexType::BTree } else if self.parse_keyword(Keyword::HASH) { - Ok(IndexType::Hash) - } else { - self.expected("index type {BTREE | HASH}", self.peek_token()) - } + IndexType::Hash + } else if self.parse_keyword(Keyword::GIN) { + IndexType::GIN + } else if self.parse_keyword(Keyword::GIST) { + IndexType::GiST + } else if self.parse_keyword(Keyword::SPGIST) { + IndexType::SPGiST + } else if self.parse_keyword(Keyword::BRIN) { + IndexType::BRIN + } else if self.parse_keyword(Keyword::BLOOM) { + IndexType::Bloom + } else { + IndexType::Custom(self.parse_identifier()?) + }) } - /// Parse [USING {BTREE | HASH}] + /// Parse [USING {BTREE | HASH | GIN | GIST | SPGIST | BRIN | BLOOM | identifier}] pub fn parse_optional_using_then_index_type( &mut self, ) -> Result<Option<IndexType>, ParserError> { - if self.parse_keyword(Keyword::USING) { - Ok(Some(self.parse_index_type()?)) + Ok(if self.parse_keyword(Keyword::USING) { + Some(self.parse_index_type()?) Review Comment: ```suggestion if self.parse_keyword(Keyword::USING) { Ok(Some(self.parse_index_type()?)) ``` Can we change this and `parse_index_type` to use the previous style of returning individual ok instead of wrapping the if statement within the OK? I think that style is more consistent with the codebase ########## src/parser/mod.rs: ########## @@ -263,7 +264,7 @@ impl ParserOptions { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] Review Comment: ```suggestion #[derive(Copy, Clone)] ``` ########## src/parser/mod.rs: ########## @@ -63,6 +63,7 @@ mod recursion { use super::ParserError; + #[derive(Debug)] Review Comment: ```suggestion ``` ########## src/parser/mod.rs: ########## @@ -7629,24 +7643,34 @@ impl<'a> Parser<'a> { } pub fn parse_index_type(&mut self) -> Result<IndexType, ParserError> { - if self.parse_keyword(Keyword::BTREE) { - Ok(IndexType::BTree) + Ok(if self.parse_keyword(Keyword::BTREE) { + IndexType::BTree } else if self.parse_keyword(Keyword::HASH) { - Ok(IndexType::Hash) - } else { - self.expected("index type {BTREE | HASH}", self.peek_token()) - } + IndexType::Hash + } else if self.parse_keyword(Keyword::GIN) { + IndexType::GIN + } else if self.parse_keyword(Keyword::GIST) { + IndexType::GiST + } else if self.parse_keyword(Keyword::SPGIST) { + IndexType::SPGiST + } else if self.parse_keyword(Keyword::BRIN) { + IndexType::BRIN + } else if self.parse_keyword(Keyword::BLOOM) { + IndexType::Bloom + } else { + IndexType::Custom(self.parse_identifier()?) + }) } - /// Parse [USING {BTREE | HASH}] + /// Parse [USING {BTREE | HASH | GIN | GIST | SPGIST | BRIN | BLOOM | identifier}] Review Comment: ```suggestion /// Optionally parse the `USING` keyword, followed by an [IndexType] /// Example: /// ```sql //// USING BTREE (name, age DESC) /// ``` ``` Can we say something like this? in order to avoid duplicating the list of supported index types ########## src/parser/mod.rs: ########## @@ -13595,10 +13619,33 @@ impl<'a> Parser<'a> { } } - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + /// Parse an OrderByExpr expression, optionally followed by ASC or DESC (used in ORDER BY) pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> { + self.parse_create_index_expr::<false>() + .map(|index_column| index_column.column) + } + + /// Parse an IndexColumn expression (used in CREATE INDEX) + pub fn parse_create_index_expr<const PARSE_OPERATOR_CLASS: bool>( Review Comment: Can we move the replace the generic parameter with a normal function parameter? That would be consistent with the codebase. Also it would be good to document the parameter since this is being exposed as a public function, maybe with a SQL example to clarify? hmm actually I'm thinking this param shouldn't actually be required on this public api since an operator class should always be applicable when parsing a create index expr, can we do something like this to clarify the relationships ```rust pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> { self.parse_index_expr_inner(false) .map(|(index_expr,)| index_expr) } pub fn parse_create_index_expr() { self.parse_index_expr_inner(false) .map(|(column, operator_class)| IndexColumn { column operator_class }) } fn parse_order_by_expr_inner(&self, with_operator_class: bool) -> Result<(IndexExpr, Option<Ident>)> { ... } ########## tests/sqlparser_postgres.rs: ########## @@ -2509,6 +2509,340 @@ fn parse_create_anonymous_index() { } } +#[test] +fn parse_create_users_name_trgm_index() { + let sql = "CREATE INDEX users_name_trgm_idx ON users USING GIN (concat_users_name(first_name, last_name) gin_trgm_ops)"; + match pg().verified_stmt(sql) { + Statement::CreateIndex(CreateIndex { + name: Some(ObjectName(name)), + table_name: ObjectName(table_name), + using: Some(using), + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct: None, + with, + predicate: None, + }) => { + assert_eq_vec(&["users_name_trgm_idx"], &name); + assert_eq_vec(&["users"], &table_name); + assert_eq!(IndexType::GIN, using); + assert_eq!( + IndexColumn { + column: OrderByExpr { + expr: Expr::Function(Function { + name: ObjectName(vec![ObjectNamePart::Identifier(Ident { + value: "concat_users_name".to_owned(), + quote_style: None, + span: Span::empty() + })]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + args: vec![ + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + Ident { + value: "first_name".to_owned(), + quote_style: None, + span: Span::empty() + } + ))), + FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Identifier( + Ident { + value: "last_name".to_owned(), + quote_style: None, + span: Span::empty() + } + ))) + ], + clauses: vec![] + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![] + }), + options: OrderByOptions { + asc: None, + nulls_first: None, + }, + with_fill: None, + }, + operator_class: Some(Ident::new("gin_trgm_ops")), + }, + columns[0], + ); + assert!(!unique); + assert!(!concurrently); + assert!(!if_not_exists); + assert!(include.is_empty()); + assert!(with.is_empty()); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_create_projects_name_description_trgm_index() { + let sql = "CREATE INDEX projects_name_description_trgm_idx ON projects USING GIN (concat_projects_name_description(name, description) gin_trgm_ops)"; Review Comment: this test scenario looks identical to `parse_create_users_name_trgm_index` in terms of coverage? Im thinking generally for the tests we can group them into the same test function, it would be sufficient with the AST assertion on only one of the test scenarios then for the rest we can rely only on `verified_stmt()`, that would keep the tests code smaller and easier to track what part of the syntax is covered. Then could we add scenarios for all the introduced index types (i.e. BRIN, BLOOM etc)? As well as the custom type which probably would live in common I imagine? ########## src/parser/mod.rs: ########## @@ -13595,10 +13619,33 @@ impl<'a> Parser<'a> { } } - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + /// Parse an OrderByExpr expression, optionally followed by ASC or DESC (used in ORDER BY) Review Comment: ```suggestion /// Parse an [OrderByExpr] expression. ``` Since the ASC/DESC is part of the returned `OrderByExpr` ########## src/parser/mod.rs: ########## @@ -13595,10 +13619,33 @@ impl<'a> Parser<'a> { } } - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + /// Parse an OrderByExpr expression, optionally followed by ASC or DESC (used in ORDER BY) pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, ParserError> { + self.parse_create_index_expr::<false>() + .map(|index_column| index_column.column) + } + + /// Parse an IndexColumn expression (used in CREATE INDEX) Review Comment: ```suggestion /// Parse an [IndexColumn]. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org