iffyio commented on code in PR #1580:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/1580#discussion_r1874992334


##########
src/parser/mod.rs:
##########
@@ -10584,6 +10598,116 @@ impl<'a> Parser<'a> {
         }
     }
 
+    fn maybe_parse_table_sample(&mut self) -> 
Result<Option<Box<TableSampleMethod>>, ParserError> {
+        if self
+            .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE])
+            .is_none()
+        {
+            return Ok(None);
+        }
+
+        // Try to parse based on an explicit table sample method keyword
+        let sample = if self
+            .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW])
+            .is_some()
+        {
+            self.expect_token(&Token::LParen)?;
+            let expr = self.parse_expr()?;
+
+            let (probability, value, unit) = if 
self.parse_keyword(Keyword::ROWS) {
+                (None, Some(expr), Some(TableSampleUnit::Rows))
+            } else if self.parse_keyword(Keyword::PERCENT) {
+                (None, Some(expr), Some(TableSampleUnit::Percent))
+            } else {
+                (Some(expr), None, None)
+            };
+            self.expect_token(&Token::RParen)?;
+            TableSampleMethod::Bernoulli(TableSampleBernoulli {
+                probability,
+                value,
+                unit,
+            })
+        } else if self
+            .parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK])
+            .is_some()
+        {
+            self.expect_token(&Token::LParen)?;
+            let probability = self.parse_expr()?;
+            self.expect_token(&Token::RParen)?;
+            let seed = if self
+                .parse_one_of_keywords(&[Keyword::REPEATABLE, Keyword::SEED])
+                .is_some()
+            {
+                self.expect_token(&Token::LParen)?;
+                let seed = self.parse_expr()?;
+                self.expect_token(&Token::RParen)?;
+                Some(seed)
+            } else {
+                None
+            };
+            TableSampleMethod::System(TableSampleSystem {
+                probability,
+                repeatable: seed,
+            })
+        // Try to parse without an explicit table sample method keyword
+        } else if self.peek_token().token == Token::LParen {

Review Comment:
   ```suggestion
           } else if self.consume_token(Token::LParen) {
   ```



##########
tests/sqlparser_snowflake.rs:
##########
@@ -2952,3 +2950,33 @@ fn test_sf_double_dot_notation() {
 
 #[test]
 fn test_parse_double_dot_notation_wrong_position() {}
+
+#[test]
+fn test_table_sample() {
+    snowflake_and_generic()
+        .verified_stmt("SELECT * FROM testtable AS t TABLESAMPLE BERNOULLI 
(10)");
+
+    // In Snowflake we translate implicit table sample method to bernoulli
+    snowflake().one_statement_parses_to(
+        "SELECT * FROM testtable SAMPLE (10)",
+        "SELECT * FROM testtable TABLESAMPLE BERNOULLI (10)",

Review Comment:
   for the scenarios that currently rely on `one_statement_parse_to`, could we 
represent them faithfully when displaying? e.g this and the `ROW` vs 
`BERNOULLI` variants etc



##########
src/parser/mod.rs:
##########
@@ -10584,6 +10598,116 @@ impl<'a> Parser<'a> {
         }
     }
 
+    fn maybe_parse_table_sample(&mut self) -> 
Result<Option<Box<TableSampleMethod>>, ParserError> {
+        if self
+            .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE])
+            .is_none()
+        {
+            return Ok(None);
+        }
+
+        // Try to parse based on an explicit table sample method keyword
+        let sample = if self
+            .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW])
+            .is_some()
+        {
+            self.expect_token(&Token::LParen)?;
+            let expr = self.parse_expr()?;
+
+            let (probability, value, unit) = if 
self.parse_keyword(Keyword::ROWS) {
+                (None, Some(expr), Some(TableSampleUnit::Rows))
+            } else if self.parse_keyword(Keyword::PERCENT) {
+                (None, Some(expr), Some(TableSampleUnit::Percent))
+            } else {
+                (Some(expr), None, None)
+            };
+            self.expect_token(&Token::RParen)?;
+            TableSampleMethod::Bernoulli(TableSampleBernoulli {
+                probability,
+                value,
+                unit,
+            })
+        } else if self
+            .parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK])
+            .is_some()
+        {
+            self.expect_token(&Token::LParen)?;
+            let probability = self.parse_expr()?;
+            self.expect_token(&Token::RParen)?;
+            let seed = if self
+                .parse_one_of_keywords(&[Keyword::REPEATABLE, Keyword::SEED])
+                .is_some()
+            {
+                self.expect_token(&Token::LParen)?;
+                let seed = self.parse_expr()?;
+                self.expect_token(&Token::RParen)?;
+                Some(seed)
+            } else {
+                None
+            };
+            TableSampleMethod::System(TableSampleSystem {
+                probability,
+                repeatable: seed,
+            })
+        // Try to parse without an explicit table sample method keyword
+        } else if self.peek_token().token == Token::LParen {
+            self.expect_token(&Token::LParen)?;
+            if self.parse_keyword(Keyword::BUCKET) {
+                let bucket = self.parse_number_value()?;
+                self.expect_keywords(&[Keyword::OUT, Keyword::OF])?;
+                let total = self.parse_number_value()?;
+                let on = if self.parse_keyword(Keyword::ON) {
+                    Some(self.parse_expr()?)
+                } else {
+                    None
+                };
+                self.expect_token(&Token::RParen)?;
+                TableSampleMethod::Bucket(TableSampleBucket { bucket, total, 
on })
+            } else {
+                let value = match self.maybe_parse(|p| p.parse_number_value()) 
{
+                    Ok(Some(num)) => num,
+                    _ => {

Review Comment:
   ```suggestion
                   let value = match self.maybe_parse(|p| 
p.parse_number_value())? {
                       Some(num) => num,
                       None => {
   ```
   I think this is usually the recursion limit or similar fatal error we can 
propagate



##########
src/parser/mod.rs:
##########
@@ -10584,6 +10598,116 @@ impl<'a> Parser<'a> {
         }
     }
 
+    fn maybe_parse_table_sample(&mut self) -> 
Result<Option<Box<TableSampleMethod>>, ParserError> {
+        if self
+            .parse_one_of_keywords(&[Keyword::SAMPLE, Keyword::TABLESAMPLE])
+            .is_none()
+        {
+            return Ok(None);
+        }
+
+        // Try to parse based on an explicit table sample method keyword
+        let sample = if self
+            .parse_one_of_keywords(&[Keyword::BERNOULLI, Keyword::ROW])
+            .is_some()
+        {
+            self.expect_token(&Token::LParen)?;
+            let expr = self.parse_expr()?;
+
+            let (probability, value, unit) = if 
self.parse_keyword(Keyword::ROWS) {
+                (None, Some(expr), Some(TableSampleUnit::Rows))
+            } else if self.parse_keyword(Keyword::PERCENT) {
+                (None, Some(expr), Some(TableSampleUnit::Percent))
+            } else {
+                (Some(expr), None, None)
+            };
+            self.expect_token(&Token::RParen)?;
+            TableSampleMethod::Bernoulli(TableSampleBernoulli {
+                probability,
+                value,
+                unit,
+            })
+        } else if self
+            .parse_one_of_keywords(&[Keyword::SYSTEM, Keyword::BLOCK])
+            .is_some()
+        {
+            self.expect_token(&Token::LParen)?;
+            let probability = self.parse_expr()?;
+            self.expect_token(&Token::RParen)?;
+            let seed = if self
+                .parse_one_of_keywords(&[Keyword::REPEATABLE, Keyword::SEED])
+                .is_some()
+            {
+                self.expect_token(&Token::LParen)?;
+                let seed = self.parse_expr()?;
+                self.expect_token(&Token::RParen)?;
+                Some(seed)
+            } else {
+                None
+            };
+            TableSampleMethod::System(TableSampleSystem {
+                probability,
+                repeatable: seed,
+            })
+        // Try to parse without an explicit table sample method keyword
+        } else if self.peek_token().token == Token::LParen {
+            self.expect_token(&Token::LParen)?;
+            if self.parse_keyword(Keyword::BUCKET) {
+                let bucket = self.parse_number_value()?;
+                self.expect_keywords(&[Keyword::OUT, Keyword::OF])?;
+                let total = self.parse_number_value()?;
+                let on = if self.parse_keyword(Keyword::ON) {
+                    Some(self.parse_expr()?)
+                } else {
+                    None
+                };
+                self.expect_token(&Token::RParen)?;
+                TableSampleMethod::Bucket(TableSampleBucket { bucket, total, 
on })
+            } else {
+                let value = match self.maybe_parse(|p| p.parse_number_value()) 
{
+                    Ok(Some(num)) => num,
+                    _ => {
+                        if let Token::Word(w) = self.next_token().token {
+                            Value::Placeholder(w.value)
+                        } else {
+                            return parser_err!(
+                                "Expecting number or byte length e.g. 100M",
+                                self.peek_token().span.start
+                            );
+                        }
+                    }
+                };
+                if self.peek_token().token == Token::RParen
+                    && !self.dialect.supports_implicit_table_sample_method()

Review Comment:
   maybe this could be simplified as `if 
self.dialect.supports_implicit_table_sample_method() && 
self.consume_token(Token::RParen)` it would also let us skip the expect_token 
that follows as well?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to