(cassandra) branch trunk updated: Ignore repetitions of semicolon in CQLSH

smiklosovic Mon, 07 Apr 2025 00:54:38 -0700

This is an automated email from the ASF dual-hosted git repository.

smiklosovic pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra.git



The following commit(s) were added to refs/heads/trunk by this push:
     new c3089b564a Ignore repetitions of semicolon in CQLSH
c3089b564a is described below

commit c3089b564aad4d8df9a23560371d31060e45d312
Author: Pedro Gordo <pedro.gordo1...@gmail.com>
AuthorDate: Sun Jan 19 17:20:42 2025 +0000

    Ignore repetitions of semicolon in CQLSH
    
    When grouping the tokens, skip statements composed by a single endtoken.
    
    patch by Pedro Gordo; reviewed by Brad Schoening, Stefan Miklosovic for 
CASSANDRA-19956
---
 CHANGES.txt                             |  1 +
 pylib/cqlshlib/cqlhandling.py           | 29 +++++++++++++++++++++++++++--
 pylib/cqlshlib/test/test_cql_parsing.py | 11 +++++++++++
 pylib/cqlshlib/util.py                  | 21 ---------------------
 4 files changed, 39 insertions(+), 23 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index cd2619e4cb..199716afe6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 5.1
+ * Ignore repetitions of semicolon in CQLSH (CASSANDRA-19956)
  * Avoid NPE during cms initialization abort (CASSANDRA-20527)
  * Avoid failing queries when epoch changes and replica goes up/down 
(CASSANDRA-20489)
  * Split out truncation record lock (CASSANDRA-20480)
diff --git a/pylib/cqlshlib/cqlhandling.py b/pylib/cqlshlib/cqlhandling.py
index 504371c16d..2cc49fbee4 100644
--- a/pylib/cqlshlib/cqlhandling.py
+++ b/pylib/cqlshlib/cqlhandling.py
@@ -113,7 +113,9 @@ class CqlParsingRuleSet(pylexotron.ParsingRuleSet):
             curstmt.append(t)
             if t[0] == 'endtoken':
                 term_on_nl = False
-                output.extend(curstmt)
+                # skip empty statements
+                if len(curstmt) > 1:
+                    output.extend(curstmt)
                 curstmt = []
             else:
                 if len(curstmt) == 1:
@@ -135,7 +137,7 @@ class CqlParsingRuleSet(pylexotron.ParsingRuleSet):
     def cql_split_statements(self, text):
         tokens = self.lex(text)
         tokens = self.cql_massage_tokens(tokens)
-        stmts = util.split_list(tokens, lambda t: t[0] == 'endtoken')
+        stmts = self.group_tokens(tokens)
         output = []
         in_batch = False
         in_pg_string = len([st for st in tokens if len(st) > 0 and st[0] == 
'unclosedPgString']) == 1
@@ -151,6 +153,29 @@ class CqlParsingRuleSet(pylexotron.ParsingRuleSet):
                     in_batch = True
         return output, in_batch or in_pg_string
 
+    def group_tokens(self, items):
+        """
+        Split an iterable into sublists, using 'endtoken' to mark the end of 
each sublist.
+        Each sublist accumulates elements until an 'endtoken' is encountered. 
If the sublist
+        consists only of a single 'endtoken', it is excluded. An empty list is 
added to the
+        result after the last 'endtoken' for cases like autocompletion.
+
+        Parameters:
+        - items (iterable): An iterable of tokens, including 'endtoken' 
elements.
+
+        Returns:
+        - list: A list of sublists, with each sublist containing tokens split 
by 'endtoken'.
+        """
+
+        thisresult = []
+        results = [thisresult]
+        for i in items:
+            thisresult.append(i)
+            if i[0] == 'endtoken':
+                thisresult = []
+                results.append(thisresult)
+        return results
+
     def cql_complete_single(self, text, partial, init_bindings=None, 
ignore_case=True,
                             startsymbol='Start'):
         tokens = (self.cql_split_statements(text)[0] or [[]])[-1]
diff --git a/pylib/cqlshlib/test/test_cql_parsing.py 
b/pylib/cqlshlib/test/test_cql_parsing.py
index b9eb716a78..7a98c6d88f 100644
--- a/pylib/cqlshlib/test/test_cql_parsing.py
+++ b/pylib/cqlshlib/test/test_cql_parsing.py
@@ -804,6 +804,17 @@ class TestCqlParsing(TestCase):
                                ''')
         self.assertRaises(SyntaxError)
 
+    def test_skip_duplicate_endtokens(self):
+        parsed = parse_cqlsh_statements('SELECT * FROM my_table;;;;')
+        expected_output = [
+            ('SELECT', 'reserved_identifier'),
+            ('*', 'star'),
+            ('FROM', 'reserved_identifier'),
+            ('my_table', 'identifier'),
+            (';', 'endtoken')
+        ]
+        self.assertSequenceEqual(tokens_with_types(parsed), expected_output)
+
 
 def parse_cqlsh_statements(text):
     """
diff --git a/pylib/cqlshlib/util.py b/pylib/cqlshlib/util.py
index 8874be011e..96d9bd272e 100644
--- a/pylib/cqlshlib/util.py
+++ b/pylib/cqlshlib/util.py
@@ -31,27 +31,6 @@ except ImportError:
     HAS_LINE_PROFILER = False
 
 
-def split_list(items, pred):
-    """
-    Split up a list (or other iterable) on the elements which satisfy the
-    given predicate 'pred'. Elements for which 'pred' returns true start a new
-    sublist for subsequent elements, which will accumulate in the new sublist
-    until the next satisfying element.
-
-    >>> split_list([0, 1, 2, 5, 99, 8], lambda n: (n % 2) == 0)
-    [[0], [1, 2], [5, 99, 8], []]
-    """
-
-    thisresult = []
-    results = [thisresult]
-    for i in items:
-        thisresult.append(i)
-        if pred(i):
-            thisresult = []
-            results.append(thisresult)
-    return results
-
-
 def find_common_prefix(strs):
     """
     Given a list (iterable) of strings, return the longest common prefix.


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

(cassandra) branch trunk updated: Ignore repetitions of semicolon in CQLSH

Reply via email to