This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6810368c109f3f130c3d99e614fa26e859fb4963
Author: Zoltan Borok-Nagy <[email protected]>
AuthorDate: Thu Nov 13 17:34:59 2025 +0100

    IMPALA-14552: test_show_create_table should be more strict with 
TBLPROPERTIES contents
    
    Currently we use this regex to parse the contents of TBLPROPERTIES:
    
      kv_regex = "'([^\']+)'\\s*=\\s*'([^\']+)'"
      kv_results = dict(re.findall(kv_regex, map_match.group(1)))
    
    This allows strings like:
     'X'='Y'='Z'
     'X'='Z'$'A'='B'
    
    This means it's easy to write strings in .test files that are not valid
    SQL. This patch adds a few extra checks to validate the TBLPROPERTIES
    contents.
    
    Change-Id: I94110f50720c01dc7807ee56c794d235f4990282
    Reviewed-on: http://gerrit.cloudera.org:8080/23671
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Riza Suminto <[email protected]>
---
 tests/metadata/test_show_create_table.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/tests/metadata/test_show_create_table.py 
b/tests/metadata/test_show_create_table.py
index 027ff1287..c248c1794 100644
--- a/tests/metadata/test_show_create_table.py
+++ b/tests/metadata/test_show_create_table.py
@@ -41,8 +41,16 @@ def get_properties_map(sql, properties_map_name, 
exclusions=None):
   map_match = re.search(properties_map_regex(properties_map_name), sql)
   if map_match is None:
     return dict()
-  kv_regex = "'([^\']+)'\\s*=\\s*'([^\']+)'"
-  kv_results = dict(re.findall(kv_regex, map_match.group(1)))
+  tbl_props_contents = map_match.group(1).strip()
+  kv_regex = r"'([^']*)'\s*=\s*'([^']*)'\s*(?:,|$)"
+  kv_results = dict(re.findall(kv_regex, tbl_props_contents))
+
+  # Verify [TBL|SERDE]PROPERTIES contents
+  stripped_sql = re.sub(r'\s+', '', tbl_props_contents)
+  reconstructed = ",".join("'{}'='{}'".format(k, v) for k, v in 
kv_results.items())
+  stripped_reconstructed = re.sub(r'\s+', '', reconstructed.strip())
+  assert stripped_sql == stripped_reconstructed, \
+      "[TBL|SERDE]PROPERTIES contents are not valid SQL: " + tbl_props_contents
 
   if exclusions is not None:
     for filtered_key in exclusions:

Reply via email to