This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 7167f3b4f IMPALA-13336: Fix syntax error in creating Iceberg test 
table on Apache Hive 3
7167f3b4f is described below

commit 7167f3b4f0b6940f36705c95b8da17941557f721
Author: stiga-huang <[email protected]>
AuthorDate: Wed Aug 28 10:43:51 2024 +0800

    IMPALA-13336: Fix syntax error in creating Iceberg test table on Apache 
Hive 3
    
    Apache Hive 3 doesn't support the syntax of STORED BY ICEBERG STORED AS
    AVRO. When loading test data on Apache Hive 3, we convert this clause to
      STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
      TBLPROPERTIES('write.format.default'='AVRO')
    However, when there is a LOCATION clause in the statement, the
    TBLPROPERTIES clause will be put before the LOCATION clause, which
    causes the syntax error.
    
    In the CreateTable statement, TBLPROPERTIES clause should be put after
    the LOCATION clause. This patch fixes generate-schema-statements.py to
    take care of this case.
    
    Tests:
     - Verified the SQL files generated by generate-schema-statements.py
    
    Change-Id: I5b47d6dc1a2ab63d4ecea476dbab67c1ae8ca490
    Reviewed-on: http://gerrit.cloudera.org:8080/21730
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 testdata/bin/generate-schema-statements.py | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/testdata/bin/generate-schema-statements.py 
b/testdata/bin/generate-schema-statements.py
index da96eaca9..2361fd2d6 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -269,11 +269,22 @@ def build_create_statement(table_template, table_name, 
db_name, db_suffix,
             ("Cannot convert STORED BY ICEBERG STORED AS file_format with 
TBLPROPERTIES "
              "also in the statement:\n" + stmt)
         iceberg_file_format = re.search(r"STORED AS (\w+)", stmt).group(1)
-        stmt = re.sub(r"STORED BY ICEBERG\s+STORED AS \w+",
-                      ("STORED BY 
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'"
-                       " TBLPROPERTIES('write.format.default'='{}')").format(
-                            iceberg_file_format),
-                      stmt)
+        # TBLPROPERTIES should be put after LOCATION
+        if "LOCATION" not in stmt:
+          stmt = re.sub(
+              r"STORED BY ICEBERG\s+STORED AS \w+",
+              ("STORED BY 
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'"
+               " 
TBLPROPERTIES('write.format.default'='{}')").format(iceberg_file_format),
+              stmt)
+        else:
+          stmt = re.sub(
+              r"STORED BY ICEBERG\s+STORED AS \w+",
+              "STORED BY 
'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'",
+              stmt)
+          loc_clause = re.search(r"LOCATION ['\"][^\s]+['\"]", stmt).group(0)
+          stmt = stmt.replace(loc_clause,
+                              loc_clause + " 
TBLPROPERTIES('write.format.default'='{}')"
+                              .format(iceberg_file_format))
   create_stmt += stmt
   return create_stmt
 

Reply via email to