This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 5fd4e6a11 IMPALA-11438: Add tests for CREATE TABLE LIKE PARQUET STORED 
AS ICEBERG
5fd4e6a11 is described below

commit 5fd4e6a11ff5e8e7203fc564c84e86b54d3c0c4f
Author: Gergely Fürnstáhl <[email protected]>
AuthorDate: Mon Sep 12 14:55:55 2022 +0200

    IMPALA-11438: Add tests for CREATE TABLE LIKE PARQUET STORED AS ICEBERG
    
    Impala already supports said statement, added query tests for it.
    
    Testing:
     - Generated parquet table with complex type by hive
     - Created hive and iceberg table from said file, provides same output
    for describe statement
    
    Change-Id: Ia363b913e101fd49b62a280721680f0eb88761c0
    Reviewed-on: http://gerrit.cloudera.org:8080/18969
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 testdata/data/README                               |   7 ++-
 .../data/create_table_like_parquet_test.parquet    | Bin 0 -> 1961 bytes
 tests/query_test/test_iceberg.py                   |  57 ++++++++++++++++++++-
 3 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/testdata/data/README b/testdata/data/README
index 329ab0938..6ac0a363e 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -907,4 +907,9 @@ alter table iceberg_mixed_file_format_test set 
tblproperties("write.format.defau
 insert into iceberg_mixed_file_format_test values (3, "C", 2.5);
 alter table iceberg_mixed_file_format_test set 
tblproperties("write.format.default"="orc");
 insert into iceberg_mixed_file_format_test values (4, "D", 3.5);
-Converted similarly to iceberg_v2_no_deletes
\ No newline at end of file
+Converted similarly to iceberg_v2_no_deletes
+
+create_table_like_parquet_test.parquet:
+Generated by Hive
+create table iceberg_create_table_like_parquet_test (col_int int, col_float 
float, col_double double, col_string string, col_struct struct<col_int:int, 
col_float:float>, col_array array<string>, col_map map<string,array<int>>) 
stored as parquet;
+insert into iceberg_create_table_like_parquet_test values (0, 1.0, 2.0, "3", 
named_struct("col_int", 4, "col_float", cast(5.0 as float)), 
array("6","7","8"), map("A", array(11,12), "B", array(21,22)));
\ No newline at end of file
diff --git a/testdata/data/create_table_like_parquet_test.parquet 
b/testdata/data/create_table_like_parquet_test.parquet
new file mode 100644
index 000000000..a887e1066
Binary files /dev/null and 
b/testdata/data/create_table_like_parquet_test.parquet differ
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index b8ab0cef1..6ed0bd908 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -32,7 +32,9 @@ import json
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.iceberg_test_suite import IcebergTestSuite
 from tests.common.skip import SkipIf, SkipIfDockerizedCluster
-from tests.common.file_utils import create_iceberg_table_from_directory
+from tests.common.file_utils import (
+  create_iceberg_table_from_directory,
+  create_table_from_parquet)
 from tests.shell.util import run_impala_shell_cmd
 from tests.util.filesystem_utils import get_fs_path, IS_HDFS
 from tests.util.get_parquet_metadata import get_parquet_metadata
@@ -790,6 +792,59 @@ class TestIcebergTable(IcebergTestSuite):
     self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
                       unique_database)
 
+  def _create_table_like_parquet_helper(self, vector, unique_database, 
tbl_name,
+                                        expect_success):
+    create_table_from_parquet(self.client, unique_database, tbl_name)
+    args = ['-q', "show files in {0}.{1}".format(unique_database, tbl_name)]
+    results = run_impala_shell_cmd(vector, args)
+    result_rows = results.stdout.strip().split('\n')
+    hdfs_file = None
+    for row in result_rows:
+      if "hdfs://" in row:
+        hdfs_file = row.split('|')[1].lstrip()
+        break
+    assert hdfs_file
+
+    iceberg_tbl_name = "iceberg_{0}".format(tbl_name)
+    sql_stmt = "create table {0}.{1} like parquet '{2}' stored as 
iceberg".format(
+      unique_database, iceberg_tbl_name, hdfs_file
+    )
+    args = ['-q', sql_stmt]
+
+    return run_impala_shell_cmd(vector, args, expect_success=expect_success)
+
+  def test_create_table_like_parquet(self, vector, unique_database):
+    tbl_name = 'alltypes_tiny_pages'
+    # Not all types are supported by iceberg
+    self._create_table_like_parquet_helper(vector, unique_database, tbl_name, 
False)
+
+    tbl_name = "create_table_like_parquet_test"
+    results = self._create_table_like_parquet_helper(vector, unique_database, 
tbl_name,
+                                                     True)
+    result_rows = results.stdout.strip().split('\n')
+    assert result_rows[3].split('|')[1] == ' Table has been created. '
+
+    sql_stmt = "describe {0}.{1}".format(unique_database, tbl_name)
+    args = ['-q', sql_stmt]
+    parquet_results = run_impala_shell_cmd(vector, args)
+    parquet_result_rows = parquet_results.stdout.strip().split('\n')
+
+    parquet_column_name_type_list = []
+    for row in parquet_result_rows[1:-2]:
+      parquet_column_name_type_list.append(row.split('|')[1:3])
+
+    sql_stmt = "describe {0}.iceberg_{1}".format(unique_database, tbl_name)
+    args = ['-q', sql_stmt]
+    iceberg_results = run_impala_shell_cmd(vector, args)
+    iceberg_result_rows = iceberg_results.stdout.strip().split('\n')
+
+    iceberg_column_name_type_list = []
+    for row in iceberg_result_rows[1:-2]:
+      iceberg_column_name_type_list.append(row.split('|')[1:3])
+
+    assert parquet_column_name_type_list == iceberg_column_name_type_list
+
+
 class TestIcebergV2Table(IcebergTestSuite):
   """Tests related to Iceberg V2 tables."""
 

Reply via email to