This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 5fd4e6a11 IMPALA-11438: Add tests for CREATE TABLE LIKE PARQUET STORED
AS ICEBERG
5fd4e6a11 is described below
commit 5fd4e6a11ff5e8e7203fc564c84e86b54d3c0c4f
Author: Gergely Fürnstáhl <[email protected]>
AuthorDate: Mon Sep 12 14:55:55 2022 +0200
IMPALA-11438: Add tests for CREATE TABLE LIKE PARQUET STORED AS ICEBERG
Impala already supports said statement, added query tests for it.
Testing:
- Generated parquet table with complex type by hive
- Created hive and iceberg table from said file, provides same output
for describe statement
Change-Id: Ia363b913e101fd49b62a280721680f0eb88761c0
Reviewed-on: http://gerrit.cloudera.org:8080/18969
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
testdata/data/README | 7 ++-
.../data/create_table_like_parquet_test.parquet | Bin 0 -> 1961 bytes
tests/query_test/test_iceberg.py | 57 ++++++++++++++++++++-
3 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/testdata/data/README b/testdata/data/README
index 329ab0938..6ac0a363e 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -907,4 +907,9 @@ alter table iceberg_mixed_file_format_test set
tblproperties("write.format.defau
insert into iceberg_mixed_file_format_test values (3, "C", 2.5);
alter table iceberg_mixed_file_format_test set
tblproperties("write.format.default"="orc");
insert into iceberg_mixed_file_format_test values (4, "D", 3.5);
-Converted similarly to iceberg_v2_no_deletes
\ No newline at end of file
+Converted similarly to iceberg_v2_no_deletes
+
+create_table_like_parquet_test.parquet:
+Generated by Hive
+create table iceberg_create_table_like_parquet_test (col_int int, col_float
float, col_double double, col_string string, col_struct struct<col_int:int,
col_float:float>, col_array array<string>, col_map map<string,array<int>>)
stored as parquet;
+insert into iceberg_create_table_like_parquet_test values (0, 1.0, 2.0, "3",
named_struct("col_int", 4, "col_float", cast(5.0 as float)),
array("6","7","8"), map("A", array(11,12), "B", array(21,22)));
\ No newline at end of file
diff --git a/testdata/data/create_table_like_parquet_test.parquet
b/testdata/data/create_table_like_parquet_test.parquet
new file mode 100644
index 000000000..a887e1066
Binary files /dev/null and
b/testdata/data/create_table_like_parquet_test.parquet differ
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index b8ab0cef1..6ed0bd908 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -32,7 +32,9 @@ import json
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.iceberg_test_suite import IcebergTestSuite
from tests.common.skip import SkipIf, SkipIfDockerizedCluster
-from tests.common.file_utils import create_iceberg_table_from_directory
+from tests.common.file_utils import (
+ create_iceberg_table_from_directory,
+ create_table_from_parquet)
from tests.shell.util import run_impala_shell_cmd
from tests.util.filesystem_utils import get_fs_path, IS_HDFS
from tests.util.get_parquet_metadata import get_parquet_metadata
@@ -790,6 +792,59 @@ class TestIcebergTable(IcebergTestSuite):
self.run_test_case('QueryTest/iceberg-mixed-file-format', vector,
unique_database)
+ def _create_table_like_parquet_helper(self, vector, unique_database,
tbl_name,
+ expect_success):
+ create_table_from_parquet(self.client, unique_database, tbl_name)
+ args = ['-q', "show files in {0}.{1}".format(unique_database, tbl_name)]
+ results = run_impala_shell_cmd(vector, args)
+ result_rows = results.stdout.strip().split('\n')
+ hdfs_file = None
+ for row in result_rows:
+ if "hdfs://" in row:
+ hdfs_file = row.split('|')[1].lstrip()
+ break
+ assert hdfs_file
+
+ iceberg_tbl_name = "iceberg_{0}".format(tbl_name)
+ sql_stmt = "create table {0}.{1} like parquet '{2}' stored as
iceberg".format(
+ unique_database, iceberg_tbl_name, hdfs_file
+ )
+ args = ['-q', sql_stmt]
+
+ return run_impala_shell_cmd(vector, args, expect_success=expect_success)
+
+ def test_create_table_like_parquet(self, vector, unique_database):
+ tbl_name = 'alltypes_tiny_pages'
+ # Not all types are supported by iceberg
+ self._create_table_like_parquet_helper(vector, unique_database, tbl_name,
False)
+
+ tbl_name = "create_table_like_parquet_test"
+ results = self._create_table_like_parquet_helper(vector, unique_database,
tbl_name,
+ True)
+ result_rows = results.stdout.strip().split('\n')
+ assert result_rows[3].split('|')[1] == ' Table has been created. '
+
+ sql_stmt = "describe {0}.{1}".format(unique_database, tbl_name)
+ args = ['-q', sql_stmt]
+ parquet_results = run_impala_shell_cmd(vector, args)
+ parquet_result_rows = parquet_results.stdout.strip().split('\n')
+
+ parquet_column_name_type_list = []
+ for row in parquet_result_rows[1:-2]:
+ parquet_column_name_type_list.append(row.split('|')[1:3])
+
+ sql_stmt = "describe {0}.iceberg_{1}".format(unique_database, tbl_name)
+ args = ['-q', sql_stmt]
+ iceberg_results = run_impala_shell_cmd(vector, args)
+ iceberg_result_rows = iceberg_results.stdout.strip().split('\n')
+
+ iceberg_column_name_type_list = []
+ for row in iceberg_result_rows[1:-2]:
+ iceberg_column_name_type_list.append(row.split('|')[1:3])
+
+ assert parquet_column_name_type_list == iceberg_column_name_type_list
+
+
class TestIcebergV2Table(IcebergTestSuite):
"""Tests related to Iceberg V2 tables."""