This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new f617e3648 IMPALA-11711: Virtual columns should be skipped in
'FileMetadataUtils::AddIcebergColumns'
f617e3648 is described below
commit f617e3648734ffaff655382f911d256424bcda7b
Author: LPL <[email protected]>
AuthorDate: Tue Nov 8 17:40:55 2022 +0800
IMPALA-11711: Virtual columns should be skipped in
'FileMetadataUtils::AddIcebergColumns'
In the 'FileMetadataUtils::AddIcebergColumns' method, when the slot is
a virtual column, it should be skipped directly. That may affect that
when we query the Iceberg v2 table (the first column is a partition
column of bool type), wrong position-delete result is given.
Testing:
- Add e2e tests
- Locally tested the result of The Position-based Iceberg tables
Change-Id: I58faf3df6ae8a5bcabb1d2ac9f11a6fbcd74bc24
Reviewed-on: http://gerrit.cloudera.org:8080/19223
Reviewed-by: Impala Public Jenkins <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/src/exec/file-metadata-utils.cc | 2 ++
.../queries/QueryTest/iceberg-virtual-columns.test | 39 ++++++++++++++++++++++
tests/query_test/test_iceberg.py | 3 ++
3 files changed, 44 insertions(+)
diff --git a/be/src/exec/file-metadata-utils.cc
b/be/src/exec/file-metadata-utils.cc
index aeb2ecc57..b48b3605e 100644
--- a/be/src/exec/file-metadata-utils.cc
+++ b/be/src/exec/file-metadata-utils.cc
@@ -89,6 +89,8 @@ void FileMetadataUtils::AddIcebergColumns(MemPool* mem_pool,
Tuple** template_tu
*template_tuple = Tuple::Create(tuple_desc->byte_size(), mem_pool);
}
for (const SlotDescriptor* slot_desc : scan_node_->tuple_desc()->slots()) {
+ // The partition column of Iceberg tables must not be virtual column
+ if (slot_desc->IsVirtual()) continue;
const SchemaPath& path = slot_desc->col_path();
if (path.size() != 1) continue;
const ColumnDescriptor& col_desc =
diff --git
a/testdata/workloads/functional-query/queries/QueryTest/iceberg-virtual-columns.test
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-virtual-columns.test
new file mode 100644
index 000000000..a892093b5
--- /dev/null
+++
b/testdata/workloads/functional-query/queries/QueryTest/iceberg-virtual-columns.test
@@ -0,0 +1,39 @@
+====
+---- QUERY
+create table ice_tbl (
+ col_b boolean,
+ col_i int,
+ col_bi bigint,
+ col_str string,
+ col_ts timestamp,
+ col_dt date
+) partitioned by spec (col_b) stored as iceberg;
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+insert into
+ ice_tbl
+values
+ (true, 0, 12345678900, 'Abc', '1800-01-01 00:00:00', DATE'1800-01-01'),
+ (false, 1, 12345678902, 'aBc', '1800-01-01 02:02:02', DATE'1800-01-01'),
+ (false, 3, 12345678907, 'abC', '1900-01-01 01:01:01', DATE'1900-01-01'),
+ (false, 5, 12345678908, '', '1900-01-01 02:02:02', DATE'1900-01-01');
+select count(1) from ice_tbl;
+---- RESULTS
+4
+---- TYPES
+BIGINT
+====
+---- QUERY
+select input__file__name, file__position from ice_tbl order by 1,2 desc;
+---- LABELS
+input__file__name, file__position
+---- RESULTS
+row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_tbl/data/col_b=false/.*.0.parq',2
+row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_tbl/data/col_b=false/.*.0.parq',1
+row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_tbl/data/col_b=false/.*.0.parq',0
+row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_tbl/data/col_b=true/.*.0.parq',0
+---- TYPES
+STRING, BIGINT
+====
\ No newline at end of file
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index e0caccf63..f264fd5e5 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -851,6 +851,9 @@ class TestIcebergTable(IcebergTestSuite):
def test_compute_stats(self, vector, unique_database):
self.run_test_case('QueryTest/iceberg-compute-stats', vector,
unique_database)
+ def test_virtual_columns(self, vector, unique_database):
+ self.run_test_case('QueryTest/iceberg-virtual-columns', vector,
unique_database)
+
class TestIcebergV2Table(IcebergTestSuite):
"""Tests related to Iceberg V2 tables."""