This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 580a477e6938f1c486540e3cb1408d20da1734d0 Author: Zoltan Borok-Nagy <[email protected]> AuthorDate: Thu Mar 21 14:36:38 2024 +0100 IMPALA-12879: Conjunct not referring to table field causes ERROR for Iceberg table The following query throws an error for Iceberg tables: select * from ice_tbl where rand() < 0.001; It's because the predicate 'rand() < 0.001' doesn't involve any table columns. Because of a bug in IcebergScanPlanner.hasPartitionTransformType() the method throws an IndexOutOfBoundsException. This patch fixes the method to handle such predicates. Testing: * added e2e tests Change-Id: Id43a6798df3f4cc3a0e00ac610e25aa3b5781342 Reviewed-on: http://gerrit.cloudera.org:8080/21179 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Gabor Kaszab <[email protected]> --- .../apache/impala/planner/IcebergScanPlanner.java | 2 +- .../queries/QueryTest/iceberg-query.test | 94 +++++++++++++++++++++- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java index 221516f5d..ad383d9de 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanPlanner.java @@ -809,7 +809,7 @@ public class IcebergScanPlanner { List<SlotId> slotIds = Lists.newArrayList(); expr.getIds(tupleIds, slotIds); - if (tupleIds.size() > 1) return false; + if (tupleIds.size() != 1) return false; if (!tupleIds.get(0).equals(tblRef_.getDesc().getId())) return false; for (SlotId sId : slotIds) { diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test index cd43496c2..c3f4081ea 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-query.test @@ -1185,4 +1185,96 @@ select p_int from functional_parquet.iceberg_alltypes_part where i = 1 and p_int 1 ---- TYPES INT -==== \ No newline at end of file +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table +select * from iceberg_avro_format where rand(6) < 0.5; +---- RESULTS +1,'A',0.5,true +3,'C',2.5,false +---- TYPES +INT, STRING, DOUBLE, BOOLEAN +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table +select * from iceberg_avro_format where rand(6) < 0.5 and string_col = 'C'; +---- RESULTS +3,'C',2.5,false +---- TYPES +INT, STRING, DOUBLE, BOOLEAN +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table. +# Scanning iceberg_v2_positional_update_all_rows involves a UNION node which behaves +# differently if it is scheduled on 1 node or 2 nodes. Setting num_nodes is not +# allowed, so let's just check that not all rows are returned. +select count(*) < 6 from iceberg_v2_positional_update_all_rows where rand(6) < 0.5; +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table. Also time travel is involved +select * from iceberg_v2_positional_update_all_rows for system_version as of 5392552459484846077 where rand(6) < 0.5; +---- RESULTS +1,'a' +3,'c' +1,'A' +3,'C' +---- TYPES +INT, STRING +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table. Also time travel is involved +select count(*) from iceberg_v2_positional_update_all_rows for system_version as of 5392552459484846077 where rand(6) < 0.5; +---- RESULTS +4 +---- TYPES +BIGINT +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table +# iceberg_partition_evolution is written during data loading, so the results are not deterministic. +select count(*) < 100 from iceberg_partition_evolution where rand(3) < 0.1 and month = 1 and id < 100; +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table +# iceberg_partition_evolution is written during data loading, so the results are not deterministic. +select count(*) < 6000 from iceberg_partition_evolution where rand(3) < 0.1; +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table +# iceberg_partition_evolution is written during data loading, so the results are not deterministic. +select count(*) < 6000 from iceberg_partition_evolution where rand(4) < 0.1; +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +# Regression test for IMPALA-12879: Conjunct not referring to table field causes ERROR +# for Iceberg table +# iceberg_partition_evolution is written during data loading, so the results are not deterministic. +select count(*) < 300 from iceberg_partition_evolution where rand(3) < 0.1 and month = 1; +---- RESULTS +true +---- TYPES +BOOLEAN +====
