This is an automated email from the ASF dual-hosted git repository. wzhou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit ada4090e0989805ed884e135356c6b688e7ccc96 Author: Gabor Kaszab <[email protected]> AuthorDate: Tue Mar 12 17:35:39 2024 +0100 IMPALA-12894: (part 1) Turn off the count(*) optimisation for V2 Iceberg tables This is a part 1 change that turns off the count(*) optimisations for V2 tables as there is a correctness issue with it. The reason is that Spark compaction may leave some dangling delete files that mess up the logic in Impala. Change-Id: Ida9fb04fd076c987b6b5257ad801bf30f5900237 Reviewed-on: http://gerrit.cloudera.org:8080/21139 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../org/apache/impala/analysis/SelectStmt.java | 3 +- testdata/data/README | 17 ++ ...06ac2-9987-4514-8310-505eb02c528a-00001.parquet | Bin 0 -> 672 bytes ...b945045-7ba1864b00000000_1900113267_data.0.parq | Bin 0 -> 670 bytes ...ee10b145-141d9f6900000000_502574269_data.0.parq | Bin 0 -> 620 bytes ...e10b145-141d9f6900000000_1919298510_data.0.parq | Bin 0 -> 1656 bytes ...f889878-632c88f100000001_1119661503_data.0.parq | Bin 0 -> 1651 bytes .../52100098-3c71-4111-8d7e-1c02e8343a0e-m0.avro | Bin 0 -> 4068 bytes .../a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m0.avro | Bin 0 -> 3719 bytes .../a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m1.avro | Bin 0 -> 4071 bytes .../aa501eb1-924a-4460-a2a0-ad577de8aef5-m0.avro | Bin 0 -> 3724 bytes .../aa501eb1-924a-4460-a2a0-ad577de8aef5-m1.avro | Bin 0 -> 3721 bytes .../aa501eb1-924a-4460-a2a0-ad577de8aef5-m2.avro | Bin 0 -> 4073 bytes .../aa501eb1-924a-4460-a2a0-ad577de8aef5-m3.avro | Bin 0 -> 3727 bytes .../f6475cdb-128e-4438-ab63-2251736670ad-m0.avro | Bin 0 -> 3721 bytes ...579-1-52100098-3c71-4111-8d7e-1c02e8343a0e.avro | Bin 0 -> 2696 bytes ...883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro | Bin 0 -> 2178 bytes ...232-1-aa501eb1-924a-4460-a2a0-ad577de8aef5.avro | Bin 0 -> 2870 bytes ...811-1-a69c2096-fc8b-4365-8b7b-3b561afdd7e2.avro | Bin 0 -> 2524 bytes .../metadata/v1.metadata.json | 53 ++++ .../metadata/v2.metadata.json | 83 ++++++ .../metadata/v3.metadata.json | 112 ++++++++ .../metadata/v4.metadata.json | 139 ++++++++++ .../metadata/v5.metadata.json | 171 ++++++++++++ .../metadata/version-hint.text | 1 + .../functional/functional_schema_template.sql | 15 + .../datasets/functional/schema_constraints.csv | 1 + .../PlannerTest/iceberg-v2-tables-hash-join.test | 256 ++++++++++------- .../queries/PlannerTest/iceberg-v2-tables.test | 304 +++++++++++++-------- .../iceberg-v2-read-position-deletes-orc.test | 4 +- .../iceberg-v2-read-position-deletes.test | 17 +- tests/query_test/test_iceberg.py | 1 + 32 files changed, 971 insertions(+), 206 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java index 48c123759..fb503ba97 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java @@ -1469,7 +1469,8 @@ public class SelectStmt extends QueryStmt { analyzer_.checkStmtExprLimit(); Table iceTable = ((FeIcebergTable) table).getIcebergApiTable(); if (Utils.hasDeleteFiles(iceTable, tableRef.getTimeTravelSpec())) { - optimizePlainCountStarQueryV2(tableRef, (FeIcebergTable)table); + // IMPALA-12894 Part1: turn off the optimisation for count(*) queries. + // optimizePlainCountStarQueryV2(tableRef, (FeIcebergTable)table); } else { optimizePlainCountStarQueryV1(tableRef, iceTable); } diff --git a/testdata/data/README b/testdata/data/README index 05a56a9b8..63c0066d3 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -1090,6 +1090,23 @@ And converted the HiveCatalog metadata to HadoopCatalog metadata via scripts at And rewrote metadata content to the correct lengths with testdata/bin/rewrite-iceberg-metadata.py "" testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_lineitem_multiblock/metadata/ +iceberg_spark_compaction_with_dangling_delete: +1) Create an Iceberg table with Impala and insert some rows. + create table functional_parquet.iceberg_spark_compaction_with_dangling_delete (id int, j bigint) + STORED AS ICEBERG + TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', + 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', + 'iceberg.table_identifier'='ice.iceberg_spark_compaction_with_dangling_delete', + 'format-version'='2'); + insert into functional_parquet.iceberg_spark_compaction_with_dangling_delete values + (1, 10), (2, 20), (3, 30), (4, 40), (5, 50); +2) Update one field of a row by Impala. This adds a new data and a new delete file to the table. + update functional_parquet.iceberg_spark_compaction_with_dangling_delete set j = -100 where id = 4; +3) Delete the same row with Impala that we updated in step 2). This adds another delete file. + delete from functional_parquet.iceberg_spark_compaction_with_dangling_delete where id = 4; +4) Run compaction on the table with Spark. + spark.sql(s"CALL hadoop_catalog.system.rewrite_data_files(table => 'ice.iceberg_spark_compaction_with_dangling_delete', options => map('min-input-files','2') )") + arrays_big.parq: Generated with RandomNestedDataGenerator.java from the following schema: { diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/00000-8-7d506ac2-9987-4514-8310-505eb02c528a-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/00000-8-7d506ac2-9987-4514-8310-505eb02c528a-00001.parquet new file mode 100644 index 000000000..5330e1a99 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/00000-8-7d506ac2-9987-4514-8310-505eb02c528a-00001.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/2b4453538b945045-7ba1864b00000000_1900113267_data.0.parq b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/2b4453538b945045-7ba1864b00000000_1900113267_data.0.parq new file mode 100644 index 000000000..f729bd7d0 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/2b4453538b945045-7ba1864b00000000_1900113267_data.0.parq differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/3549308fee10b145-141d9f6900000000_502574269_data.0.parq b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/3549308fee10b145-141d9f6900000000_502574269_data.0.parq new file mode 100644 index 000000000..3359e3f29 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/3549308fee10b145-141d9f6900000000_502574269_data.0.parq differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/delete-3549308fee10b145-141d9f6900000000_1919298510_data.0.parq b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/delete-3549308fee10b145-141d9f6900000000_1919298510_data.0.parq new file mode 100644 index 000000000..f774b571b Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/delete-3549308fee10b145-141d9f6900000000_1919298510_data.0.parq differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/delete-ca41ed5edf889878-632c88f100000001_1119661503_data.0.parq b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/delete-ca41ed5edf889878-632c88f100000001_1119661503_data.0.parq new file mode 100644 index 000000000..34b1b367a Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/data/delete-ca41ed5edf889878-632c88f100000001_1119661503_data.0.parq differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/52100098-3c71-4111-8d7e-1c02e8343a0e-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/52100098-3c71-4111-8d7e-1c02e8343a0e-m0.avro new file mode 100644 index 000000000..ead6319c0 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/52100098-3c71-4111-8d7e-1c02e8343a0e-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m0.avro new file mode 100644 index 000000000..f70e69cfe Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m1.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m1.avro new file mode 100644 index 000000000..fa28b96b0 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/a69c2096-fc8b-4365-8b7b-3b561afdd7e2-m1.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m0.avro new file mode 100644 index 000000000..da9fc77a6 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m1.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m1.avro new file mode 100644 index 000000000..59eadb274 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m1.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m2.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m2.avro new file mode 100644 index 000000000..714bc8f56 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m2.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m3.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m3.avro new file mode 100644 index 000000000..40dad44ee Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/aa501eb1-924a-4460-a2a0-ad577de8aef5-m3.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/f6475cdb-128e-4438-ab63-2251736670ad-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/f6475cdb-128e-4438-ab63-2251736670ad-m0.avro new file mode 100644 index 000000000..3694b4c3c Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/f6475cdb-128e-4438-ab63-2251736670ad-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-1208327814823543579-1-52100098-3c71-4111-8d7e-1c02e8343a0e.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-1208327814823543579-1-52100098-3c71-4111-8d7e-1c02e8343a0e.avro new file mode 100644 index 000000000..0155db639 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-1208327814823543579-1-52100098-3c71-4111-8d7e-1c02e8343a0e.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-37664836060851883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-37664836060851883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro new file mode 100644 index 000000000..42478849f Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-37664836060851883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-5278394901353853232-1-aa501eb1-924a-4460-a2a0-ad577de8aef5.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-5278394901353853232-1-aa501eb1-924a-4460-a2a0-ad577de8aef5.avro new file mode 100644 index 000000000..4fd883f34 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-5278394901353853232-1-aa501eb1-924a-4460-a2a0-ad577de8aef5.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-6274599306850878811-1-a69c2096-fc8b-4365-8b7b-3b561afdd7e2.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-6274599306850878811-1-a69c2096-fc8b-4365-8b7b-3b561afdd7e2.avro new file mode 100644 index 000000000..540960c35 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-6274599306850878811-1-a69c2096-fc8b-4365-8b7b-3b561afdd7e2.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v1.metadata.json new file mode 100644 index 000000000..f9c477aa5 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v1.metadata.json @@ -0,0 +1,53 @@ +{ + "format-version" : 2, + "table-uuid" : "015392dc-74ef-46f8-bd70-73ef036f5446", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete", + "last-sequence-number" : 0, + "last-updated-ms" : 1710320789072, + "last-column-id" : 2, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "id", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "j", + "required" : false, + "type" : "long" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_spark_compaction_with_dangling_delete" + }, + "current-snapshot-id" : -1, + "refs" : { }, + "snapshots" : [ ], + "statistics" : [ ], + "snapshot-log" : [ ], + "metadata-log" : [ ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v2.metadata.json new file mode 100644 index 000000000..1abf3a90a --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v2.metadata.json @@ -0,0 +1,83 @@ +{ + "format-version" : 2, + "table-uuid" : "015392dc-74ef-46f8-bd70-73ef036f5446", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete", + "last-sequence-number" : 1, + "last-updated-ms" : 1710320799636, + "last-column-id" : 2, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "id", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "j", + "required" : false, + "type" : "long" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_spark_compaction_with_dangling_delete" + }, + "current-snapshot-id" : 37664836060851883, + "refs" : { + "main" : { + "snapshot-id" : 37664836060851883, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 37664836060851883, + "timestamp-ms" : 1710320799636, + "summary" : { + "operation" : "append", + "added-data-files" : "1", + "added-records" : "5", + "added-files-size" : "670", + "changed-partition-count" : "1", + "total-records" : "5", + "total-files-size" : "670", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-37664836060851883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710320799636, + "snapshot-id" : 37664836060851883 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710320789072, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v1.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v3.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v3.metadata.json new file mode 100644 index 000000000..2a8de9a59 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v3.metadata.json @@ -0,0 +1,112 @@ +{ + "format-version" : 2, + "table-uuid" : "015392dc-74ef-46f8-bd70-73ef036f5446", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete", + "last-sequence-number" : 2, + "last-updated-ms" : 1710320820255, + "last-column-id" : 2, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "id", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "j", + "required" : false, + "type" : "long" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_spark_compaction_with_dangling_delete" + }, + "current-snapshot-id" : 6274599306850878811, + "refs" : { + "main" : { + "snapshot-id" : 6274599306850878811, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 37664836060851883, + "timestamp-ms" : 1710320799636, + "summary" : { + "operation" : "append", + "added-data-files" : "1", + "added-records" : "5", + "added-files-size" : "670", + "changed-partition-count" : "1", + "total-records" : "5", + "total-files-size" : "670", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-37664836060851883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 6274599306850878811, + "parent-snapshot-id" : 37664836060851883, + "timestamp-ms" : 1710320820255, + "summary" : { + "operation" : "overwrite", + "added-data-files" : "1", + "added-position-delete-files" : "1", + "added-delete-files" : "1", + "added-records" : "1", + "added-files-size" : "2276", + "added-position-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "6", + "total-files-size" : "2946", + "total-data-files" : "2", + "total-delete-files" : "1", + "total-position-deletes" : "1", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-6274599306850878811-1-a69c2096-fc8b-4365-8b7b-3b561afdd7e2.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710320799636, + "snapshot-id" : 37664836060851883 + }, { + "timestamp-ms" : 1710320820255, + "snapshot-id" : 6274599306850878811 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710320789072, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1710320799636, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v2.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v4.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v4.metadata.json new file mode 100644 index 000000000..ca3554477 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v4.metadata.json @@ -0,0 +1,139 @@ +{ + "format-version" : 2, + "table-uuid" : "015392dc-74ef-46f8-bd70-73ef036f5446", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete", + "last-sequence-number" : 3, + "last-updated-ms" : 1710320826211, + "last-column-id" : 2, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "id", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "j", + "required" : false, + "type" : "long" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_spark_compaction_with_dangling_delete" + }, + "current-snapshot-id" : 1208327814823543579, + "refs" : { + "main" : { + "snapshot-id" : 1208327814823543579, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 37664836060851883, + "timestamp-ms" : 1710320799636, + "summary" : { + "operation" : "append", + "added-data-files" : "1", + "added-records" : "5", + "added-files-size" : "670", + "changed-partition-count" : "1", + "total-records" : "5", + "total-files-size" : "670", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-37664836060851883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 6274599306850878811, + "parent-snapshot-id" : 37664836060851883, + "timestamp-ms" : 1710320820255, + "summary" : { + "operation" : "overwrite", + "added-data-files" : "1", + "added-position-delete-files" : "1", + "added-delete-files" : "1", + "added-records" : "1", + "added-files-size" : "2276", + "added-position-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "6", + "total-files-size" : "2946", + "total-data-files" : "2", + "total-delete-files" : "1", + "total-position-deletes" : "1", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-6274599306850878811-1-a69c2096-fc8b-4365-8b7b-3b561afdd7e2.avro", + "schema-id" : 0 + }, { + "sequence-number" : 3, + "snapshot-id" : 1208327814823543579, + "parent-snapshot-id" : 6274599306850878811, + "timestamp-ms" : 1710320826211, + "summary" : { + "operation" : "overwrite", + "added-position-delete-files" : "1", + "added-delete-files" : "1", + "added-files-size" : "1651", + "added-position-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "6", + "total-files-size" : "4597", + "total-data-files" : "2", + "total-delete-files" : "2", + "total-position-deletes" : "2", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-1208327814823543579-1-52100098-3c71-4111-8d7e-1c02e8343a0e.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710320799636, + "snapshot-id" : 37664836060851883 + }, { + "timestamp-ms" : 1710320820255, + "snapshot-id" : 6274599306850878811 + }, { + "timestamp-ms" : 1710320826211, + "snapshot-id" : 1208327814823543579 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710320789072, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1710320799636, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1710320820255, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v3.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v5.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v5.metadata.json new file mode 100644 index 000000000..2ba0be109 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v5.metadata.json @@ -0,0 +1,171 @@ +{ + "format-version" : 2, + "table-uuid" : "015392dc-74ef-46f8-bd70-73ef036f5446", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete", + "last-sequence-number" : 4, + "last-updated-ms" : 1710320834982, + "last-column-id" : 2, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "fields" : [ { + "id" : 1, + "name" : "id", + "required" : false, + "type" : "int" + }, { + "id" : 2, + "name" : "j", + "required" : false, + "type" : "long" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ ] + } ], + "last-partition-id" : 999, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_spark_compaction_with_dangling_delete" + }, + "current-snapshot-id" : 5278394901353853232, + "refs" : { + "main" : { + "snapshot-id" : 5278394901353853232, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 37664836060851883, + "timestamp-ms" : 1710320799636, + "summary" : { + "operation" : "append", + "added-data-files" : "1", + "added-records" : "5", + "added-files-size" : "670", + "changed-partition-count" : "1", + "total-records" : "5", + "total-files-size" : "670", + "total-data-files" : "1", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-37664836060851883-1-f6475cdb-128e-4438-ab63-2251736670ad.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 6274599306850878811, + "parent-snapshot-id" : 37664836060851883, + "timestamp-ms" : 1710320820255, + "summary" : { + "operation" : "overwrite", + "added-data-files" : "1", + "added-position-delete-files" : "1", + "added-delete-files" : "1", + "added-records" : "1", + "added-files-size" : "2276", + "added-position-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "6", + "total-files-size" : "2946", + "total-data-files" : "2", + "total-delete-files" : "1", + "total-position-deletes" : "1", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-6274599306850878811-1-a69c2096-fc8b-4365-8b7b-3b561afdd7e2.avro", + "schema-id" : 0 + }, { + "sequence-number" : 3, + "snapshot-id" : 1208327814823543579, + "parent-snapshot-id" : 6274599306850878811, + "timestamp-ms" : 1710320826211, + "summary" : { + "operation" : "overwrite", + "added-position-delete-files" : "1", + "added-delete-files" : "1", + "added-files-size" : "1651", + "added-position-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "6", + "total-files-size" : "4597", + "total-data-files" : "2", + "total-delete-files" : "2", + "total-position-deletes" : "2", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-1208327814823543579-1-52100098-3c71-4111-8d7e-1c02e8343a0e.avro", + "schema-id" : 0 + }, { + "sequence-number" : 4, + "snapshot-id" : 5278394901353853232, + "parent-snapshot-id" : 1208327814823543579, + "timestamp-ms" : 1710320834982, + "summary" : { + "operation" : "replace", + "added-data-files" : "1", + "deleted-data-files" : "2", + "removed-position-delete-files" : "1", + "removed-delete-files" : "1", + "added-records" : "4", + "deleted-records" : "6", + "added-files-size" : "672", + "removed-files-size" : "2946", + "removed-position-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "4", + "total-files-size" : "2323", + "total-data-files" : "1", + "total-delete-files" : "1", + "total-position-deletes" : "1", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/snap-5278394901353853232-1-aa501eb1-924a-4460-a2a0-ad577de8aef5.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710320799636, + "snapshot-id" : 37664836060851883 + }, { + "timestamp-ms" : 1710320820255, + "snapshot-id" : 6274599306850878811 + }, { + "timestamp-ms" : 1710320826211, + "snapshot-id" : 1208327814823543579 + }, { + "timestamp-ms" : 1710320834982, + "snapshot-id" : 5278394901353853232 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710320789072, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1710320799636, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1710320820255, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v3.metadata.json" + }, { + "timestamp-ms" : 1710320826211, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/v4.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/version-hint.text b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/version-hint.text new file mode 100644 index 000000000..7813681f5 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete/metadata/version-hint.text @@ -0,0 +1 @@ +5 \ No newline at end of file diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 286e96c42..f4f9fa63d 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3902,6 +3902,21 @@ hadoop fs -Ddfs.block.size=1048576 -put -f ${IMPALA_HOME}/testdata/LineItemMulti ---- DATASET functional ---- BASE_TABLE_NAME +iceberg_spark_compaction_with_dangling_delete +---- CREATE +CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} +STORED AS ICEBERG +TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', + 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', + 'iceberg.table_identifier'='ice.iceberg_spark_compaction_with_dangling_delete', + 'format-version'='2'); +---- DEPENDENT_LOAD +`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -Ddfs.block.size=1048576 -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_spark_compaction_with_dangling_delete /test-warehouse/iceberg_test/hadoop_catalog/ice +==== +---- DATASET +functional +---- BASE_TABLE_NAME mv1_alltypes_jointbl ---- HIVE_MAJOR_VERSION 3 diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index 6c1e82053..1de78011c 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -105,6 +105,7 @@ table_name:iceberg_mixed_file_format, constraint:restrict_to, table_format:parqu table_name:iceberg_test_metadata, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_lineitem_multiblock, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_lineitem_sixblocks, constraint:restrict_to, table_format:parquet/none/none +table_name:iceberg_spark_compaction_with_dangling_delete, constraint:restrict_to, table_format:parquet/none/none # TODO: Support Avro. Data loading currently fails for Avro because complex types # cannot be converted to the corresponding Avro types yet. diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test index a43b98c5b..741373461 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables-hash-join.test @@ -357,99 +357,135 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- PLAN PLAN-ROOT SINK | -03:AGGREGATE [FINALIZE] +05:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| row-size=20B cardinality=3 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=10 | -|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=1 size=2.63KB +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| | row-size=20B cardinality=3 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=1 size=2.63KB +| | Iceberg snapshot id: 752781918366351945 +| | row-size=267B cardinality=1 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=1 size=625B | Iceberg snapshot id: 752781918366351945 -| row-size=267B cardinality=1 +| row-size=20B cardinality=3 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=1 size=625B +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=3 size=1.83KB Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=3 + row-size=20B cardinality=7 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:AGGREGATE [FINALIZE] +08:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -05:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | -03:AGGREGATE +05:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] -| row-size=20B cardinality=3 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=10 | -|--04:EXCHANGE [BROADCAST] +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, BROADCAST] +| | row-size=20B cardinality=3 +| | +| |--06:EXCHANGE [BROADCAST] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=1 size=2.63KB +| | Iceberg snapshot id: 752781918366351945 +| | row-size=267B cardinality=1 | | -| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=1 size=2.63KB +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=1 size=625B | Iceberg snapshot id: 752781918366351945 -| row-size=267B cardinality=1 +| row-size=20B cardinality=3 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=1 size=625B +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=3 size=1.83KB Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=3 + row-size=20B cardinality=7 ==== SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files; ---- PLAN PLAN-ROOT SINK | -03:AGGREGATE [FINALIZE] +05:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| row-size=20B cardinality=6 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=10 | -|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| | row-size=20B cardinality=6 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -07:AGGREGATE [FINALIZE] +09:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -06:EXCHANGE [UNPARTITIONED] +08:EXCHANGE [UNPARTITIONED] | -03:AGGREGATE +05:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] -| row-size=20B cardinality=6 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=10 | -|--05:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=20B cardinality=6 | | -| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +| |--07:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 +| | +| 06:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 -| -04:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ==== SELECT * from iceberg_v2_positional_update_all_rows ---- PLAN @@ -1170,117 +1206,153 @@ u3 as (select count(*) from iceberg_v2_positional_not_all_data_files_have_delete ---- PLAN PLAN-ROOT SINK | -10:NESTED LOOP JOIN [CROSS JOIN] +14:NESTED LOOP JOIN [CROSS JOIN] | row-size=17B cardinality=1 | -|--08:AGGREGATE [FINALIZE] +|--12:AGGREGATE [FINALIZE] | | output: count(*) | | row-size=8B cardinality=1 | | -| 07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| | row-size=20B cardinality=6 +| 11:UNION +| | pass-through-operands: all +| | row-size=20B cardinality=10 | | -| |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB +| |--09:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| | | row-size=20B cardinality=6 +| | | +| | |--08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | | HDFS partitions=1/1 files=2 size=5.33KB +| | | Iceberg snapshot id: 1497619269847778439 +| | | row-size=267B cardinality=4 +| | | +| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| | HDFS partitions=1/1 files=2 size=1.22KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 +| | row-size=20B cardinality=6 | | -| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=20B cardinality=4 | -09:NESTED LOOP JOIN [CROSS JOIN] +13:NESTED LOOP JOIN [CROSS JOIN] | row-size=9B cardinality=1 | -|--04:UNION +|--06:UNION | constant-operands=1 | row-size=1B cardinality=1 | -03:AGGREGATE [FINALIZE] +05:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] -| row-size=20B cardinality=6 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=10 | -|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN] +| | row-size=20B cardinality=6 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -10:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +14:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=17B cardinality=1 | -|--20:EXCHANGE [UNPARTITIONED] +|--24:EXCHANGE [UNPARTITIONED] | | -| 19:AGGREGATE [FINALIZE] +| 23:AGGREGATE [FINALIZE] | | output: count:merge(*) | | row-size=8B cardinality=1 | | -| 18:EXCHANGE [UNPARTITIONED] +| 22:EXCHANGE [UNPARTITIONED] | | -| 08:AGGREGATE +| 12:AGGREGATE | | output: count(*) | | row-size=8B cardinality=1 | | -| 07:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] -| | row-size=20B cardinality=6 +| 11:UNION +| | pass-through-operands: all +| | row-size=20B cardinality=10 | | -| |--17:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +| |--09:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | | row-size=20B cardinality=6 | | | -| | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB +| | |--21:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +| | | | +| | | 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | | HDFS partitions=1/1 files=2 size=5.33KB +| | | Iceberg snapshot id: 1497619269847778439 +| | | row-size=267B cardinality=4 +| | | +| | 20:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | | +| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| | HDFS partitions=1/1 files=2 size=1.22KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 -| | -| 16:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | row-size=20B cardinality=6 | | -| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=20B cardinality=4 | -09:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +13:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=9B cardinality=1 | -|--15:EXCHANGE [UNPARTITIONED] +|--19:EXCHANGE [UNPARTITIONED] | | -| 04:UNION +| 06:UNION | constant-operands=1 | row-size=1B cardinality=1 | -14:AGGREGATE [FINALIZE] +18:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -13:EXCHANGE [UNPARTITIONED] +17:EXCHANGE [UNPARTITIONED] | -03:AGGREGATE +05:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] -| row-size=20B cardinality=6 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=10 | -|--12:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +|--02:DELETE EVENTS HASH JOIN [LEFT ANTI JOIN, PARTITIONED] +| | row-size=20B cardinality=6 | | -| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +| |--16:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.pos,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete.file_path)] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 +| | +| 15:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 -| -11:EXCHANGE [HASH(functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.file__position,functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files.input__file__name)] +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test index 549a9b7da..a463dcd97 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test @@ -353,145 +353,199 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- PLAN PLAN-ROOT SINK | -03:AGGREGATE [FINALIZE] +05:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=9 | -|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=1 size=2.63KB +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | row-size=20B cardinality=2 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=1 size=2.63KB +| | Iceberg snapshot id: 752781918366351945 +| | row-size=267B cardinality=1 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=1 size=625B | Iceberg snapshot id: 752781918366351945 -| row-size=267B cardinality=1 +| row-size=20B cardinality=3 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=1 size=625B +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=3 size=1.83KB Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=3 + row-size=20B cardinality=7 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:AGGREGATE [FINALIZE] +08:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -05:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | -03:AGGREGATE +05:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=9 | -|--04:EXCHANGE [DIRECTED] +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| | row-size=20B cardinality=2 | | -| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=1 size=2.63KB +| |--06:EXCHANGE [DIRECTED] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=1 size=2.63KB +| | Iceberg snapshot id: 752781918366351945 +| | row-size=267B cardinality=1 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=1 size=625B | Iceberg snapshot id: 752781918366351945 -| row-size=267B cardinality=1 +| row-size=20B cardinality=3 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=1 size=625B +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=3 size=1.83KB Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=3 + row-size=20B cardinality=7 ==== SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files for system_time as of "2022-08-19 13:50:00"; ---- PLAN PLAN-ROOT SINK | -03:AGGREGATE [FINALIZE] +05:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=9 | -|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=1 size=2.63KB +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | row-size=20B cardinality=2 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=1 size=2.63KB +| | Iceberg snapshot id: 752781918366351945 +| | row-size=267B cardinality=1 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=1 size=625B | Iceberg snapshot id: 752781918366351945 -| row-size=267B cardinality=1 +| row-size=20B cardinality=3 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=1 size=625B +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=3 size=1.83KB Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=3 + row-size=20B cardinality=7 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:AGGREGATE [FINALIZE] +08:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -05:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | -03:AGGREGATE +05:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=9 | -|--04:EXCHANGE [DIRECTED] +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| | row-size=20B cardinality=2 | | -| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=1 size=2.63KB +| |--06:EXCHANGE [DIRECTED] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=1 size=2.63KB +| | Iceberg snapshot id: 752781918366351945 +| | row-size=267B cardinality=1 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=1 size=625B | Iceberg snapshot id: 752781918366351945 -| row-size=267B cardinality=1 +| row-size=20B cardinality=3 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] - HDFS partitions=1/1 files=1 size=625B +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] + HDFS partitions=1/1 files=3 size=1.83KB Iceberg snapshot id: 752781918366351945 - row-size=20B cardinality=3 + row-size=20B cardinality=7 ==== SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files; ---- PLAN PLAN-ROOT SINK | -03:AGGREGATE [FINALIZE] +05:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=6 | -|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | row-size=20B cardinality=2 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -06:AGGREGATE [FINALIZE] +08:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -05:EXCHANGE [UNPARTITIONED] +07:EXCHANGE [UNPARTITIONED] | -03:AGGREGATE +05:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=6 | -|--04:EXCHANGE [DIRECTED] +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| | row-size=20B cardinality=2 | | -| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +| |--06:EXCHANGE [DIRECTED] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ==== SELECT * from iceberg_v2_positional_update_all_rows ---- PLAN @@ -1202,115 +1256,151 @@ u3 as (select count(*) from iceberg_v2_positional_not_all_data_files_have_delete ---- PLAN PLAN-ROOT SINK | -10:NESTED LOOP JOIN [CROSS JOIN] +14:NESTED LOOP JOIN [CROSS JOIN] | row-size=17B cardinality=1 | -|--08:AGGREGATE [FINALIZE] +|--12:AGGREGATE [FINALIZE] | | output: count(*) | | row-size=8B cardinality=1 | | -| 07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| | row-size=20B cardinality=2 +| 11:UNION +| | pass-through-operands: all +| | row-size=20B cardinality=6 | | -| |--06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB +| |--09:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | | row-size=20B cardinality=2 +| | | +| | |--08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | | HDFS partitions=1/1 files=2 size=5.33KB +| | | Iceberg snapshot id: 1497619269847778439 +| | | row-size=267B cardinality=4 +| | | +| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| | HDFS partitions=1/1 files=2 size=1.22KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 +| | row-size=20B cardinality=6 | | -| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=20B cardinality=4 | -09:NESTED LOOP JOIN [CROSS JOIN] +13:NESTED LOOP JOIN [CROSS JOIN] | row-size=9B cardinality=1 | -|--04:UNION +|--06:UNION | constant-operands=1 | row-size=1B cardinality=1 | -03:AGGREGATE [FINALIZE] +05:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=6 | -|--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN] +| | row-size=20B cardinality=2 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -10:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +14:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=17B cardinality=1 | -|--18:EXCHANGE [UNPARTITIONED] +|--22:EXCHANGE [UNPARTITIONED] | | -| 17:AGGREGATE [FINALIZE] +| 21:AGGREGATE [FINALIZE] | | output: count:merge(*) | | row-size=8B cardinality=1 | | -| 16:EXCHANGE [UNPARTITIONED] +| 20:EXCHANGE [UNPARTITIONED] | | -| 08:AGGREGATE +| 12:AGGREGATE | | output: count(*) | | row-size=8B cardinality=1 | | -| 07:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| | row-size=20B cardinality=2 +| 11:UNION +| | pass-through-operands: all +| | row-size=20B cardinality=6 | | -| |--15:EXCHANGE [DIRECTED] +| |--09:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| | | row-size=20B cardinality=2 | | | -| | 06:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-06 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| | HDFS partitions=1/1 files=2 size=5.33KB +| | |--19:EXCHANGE [DIRECTED] +| | | | +| | | 08:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-08 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | | HDFS partitions=1/1 files=2 size=5.33KB +| | | Iceberg snapshot id: 1497619269847778439 +| | | row-size=267B cardinality=4 +| | | +| | 07:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| | HDFS partitions=1/1 files=2 size=1.22KB | | Iceberg snapshot id: 1497619269847778439 -| | row-size=267B cardinality=4 +| | row-size=20B cardinality=6 | | -| 05:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| 10:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] | HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=20B cardinality=6 +| row-size=20B cardinality=4 | -09:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +13:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] | row-size=9B cardinality=1 | -|--14:EXCHANGE [UNPARTITIONED] +|--18:EXCHANGE [UNPARTITIONED] | | -| 04:UNION +| 06:UNION | constant-operands=1 | row-size=1B cardinality=1 | -13:AGGREGATE [FINALIZE] +17:AGGREGATE [FINALIZE] | output: count:merge(*) | row-size=8B cardinality=1 | -12:EXCHANGE [UNPARTITIONED] +16:EXCHANGE [UNPARTITIONED] | -03:AGGREGATE +05:AGGREGATE | output: count(*) | row-size=8B cardinality=1 | -02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] -| row-size=20B cardinality=2 +04:UNION +| pass-through-operands: all +| row-size=20B cardinality=6 | -|--11:EXCHANGE [DIRECTED] +|--02:DELETE EVENTS ICEBERG DELETE [ICEBERG DELETE JOIN, DIRECTED] +| | row-size=20B cardinality=2 +| | +| |--15:EXCHANGE [DIRECTED] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] +| | HDFS partitions=1/1 files=2 size=5.33KB +| | Iceberg snapshot id: 1497619269847778439 +| | row-size=267B cardinality=4 | | -| 01:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-POSITION-DELETE-01 functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files-position-delete] -| HDFS partitions=1/1 files=2 size=5.33KB +| 00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +| HDFS partitions=1/1 files=2 size=1.22KB | Iceberg snapshot id: 1497619269847778439 -| row-size=267B cardinality=4 +| row-size=20B cardinality=6 | -00:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] +03:SCAN HDFS [functional_parquet.iceberg_v2_positional_not_all_data_files_have_delete_files] HDFS partitions=1/1 files=2 size=1.22KB Iceberg snapshot id: 1497619269847778439 - row-size=20B cardinality=6 + row-size=20B cardinality=4 ==== select * from functional_parquet.iceberg_v2_delete_equality; ---- PLAN diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test index a2af39918..e6b3021b0 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes-orc.test @@ -154,7 +154,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files_ ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumOrcStripes): 2 +aggregation(SUM, NumOrcStripes): 5 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -164,7 +164,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files_ ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumOrcStripes): 4 +aggregation(SUM, NumOrcStripes): 6 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test index 6a7cb599b..80e5cdad0 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-position-deletes.test @@ -206,7 +206,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 2 +aggregation(SUM, NumRowGroups): 5 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -216,7 +216,7 @@ SELECT count(*) from iceberg_v2_positional_not_all_data_files_have_delete_files ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 4 +aggregation(SUM, NumRowGroups): 6 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -267,7 +267,7 @@ SELECT count(*) from iceberg_v2_positional_update_all_rows for system_version as ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 2 +aggregation(SUM, NumRowGroups): 3 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -277,7 +277,7 @@ SELECT count(*) from iceberg_v2_positional_update_all_rows ---- TYPES bigint ---- RUNTIME_PROFILE -aggregation(SUM, NumRowGroups): 2 +aggregation(SUM, NumRowGroups): 3 aggregation(SUM, NumFileMetadataRead): 0 ==== ---- QUERY @@ -742,3 +742,12 @@ INT, STRING, STRING, TIMESTAMP # Changing back in case we add new tests later SET BATCH_SIZE=0; ==== +---- QUERY +# IMPALA-12984: Wrong count(*) results when having dangling deletes after a Spark +# compaction. +select count(*) from functional_parquet.iceberg_spark_compaction_with_dangling_delete; +---- RESULTS +4 +---- TYPES +bigint +==== diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py index d97d0de64..a6cca7ffc 100644 --- a/tests/query_test/test_iceberg.py +++ b/tests/query_test/test_iceberg.py @@ -1419,6 +1419,7 @@ class TestIcebergV2Table(IcebergTestSuite): # The test uses pre-written Iceberg tables where the position delete files refer to # the data files via full URI, i.e. they start with 'hdfs://localhost:2050/...'. In the # dockerised environment the namenode is accessible on a different hostname/port. + @pytest.mark.skip(reason="IMPALA-12894") @SkipIfDockerizedCluster.internal_hostname @SkipIf.hardcoded_uris def test_plain_count_star_optimization(self, vector):
