This is an automated email from the ASF dual-hosted git repository. dbecker pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 18b9c08c5203901da57e738afcc47f1f7a53b3bc Author: Gabor Kaszab <[email protected]> AuthorDate: Thu Mar 21 14:56:01 2024 +0100 IMPALA-12600: Schema evolution with equality delete files This patch adds test coverage for a table that has equality delete files and also schema evolution, where the schema changes didn't affect the primary key columns. Note, partition evolution on tables with equality deletes is still not supported. Testing: - Added a new test table for this use-case and some E2E tests on that table. Change-Id: I125f72bade5b79bad5aaa6b676d6afaf3ca98395 Reviewed-on: http://gerrit.cloudera.org:8080/21210 Reviewed-by: Gabor Kaszab <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- testdata/data/README | 23 +++ ...47c78-9a7a-4d68-81d9-ab22e44a3630-00002.parquet | Bin 0 -> 671 bytes ...845c344-9698e59400000000_1309071497_data.0.parq | Bin 0 -> 836 bytes ...47c78-9a7a-4d68-81d9-ab22e44a3630-00004.parquet | Bin 0 -> 671 bytes ...caf13-6a91-4fd5-b509-54ec8b16864f-00001.parquet | Bin 0 -> 1118 bytes ...caf13-6a91-4fd5-b509-54ec8b16864f-00002.parquet | Bin 0 -> 671 bytes ...845c344-9698e59400000000_1656341410_data.0.parq | Bin 0 -> 836 bytes ...845c344-9698e59400000000_1634747934_data.0.parq | Bin 0 -> 824 bytes .../94efa501-9664-420d-a524-30535c11d363-m0.avro | Bin 0 -> 4386 bytes .../bf0f2c96-954e-4b3c-a686-8b06e9fd56e8-m0.avro | Bin 0 -> 4097 bytes .../fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m0.avro | Bin 0 -> 3865 bytes .../fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m1.avro | Bin 0 -> 3833 bytes ...669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro | Bin 0 -> 2379 bytes ...884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro | Bin 0 -> 2753 bytes ...1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro_tmp | 116 ++++++++++++ ...192-1-94efa501-9664-420d-a524-30535c11d363.avro | Bin 0 -> 2193 bytes .../metadata/v1.metadata.json | 64 +++++++ .../metadata/v2.metadata.json | 94 ++++++++++ .../metadata/v3.metadata.json | 121 +++++++++++++ .../metadata/v4.metadata.json | 144 +++++++++++++++ .../metadata/v5.metadata.json | 172 ++++++++++++++++++ .../metadata/v6.metadata.json | 201 +++++++++++++++++++++ .../metadata/version-hint.text | 1 + .../functional/functional_schema_template.sql | 15 ++ .../datasets/functional/schema_constraints.csv | 1 + .../queries/PlannerTest/iceberg-v2-tables.test | 59 ++++++ .../iceberg-v2-read-equality-deletes.test | 23 +++ 27 files changed, 1034 insertions(+) diff --git a/testdata/data/README b/testdata/data/README index 63c0066d3..0eea4842f 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -1107,6 +1107,29 @@ iceberg_spark_compaction_with_dangling_delete: 4) Run compaction on the table with Spark. spark.sql(s"CALL hadoop_catalog.system.rewrite_data_files(table => 'ice.iceberg_spark_compaction_with_dangling_delete', options => map('min-input-files','2') )") +iceberg_v2_equality_delete_schema_evolution: +1: Create and populate an Iceberg table with primary keys with Impala: + create table functional_parquet.iceberg_v2_equality_delete_schema_evolution + (i int not null, d date not null, s string, primary key(i, d) not enforced) + PARTITIONED BY SPEC (d) + STORED AS ICEBERG + TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', + 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', + 'iceberg.table_identifier'='ice.iceberg_v2_equality_delete_schema_evolution', + 'format-version'='2'); + insert into functional_parquet.iceberg_v2_equality_delete_schema_evolution values + (1, "2024-03-20", "str1"), + (2, "2024-03-20", "str2"), + (3, "2024-03-21", "str3"), + (4, "2024-03-21", "str4"), + (5, "2024-03-22", "str5"); +2: Delete some rows with Nifi where i=2, i=3 +3: Do some schema evolution on the table with Impala: + alter table functional_parquet.iceberg_v2_equality_delete_schema_evolution change s str string; + alter table functional_parquet.iceberg_v2_equality_delete_schema_evolution add column j int; +4: Update a row with Nifi where i=4 to the following: + (44, 2024-03-21, "str4", 4444) + arrays_big.parq: Generated with RandomNestedDataGenerator.java from the following schema: { diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-20/00000-10-e4b47c78-9a7a-4d68-81d9-ab22e44a3630-00002.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-20/00000-10-e4b47c78-9a7a-4d68-81d9-ab22e44a3630-00002.parquet new file mode 100644 index 000000000..c8029ba5c Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-20/00000-10-e4b47c78-9a7a-4d68-81d9-ab22e44a3630-00002.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-20/3645a3085845c344-9698e59400000000_1309071497_data.0.parq b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-20/3645a3085845c344-9698e59400000000_1309071497_data.0.parq new file mode 100644 index 000000000..1bc75eea8 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-20/3645a3085845c344-9698e59400000000_1309071497_data.0.parq differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-10-e4b47c78-9a7a-4d68-81d9-ab22e44a3630-00004.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-10-e4b47c78-9a7a-4d68-81d9-ab22e44a3630-00004.parquet new file mode 100644 index 000000000..483edd2c4 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-10-e4b47c78-9a7a-4d68-81d9-ab22e44a3630-00004.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-11-a72caf13-6a91-4fd5-b509-54ec8b16864f-00001.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-11-a72caf13-6a91-4fd5-b509-54ec8b16864f-00001.parquet new file mode 100644 index 000000000..eedfa9dcc Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-11-a72caf13-6a91-4fd5-b509-54ec8b16864f-00001.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-11-a72caf13-6a91-4fd5-b509-54ec8b16864f-00002.parquet b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-11-a72caf13-6a91-4fd5-b509-54ec8b16864f-00002.parquet new file mode 100644 index 000000000..ebd9bd6be Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/00000-11-a72caf13-6a91-4fd5-b509-54ec8b16864f-00002.parquet differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/3645a3085845c344-9698e59400000000_1656341410_data.0.parq b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/3645a3085845c344-9698e59400000000_1656341410_data.0.parq new file mode 100644 index 000000000..cc7f09378 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-21/3645a3085845c344-9698e59400000000_1656341410_data.0.parq differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-22/3645a3085845c344-9698e59400000000_1634747934_data.0.parq b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-22/3645a3085845c344-9698e59400000000_1634747934_data.0.parq new file mode 100644 index 000000000..e89d7f7c7 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/data/d=2024-03-22/3645a3085845c344-9698e59400000000_1634747934_data.0.parq differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/94efa501-9664-420d-a524-30535c11d363-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/94efa501-9664-420d-a524-30535c11d363-m0.avro new file mode 100644 index 000000000..dc4f9be5f Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/94efa501-9664-420d-a524-30535c11d363-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/bf0f2c96-954e-4b3c-a686-8b06e9fd56e8-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/bf0f2c96-954e-4b3c-a686-8b06e9fd56e8-m0.avro new file mode 100644 index 000000000..ba40d992e Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/bf0f2c96-954e-4b3c-a686-8b06e9fd56e8-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m0.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m0.avro new file mode 100644 index 000000000..a62a07fcc Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m0.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m1.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m1.avro new file mode 100644 index 000000000..cc7866dcb Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m1.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-3986738438831924669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-3986738438831924669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro new file mode 100644 index 000000000..2467d3278 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-3986738438831924669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-5816823095034839884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-5816823095034839884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro new file mode 100644 index 000000000..29012addb Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-5816823095034839884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-5816823095034839884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro_tmp b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-5816823095034839884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro_tmp new file mode 100644 index 000000000..e9a32c273 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-5816823095034839884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro_tmp @@ -0,0 +1,116 @@ +{ + "manifest_path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m0.avro", + "manifest_length" : 3865, + "partition_spec_id" : 0, + "content" : 0, + "sequence_number" : 3, + "min_sequence_number" : 3, + "added_snapshot_id" : 5816823095034839884, + "added_data_files_count" : 1, + "existing_data_files_count" : 0, + "deleted_data_files_count" : 0, + "added_rows_count" : 1, + "existing_rows_count" : 0, + "deleted_rows_count" : 0, + "partitions" : { + "array" : [ { + "contains_null" : false, + "contains_nan" : { + "boolean" : false + }, + "lower_bound" : { + "bytes" : "[M\u0000\u0000" + }, + "upper_bound" : { + "bytes" : "[M\u0000\u0000" + } + } ] + } +} +{ + "manifest_path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/94efa501-9664-420d-a524-30535c11d363-m0.avro", + "manifest_length" : 4386, + "partition_spec_id" : 0, + "content" : 0, + "sequence_number" : 1, + "min_sequence_number" : 1, + "added_snapshot_id" : 7131747670101362192, + "added_data_files_count" : 3, + "existing_data_files_count" : 0, + "deleted_data_files_count" : 0, + "added_rows_count" : 5, + "existing_rows_count" : 0, + "deleted_rows_count" : 0, + "partitions" : { + "array" : [ { + "contains_null" : false, + "contains_nan" : { + "boolean" : false + }, + "lower_bound" : { + "bytes" : "ZM\u0000\u0000" + }, + "upper_bound" : { + "bytes" : "\\M\u0000\u0000" + } + } ] + } +} +{ + "manifest_path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/fb7a4022-ee0a-4540-87d8-b8fa8e4c8596-m1.avro", + "manifest_length" : 3833, + "partition_spec_id" : 0, + "content" : 1, + "sequence_number" : 3, + "min_sequence_number" : 3, + "added_snapshot_id" : 5816823095034839884, + "added_data_files_count" : 1, + "existing_data_files_count" : 0, + "deleted_data_files_count" : 0, + "added_rows_count" : 1, + "existing_rows_count" : 0, + "deleted_rows_count" : 0, + "partitions" : { + "array" : [ { + "contains_null" : false, + "contains_nan" : { + "boolean" : false + }, + "lower_bound" : { + "bytes" : "[M\u0000\u0000" + }, + "upper_bound" : { + "bytes" : "[M\u0000\u0000" + } + } ] + } +} +{ + "manifest_path" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/bf0f2c96-954e-4b3c-a686-8b06e9fd56e8-m0.avro", + "manifest_length" : 4097, + "partition_spec_id" : 0, + "content" : 1, + "sequence_number" : 2, + "min_sequence_number" : 2, + "added_snapshot_id" : 3986738438831924669, + "added_data_files_count" : 2, + "existing_data_files_count" : 0, + "deleted_data_files_count" : 0, + "added_rows_count" : 2, + "existing_rows_count" : 0, + "deleted_rows_count" : 0, + "partitions" : { + "array" : [ { + "contains_null" : false, + "contains_nan" : { + "boolean" : false + }, + "lower_bound" : { + "bytes" : "ZM\u0000\u0000" + }, + "upper_bound" : { + "bytes" : "[M\u0000\u0000" + } + } ] + } +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro new file mode 100644 index 000000000..6f1cfe8b7 Binary files /dev/null and b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro differ diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json new file mode 100644 index 000000000..dae3080e9 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json @@ -0,0 +1,64 @@ +{ + "format-version" : 2, + "table-uuid" : "4f077766-4ab0-4537-99f1-2059cc203ddf", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution", + "last-sequence-number" : 0, + "last-updated-ms" : 1710950805180, + "last-column-id" : 3, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "s", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "d", + "transform" : "identity", + "source-id" : 2, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_v2_equality_delete_schema_evolution" + }, + "current-snapshot-id" : -1, + "refs" : { }, + "snapshots" : [ ], + "statistics" : [ ], + "snapshot-log" : [ ], + "metadata-log" : [ ] +} \ No newline at end of file diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v2.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v2.metadata.json new file mode 100644 index 000000000..4a0c6e184 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v2.metadata.json @@ -0,0 +1,94 @@ +{ + "format-version" : 2, + "table-uuid" : "4f077766-4ab0-4537-99f1-2059cc203ddf", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution", + "last-sequence-number" : 1, + "last-updated-ms" : 1710950811050, + "last-column-id" : 3, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "s", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "d", + "transform" : "identity", + "source-id" : 2, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_v2_equality_delete_schema_evolution" + }, + "current-snapshot-id" : 7131747670101362192, + "refs" : { + "main" : { + "snapshot-id" : 7131747670101362192, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950811050, + "summary" : { + "operation" : "append", + "added-data-files" : "3", + "added-records" : "5", + "added-files-size" : "2496", + "changed-partition-count" : "3", + "total-records" : "5", + "total-files-size" : "2496", + "total-data-files" : "3", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710950811050, + "snapshot-id" : 7131747670101362192 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710950805180, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v3.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v3.metadata.json new file mode 100644 index 000000000..37d181f53 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v3.metadata.json @@ -0,0 +1,121 @@ +{ + "format-version" : 2, + "table-uuid" : "4f077766-4ab0-4537-99f1-2059cc203ddf", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution", + "last-sequence-number" : 2, + "last-updated-ms" : 1710950888718, + "last-column-id" : 3, + "current-schema-id" : 0, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "s", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "d", + "transform" : "identity", + "source-id" : 2, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_v2_equality_delete_schema_evolution" + }, + "current-snapshot-id" : 3986738438831924669, + "refs" : { + "main" : { + "snapshot-id" : 3986738438831924669, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950811050, + "summary" : { + "operation" : "append", + "added-data-files" : "3", + "added-records" : "5", + "added-files-size" : "2496", + "changed-partition-count" : "3", + "total-records" : "5", + "total-files-size" : "2496", + "total-data-files" : "3", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 3986738438831924669, + "parent-snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950888718, + "summary" : { + "operation" : "overwrite", + "added-equality-delete-files" : "2", + "added-delete-files" : "2", + "added-files-size" : "1342", + "added-equality-deletes" : "2", + "changed-partition-count" : "2", + "total-records" : "5", + "total-files-size" : "3838", + "total-data-files" : "3", + "total-delete-files" : "2", + "total-position-deletes" : "0", + "total-equality-deletes" : "2" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-3986738438831924669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710950811050, + "snapshot-id" : 7131747670101362192 + }, { + "timestamp-ms" : 1710950888718, + "snapshot-id" : 3986738438831924669 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710950805180, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1710950811050, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v2.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v4.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v4.metadata.json new file mode 100644 index 000000000..8edeccd63 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v4.metadata.json @@ -0,0 +1,144 @@ +{ + "format-version" : 2, + "table-uuid" : "4f077766-4ab0-4537-99f1-2059cc203ddf", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution", + "last-sequence-number" : 2, + "last-updated-ms" : 1710950922340, + "last-column-id" : 3, + "current-schema-id" : 1, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "s", + "required" : false, + "type" : "string" + } ] + }, { + "type" : "struct", + "schema-id" : 1, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "str", + "required" : false, + "type" : "string" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "d", + "transform" : "identity", + "source-id" : 2, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_v2_equality_delete_schema_evolution" + }, + "current-snapshot-id" : 3986738438831924669, + "refs" : { + "main" : { + "snapshot-id" : 3986738438831924669, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950811050, + "summary" : { + "operation" : "append", + "added-data-files" : "3", + "added-records" : "5", + "added-files-size" : "2496", + "changed-partition-count" : "3", + "total-records" : "5", + "total-files-size" : "2496", + "total-data-files" : "3", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 3986738438831924669, + "parent-snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950888718, + "summary" : { + "operation" : "overwrite", + "added-equality-delete-files" : "2", + "added-delete-files" : "2", + "added-files-size" : "1342", + "added-equality-deletes" : "2", + "changed-partition-count" : "2", + "total-records" : "5", + "total-files-size" : "3838", + "total-data-files" : "3", + "total-delete-files" : "2", + "total-position-deletes" : "0", + "total-equality-deletes" : "2" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-3986738438831924669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710950811050, + "snapshot-id" : 7131747670101362192 + }, { + "timestamp-ms" : 1710950888718, + "snapshot-id" : 3986738438831924669 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710950805180, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1710950811050, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1710950888718, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v3.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v5.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v5.metadata.json new file mode 100644 index 000000000..791e8df58 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v5.metadata.json @@ -0,0 +1,172 @@ +{ + "format-version" : 2, + "table-uuid" : "4f077766-4ab0-4537-99f1-2059cc203ddf", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution", + "last-sequence-number" : 2, + "last-updated-ms" : 1710950924199, + "last-column-id" : 4, + "current-schema-id" : 2, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "s", + "required" : false, + "type" : "string" + } ] + }, { + "type" : "struct", + "schema-id" : 1, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "str", + "required" : false, + "type" : "string" + } ] + }, { + "type" : "struct", + "schema-id" : 2, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "str", + "required" : false, + "type" : "string" + }, { + "id" : 4, + "name" : "j", + "required" : false, + "type" : "int" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "d", + "transform" : "identity", + "source-id" : 2, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_v2_equality_delete_schema_evolution" + }, + "current-snapshot-id" : 3986738438831924669, + "refs" : { + "main" : { + "snapshot-id" : 3986738438831924669, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950811050, + "summary" : { + "operation" : "append", + "added-data-files" : "3", + "added-records" : "5", + "added-files-size" : "2496", + "changed-partition-count" : "3", + "total-records" : "5", + "total-files-size" : "2496", + "total-data-files" : "3", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 3986738438831924669, + "parent-snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950888718, + "summary" : { + "operation" : "overwrite", + "added-equality-delete-files" : "2", + "added-delete-files" : "2", + "added-files-size" : "1342", + "added-equality-deletes" : "2", + "changed-partition-count" : "2", + "total-records" : "5", + "total-files-size" : "3838", + "total-data-files" : "3", + "total-delete-files" : "2", + "total-position-deletes" : "0", + "total-equality-deletes" : "2" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-3986738438831924669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro", + "schema-id" : 0 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710950811050, + "snapshot-id" : 7131747670101362192 + }, { + "timestamp-ms" : 1710950888718, + "snapshot-id" : 3986738438831924669 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710950805180, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1710950811050, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1710950888718, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v3.metadata.json" + }, { + "timestamp-ms" : 1710950922340, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v4.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v6.metadata.json b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v6.metadata.json new file mode 100644 index 000000000..858609cbe --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v6.metadata.json @@ -0,0 +1,201 @@ +{ + "format-version" : 2, + "table-uuid" : "4f077766-4ab0-4537-99f1-2059cc203ddf", + "location" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution", + "last-sequence-number" : 3, + "last-updated-ms" : 1710951029847, + "last-column-id" : 4, + "current-schema-id" : 2, + "schemas" : [ { + "type" : "struct", + "schema-id" : 0, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "s", + "required" : false, + "type" : "string" + } ] + }, { + "type" : "struct", + "schema-id" : 1, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "str", + "required" : false, + "type" : "string" + } ] + }, { + "type" : "struct", + "schema-id" : 2, + "identifier-field-ids" : [ 1, 2 ], + "fields" : [ { + "id" : 1, + "name" : "i", + "required" : true, + "type" : "int" + }, { + "id" : 2, + "name" : "d", + "required" : true, + "type" : "date" + }, { + "id" : 3, + "name" : "str", + "required" : false, + "type" : "string" + }, { + "id" : 4, + "name" : "j", + "required" : false, + "type" : "int" + } ] + } ], + "default-spec-id" : 0, + "partition-specs" : [ { + "spec-id" : 0, + "fields" : [ { + "name" : "d", + "transform" : "identity", + "source-id" : 2, + "field-id" : 1000 + } ] + } ], + "last-partition-id" : 1000, + "default-sort-order-id" : 0, + "sort-orders" : [ { + "order-id" : 0, + "fields" : [ ] + } ], + "properties" : { + "engine.hive.enabled" : "true", + "write.merge.mode" : "merge-on-read", + "write.format.default" : "parquet", + "write.delete.mode" : "merge-on-read", + "iceberg.catalog_location" : "/test-warehouse/iceberg_test/hadoop_catalog", + "OBJCAPABILITIES" : "EXTREAD,EXTWRITE", + "write.update.mode" : "merge-on-read", + "storage_handler" : "org.apache.iceberg.mr.hive.HiveIcebergStorageHandler", + "iceberg.catalog" : "hadoop.catalog", + "iceberg.table_identifier" : "ice.iceberg_v2_equality_delete_schema_evolution" + }, + "current-snapshot-id" : 5816823095034839884, + "refs" : { + "main" : { + "snapshot-id" : 5816823095034839884, + "type" : "branch" + } + }, + "snapshots" : [ { + "sequence-number" : 1, + "snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950811050, + "summary" : { + "operation" : "append", + "added-data-files" : "3", + "added-records" : "5", + "added-files-size" : "2496", + "changed-partition-count" : "3", + "total-records" : "5", + "total-files-size" : "2496", + "total-data-files" : "3", + "total-delete-files" : "0", + "total-position-deletes" : "0", + "total-equality-deletes" : "0" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-7131747670101362192-1-94efa501-9664-420d-a524-30535c11d363.avro", + "schema-id" : 0 + }, { + "sequence-number" : 2, + "snapshot-id" : 3986738438831924669, + "parent-snapshot-id" : 7131747670101362192, + "timestamp-ms" : 1710950888718, + "summary" : { + "operation" : "overwrite", + "added-equality-delete-files" : "2", + "added-delete-files" : "2", + "added-files-size" : "1342", + "added-equality-deletes" : "2", + "changed-partition-count" : "2", + "total-records" : "5", + "total-files-size" : "3838", + "total-data-files" : "3", + "total-delete-files" : "2", + "total-position-deletes" : "0", + "total-equality-deletes" : "2" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-3986738438831924669-1-bf0f2c96-954e-4b3c-a686-8b06e9fd56e8.avro", + "schema-id" : 0 + }, { + "sequence-number" : 3, + "snapshot-id" : 5816823095034839884, + "parent-snapshot-id" : 3986738438831924669, + "timestamp-ms" : 1710951029847, + "summary" : { + "operation" : "overwrite", + "added-data-files" : "1", + "added-equality-delete-files" : "1", + "added-delete-files" : "1", + "added-records" : "1", + "added-files-size" : "1789", + "added-equality-deletes" : "1", + "changed-partition-count" : "1", + "total-records" : "6", + "total-files-size" : "5627", + "total-data-files" : "4", + "total-delete-files" : "3", + "total-position-deletes" : "0", + "total-equality-deletes" : "3" + }, + "manifest-list" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/snap-5816823095034839884-1-fb7a4022-ee0a-4540-87d8-b8fa8e4c8596.avro", + "schema-id" : 2 + } ], + "statistics" : [ ], + "snapshot-log" : [ { + "timestamp-ms" : 1710950811050, + "snapshot-id" : 7131747670101362192 + }, { + "timestamp-ms" : 1710950888718, + "snapshot-id" : 3986738438831924669 + }, { + "timestamp-ms" : 1710951029847, + "snapshot-id" : 5816823095034839884 + } ], + "metadata-log" : [ { + "timestamp-ms" : 1710950805180, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v1.metadata.json" + }, { + "timestamp-ms" : 1710950811050, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v2.metadata.json" + }, { + "timestamp-ms" : 1710950888718, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v3.metadata.json" + }, { + "timestamp-ms" : 1710950922340, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v4.metadata.json" + }, { + "timestamp-ms" : 1710950924199, + "metadata-file" : "/test-warehouse/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/v5.metadata.json" + } ] +} diff --git a/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/version-hint.text b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/version-hint.text new file mode 100644 index 000000000..62f945751 --- /dev/null +++ b/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution/metadata/version-hint.text @@ -0,0 +1 @@ +6 \ No newline at end of file diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 73b8bfac4..abf8b44ad 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -3939,6 +3939,21 @@ hadoop fs -Ddfs.block.size=1048576 -put -f ${IMPALA_HOME}/testdata/data/iceberg_ ---- DATASET functional ---- BASE_TABLE_NAME +iceberg_v2_equality_delete_schema_evolution +---- CREATE +CREATE EXTERNAL TABLE IF NOT EXISTS {db_name}{db_suffix}.{table_name} +STORED AS ICEBERG +TBLPROPERTIES('iceberg.catalog'='hadoop.catalog', + 'iceberg.catalog_location'='/test-warehouse/iceberg_test/hadoop_catalog', + 'iceberg.table_identifier'='ice.iceberg_v2_equality_delete_schema_evolution', + 'format-version'='2'); +---- DEPENDENT_LOAD +`hadoop fs -mkdir -p /test-warehouse/iceberg_test/hadoop_catalog/ice && \ +hadoop fs -put -f ${IMPALA_HOME}/testdata/data/iceberg_test/hadoop_catalog/ice/iceberg_v2_equality_delete_schema_evolution /test-warehouse/iceberg_test/hadoop_catalog/ice +==== +---- DATASET +functional +---- BASE_TABLE_NAME mv1_alltypes_jointbl ---- HIVE_MAJOR_VERSION 3 diff --git a/testdata/datasets/functional/schema_constraints.csv b/testdata/datasets/functional/schema_constraints.csv index 1de78011c..7c78757ee 100644 --- a/testdata/datasets/functional/schema_constraints.csv +++ b/testdata/datasets/functional/schema_constraints.csv @@ -106,6 +106,7 @@ table_name:iceberg_test_metadata, constraint:restrict_to, table_format:parquet/n table_name:iceberg_lineitem_multiblock, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_lineitem_sixblocks, constraint:restrict_to, table_format:parquet/none/none table_name:iceberg_spark_compaction_with_dangling_delete, constraint:restrict_to, table_format:parquet/none/none +table_name:iceberg_v2_equality_delete_schema_evolution, constraint:restrict_to, table_format:parquet/none/none # TODO: Support Avro. Data loading currently fails for Avro because complex types # cannot be converted to the corresponding Avro types yet. diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test index 57750d661..f4bc7829a 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/iceberg-v2-tables.test @@ -1882,3 +1882,62 @@ PLAN-ROOT SINK constant-operands=1 row-size=8B cardinality=1 ==== +# Query a table that has schema evolution and equality deletes. +select * from functional_parquet.iceberg_v2_equality_delete_schema_evolution; +---- PLAN +PLAN-ROOT SINK +| +04:UNION +| pass-through-operands: all +| row-size=32B cardinality=6 +| +|--02:HASH JOIN [LEFT ANTI JOIN] +| | hash predicates: functional_parquet.iceberg_v2_equality_delete_schema_evolution.d IS NOT DISTINCT FROM functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01.d, functional_parquet.iceberg_v2_equality_delete_schema_evolution.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01.i +| | other join predicates: functional_parquet.iceberg_v2_equality_delete_schema_evolution.iceberg__data__sequence__number < functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01.iceberg__data__sequence__number +| | row-size=32B cardinality=4 +| | +| |--01:SCAN HDFS [functional_parquet.iceberg_v2_equality_delete_schema_evolution-EQUALITY-DELETE-01 functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01] +| | HDFS partitions=1/1 files=3 size=1.97KB +| | Iceberg snapshot id: 5816823095034839884 +| | row-size=16B cardinality=3 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_equality_delete_schema_evolution] +| HDFS partitions=1/1 files=2 size=1.63KB +| Iceberg snapshot id: 5816823095034839884 +| row-size=32B cardinality=4 +| +03:SCAN HDFS [functional_parquet.iceberg_v2_equality_delete_schema_evolution] + HDFS partitions=1/1 files=2 size=1.90KB + Iceberg snapshot id: 5816823095034839884 + row-size=32B cardinality=2 +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +06:EXCHANGE [UNPARTITIONED] +| +04:UNION +| pass-through-operands: all +| row-size=32B cardinality=6 +| +|--02:HASH JOIN [LEFT ANTI JOIN, BROADCAST] +| | hash predicates: functional_parquet.iceberg_v2_equality_delete_schema_evolution.d IS NOT DISTINCT FROM functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01.d, functional_parquet.iceberg_v2_equality_delete_schema_evolution.i IS NOT DISTINCT FROM functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01.i +| | other join predicates: functional_parquet.iceberg_v2_equality_delete_schema_evolution.iceberg__data__sequence__number < functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01.iceberg__data__sequence__number +| | row-size=32B cardinality=4 +| | +| |--05:EXCHANGE [BROADCAST] +| | | +| | 01:SCAN HDFS [functional_parquet.iceberg_v2_equality_delete_schema_evolution-EQUALITY-DELETE-01 functional_parquet.iceberg_v2_equality_delete_schema_evolution-equality-delete-01] +| | HDFS partitions=1/1 files=3 size=1.97KB +| | Iceberg snapshot id: 5816823095034839884 +| | row-size=16B cardinality=3 +| | +| 00:SCAN HDFS [functional_parquet.iceberg_v2_equality_delete_schema_evolution] +| HDFS partitions=1/1 files=2 size=1.63KB +| Iceberg snapshot id: 5816823095034839884 +| row-size=32B cardinality=4 +| +03:SCAN HDFS [functional_parquet.iceberg_v2_equality_delete_schema_evolution] + HDFS partitions=1/1 files=2 size=1.90KB + Iceberg snapshot id: 5816823095034839884 + row-size=32B cardinality=2 +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-equality-deletes.test b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-equality-deletes.test index 3182b7ab4..3a196b3de 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-equality-deletes.test +++ b/testdata/workloads/functional-query/queries/QueryTest/iceberg-v2-read-equality-deletes.test @@ -133,3 +133,26 @@ select * from functional_parquet.iceberg_v2_delete_pos_and_multi_eq_ids ---- TYPES INT,STRING,DATE ==== +---- QUERY +# Query an Iceberg table with primary keys that has schema evolution and equality +# deletes. Note, the schema evolution didn't touch the primary keys +# (identifier-field-ids). +select * from functional_parquet.iceberg_v2_equality_delete_schema_evolution; +---- RESULTS +1,2024-03-20,'str1',NULL +44,2024-03-21,'str4',4444 +5,2024-03-22,'str5',NULL +---- TYPES +INT,DATE,STRING,INT +==== +---- QUERY +# Time travel before the schema evolution was done on the table. +select * from functional_parquet.iceberg_v2_equality_delete_schema_evolution +for system_version as of 3986738438831924669; +---- RESULTS +1,2024-03-20,'str1' +4,2024-03-21,'str4' +5,2024-03-22,'str5' +---- TYPES +INT,DATE,STRING +====
