This is an automated email from the ASF dual-hosted git repository.
dbecker pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
from d2cb00cec IMPALA-13456: Calcite planner: fix issues with quotes
new ff7a55332 IMPALA-13462: Added support for functions used in tpcds
new 9ca3ccb34 IMPALA-13445: Ignore num partition for unpartitioned writes
new b05b408f1 IMPALA-13247: Support Reading Puffin files for the current
snapshot
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
be/src/catalog/catalog.cc | 2 +
be/src/service/query-state-record.cc | 1 +
be/src/util/backend-gflag-util.cc | 3 +
bin/impala-config.sh | 1 +
common/thrift/BackendGflags.thrift | 2 +
fe/pom.xml | 7 +
.../org/apache/impala/catalog/ColumnStats.java | 7 +
.../org/apache/impala/catalog/IcebergTable.java | 81 ++-
.../apache/impala/catalog/PuffinStatsLoader.java | 196 +++++
.../apache/impala/planner/DistributedPlanner.java | 56 +-
.../org/apache/impala/planner/HdfsTableSink.java | 37 +
.../org/apache/impala/planner/ProcessingCost.java | 4 +
.../org/apache/impala/service/BackendConfig.java | 4 +
.../org/apache/impala/planner/CardinalityTest.java | 48 ++
.../impala/planner/TpcdsCpuCostPlannerTest.java | 18 +
.../impala/calcite/functions/FunctionResolver.java | 65 +-
.../operators/ImpalaAdjustScaleFunction.java | 68 ++
.../operators/ImpalaCustomOperatorTable.java | 29 +
.../calcite/operators/ImpalaGroupingFunction.java | 53 ++
.../impala/calcite/operators/ImpalaOperator.java | 9 +-
.../impala/calcite/service/CalciteValidator.java | 3 +
java/pom.xml | 2 +
...b1ffe-ba85-4be4-a590-7c39428931e1.metadata.json | 229 ++++++
java/puffin-data-generator/pom.xml | 109 +++
.../puffindatagenerator/PuffinDataGenerator.java | 542 ++++++++++++++
...a9277bf-954aff1b00000000_1684663509_data.0.parq | Bin 0 -> 604 bytes
.../11cd04ec-55ea-40aa-a89b-197c3c275e7a-m0.avro | Bin 0 -> 3220 bytes
...sfgs-4d9242d5-bd79-4069-be8b-2cfced8e0647.stats | Bin 0 -> 534 bytes
...423-1-11cd04ec-55ea-40aa-a89b-197c3c275e7a.avro | Bin 0 -> 1997 bytes
.../metadata/v1.metadata.json | 64 ++
.../metadata/v2.metadata.json | 95 +++
.../metadata/v3.metadata.json | 121 ++++
.../metadata/version-hint.txt | 0
.../functional/functional_schema_template.sql | 13 +
.../datasets/functional/schema_constraints.csv | 1 +
...ade02-35ae-4a8f-a84f-784d1e0c0790.metadata.json | 172 +++++
testdata/ice_puffin/README | 31 +
.../generated/all_files_corrupt.metadata.json | 204 ++++++
testdata/ice_puffin/generated/all_stats.stats | Bin 0 -> 1769 bytes
.../generated/all_stats_in_1_file.metadata.json | 223 ++++++
testdata/ice_puffin/generated/corrupt_file.stats | Bin 0 -> 391 bytes
testdata/ice_puffin/generated/corrupt_file1.stats | Bin 0 -> 471 bytes
testdata/ice_puffin/generated/corrupt_file2.stats | Bin 0 -> 511 bytes
.../ice_puffin/generated/current_snapshot_id.stats | Bin 0 -> 479 bytes
.../duplicate_stats_in_1_file.metadata.json | 193 +++++
.../generated/duplicate_stats_in_1_file.stats | Bin 0 -> 690 bytes
.../duplicate_stats_in_2_files.metadata.json | 204 ++++++
.../generated/duplicate_stats_in_2_files1.stats | Bin 0 -> 479 bytes
.../generated/duplicate_stats_in_2_files2.stats | Bin 0 -> 447 bytes
testdata/ice_puffin/generated/existing_file.stats | Bin 0 -> 519 bytes
.../file_contains_invalid_field_id.metadata.json | 188 +++++
.../generated/file_contains_invalid_field_id.stats | Bin 0 -> 481 bytes
.../invalidAndCorruptSketches.metadata.json | 203 ++++++
.../generated/invalidAndCorruptSketches.stats | Bin 0 -> 1081 bytes
.../generated/missing_file.metadata.json | 204 ++++++
.../ice_puffin/generated/non_corrupt_file.stats | Bin 0 -> 439 bytes
.../generated/not_all_blobs_current.metadata.json | 198 +++++
.../generated/not_all_blobs_current.stats | Bin 0 -> 910 bytes
.../generated/not_current_snapshot_id.stats | Bin 0 -> 519 bytes
.../one_file_corrupt_one_not.metadata.json | 204 ++++++
.../one_file_current_one_not.metadata.json | 204 ++++++
.../generated/stats_divided.metadata.json | 229 ++++++
testdata/ice_puffin/generated/stats_divided1.stats | Bin 0 -> 749 bytes
testdata/ice_puffin/generated/stats_divided2.stats | Bin 0 -> 1308 bytes
.../stats_for_unsupported_type.metadata.json | 188 +++++
.../generated/stats_for_unsupported_type.stats | Bin 0 -> 496 bytes
...214-1-c9f94c00-2920-4a39-8e7f-c7faf7e71a7d.avro | Bin 0 -> 3624 bytes
.../queries/PlannerTest/tpcds_cpu_cost/ddl.test | 805 +++++++++++++++++++++
.../tpcds_cpu_cost/tpcds-ddl-iceberg.test | 741 +++++++++++++++++++
.../tpcds_cpu_cost/tpcds-ddl-parquet.test | 757 +++++++++++++++++++
tests/common/impala_test_suite.py | 20 +
tests/custom_cluster/test_executor_groups.py | 205 ++++--
tests/custom_cluster/test_iceberg_with_puffin.py | 256 +++++++
tests/metadata/test_ddl_base.py | 20 -
tests/query_test/test_ext_data_sources.py | 20 -
tests/query_test/test_iceberg.py | 1 -
76 files changed, 7162 insertions(+), 178 deletions(-)
create mode 100644
fe/src/main/java/org/apache/impala/catalog/PuffinStatsLoader.java
create mode 100644
java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaAdjustScaleFunction.java
create mode 100644
java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaGroupingFunction.java
create mode 100644
java/puffin-data-generator/00002-3c6b1ffe-ba85-4be4-a590-7c39428931e1.metadata.json
create mode 100644 java/puffin-data-generator/pom.xml
create mode 100644
java/puffin-data-generator/src/main/java/org/apache/impala/puffindatagenerator/PuffinDataGenerator.java
create mode 100644
testdata/data/iceberg_test/iceberg_with_puffin_stats/data/0747babcda9277bf-954aff1b00000000_1684663509_data.0.parq
create mode 100644
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/11cd04ec-55ea-40aa-a89b-197c3c275e7a-m0.avro
create mode 100644
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/20240906_085606_00006_wsfgs-4d9242d5-bd79-4069-be8b-2cfced8e0647.stats
create mode 100644
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/snap-1880359224532128423-1-11cd04ec-55ea-40aa-a89b-197c3c275e7a.avro
create mode 100644
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/v1.metadata.json
create mode 100644
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/v2.metadata.json
create mode 100644
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/v3.metadata.json
copy
testdata/data/iceberg_test/{hadoop_catalog/ice/iceberg_lineitem_multiblock =>
iceberg_with_puffin_stats}/metadata/version-hint.txt (100%)
create mode 100644
testdata/ice_puffin/00001-2e1ade02-35ae-4a8f-a84f-784d1e0c0790.metadata.json
create mode 100644 testdata/ice_puffin/README
create mode 100644
testdata/ice_puffin/generated/all_files_corrupt.metadata.json
create mode 100644 testdata/ice_puffin/generated/all_stats.stats
create mode 100644
testdata/ice_puffin/generated/all_stats_in_1_file.metadata.json
create mode 100644 testdata/ice_puffin/generated/corrupt_file.stats
create mode 100644 testdata/ice_puffin/generated/corrupt_file1.stats
create mode 100644 testdata/ice_puffin/generated/corrupt_file2.stats
create mode 100644 testdata/ice_puffin/generated/current_snapshot_id.stats
create mode 100644
testdata/ice_puffin/generated/duplicate_stats_in_1_file.metadata.json
create mode 100644
testdata/ice_puffin/generated/duplicate_stats_in_1_file.stats
create mode 100644
testdata/ice_puffin/generated/duplicate_stats_in_2_files.metadata.json
create mode 100644
testdata/ice_puffin/generated/duplicate_stats_in_2_files1.stats
create mode 100644
testdata/ice_puffin/generated/duplicate_stats_in_2_files2.stats
create mode 100644 testdata/ice_puffin/generated/existing_file.stats
create mode 100644
testdata/ice_puffin/generated/file_contains_invalid_field_id.metadata.json
create mode 100644
testdata/ice_puffin/generated/file_contains_invalid_field_id.stats
create mode 100644
testdata/ice_puffin/generated/invalidAndCorruptSketches.metadata.json
create mode 100644
testdata/ice_puffin/generated/invalidAndCorruptSketches.stats
create mode 100644 testdata/ice_puffin/generated/missing_file.metadata.json
create mode 100644 testdata/ice_puffin/generated/non_corrupt_file.stats
create mode 100644
testdata/ice_puffin/generated/not_all_blobs_current.metadata.json
create mode 100644 testdata/ice_puffin/generated/not_all_blobs_current.stats
create mode 100644 testdata/ice_puffin/generated/not_current_snapshot_id.stats
create mode 100644
testdata/ice_puffin/generated/one_file_corrupt_one_not.metadata.json
create mode 100644
testdata/ice_puffin/generated/one_file_current_one_not.metadata.json
create mode 100644 testdata/ice_puffin/generated/stats_divided.metadata.json
create mode 100644 testdata/ice_puffin/generated/stats_divided1.stats
create mode 100644 testdata/ice_puffin/generated/stats_divided2.stats
create mode 100644
testdata/ice_puffin/generated/stats_for_unsupported_type.metadata.json
create mode 100644
testdata/ice_puffin/generated/stats_for_unsupported_type.stats
create mode 100644
testdata/ice_puffin/snap-2532372403033748214-1-c9f94c00-2920-4a39-8e7f-c7faf7e71a7d.avro
create mode 100644
testdata/workloads/functional-planner/queries/PlannerTest/tpcds_cpu_cost/ddl.test
create mode 100644
testdata/workloads/functional-planner/queries/PlannerTest/tpcds_cpu_cost/tpcds-ddl-iceberg.test
create mode 100644
testdata/workloads/functional-planner/queries/PlannerTest/tpcds_cpu_cost/tpcds-ddl-parquet.test
create mode 100644 tests/custom_cluster/test_iceberg_with_puffin.py