This is an automated email from the ASF dual-hosted git repository.

dbecker pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from d2cb00cec IMPALA-13456: Calcite planner: fix issues with quotes
     new ff7a55332 IMPALA-13462: Added support for functions used in tpcds
     new 9ca3ccb34 IMPALA-13445: Ignore num partition for unpartitioned writes
     new b05b408f1 IMPALA-13247: Support Reading Puffin files for the current 
snapshot

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/catalog/catalog.cc                          |   2 +
 be/src/service/query-state-record.cc               |   1 +
 be/src/util/backend-gflag-util.cc                  |   3 +
 bin/impala-config.sh                               |   1 +
 common/thrift/BackendGflags.thrift                 |   2 +
 fe/pom.xml                                         |   7 +
 .../org/apache/impala/catalog/ColumnStats.java     |   7 +
 .../org/apache/impala/catalog/IcebergTable.java    |  81 ++-
 .../apache/impala/catalog/PuffinStatsLoader.java   | 196 +++++
 .../apache/impala/planner/DistributedPlanner.java  |  56 +-
 .../org/apache/impala/planner/HdfsTableSink.java   |  37 +
 .../org/apache/impala/planner/ProcessingCost.java  |   4 +
 .../org/apache/impala/service/BackendConfig.java   |   4 +
 .../org/apache/impala/planner/CardinalityTest.java |  48 ++
 .../impala/planner/TpcdsCpuCostPlannerTest.java    |  18 +
 .../impala/calcite/functions/FunctionResolver.java |  65 +-
 .../operators/ImpalaAdjustScaleFunction.java       |  68 ++
 .../operators/ImpalaCustomOperatorTable.java       |  29 +
 .../calcite/operators/ImpalaGroupingFunction.java  |  53 ++
 .../impala/calcite/operators/ImpalaOperator.java   |   9 +-
 .../impala/calcite/service/CalciteValidator.java   |   3 +
 java/pom.xml                                       |   2 +
 ...b1ffe-ba85-4be4-a590-7c39428931e1.metadata.json | 229 ++++++
 java/puffin-data-generator/pom.xml                 | 109 +++
 .../puffindatagenerator/PuffinDataGenerator.java   | 542 ++++++++++++++
 ...a9277bf-954aff1b00000000_1684663509_data.0.parq | Bin 0 -> 604 bytes
 .../11cd04ec-55ea-40aa-a89b-197c3c275e7a-m0.avro   | Bin 0 -> 3220 bytes
 ...sfgs-4d9242d5-bd79-4069-be8b-2cfced8e0647.stats | Bin 0 -> 534 bytes
 ...423-1-11cd04ec-55ea-40aa-a89b-197c3c275e7a.avro | Bin 0 -> 1997 bytes
 .../metadata/v1.metadata.json                      |  64 ++
 .../metadata/v2.metadata.json                      |  95 +++
 .../metadata/v3.metadata.json                      | 121 ++++
 .../metadata/version-hint.txt                      |   0
 .../functional/functional_schema_template.sql      |  13 +
 .../datasets/functional/schema_constraints.csv     |   1 +
 ...ade02-35ae-4a8f-a84f-784d1e0c0790.metadata.json | 172 +++++
 testdata/ice_puffin/README                         |  31 +
 .../generated/all_files_corrupt.metadata.json      | 204 ++++++
 testdata/ice_puffin/generated/all_stats.stats      | Bin 0 -> 1769 bytes
 .../generated/all_stats_in_1_file.metadata.json    | 223 ++++++
 testdata/ice_puffin/generated/corrupt_file.stats   | Bin 0 -> 391 bytes
 testdata/ice_puffin/generated/corrupt_file1.stats  | Bin 0 -> 471 bytes
 testdata/ice_puffin/generated/corrupt_file2.stats  | Bin 0 -> 511 bytes
 .../ice_puffin/generated/current_snapshot_id.stats | Bin 0 -> 479 bytes
 .../duplicate_stats_in_1_file.metadata.json        | 193 +++++
 .../generated/duplicate_stats_in_1_file.stats      | Bin 0 -> 690 bytes
 .../duplicate_stats_in_2_files.metadata.json       | 204 ++++++
 .../generated/duplicate_stats_in_2_files1.stats    | Bin 0 -> 479 bytes
 .../generated/duplicate_stats_in_2_files2.stats    | Bin 0 -> 447 bytes
 testdata/ice_puffin/generated/existing_file.stats  | Bin 0 -> 519 bytes
 .../file_contains_invalid_field_id.metadata.json   | 188 +++++
 .../generated/file_contains_invalid_field_id.stats | Bin 0 -> 481 bytes
 .../invalidAndCorruptSketches.metadata.json        | 203 ++++++
 .../generated/invalidAndCorruptSketches.stats      | Bin 0 -> 1081 bytes
 .../generated/missing_file.metadata.json           | 204 ++++++
 .../ice_puffin/generated/non_corrupt_file.stats    | Bin 0 -> 439 bytes
 .../generated/not_all_blobs_current.metadata.json  | 198 +++++
 .../generated/not_all_blobs_current.stats          | Bin 0 -> 910 bytes
 .../generated/not_current_snapshot_id.stats        | Bin 0 -> 519 bytes
 .../one_file_corrupt_one_not.metadata.json         | 204 ++++++
 .../one_file_current_one_not.metadata.json         | 204 ++++++
 .../generated/stats_divided.metadata.json          | 229 ++++++
 testdata/ice_puffin/generated/stats_divided1.stats | Bin 0 -> 749 bytes
 testdata/ice_puffin/generated/stats_divided2.stats | Bin 0 -> 1308 bytes
 .../stats_for_unsupported_type.metadata.json       | 188 +++++
 .../generated/stats_for_unsupported_type.stats     | Bin 0 -> 496 bytes
 ...214-1-c9f94c00-2920-4a39-8e7f-c7faf7e71a7d.avro | Bin 0 -> 3624 bytes
 .../queries/PlannerTest/tpcds_cpu_cost/ddl.test    | 805 +++++++++++++++++++++
 .../tpcds_cpu_cost/tpcds-ddl-iceberg.test          | 741 +++++++++++++++++++
 .../tpcds_cpu_cost/tpcds-ddl-parquet.test          | 757 +++++++++++++++++++
 tests/common/impala_test_suite.py                  |  20 +
 tests/custom_cluster/test_executor_groups.py       | 205 ++++--
 tests/custom_cluster/test_iceberg_with_puffin.py   | 256 +++++++
 tests/metadata/test_ddl_base.py                    |  20 -
 tests/query_test/test_ext_data_sources.py          |  20 -
 tests/query_test/test_iceberg.py                   |   1 -
 76 files changed, 7162 insertions(+), 178 deletions(-)
 create mode 100644 
fe/src/main/java/org/apache/impala/catalog/PuffinStatsLoader.java
 create mode 100644 
java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaAdjustScaleFunction.java
 create mode 100644 
java/calcite-planner/src/main/java/org/apache/impala/calcite/operators/ImpalaGroupingFunction.java
 create mode 100644 
java/puffin-data-generator/00002-3c6b1ffe-ba85-4be4-a590-7c39428931e1.metadata.json
 create mode 100644 java/puffin-data-generator/pom.xml
 create mode 100644 
java/puffin-data-generator/src/main/java/org/apache/impala/puffindatagenerator/PuffinDataGenerator.java
 create mode 100644 
testdata/data/iceberg_test/iceberg_with_puffin_stats/data/0747babcda9277bf-954aff1b00000000_1684663509_data.0.parq
 create mode 100644 
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/11cd04ec-55ea-40aa-a89b-197c3c275e7a-m0.avro
 create mode 100644 
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/20240906_085606_00006_wsfgs-4d9242d5-bd79-4069-be8b-2cfced8e0647.stats
 create mode 100644 
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/snap-1880359224532128423-1-11cd04ec-55ea-40aa-a89b-197c3c275e7a.avro
 create mode 100644 
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/v1.metadata.json
 create mode 100644 
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/v2.metadata.json
 create mode 100644 
testdata/data/iceberg_test/iceberg_with_puffin_stats/metadata/v3.metadata.json
 copy 
testdata/data/iceberg_test/{hadoop_catalog/ice/iceberg_lineitem_multiblock => 
iceberg_with_puffin_stats}/metadata/version-hint.txt (100%)
 create mode 100644 
testdata/ice_puffin/00001-2e1ade02-35ae-4a8f-a84f-784d1e0c0790.metadata.json
 create mode 100644 testdata/ice_puffin/README
 create mode 100644 
testdata/ice_puffin/generated/all_files_corrupt.metadata.json
 create mode 100644 testdata/ice_puffin/generated/all_stats.stats
 create mode 100644 
testdata/ice_puffin/generated/all_stats_in_1_file.metadata.json
 create mode 100644 testdata/ice_puffin/generated/corrupt_file.stats
 create mode 100644 testdata/ice_puffin/generated/corrupt_file1.stats
 create mode 100644 testdata/ice_puffin/generated/corrupt_file2.stats
 create mode 100644 testdata/ice_puffin/generated/current_snapshot_id.stats
 create mode 100644 
testdata/ice_puffin/generated/duplicate_stats_in_1_file.metadata.json
 create mode 100644 
testdata/ice_puffin/generated/duplicate_stats_in_1_file.stats
 create mode 100644 
testdata/ice_puffin/generated/duplicate_stats_in_2_files.metadata.json
 create mode 100644 
testdata/ice_puffin/generated/duplicate_stats_in_2_files1.stats
 create mode 100644 
testdata/ice_puffin/generated/duplicate_stats_in_2_files2.stats
 create mode 100644 testdata/ice_puffin/generated/existing_file.stats
 create mode 100644 
testdata/ice_puffin/generated/file_contains_invalid_field_id.metadata.json
 create mode 100644 
testdata/ice_puffin/generated/file_contains_invalid_field_id.stats
 create mode 100644 
testdata/ice_puffin/generated/invalidAndCorruptSketches.metadata.json
 create mode 100644 
testdata/ice_puffin/generated/invalidAndCorruptSketches.stats
 create mode 100644 testdata/ice_puffin/generated/missing_file.metadata.json
 create mode 100644 testdata/ice_puffin/generated/non_corrupt_file.stats
 create mode 100644 
testdata/ice_puffin/generated/not_all_blobs_current.metadata.json
 create mode 100644 testdata/ice_puffin/generated/not_all_blobs_current.stats
 create mode 100644 testdata/ice_puffin/generated/not_current_snapshot_id.stats
 create mode 100644 
testdata/ice_puffin/generated/one_file_corrupt_one_not.metadata.json
 create mode 100644 
testdata/ice_puffin/generated/one_file_current_one_not.metadata.json
 create mode 100644 testdata/ice_puffin/generated/stats_divided.metadata.json
 create mode 100644 testdata/ice_puffin/generated/stats_divided1.stats
 create mode 100644 testdata/ice_puffin/generated/stats_divided2.stats
 create mode 100644 
testdata/ice_puffin/generated/stats_for_unsupported_type.metadata.json
 create mode 100644 
testdata/ice_puffin/generated/stats_for_unsupported_type.stats
 create mode 100644 
testdata/ice_puffin/snap-2532372403033748214-1-c9f94c00-2920-4a39-8e7f-c7faf7e71a7d.avro
 create mode 100644 
testdata/workloads/functional-planner/queries/PlannerTest/tpcds_cpu_cost/ddl.test
 create mode 100644 
testdata/workloads/functional-planner/queries/PlannerTest/tpcds_cpu_cost/tpcds-ddl-iceberg.test
 create mode 100644 
testdata/workloads/functional-planner/queries/PlannerTest/tpcds_cpu_cost/tpcds-ddl-parquet.test
 create mode 100644 tests/custom_cluster/test_iceberg_with_puffin.py

Reply via email to