This is an automated email from the ASF dual-hosted git repository. jasonmfehr pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 2f5aef64a5a8cf5fff6248355a2cb27e551652d5 Author: Riza Suminto <[email protected]> AuthorDate: Mon Dec 16 15:53:42 2024 -0800 IMPALA-13617: Rename c_last_review_date to c_last_review_date_sk TPC-DS v2.11.0, section 2.4.7, rename column customer.c_last_review_date to customer.c_last_review_date_sk to align with other surrogate key columns. impala-tpcds-kit has been modified to reflect this column name change in https://github.com/cloudera/impala-tpcds-kit/commit/086d7113c8b4172247f83f60f4e274fe3326df11 However, the tpcds dataset schema in Impala test data remains unchanged. This patch did such a rename to align closer to TPC-DS v2.11.0. This patch contains no data type adjustment because such adjustment requires larger changes. customer_multiblock_page_index.parquet added by IMPALA-10310 is regenerated to follow the new schema of table customer. The SQL used to create the file is ordered more specifically over both c_current_cdemo_sk and c_customer_sk columns. The associated test assertion in parquet-page-index.test is also updated. A workaround in test_file_parser.py added by IMPALA-13543 is now removed after this change is applied. Testing: - Pass core tests. Change-Id: Ie446b3c534cb8f6f54265cd9b2f705cad91dd4ac Reviewed-on: http://gerrit.cloudera.org:8080/22223 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- docker/quickstart_client/load_tpcds_kudu.sql | 2 +- docker/quickstart_client/load_tpcds_parquet.sql | 4 ++-- docs/topics/impala_show.xml | 4 ++-- docs/topics/impala_tutorial.xml | 4 ++-- testdata/data/README | 16 ++++++++-------- .../data/customer_multiblock_page_index.parquet | Bin 451607 -> 271263 bytes .../datasets/tpcds/tpcds_jdbc_schema_template.sql | 2 +- testdata/datasets/tpcds/tpcds_kudu_template.sql | 2 +- testdata/datasets/tpcds/tpcds_schema_template.sql | 2 +- .../tpcds_partitioned_schema_template.sql | 2 +- .../queries/PlannerTest/fk-pk-join-detection.test | 8 ++++---- .../queries/PlannerTest/tpcds/tpcds-q30.test | 20 ++++++++++---------- .../queries/QueryTest/parquet-page-index.test | 7 +++++-- .../tpcds-unmodified/queries/tpcds-q30.test | 4 ++-- .../workloads/tpcds/queries/raw/tpcds-query30.sql | 4 ++-- .../tpcds/queries/tpcds-decimal_v2-q30.test | 4 ++-- testdata/workloads/tpcds/queries/tpcds-q30.test | 4 ++-- tests/util/test_file_parser.py | 3 --- 18 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docker/quickstart_client/load_tpcds_kudu.sql b/docker/quickstart_client/load_tpcds_kudu.sql index a7b8ad62d..e86f75ac0 100644 --- a/docker/quickstart_client/load_tpcds_kudu.sql +++ b/docker/quickstart_client/load_tpcds_kudu.sql @@ -462,7 +462,7 @@ CREATE TABLE tpcds_kudu.customer ( c_birth_country STRING, c_login STRING, c_email_address STRING, - c_last_review_date STRING + c_last_review_date_sk STRING ) PARTITION BY HASH (c_customer_sk) PARTITIONS 12 STORED AS KUDU; diff --git a/docker/quickstart_client/load_tpcds_parquet.sql b/docker/quickstart_client/load_tpcds_parquet.sql index d8cd90c3f..4fb826882 100644 --- a/docker/quickstart_client/load_tpcds_parquet.sql +++ b/docker/quickstart_client/load_tpcds_parquet.sql @@ -188,7 +188,7 @@ CREATE EXTERNAL TABLE tpcds_raw.customer ( c_birth_country STRING, c_login STRING, c_email_address STRING, - c_last_review_date STRING + c_last_review_date_sk STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|') @@ -808,7 +808,7 @@ CREATE TABLE tpcds_parquet.customer ( c_birth_country STRING, c_login STRING, c_email_address STRING, - c_last_review_date STRING + c_last_review_date_sk STRING ) STORED AS PARQUET; CREATE TABLE tpcds_parquet.customer_address ( diff --git a/docs/topics/impala_show.xml b/docs/topics/impala_show.xml index e8627599c..c332e0ecc 100644 --- a/docs/topics/impala_show.xml +++ b/docs/topics/impala_show.xml @@ -1121,7 +1121,7 @@ show table stats store_sales; | c_birth_country | STRING | -1 | -1 | -1 | -1 | | c_login | STRING | -1 | -1 | -1 | -1 | | c_email_address | STRING | -1 | -1 | -1 | -1 | -| c_last_review_date | STRING | -1 | -1 | -1 | -1 | +| c_last_review_date_sk | STRING | -1 | -1 | -1 | -1 | +------------------------+--------+------------------+--------+----------+----------+ show column stats store_sales; @@ -1196,7 +1196,7 @@ show column stats customer; | c_birth_country | STRING | 205 | -1 | 20 | 8.4001 | | c_login | STRING | 1 | -1 | 0 | 0 | | c_email_address | STRING | 94492 | -1 | 46 | 26.485 | -| c_last_review_date | STRING | 349 | -1 | 7 | 6.7561 | +| c_last_review_date_sk | STRING | 349 | -1 | 7 | 6.7561 | +------------------------+--------+------------------+--------+----------+--------+ show column stats store_sales; diff --git a/docs/topics/impala_tutorial.xml b/docs/topics/impala_tutorial.xml index 0cac7904e..3f40864d8 100644 --- a/docs/topics/impala_tutorial.xml +++ b/docs/topics/impala_tutorial.xml @@ -250,7 +250,7 @@ Welcome to the Impala shell. Press TAB twice to see a list of available commands | c_birth_country | string | | | c_login | string | | | c_email_address | string | | -| c_last_review_date | string | | +| c_last_review_date_sk | string | | +------------------------+--------+---------+ [localhost:21000] > select count(*) from customer; +----------+ @@ -724,7 +724,7 @@ create external table customer c_birth_country string, c_login string, c_email_address string, - c_last_review_date string + c_last_review_date_sk string ) row format delimited fields terminated by '|' location '/user/hive/tpcds/customer'; diff --git a/testdata/data/README b/testdata/data/README index e4a05f268..d20db0976 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -620,17 +620,17 @@ if (r ... ) location.offset = -1; customer_multiblock_page_index.parquet Parquet file that contains multiple blocks in a single file Needed to test IMPALA-10310. -In order to generate this file, execute the following instruments: -// use 1.11.0 to generate page index -1. export HIVE_AUX_JARS_PATH=/path/parquet-hadoop-1.11.0.jar -// in hive shell -2. SET parquet.block.size=8192; // use little block size -3. SET parquet.page.row.count.limit=10; // little page row count generate multi pages -4. CREATE TABLE customer_multiblock_page_index_6 +In order to generate this file, execute the following instruments in beeline +(Beeline version 3.1.3000.7.3.1.0-160 by Apache Hive): +1. SET parquet.block.size=8192; // use little block size +2. SET parquet.page.row.count.limit=10; // little page row count generate multi pages +3. CREATE TABLE customer_multiblock_page_index_6 STORED AS PARQUET TBLPROPERTIES('parquet.compression'='SNAPPY') AS SELECT * FROM tpcds.customer - WHERE c_current_cdemo_sk IS NOT NULL ORDER BY c_current_cdemo_sk LIMIT 2000; + WHERE c_current_cdemo_sk IS NOT NULL + ORDER BY c_current_cdemo_sk, c_customer_sk + LIMIT 2000; generated file will contains multi blocks, multi pages per block. customer_nested_multiblock_multipage.parquet diff --git a/testdata/data/customer_multiblock_page_index.parquet b/testdata/data/customer_multiblock_page_index.parquet index 21fa9a522..207890c2e 100644 Binary files a/testdata/data/customer_multiblock_page_index.parquet and b/testdata/data/customer_multiblock_page_index.parquet differ diff --git a/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql b/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql index bb4f9c62c..2c914daae 100644 --- a/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql +++ b/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql @@ -308,7 +308,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {jdbc_db_name}.customer ( c_birth_country STRING, c_login STRING, c_email_address STRING, - c_last_review_date STRING + c_last_review_date_sk STRING ) STORED AS JDBC TBLPROPERTIES ( diff --git a/testdata/datasets/tpcds/tpcds_kudu_template.sql b/testdata/datasets/tpcds/tpcds_kudu_template.sql index a4c48ede3..9af458005 100644 --- a/testdata/datasets/tpcds/tpcds_kudu_template.sql +++ b/testdata/datasets/tpcds/tpcds_kudu_template.sql @@ -451,7 +451,7 @@ CREATE TABLE {target_db_name}.customer ( c_birth_country STRING, c_login STRING, c_email_address STRING, - c_last_review_date STRING + c_last_review_date_sk STRING ) PARTITION BY HASH (c_customer_sk) PARTITIONS {buckets} STORED AS KUDU diff --git a/testdata/datasets/tpcds/tpcds_schema_template.sql b/testdata/datasets/tpcds/tpcds_schema_template.sql index 42a573595..76cd66c4c 100644 --- a/testdata/datasets/tpcds/tpcds_schema_template.sql +++ b/testdata/datasets/tpcds/tpcds_schema_template.sql @@ -376,7 +376,7 @@ c_birth_year int c_birth_country string c_login string c_email_address string -c_last_review_date string +c_last_review_date_sk string primary key (c_customer_sk) DISABLE NOVALIDATE RELY foreign key (c_current_addr_sk) references {db_name}{db_suffix}.customer_address (ca_address_sk) DISABLE NOVALIDATE RELY foreign key (c_current_cdemo_sk) references {db_name}{db_suffix}.customer_demographics (cd_demo_sk) DISABLE NOVALIDATE RELY diff --git a/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql b/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql index 6c6b436ff..d0c4236e0 100644 --- a/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql +++ b/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql @@ -391,7 +391,7 @@ SELECT c_birth_country, c_login, c_email_address, - CAST(c_last_review_date AS INT) + CAST(c_last_review_date_sk AS INT) FROM tpcds{scale_factor}.{table_name}; ==== ---- DATASET diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test index b8f123e1d..9e27118d8 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test @@ -7,7 +7,7 @@ where c_salutation = 'Mrs.' F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Host Resources: mem-estimate=181.75MB mem-reservation=21.75MB thread-reservation=3 runtime-filters-memory=1.00MB PLAN-ROOT SINK -| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] +| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] | mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 | 02:HASH JOIN [INNER JOIN] @@ -51,7 +51,7 @@ where c_salutation = 'Mrs.' F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Host Resources: mem-estimate=180.75MB mem-reservation=20.75MB thread-reservation=3 PLAN-ROOT SINK -| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] +| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] | mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 | 02:HASH JOIN [LEFT OUTER JOIN] @@ -94,7 +94,7 @@ where c_salutation = 'Mrs.' F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Host Resources: mem-estimate=181.75MB mem-reservation=21.75MB thread-reservation=3 runtime-filters-memory=1.00MB PLAN-ROOT SINK -| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] +| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] | mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 | 02:HASH JOIN [RIGHT OUTER JOIN] @@ -364,7 +364,7 @@ on ss_customer_sk % 10 = c_customer_sk / 100 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Host Resources: mem-estimate=211.00MB mem-reservation=51.00MB thread-reservation=3 runtime-filters-memory=1.00MB PLAN-ROOT SINK -| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] +| output exprs: tpcds.store_sales.ss_sold_time_sk, tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...] | mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 | 02:HASH JOIN [INNER JOIN] diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test index 89b1c2456..4e8f3cdef 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test @@ -22,7 +22,7 @@ SELECT c_customer_id, c_birth_country, c_login, c_email_address, - c_last_review_date, + c_last_review_date_sk, ctr_total_return FROM customer_total_return ctr1, customer_address, @@ -45,7 +45,7 @@ ORDER BY c_customer_id, c_birth_country, c_login, c_email_address, - c_last_review_date, + c_last_review_date_sk, ctr_total_return LIMIT 100; ---- PLAN @@ -54,11 +54,11 @@ Per-Host Resource Estimates: Memory=429MB F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Host Resources: mem-estimate=428.81MB mem-reservation=27.44MB thread-reservation=9 runtime-filters-memory=7.00MB PLAN-ROOT SINK -| output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return +| output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date_sk, ctr_total_return | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 | 18:TOP-N [LIMIT=100] -| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date ASC, ctr_total_return ASC +| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date_sk ASC, ctr_total_return ASC | mem-estimate=20.64KB mem-reservation=0B thread-reservation=0 | tuple-ids=15 row-size=211B cardinality=100 | in pipelines: 18(GETNEXT), 07(OPEN) @@ -239,11 +239,11 @@ Per-Host Resource Estimates: Memory=504MB F11:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Host Resources: mem-estimate=4.04MB mem-reservation=4.00MB thread-reservation=1 PLAN-ROOT SINK -| output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return +| output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date_sk, ctr_total_return | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 | 32:MERGING-EXCHANGE [UNPARTITIONED] -| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date ASC, ctr_total_return ASC +| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date_sk ASC, ctr_total_return ASC | limit: 100 | mem-estimate=41.67KB mem-reservation=0B thread-reservation=0 | tuple-ids=15 row-size=211B cardinality=100 @@ -252,7 +252,7 @@ PLAN-ROOT SINK F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 Per-Host Resources: mem-estimate=137.32MB mem-reservation=16.81MB thread-reservation=2 runtime-filters-memory=3.00MB 18:TOP-N [LIMIT=100] -| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date ASC, ctr_total_return ASC +| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date_sk ASC, ctr_total_return ASC | mem-estimate=20.64KB mem-reservation=0B thread-reservation=0 | tuple-ids=15 row-size=211B cardinality=100 | in pipelines: 18(GETNEXT), 07(OPEN) @@ -524,11 +524,11 @@ Per-Host Resource Estimates: Memory=236MB F11:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1 | Per-Instance Resources: mem-estimate=4.04MB mem-reservation=4.00MB thread-reservation=1 PLAN-ROOT SINK -| output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return +| output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date_sk, ctr_total_return | mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB thread-reservation=0 | 32:MERGING-EXCHANGE [UNPARTITIONED] -| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date ASC, ctr_total_return ASC +| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date_sk ASC, ctr_total_return ASC | limit: 100 | mem-estimate=41.67KB mem-reservation=0B thread-reservation=0 | tuple-ids=15 row-size=211B cardinality=100 @@ -538,7 +538,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1 Per-Host Shared Resources: mem-estimate=2.00MB mem-reservation=2.00MB thread-reservation=0 runtime-filters-memory=2.00MB Per-Instance Resources: mem-estimate=16.02MB mem-reservation=8.00MB thread-reservation=1 18:TOP-N [LIMIT=100] -| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date ASC, ctr_total_return ASC +| order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, c_last_review_date_sk ASC, ctr_total_return ASC | mem-estimate=20.64KB mem-reservation=0B thread-reservation=0 | tuple-ids=15 row-size=211B cardinality=100 | in pipelines: 18(GETNEXT), 07(OPEN) diff --git a/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test b/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test index 9bf7b5395..30ef29a66 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test +++ b/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test @@ -371,8 +371,11 @@ where c_current_cdemo_sk < 100 group by c_birth_country; ---- TYPES STRING, BIGINT ---- RUNTIME_PROFILE -aggregation(SUM, NumPages): 30 -aggregation(SUM, NumStatsFilteredPages): 27 +aggregation(SUM, NumPages): 6 +aggregation(SUM, NumRowGroups): 20 +aggregation(SUM, NumRowGroupsWithPageIndex): 20 +aggregation(SUM, NumStatsFilteredPages): 3 +aggregation(SUM, NumStatsFilteredRowGroups): 19 ==== ---- QUERY # IMPALA-10345: Impala hits DCHECK in parquet-column-stats.inline.h diff --git a/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test b/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test index 891968856..98564ae6e 100644 --- a/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test +++ b/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test @@ -15,7 +15,7 @@ with customer_total_return as ,ca_state) select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return from customer_total_return ctr1 ,customer_address ,customer @@ -27,7 +27,7 @@ with customer_total_return as and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return limit 100; diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql b/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql index 1057157ed..63d0cd8de 100644 --- a/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql +++ b/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql @@ -13,7 +13,7 @@ with customer_total_return as ,ca_state) select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return from customer_total_return ctr1 ,customer_address ,customer @@ -25,7 +25,7 @@ with customer_total_return as and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return limit 100; -- end query 1 in stream 0 using template query30.tpl diff --git a/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test index 0d5f5eb97..877123bee 100644 --- a/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test +++ b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test @@ -14,7 +14,7 @@ with customer_total_return as ,ca_state) select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return from customer_total_return ctr1 ,customer_address ,customer @@ -26,7 +26,7 @@ with customer_total_return as and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return limit 100; ---- RESULTS 'AAAAAAAAACBFAAAA','Dr.','Clifton','Carr','N',4,4,1950,'INDIA','NULL','[email protected]','2452385',2232.42 diff --git a/testdata/workloads/tpcds/queries/tpcds-q30.test b/testdata/workloads/tpcds/queries/tpcds-q30.test index 0d5f5eb97..877123bee 100644 --- a/testdata/workloads/tpcds/queries/tpcds-q30.test +++ b/testdata/workloads/tpcds/queries/tpcds-q30.test @@ -14,7 +14,7 @@ with customer_total_return as ,ca_state) select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return from customer_total_return ctr1 ,customer_address ,customer @@ -26,7 +26,7 @@ with customer_total_return as and ctr1.ctr_customer_sk = c_customer_sk order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address - ,c_last_review_date,ctr_total_return + ,c_last_review_date_sk,ctr_total_return limit 100; ---- RESULTS 'AAAAAAAAACBFAAAA','Dr.','Clifton','Carr','N',4,4,1950,'INDIA','NULL','[email protected]','2452385',2232.42 diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py index 8ec30ca84..e0d31ca21 100644 --- a/tests/util/test_file_parser.py +++ b/tests/util/test_file_parser.py @@ -433,9 +433,6 @@ def load_tpc_queries(workload, include_stress_queries=False, query_name_filters= test_cases = parse_query_test_file(file_path) for test_case in test_cases: query_sql = remove_comments(test_case["QUERY"]) - if workload == "tpcds_partitioned": - # replace old columns names from old TPC-DS spec with a new one. - query_sql = query_sql.replace("c_last_review_date", "c_last_review_date_sk") if re.match(filter_regex, test_case["QUERY_NAME"]): query_name_match = query_name_pattern.search(test_case["QUERY_NAME"])
