This is an automated email from the ASF dual-hosted git repository.

jasonmfehr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2f5aef64a5a8cf5fff6248355a2cb27e551652d5
Author: Riza Suminto <[email protected]>
AuthorDate: Mon Dec 16 15:53:42 2024 -0800

    IMPALA-13617: Rename c_last_review_date to c_last_review_date_sk
    
    TPC-DS v2.11.0, section 2.4.7, rename column customer.c_last_review_date
    to customer.c_last_review_date_sk to align with other surrogate key
    columns. impala-tpcds-kit has been modified to reflect this column name
    change in
    
https://github.com/cloudera/impala-tpcds-kit/commit/086d7113c8b4172247f83f60f4e274fe3326df11
    However, the tpcds dataset schema in Impala test data remains unchanged.
    
    This patch did such a rename to align closer to TPC-DS v2.11.0. This
    patch contains no data type adjustment because such adjustment requires
    larger changes.
    
    customer_multiblock_page_index.parquet added by IMPALA-10310 is
    regenerated to follow the new schema of table customer. The SQL used to
    create the file is ordered more specifically over both
    c_current_cdemo_sk and c_customer_sk columns. The associated test
    assertion in parquet-page-index.test is also updated.
    
    A workaround in test_file_parser.py added by IMPALA-13543 is now removed
    after this change is applied.
    
    Testing:
    - Pass core tests.
    
    Change-Id: Ie446b3c534cb8f6f54265cd9b2f705cad91dd4ac
    Reviewed-on: http://gerrit.cloudera.org:8080/22223
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 docker/quickstart_client/load_tpcds_kudu.sql       |   2 +-
 docker/quickstart_client/load_tpcds_parquet.sql    |   4 ++--
 docs/topics/impala_show.xml                        |   4 ++--
 docs/topics/impala_tutorial.xml                    |   4 ++--
 testdata/data/README                               |  16 ++++++++--------
 .../data/customer_multiblock_page_index.parquet    | Bin 451607 -> 271263 bytes
 .../datasets/tpcds/tpcds_jdbc_schema_template.sql  |   2 +-
 testdata/datasets/tpcds/tpcds_kudu_template.sql    |   2 +-
 testdata/datasets/tpcds/tpcds_schema_template.sql  |   2 +-
 .../tpcds_partitioned_schema_template.sql          |   2 +-
 .../queries/PlannerTest/fk-pk-join-detection.test  |   8 ++++----
 .../queries/PlannerTest/tpcds/tpcds-q30.test       |  20 ++++++++++----------
 .../queries/QueryTest/parquet-page-index.test      |   7 +++++--
 .../tpcds-unmodified/queries/tpcds-q30.test        |   4 ++--
 .../workloads/tpcds/queries/raw/tpcds-query30.sql  |   4 ++--
 .../tpcds/queries/tpcds-decimal_v2-q30.test        |   4 ++--
 testdata/workloads/tpcds/queries/tpcds-q30.test    |   4 ++--
 tests/util/test_file_parser.py                     |   3 ---
 18 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/docker/quickstart_client/load_tpcds_kudu.sql 
b/docker/quickstart_client/load_tpcds_kudu.sql
index a7b8ad62d..e86f75ac0 100644
--- a/docker/quickstart_client/load_tpcds_kudu.sql
+++ b/docker/quickstart_client/load_tpcds_kudu.sql
@@ -462,7 +462,7 @@ CREATE TABLE tpcds_kudu.customer (
   c_birth_country STRING,
   c_login STRING,
   c_email_address STRING,
-  c_last_review_date STRING
+  c_last_review_date_sk STRING
 )
 PARTITION BY HASH (c_customer_sk) PARTITIONS 12
 STORED AS KUDU;
diff --git a/docker/quickstart_client/load_tpcds_parquet.sql 
b/docker/quickstart_client/load_tpcds_parquet.sql
index d8cd90c3f..4fb826882 100644
--- a/docker/quickstart_client/load_tpcds_parquet.sql
+++ b/docker/quickstart_client/load_tpcds_parquet.sql
@@ -188,7 +188,7 @@ CREATE EXTERNAL TABLE tpcds_raw.customer (
   c_birth_country STRING,
   c_login STRING,
   c_email_address STRING,
-  c_last_review_date STRING
+  c_last_review_date_sk STRING
 )
 ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
 WITH SERDEPROPERTIES ('field.delim'='|', 'serialization.format'='|')
@@ -808,7 +808,7 @@ CREATE TABLE tpcds_parquet.customer (
   c_birth_country STRING,
   c_login STRING,
   c_email_address STRING,
-  c_last_review_date STRING
+  c_last_review_date_sk STRING
 )
 STORED AS PARQUET;
 CREATE TABLE tpcds_parquet.customer_address (
diff --git a/docs/topics/impala_show.xml b/docs/topics/impala_show.xml
index e8627599c..c332e0ecc 100644
--- a/docs/topics/impala_show.xml
+++ b/docs/topics/impala_show.xml
@@ -1121,7 +1121,7 @@ show table stats store_sales;
 | c_birth_country        | STRING | -1               | -1     | -1       | -1  
     |
 | c_login                | STRING | -1               | -1     | -1       | -1  
     |
 | c_email_address        | STRING | -1               | -1     | -1       | -1  
     |
-| c_last_review_date     | STRING | -1               | -1     | -1       | -1  
     |
+| c_last_review_date_sk  | STRING | -1               | -1     | -1       | -1  
     |
 
+------------------------+--------+------------------+--------+----------+----------+
 
 show column stats store_sales;
@@ -1196,7 +1196,7 @@ show column stats customer;
 | c_birth_country        | STRING | 205              | -1     | 20       | 
8.4001 |
 | c_login                | STRING | 1                | -1     | 0        | 0   
   |
 | c_email_address        | STRING | 94492            | -1     | 46       | 
26.485 |
-| c_last_review_date     | STRING | 349              | -1     | 7        | 
6.7561 |
+| c_last_review_date_sk  | STRING | 349              | -1     | 7        | 
6.7561 |
 
+------------------------+--------+------------------+--------+----------+--------+
 
 show column stats store_sales;
diff --git a/docs/topics/impala_tutorial.xml b/docs/topics/impala_tutorial.xml
index 0cac7904e..3f40864d8 100644
--- a/docs/topics/impala_tutorial.xml
+++ b/docs/topics/impala_tutorial.xml
@@ -250,7 +250,7 @@ Welcome to the Impala shell. Press TAB twice to see a list 
of available commands
 | c_birth_country        | string |         |
 | c_login                | string |         |
 | c_email_address        | string |         |
-| c_last_review_date     | string |         |
+| c_last_review_date_sk  | string |         |
 +------------------------+--------+---------+
 [localhost:21000] > select count(*) from customer;
 +----------+
@@ -724,7 +724,7 @@ create external table customer
     c_birth_country           string,
     c_login                   string,
     c_email_address           string,
-    c_last_review_date        string
+    c_last_review_date_sk     string
 )
 row format delimited fields terminated by '|'
 location '/user/hive/tpcds/customer';
diff --git a/testdata/data/README b/testdata/data/README
index e4a05f268..d20db0976 100644
--- a/testdata/data/README
+++ b/testdata/data/README
@@ -620,17 +620,17 @@ if (r ... ) location.offset = -1;
 
 customer_multiblock_page_index.parquet
 Parquet file that contains multiple blocks in a single file Needed to test 
IMPALA-10310.
-In order to generate this file, execute the following instruments:
-// use 1.11.0 to generate page index
-1. export HIVE_AUX_JARS_PATH=/path/parquet-hadoop-1.11.0.jar
-// in hive shell
-2. SET parquet.block.size=8192;         // use little block size
-3. SET parquet.page.row.count.limit=10; // little page row count generate 
multi pages
-4. CREATE TABLE customer_multiblock_page_index_6
+In order to generate this file, execute the following instruments in beeline
+(Beeline version 3.1.3000.7.3.1.0-160 by Apache Hive):
+1. SET parquet.block.size=8192;         // use little block size
+2. SET parquet.page.row.count.limit=10; // little page row count generate 
multi pages
+3. CREATE TABLE customer_multiblock_page_index_6
    STORED AS PARQUET
    TBLPROPERTIES('parquet.compression'='SNAPPY')
    AS SELECT * FROM tpcds.customer
-   WHERE c_current_cdemo_sk IS NOT NULL ORDER BY c_current_cdemo_sk LIMIT 2000;
+   WHERE c_current_cdemo_sk IS NOT NULL
+   ORDER BY c_current_cdemo_sk, c_customer_sk
+   LIMIT 2000;
 generated file will contains multi blocks, multi pages per block.
 
 customer_nested_multiblock_multipage.parquet
diff --git a/testdata/data/customer_multiblock_page_index.parquet 
b/testdata/data/customer_multiblock_page_index.parquet
index 21fa9a522..207890c2e 100644
Binary files a/testdata/data/customer_multiblock_page_index.parquet and 
b/testdata/data/customer_multiblock_page_index.parquet differ
diff --git a/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql 
b/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql
index bb4f9c62c..2c914daae 100644
--- a/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql
+++ b/testdata/datasets/tpcds/tpcds_jdbc_schema_template.sql
@@ -308,7 +308,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS {jdbc_db_name}.customer 
(
   c_birth_country STRING,
   c_login STRING,
   c_email_address STRING,
-  c_last_review_date STRING
+  c_last_review_date_sk STRING
 )
 STORED AS JDBC
 TBLPROPERTIES (
diff --git a/testdata/datasets/tpcds/tpcds_kudu_template.sql 
b/testdata/datasets/tpcds/tpcds_kudu_template.sql
index a4c48ede3..9af458005 100644
--- a/testdata/datasets/tpcds/tpcds_kudu_template.sql
+++ b/testdata/datasets/tpcds/tpcds_kudu_template.sql
@@ -451,7 +451,7 @@ CREATE TABLE {target_db_name}.customer (
   c_birth_country STRING,
   c_login STRING,
   c_email_address STRING,
-  c_last_review_date STRING
+  c_last_review_date_sk STRING
 )
 PARTITION BY HASH (c_customer_sk) PARTITIONS {buckets}
 STORED AS KUDU
diff --git a/testdata/datasets/tpcds/tpcds_schema_template.sql 
b/testdata/datasets/tpcds/tpcds_schema_template.sql
index 42a573595..76cd66c4c 100644
--- a/testdata/datasets/tpcds/tpcds_schema_template.sql
+++ b/testdata/datasets/tpcds/tpcds_schema_template.sql
@@ -376,7 +376,7 @@ c_birth_year              int
 c_birth_country           string
 c_login                   string
 c_email_address           string
-c_last_review_date        string
+c_last_review_date_sk     string
 primary key (c_customer_sk) DISABLE NOVALIDATE RELY
 foreign key (c_current_addr_sk) references 
{db_name}{db_suffix}.customer_address (ca_address_sk) DISABLE NOVALIDATE RELY
 foreign key (c_current_cdemo_sk) references 
{db_name}{db_suffix}.customer_demographics (cd_demo_sk) DISABLE NOVALIDATE RELY
diff --git 
a/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql 
b/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql
index 6c6b436ff..d0c4236e0 100644
--- a/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql
+++ b/testdata/datasets/tpcds_partitioned/tpcds_partitioned_schema_template.sql
@@ -391,7 +391,7 @@ SELECT
   c_birth_country,
   c_login,
   c_email_address,
-  CAST(c_last_review_date AS INT)
+  CAST(c_last_review_date_sk AS INT)
 FROM tpcds{scale_factor}.{table_name};
 ====
 ---- DATASET
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
index b8f123e1d..9e27118d8 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
@@ -7,7 +7,7 @@ where c_salutation = 'Mrs.'
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=181.75MB mem-reservation=21.75MB 
thread-reservation=3 runtime-filters-memory=1.00MB
 PLAN-ROOT SINK
-|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
+|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
 |  mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
 |
 02:HASH JOIN [INNER JOIN]
@@ -51,7 +51,7 @@ where c_salutation = 'Mrs.'
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=180.75MB mem-reservation=20.75MB 
thread-reservation=3
 PLAN-ROOT SINK
-|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
+|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
 |  mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
 |
 02:HASH JOIN [LEFT OUTER JOIN]
@@ -94,7 +94,7 @@ where c_salutation = 'Mrs.'
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=181.75MB mem-reservation=21.75MB 
thread-reservation=3 runtime-filters-memory=1.00MB
 PLAN-ROOT SINK
-|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
+|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
 |  mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
 |
 02:HASH JOIN [RIGHT OUTER JOIN]
@@ -364,7 +364,7 @@ on ss_customer_sk % 10 = c_customer_sk / 100
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=211.00MB mem-reservation=51.00MB 
thread-reservation=3 runtime-filters-memory=1.00MB
 PLAN-ROOT SINK
-|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
+|  output exprs: tpcds.store_sales.ss_sold_time_sk, 
tpcds.store_sales.ss_item_sk, tpcds.store_sales.ss_customer_sk, 
tpcds.store_sales.ss_cdemo_sk, tpcds.store_sales.ss_hdemo_sk, 
tpcds.store_sales.ss_addr_sk, tpcds.store_sales.ss_store_sk, 
tpcds.store_sales.ss_promo_sk, tpcds.store_sales.ss_ticket_number, 
tpcds.store_sales.ss_quantity, tpcds.store_sales.ss_wholesale_cost, 
tpcds.store_sales.ss_list_price, tpcds.store_sales.ss_sales_price, 
tpcds.store_sales.ss_ext_discount_amt, tpcds.store_ [...]
 |  mem-estimate=100.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
 |
 02:HASH JOIN [INNER JOIN]
diff --git 
a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test
 
b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test
index 89b1c2456..4e8f3cdef 100644
--- 
a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test
+++ 
b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds/tpcds-q30.test
@@ -22,7 +22,7 @@ SELECT c_customer_id,
        c_birth_country,
        c_login,
        c_email_address,
-       c_last_review_date,
+       c_last_review_date_sk,
        ctr_total_return
 FROM customer_total_return ctr1,
      customer_address,
@@ -45,7 +45,7 @@ ORDER BY c_customer_id,
          c_birth_country,
          c_login,
          c_email_address,
-         c_last_review_date,
+         c_last_review_date_sk,
          ctr_total_return
 LIMIT 100;
 ---- PLAN
@@ -54,11 +54,11 @@ Per-Host Resource Estimates: Memory=429MB
 F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=428.81MB mem-reservation=27.44MB 
thread-reservation=9 runtime-filters-memory=7.00MB
 PLAN-ROOT SINK
-|  output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, 
c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, 
c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return
+|  output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, 
c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, 
c_birth_country, c_login, c_email_address, c_last_review_date_sk, 
ctr_total_return
 |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
 |
 18:TOP-N [LIMIT=100]
-|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date ASC, ctr_total_return ASC
+|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date_sk ASC, ctr_total_return ASC
 |  mem-estimate=20.64KB mem-reservation=0B thread-reservation=0
 |  tuple-ids=15 row-size=211B cardinality=100
 |  in pipelines: 18(GETNEXT), 07(OPEN)
@@ -239,11 +239,11 @@ Per-Host Resource Estimates: Memory=504MB
 F11:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Host Resources: mem-estimate=4.04MB mem-reservation=4.00MB 
thread-reservation=1
 PLAN-ROOT SINK
-|  output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, 
c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, 
c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return
+|  output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, 
c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, 
c_birth_country, c_login, c_email_address, c_last_review_date_sk, 
ctr_total_return
 |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
 |
 32:MERGING-EXCHANGE [UNPARTITIONED]
-|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date ASC, ctr_total_return ASC
+|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date_sk ASC, ctr_total_return ASC
 |  limit: 100
 |  mem-estimate=41.67KB mem-reservation=0B thread-reservation=0
 |  tuple-ids=15 row-size=211B cardinality=100
@@ -252,7 +252,7 @@ PLAN-ROOT SINK
 F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
 Per-Host Resources: mem-estimate=137.32MB mem-reservation=16.81MB 
thread-reservation=2 runtime-filters-memory=3.00MB
 18:TOP-N [LIMIT=100]
-|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date ASC, ctr_total_return ASC
+|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date_sk ASC, ctr_total_return ASC
 |  mem-estimate=20.64KB mem-reservation=0B thread-reservation=0
 |  tuple-ids=15 row-size=211B cardinality=100
 |  in pipelines: 18(GETNEXT), 07(OPEN)
@@ -524,11 +524,11 @@ Per-Host Resource Estimates: Memory=236MB
 F11:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
 |  Per-Instance Resources: mem-estimate=4.04MB mem-reservation=4.00MB 
thread-reservation=1
 PLAN-ROOT SINK
-|  output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, 
c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, 
c_birth_country, c_login, c_email_address, c_last_review_date, ctr_total_return
+|  output exprs: c_customer_id, c_salutation, c_first_name, c_last_name, 
c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, 
c_birth_country, c_login, c_email_address, c_last_review_date_sk, 
ctr_total_return
 |  mem-estimate=4.00MB mem-reservation=4.00MB spill-buffer=2.00MB 
thread-reservation=0
 |
 32:MERGING-EXCHANGE [UNPARTITIONED]
-|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date ASC, ctr_total_return ASC
+|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date_sk ASC, ctr_total_return ASC
 |  limit: 100
 |  mem-estimate=41.67KB mem-reservation=0B thread-reservation=0
 |  tuple-ids=15 row-size=211B cardinality=100
@@ -538,7 +538,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
 Per-Host Shared Resources: mem-estimate=2.00MB mem-reservation=2.00MB 
thread-reservation=0 runtime-filters-memory=2.00MB
 Per-Instance Resources: mem-estimate=16.02MB mem-reservation=8.00MB 
thread-reservation=1
 18:TOP-N [LIMIT=100]
-|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date ASC, ctr_total_return ASC
+|  order by: c_customer_id ASC, c_salutation ASC, c_first_name ASC, 
c_last_name ASC, c_preferred_cust_flag ASC, c_birth_day ASC, c_birth_month ASC, 
c_birth_year ASC, c_birth_country ASC, c_login ASC, c_email_address ASC, 
c_last_review_date_sk ASC, ctr_total_return ASC
 |  mem-estimate=20.64KB mem-reservation=0B thread-reservation=0
 |  tuple-ids=15 row-size=211B cardinality=100
 |  in pipelines: 18(GETNEXT), 07(OPEN)
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test 
b/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test
index 9bf7b5395..30ef29a66 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/parquet-page-index.test
@@ -371,8 +371,11 @@ where c_current_cdemo_sk < 100 group by c_birth_country;
 ---- TYPES
 STRING, BIGINT
 ---- RUNTIME_PROFILE
-aggregation(SUM, NumPages): 30
-aggregation(SUM, NumStatsFilteredPages): 27
+aggregation(SUM, NumPages): 6
+aggregation(SUM, NumRowGroups): 20
+aggregation(SUM, NumRowGroupsWithPageIndex): 20
+aggregation(SUM, NumStatsFilteredPages): 3
+aggregation(SUM, NumStatsFilteredRowGroups): 19
 ====
 ---- QUERY
 # IMPALA-10345: Impala hits DCHECK in parquet-column-stats.inline.h
diff --git a/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test 
b/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test
index 891968856..98564ae6e 100644
--- a/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test
+++ b/testdata/workloads/tpcds-unmodified/queries/tpcds-q30.test
@@ -15,7 +15,7 @@ with customer_total_return as
          ,ca_state)
   select  
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
        
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-       ,c_last_review_date,ctr_total_return
+       ,c_last_review_date_sk,ctr_total_return
  from customer_total_return ctr1
      ,customer_address
      ,customer
@@ -27,7 +27,7 @@ with customer_total_return as
        and ctr1.ctr_customer_sk = c_customer_sk
  order by 
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
                   
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-                  ,c_last_review_date,ctr_total_return
+                  ,c_last_review_date_sk,ctr_total_return
 limit 100;
 
 
diff --git a/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql 
b/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql
index 1057157ed..63d0cd8de 100644
--- a/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql
+++ b/testdata/workloads/tpcds/queries/raw/tpcds-query30.sql
@@ -13,7 +13,7 @@ with customer_total_return as
          ,ca_state)
   select  
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
        
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-       ,c_last_review_date,ctr_total_return
+       ,c_last_review_date_sk,ctr_total_return
  from customer_total_return ctr1
      ,customer_address
      ,customer
@@ -25,7 +25,7 @@ with customer_total_return as
        and ctr1.ctr_customer_sk = c_customer_sk
  order by 
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
                   
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-                  ,c_last_review_date,ctr_total_return
+                  ,c_last_review_date_sk,ctr_total_return
 limit 100;
 
 -- end query 1 in stream 0 using template query30.tpl
diff --git a/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test 
b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test
index 0d5f5eb97..877123bee 100644
--- a/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test
+++ b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q30.test
@@ -14,7 +14,7 @@ with customer_total_return as
          ,ca_state)
   select  
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
        
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-       ,c_last_review_date,ctr_total_return
+       ,c_last_review_date_sk,ctr_total_return
  from customer_total_return ctr1
      ,customer_address
      ,customer
@@ -26,7 +26,7 @@ with customer_total_return as
        and ctr1.ctr_customer_sk = c_customer_sk
  order by 
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
                   
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-                  ,c_last_review_date,ctr_total_return
+                  ,c_last_review_date_sk,ctr_total_return
 limit 100;
 ---- RESULTS
 
'AAAAAAAAACBFAAAA','Dr.','Clifton','Carr','N',4,4,1950,'INDIA','NULL','[email protected]','2452385',2232.42
diff --git a/testdata/workloads/tpcds/queries/tpcds-q30.test 
b/testdata/workloads/tpcds/queries/tpcds-q30.test
index 0d5f5eb97..877123bee 100644
--- a/testdata/workloads/tpcds/queries/tpcds-q30.test
+++ b/testdata/workloads/tpcds/queries/tpcds-q30.test
@@ -14,7 +14,7 @@ with customer_total_return as
          ,ca_state)
   select  
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
        
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-       ,c_last_review_date,ctr_total_return
+       ,c_last_review_date_sk,ctr_total_return
  from customer_total_return ctr1
      ,customer_address
      ,customer
@@ -26,7 +26,7 @@ with customer_total_return as
        and ctr1.ctr_customer_sk = c_customer_sk
  order by 
c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag
                   
,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address
-                  ,c_last_review_date,ctr_total_return
+                  ,c_last_review_date_sk,ctr_total_return
 limit 100;
 ---- RESULTS
 
'AAAAAAAAACBFAAAA','Dr.','Clifton','Carr','N',4,4,1950,'INDIA','NULL','[email protected]','2452385',2232.42
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index 8ec30ca84..e0d31ca21 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -433,9 +433,6 @@ def load_tpc_queries(workload, 
include_stress_queries=False, query_name_filters=
     test_cases = parse_query_test_file(file_path)
     for test_case in test_cases:
       query_sql = remove_comments(test_case["QUERY"])
-      if workload == "tpcds_partitioned":
-        # replace old columns names from old TPC-DS spec with a new one.
-        query_sql = query_sql.replace("c_last_review_date", 
"c_last_review_date_sk")
 
       if re.match(filter_regex, test_case["QUERY_NAME"]):
         query_name_match = query_name_pattern.search(test_case["QUERY_NAME"])

Reply via email to