This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit f1de0c392f4c5fe1df653956595a6f6699dd100e Author: Riza Suminto <[email protected]> AuthorDate: Tue Dec 17 11:53:43 2024 -0800 IMPALA-13636: Fix target file_format in TestTpcdsInsert TestTpcdsInsert creates a temporary table to test insert functionality. It has three problems: 1. It does not use unique_database parameter, so the temporary table is not cleaned up after test finished. 2. It ignores file_format from test vector, causing inconsistency in the temporary table's file format. str_insert is always in PARQUET format, while store_sales_insert is always in TEXTFILE format. 3. text file_format dimension is never exercised, because --workload_exploration_strategy in run-all-tests.sh does not explicitly list tpcds-insert workload. This patch fixes all three problems and few flake8 warnings in test_tpcds_queries.py. Testing: - Run bin/run-all-tests.sh with EXPLORATION_STRATEGY=exhaustive EE_TEST=true EE_TEST_FILES="query_test/test_tpcds_queries.py::TestTpcdsInsert" Verified that the temporary table format follows file_format dimension. Change-Id: Iea621ec1d6a53eba9558b0daa3a4cc97fbcc67ae Reviewed-on: http://gerrit.cloudera.org:8080/22291 Reviewed-by: Michael Smith <[email protected]> Reviewed-by: Csaba Ringhofer <[email protected]> Tested-by: Riza Suminto <[email protected]> --- bin/run-all-tests.sh | 3 +- buildall.sh | 2 +- .../tpcds-insert/queries/expr-insert.test | 7 ++--- .../tpcds-insert/queries/partitioned-insert.test | 14 ++++----- tests/query_test/test_tpcds_queries.py | 34 +++++++++++++--------- 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh index b90132132..9e53aef2c 100755 --- a/bin/run-all-tests.sh +++ b/bin/run-all-tests.sh @@ -193,7 +193,8 @@ TIMEOUT_PID=$! COMMON_PYTEST_ARGS="--maxfail=${MAX_PYTEST_FAILURES} --exploration_strategy=core"` `" --workload_exploration_strategy="` `"functional-query:${EXPLORATION_STRATEGY},"` - `"targeted-stress:${EXPLORATION_STRATEGY}" + `"targeted-stress:${EXPLORATION_STRATEGY},"` + `"tpcds-insert:${EXPLORATION_STRATEGY}" if [[ "${EXPLORATION_STRATEGY}" == "core" ]]; then # Skip the stress test in core - all stress tests are in exhaustive and # pytest startup takes a significant amount of time. diff --git a/buildall.sh b/buildall.sh index 2a96741de..bca291789 100755 --- a/buildall.sh +++ b/buildall.sh @@ -253,7 +253,7 @@ do "test execution time)" echo "[-testexhaustive] : Run tests in 'exhaustive' mode, which significantly"\ "increases test execution time. ONLY APPLIES to suites with workloads:"\ - "functional-query, targeted-stress" + "functional-query, targeted-stress, tpcds-insert" echo "[-testdata] : Loads test data. Implied as true if -snapshot_file is"\ "specified. If -snapshot_file is not specified, data will be regenerated." echo "[-snapshot_file <file name>] : Load test data from a snapshot file" diff --git a/testdata/workloads/tpcds-insert/queries/expr-insert.test b/testdata/workloads/tpcds-insert/queries/expr-insert.test index 10840290f..d38416cca 100644 --- a/testdata/workloads/tpcds-insert/queries/expr-insert.test +++ b/testdata/workloads/tpcds-insert/queries/expr-insert.test @@ -1,16 +1,13 @@ ==== ----- QUERY: TPDCS-STR-INSERT-DROP -DROP TABLE IF EXISTS str_insert -==== ---- QUERY: TPDCS-STR-INSERT-SETUP -CREATE TABLE str_insert (s string) STORED AS PARQUET +CREATE TABLE str_insert (s string) STORED AS $FILE_FORMAT ---- RESULTS 'Table has been created.' ==== ---- QUERY: TPDCS-STR-INSERT-CASE INSERT INTO str_insert SELECT case when ss_promo_sk % 2 = 0 then 'even' else 'odd' end -FROM store_sales +FROM tpcds.store_sales ---- RESULTS : 2880404 ==== diff --git a/testdata/workloads/tpcds-insert/queries/partitioned-insert.test b/testdata/workloads/tpcds-insert/queries/partitioned-insert.test index a9571eb27..166a99c36 100644 --- a/testdata/workloads/tpcds-insert/queries/partitioned-insert.test +++ b/testdata/workloads/tpcds-insert/queries/partitioned-insert.test @@ -1,23 +1,21 @@ ==== ----- QUERY: TPCDS-SS-INSERT-SETUP1 -DROP TABLE IF EXISTS store_sales_insert -==== ---- QUERY: TPCDS-SS-INSERT-SETUP2 -CREATE TABLE store_sales_insert LIKE store_sales +CREATE TABLE store_sales_insert LIKE tpcds.store_sales +STORED AS $FILE_FORMAT ---- RESULTS 'Table has been created.' ==== ---- QUERY: TPCDS-SS-INSERT-DAY # Insert a day's worth of data INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk) -SELECT * FROM store_sales +SELECT * FROM tpcds.store_sales WHERE ss_sold_date_sk = 2451239 ---- RESULTS ss_sold_date_sk=2451239: 847 ==== ---- QUERY: TPCDS-SS-INSERT-MONTH INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk) -SELECT * FROM store_sales +SELECT * FROM tpcds.store_sales WHERE ss_sold_date_sk >= 2451270 and ss_sold_date_sk <= 2451299 ---- RESULTS ss_sold_date_sk=2451270: 822 @@ -53,7 +51,7 @@ ss_sold_date_sk=2451299: 703 ==== ---- QUERY: TPCDS-SS-INSERT-QUARTER INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk) -SELECT * FROM store_sales +SELECT * FROM tpcds.store_sales WHERE ss_sold_date_sk >= 2451423 and ss_sold_date_sk <= 2451544 ---- RESULTS ss_sold_date_sk=2451423: 1881 @@ -181,7 +179,7 @@ ss_sold_date_sk=2451544: 3046 ==== ---- QUERY: TPCDS-SS-INSERT-YEAR INSERT OVERWRITE TABLE store_sales_insert PARTITION (ss_sold_date_sk) -SELECT * FROM store_sales +SELECT * FROM tpcds.store_sales WHERE ss_sold_date_sk >= 2451545 and ss_sold_date_sk < 2451911 ---- RESULTS ss_sold_date_sk=2451545: 2743 diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py index 7e3e3e3fc..723d4f49f 100644 --- a/tests/query_test/test_tpcds_queries.py +++ b/tests/query_test/test_tpcds_queries.py @@ -27,10 +27,12 @@ from tests.common.skip import ( SkipIfBuildType, SkipIfDockerizedCluster) from tests.common.test_dimensions import ( + FILE_FORMAT_TO_STORED_AS_MAP, add_mandatory_exec_option, create_single_exec_option_dimension, is_supported_insert_format) + class TestTpcdsQuery(ImpalaTestSuite): @classmethod def get_workload(cls): @@ -40,9 +42,9 @@ class TestTpcdsQuery(ImpalaTestSuite): def add_test_dimensions(cls): super(TestTpcdsQuery, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint(lambda v: - v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and - v.get_value('table_format').compression_codec in ['none', 'snap'] and - v.get_value('table_format').compression_type != 'record') + v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] + and v.get_value('table_format').compression_codec in ['none', 'snap'] + and v.get_value('table_format').compression_type != 'record') add_mandatory_exec_option(cls, 'decimal_v2', 0) if cls.exploration_strategy() != 'exhaustive': @@ -345,9 +347,9 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite): def add_test_dimensions(cls): super(TestTpcdsDecimalV2Query, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint(lambda v: - v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and - v.get_value('table_format').compression_codec in ['none', 'snap'] and - v.get_value('table_format').compression_type != 'record') + v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] + and v.get_value('table_format').compression_codec in ['none', 'snap'] + and v.get_value('table_format').compression_type != 'record') if cls.exploration_strategy() != 'exhaustive': # Cut down on the execution time for these tests in core by running only @@ -704,11 +706,17 @@ class TestTpcdsInsert(ImpalaTestSuite): cls.ImpalaTestMatrix.add_constraint(lambda v: is_supported_insert_format(v.get_value('table_format'))) - def test_tpcds_partitioned_insert(self, vector): - self.run_test_case('partitioned-insert', vector) + def create_test_file_vars(self, vector): + stored_as = FILE_FORMAT_TO_STORED_AS_MAP[vector.get_value('table_format').file_format] + return {'$FILE_FORMAT': stored_as} + + def test_tpcds_partitioned_insert(self, vector, unique_database): + self.run_test_case('partitioned-insert', vector, unique_database, + test_file_vars=self.create_test_file_vars(vector)) - def test_expr_insert(self, vector): - self.run_test_case('expr-insert', vector) + def test_expr_insert(self, vector, unique_database): + self.run_test_case('expr-insert', vector, unique_database, + test_file_vars=self.create_test_file_vars(vector)) class TestTpcdsUnmodified(ImpalaTestSuite): @@ -720,9 +728,9 @@ class TestTpcdsUnmodified(ImpalaTestSuite): def add_test_dimensions(cls): super(TestTpcdsUnmodified, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint(lambda v: - v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and - v.get_value('table_format').compression_codec in ['none', 'snap'] and - v.get_value('table_format').compression_type != 'record') + v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] + and v.get_value('table_format').compression_codec in ['none', 'snap'] + and v.get_value('table_format').compression_type != 'record') if cls.exploration_strategy() != 'exhaustive': # Cut down on the execution time for these tests in core by running only
