This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit be6f896d1066095c2fe21ec2d60758e99c5f9931 Author: Riza Suminto <[email protected]> AuthorDate: Wed Aug 21 16:34:12 2024 -0700 IMPALA-13319: Avoid duplicate exec option declaration in py.test Before this patch, add_mandatory_exec_option() replace existing query option values in 'exec_option' dimension and may cause unintended test vector duplication. For example, the following declaration will create two duplicate test vector, both with "disable_codegen=False": cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( disable_codegen_options=[False, True])) add_mandatory_exec_option(cls, "disable_codegen", False) add_exec_option_dimension() will create new test dimension for a 'key', but does not insert it into 'exec_option' dimension until vector generation later. It also does not validate if 'key' already exist in 'exec_option' dimension. This can confuse test writer when they need to write constraint, because they might look for the value at vector.get_value('exec_option')['key'] instead of vector.get_value('key'), and vice versa. This patch add assertion to check that no duplicate query option name is declared through any helper function. It also assert that all query option names are declared in lowercase. Testing: - Manually verify test vector generation in test files containing the helper functions by running: impala-py.test --exploration=exhaustive --collect-only <test_file> - Adjust query option declaration that breaks after this change. Change-Id: I8143e47f19090e20707cfb0a05c779f4d289f33c Reviewed-on: http://gerrit.cloudera.org:8080/21707 Reviewed-by: Michael Smith <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- tests/common/test_dimensions.py | 18 +++++++++--- tests/common/test_vector.py | 37 +++++++++++++++++++----- tests/query_test/test_ext_data_sources.py | 5 ++-- tests/query_test/test_join_queries.py | 9 +++--- tests/query_test/test_kudu.py | 4 +-- tests/query_test/test_limit_pushdown_analytic.py | 10 ++++--- tests/query_test/test_nested_types.py | 4 +-- tests/query_test/test_queries.py | 21 +++++++++----- tests/query_test/test_runtime_filters.py | 2 +- tests/query_test/test_scanners.py | 3 +- 10 files changed, 77 insertions(+), 36 deletions(-) diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py index 69ef44182..3b8f8075c 100644 --- a/tests/common/test_dimensions.py +++ b/tests/common/test_dimensions.py @@ -23,7 +23,8 @@ import copy import os from itertools import product -from tests.common.test_vector import ImpalaTestDimension, ImpalaTestVector +from tests.common.test_vector import ( + ImpalaTestDimension, ImpalaTestVector, assert_exec_option_key) from tests.util.filesystem_utils import ( IS_HDFS) @@ -257,6 +258,8 @@ def create_exec_option_dimension_from_dict(exec_option_dimensions): # Generate the cross product (all combinations) of the exec options specified. Then # store them in exec_option dictionary format. keys = sorted(exec_option_dimensions) + for name in keys: + assert_exec_option_key(name) combinations = product(*(exec_option_dimensions[name] for name in keys)) exec_option_dimension_values = [dict(zip(keys, prod)) for prod in combinations] @@ -268,7 +271,11 @@ def add_exec_option_dimension(test_suite, key, values): """ Takes an ImpalaTestSuite object 'test_suite' and register new exec option dimension. 'key' must be a query option known to Impala, and 'values' must be a list of more than - one element. + one element. Exec option 'key' must not be declared before. + If writing constraint against 'key', the value should be looked up at: + vector.get_value('key') + instead of: + vector.get_value('exec_option')['key'] """ test_suite.ImpalaTestMatrix.add_exec_option_dimension( ImpalaTestDimension(key, *values)) @@ -277,7 +284,8 @@ def add_exec_option_dimension(test_suite, key, values): def add_mandatory_exec_option(test_suite, key, value): """ Takes an ImpalaTestSuite object 'test_suite' and adds 'key=value' to every exec option - test dimension, leaving the number of tests that will be run unchanged. + test dimension, leaving the number of tests that will be run unchanged. Exec option + 'key' must not be declared before. """ test_suite.ImpalaTestMatrix.add_mandatory_exec_option(key, value) @@ -286,8 +294,10 @@ def extend_exec_option_dimension(test_suite, key, value): """ Takes an ImpalaTestSuite object 'test_suite' and extends the exec option test dimension by creating a copy of each existing exec option value that has 'key' set to 'value', - doubling the number of tests that will be run. + doubling the number of tests that will be run. Exec option 'key' must not be declared + before. """ + test_suite.ImpalaTestMatrix.assert_unique_exec_option_key(key) dim = test_suite.ImpalaTestMatrix.dimensions["exec_option"] new_value = [] for v in dim: diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py index 7919fd796..da21b688d 100644 --- a/tests/common/test_vector.py +++ b/tests/common/test_vector.py @@ -63,6 +63,10 @@ from copy import deepcopy EXEC_OPTION_KEY = 'exec_option' +def assert_exec_option_key(key): + assert key.islower(), "Exec option " + key + " is not in lower case" + + # A list of test dimension values. class ImpalaTestDimension(list): def __init__(self, name, *args): @@ -124,12 +128,31 @@ class ImpalaTestMatrix(object): def add_dimension(self, dimension): self.dimensions[dimension.name] = dimension + if dimension.name == EXEC_OPTION_KEY: + self.independent_exec_option_names.clear() - def add_mandatory_exec_option(self, exec_option_key, exec_option_value): + def assert_unique_exec_option_key(self, key): + """Assert that 'exec_option' dimension exist and 'key' is not exist yet + in self.dimension['exec_option'].""" + assert_exec_option_key(key) assert EXEC_OPTION_KEY in self.dimensions, ( - "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!") + "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!") + for vector in self.dimensions[EXEC_OPTION_KEY]: - vector.value[exec_option_key] = exec_option_value + assert key not in vector.value, ( + "'{}' is already exist in '{}' dimension!".format(key, EXEC_OPTION_KEY)) + + # 'key' must not previously declared with add_exec_option_dimension(). + assert key not in self.independent_exec_option_names, ( + "['{}']['{}'] was previously declared with non-unique value: {}".format( + EXEC_OPTION_KEY, key, [dim.value for dim in self.dimensions[key]])) + + def add_mandatory_exec_option(self, key, value): + """Append key=value pair into 'exec_option' dimension.""" + self.assert_unique_exec_option_key(key) + + for vector in self.dimensions[EXEC_OPTION_KEY]: + vector.value[key] = value def add_exec_option_dimension(self, dimension): """ @@ -139,11 +162,11 @@ class ImpalaTestMatrix(object): exhaustive combination correct while eliminating the need for individual test method to append the custom exec options into 'exec_option' dimension themself. """ - assert EXEC_OPTION_KEY in self.dimensions, ( - "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!") + self.assert_unique_exec_option_key(dimension.name) assert len(dimension) > 1, ( - "Dimension " + dimension.name + " must have more than 1 possible value! " - "Use add_mandatory_exec_option() instead.") + "Dimension " + dimension.name + " must have more than 1 possible value! " + "Use add_mandatory_exec_option() instead.") + self.add_dimension(dimension) self.independent_exec_option_names.add(dimension.name) diff --git a/tests/query_test/test_ext_data_sources.py b/tests/query_test/test_ext_data_sources.py index 554bb96fa..8d97e34f7 100644 --- a/tests/query_test/test_ext_data_sources.py +++ b/tests/query_test/test_ext_data_sources.py @@ -21,7 +21,7 @@ import re from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.test_dimensions import (create_uncompressed_text_dimension, - extend_exec_option_dimension) + create_exec_option_dimension) class TestExtDataSources(ImpalaTestSuite): @@ -34,9 +34,10 @@ class TestExtDataSources(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestExtDataSources, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_dimension( + create_exec_option_dimension(exec_single_node_option=[0, 100])) cls.ImpalaTestMatrix.add_dimension( create_uncompressed_text_dimension(cls.get_workload())) - extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100") def _get_tbl_properties(self, table_name): """Extracts the table properties mapping from the output of DESCRIBE FORMATTED""" diff --git a/tests/query_test/test_join_queries.py b/tests/query_test/test_join_queries.py index ddcf59f4d..8bfc7e7f0 100644 --- a/tests/query_test/test_join_queries.py +++ b/tests/query_test/test_join_queries.py @@ -26,7 +26,7 @@ from tests.common.skip import SkipIf, SkipIfFS from tests.common.test_vector import ImpalaTestDimension from tests.common.test_dimensions import ( add_mandatory_exec_option, - create_single_exec_option_dimension, + create_exec_option_dimension, create_table_format_dimension) @@ -246,14 +246,15 @@ class TestExprValueCache(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestExprValueCache, cls).add_test_dimensions() - cls.ImpalaTestMatrix.add_dimension( - create_single_exec_option_dimension()) + # create_single_exec_option_dimension + batch_size=65536 + cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( + cluster_sizes=[0], disable_codegen_options=[False], batch_sizes=[65536], + disable_codegen_rows_threshold_options=[5000])) cls.ImpalaTestMatrix.add_dimension( create_table_format_dimension(cls.get_workload(), 'parquet/snap/block')) add_mandatory_exec_option(cls, 'runtime_filter_mode', 'OFF') add_mandatory_exec_option(cls, 'mem_limit', '149mb') add_mandatory_exec_option(cls, 'mt_dop', 1) - add_mandatory_exec_option(cls, 'batch_size', 65536) def test_expr_value_cache_fits(self, vector): self.run_test_case('tpcds-q97', vector) diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py index f3176ddc6..2183e7298 100644 --- a/tests/query_test/test_kudu.py +++ b/tests/query_test/test_kudu.py @@ -614,10 +614,8 @@ class TestKuduOperations(KuduTestSuite): class TestKuduPartitioning(KuduTestSuite): @classmethod def add_test_dimensions(cls): - super(TestKuduPartitioning, cls).add_test_dimensions() - # Test both the interpreted and the codegen'd path. - add_exec_option_dimension(cls, "disable_codegen", [0, 1]) + super(TestKuduPartitioning, cls).add_test_dimensions() def test_partitions_evenly_distributed(self, vector, cursor, kudu_client, unique_database): diff --git a/tests/query_test/test_limit_pushdown_analytic.py b/tests/query_test/test_limit_pushdown_analytic.py index d1295eea8..7864a0614 100644 --- a/tests/query_test/test_limit_pushdown_analytic.py +++ b/tests/query_test/test_limit_pushdown_analytic.py @@ -20,8 +20,7 @@ from __future__ import absolute_import, division, print_function from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.test_dimensions import (create_single_exec_option_dimension, - extend_exec_option_dimension) +from tests.common.test_dimensions import create_exec_option_dimension class TestLimitPushdownAnalyticTpch(ImpalaTestSuite): @@ -48,9 +47,12 @@ class TestLimitPushdownAnalyticFunctional(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestLimitPushdownAnalyticFunctional, cls).add_test_dimensions() - cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) # Also run with num_nodes=1 because it's easier to reproduce IMPALA-10296. - extend_exec_option_dimension(cls, 'num_nodes', 1) + cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( + cluster_sizes=[0, 1], + disable_codegen_options=[False], + disable_codegen_rows_threshold_options=[5000], + batch_sizes=[0])) cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format in ['parquet']) diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py index a21224bcf..ea0a3259c 100644 --- a/tests/query_test/test_nested_types.py +++ b/tests/query_test/test_nested_types.py @@ -143,7 +143,7 @@ class TestNestedStructsInSelectList(ImpalaTestSuite): """Queries where a struct column is in the select list""" new_vector = deepcopy(vector) new_vector.get_value('exec_option')['convert_legacy_hive_parquet_utc_timestamps'] = 1 - new_vector.get_value('exec_option')['TIMEZONE'] = '"Europe/Budapest"' + new_vector.get_value('exec_option')['timezone'] = '"Europe/Budapest"' self.run_test_case('QueryTest/struct-in-select-list', new_vector) @SkipIfFS.hbase @@ -936,7 +936,7 @@ class TestNestedTypesStarExpansion(ImpalaTestSuite): 'disable_codegen': ['False', 'True']})) cls.ImpalaTestMatrix.add_mandatory_exec_option( 'convert_legacy_hive_parquet_utc_timestamps', 'true') - cls.ImpalaTestMatrix.add_mandatory_exec_option('TIMEZONE', '"Europe/Budapest"') + cls.ImpalaTestMatrix.add_mandatory_exec_option('timezone', '"Europe/Budapest"') def test_star_expansion(self, vector): # Queries with star (*) expression on tables with array, map diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py index 8c33dbe18..3d2ef4d5d 100644 --- a/tests/query_test/test_queries.py +++ b/tests/query_test/test_queries.py @@ -29,7 +29,7 @@ from tests.common.test_dimensions import ( create_uncompressed_text_dimension, create_uncompressed_json_dimension, create_exec_option_dimension_from_dict, create_client_protocol_dimension, hs2_parquet_constraint, extend_exec_option_dimension, FILE_FORMAT_TO_STORED_AS_MAP, - add_exec_option_dimension) + add_exec_option_dimension, create_exec_option_dimension) from tests.util.filesystem_utils import get_fs_path from subprocess import check_call @@ -42,6 +42,11 @@ class TestQueries(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestQueries, cls).add_test_dimensions() + single_node_option = ([0, 100] if cls.exploration_strategy() == 'exhaustive' + else [0]) + cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( + exec_single_node_option=single_node_option)) + if cls.exploration_strategy() == 'core': cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format == 'parquet') @@ -53,8 +58,7 @@ class TestQueries(ImpalaTestSuite): # Adding a test dimension here to test the small query opt in exhaustive. if cls.exploration_strategy() == 'exhaustive': - extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100") - extend_exec_option_dimension(cls, "ASYNC_CODEGEN", 1) + extend_exec_option_dimension(cls, "async_codegen", 1) extend_exec_option_dimension(cls, "debug_action", cls.debug_actions) cls.ImpalaTestMatrix.add_constraint(cls.debug_action_constraint) @@ -62,7 +66,7 @@ class TestQueries(ImpalaTestSuite): def debug_action_constraint(cls, vector): exec_option = vector.get_value("exec_option") - is_async = exec_option.get("ASYNC_CODEGEN") == 1 + is_async = exec_option.get("async_codegen") == 1 using_async_debug_actions = exec_option.get("debug_action") == cls.debug_actions codegen_enabled = exec_option["disable_codegen"] == 0 @@ -321,14 +325,15 @@ class TestHdfsQueries(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestHdfsQueries, cls).add_test_dimensions() + # Adding a test dimension here to test the small query opt in exhaustive. + single_node_option = ([0, 100] if cls.exploration_strategy() == 'exhaustive' + else [0]) + cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension( + exec_single_node_option=single_node_option)) # Kudu doesn't support AllTypesAggMultiFilesNoPart (KUDU-1271, KUDU-1570). cls.ImpalaTestMatrix.add_constraint(lambda v: v.get_value('table_format').file_format != 'kudu') - # Adding a test dimension here to test the small query opt in exhaustive. - if cls.exploration_strategy() == 'exhaustive': - extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100") - @classmethod def get_workload(cls): return 'functional-query' diff --git a/tests/query_test/test_runtime_filters.py b/tests/query_test/test_runtime_filters.py index cd76eba50..db2921f7b 100644 --- a/tests/query_test/test_runtime_filters.py +++ b/tests/query_test/test_runtime_filters.py @@ -262,7 +262,7 @@ class TestMinMaxFilters(ImpalaTestSuite): # IMPALA-10715. Enable only min/max since the bloom filters will return # rows only satisfying the join predicates. This test requires the return # of non-qualifying rows to succeed. - add_mandatory_exec_option(cls, "ENABLED_RUNTIME_FILTER_TYPES", "MIN_MAX") + add_mandatory_exec_option(cls, "enabled_runtime_filter_types", "MIN_MAX") def test_min_max_filters(self, vector): self.execute_query("SET MINMAX_FILTER_THRESHOLD=0.5") diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py index 30b84cb0a..c49c512f0 100644 --- a/tests/query_test/test_scanners.py +++ b/tests/query_test/test_scanners.py @@ -224,8 +224,9 @@ class TestScannersAllTableFormatsWithLimit(ImpalaTestSuite): @classmethod def add_test_dimensions(cls): super(TestScannersAllTableFormatsWithLimit, cls).add_test_dimensions() + cls.ImpalaTestMatrix.add_dimension( + create_exec_option_dimension(batch_sizes=[100])) add_exec_option_dimension(cls, 'mt_dop', MT_DOP_VALUES) - add_mandatory_exec_option(cls, 'batch_size', 100) def test_limit(self, vector): vector.get_value('exec_option')['abort_on_error'] = 1
