(impala) 01/02: IMPALA-13319: Avoid duplicate exec option declaration in py.test

stigahuang Fri, 23 Aug 2024 00:27:28 -0700

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit be6f896d1066095c2fe21ec2d60758e99c5f9931
Author: Riza Suminto <[email protected]>
AuthorDate: Wed Aug 21 16:34:12 2024 -0700

    IMPALA-13319: Avoid duplicate exec option declaration in py.test
    
    Before this patch, add_mandatory_exec_option() replace existing query
    option values in 'exec_option' dimension and may cause unintended test
    vector duplication. For example, the following declaration will create
    two duplicate test vector, both with "disable_codegen=False":
    
        cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
          disable_codegen_options=[False, True]))
        add_mandatory_exec_option(cls, "disable_codegen", False)
    
    add_exec_option_dimension() will create new test dimension for a 'key',
    but does not insert it into 'exec_option' dimension until vector
    generation later. It also does not validate if 'key' already exist in
    'exec_option' dimension. This can confuse test writer when they need to
    write constraint, because they might look for the value at
    vector.get_value('exec_option')['key'] instead of
    vector.get_value('key'), and vice versa.
    
    This patch add assertion to check that no duplicate query option name is
    declared through any helper function. It also assert that all query
    option names are declared in lowercase.
    
    Testing:
    - Manually verify test vector generation in test files containing the
      helper functions by running:
      impala-py.test --exploration=exhaustive --collect-only <test_file>
    - Adjust query option declaration that breaks after this change.
    
    Change-Id: I8143e47f19090e20707cfb0a05c779f4d289f33c
    Reviewed-on: http://gerrit.cloudera.org:8080/21707
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 tests/common/test_dimensions.py                  | 18 +++++++++---
 tests/common/test_vector.py                      | 37 +++++++++++++++++++-----
 tests/query_test/test_ext_data_sources.py        |  5 ++--
 tests/query_test/test_join_queries.py            |  9 +++---
 tests/query_test/test_kudu.py                    |  4 +--
 tests/query_test/test_limit_pushdown_analytic.py | 10 ++++---
 tests/query_test/test_nested_types.py            |  4 +--
 tests/query_test/test_queries.py                 | 21 +++++++++-----
 tests/query_test/test_runtime_filters.py         |  2 +-
 tests/query_test/test_scanners.py                |  3 +-
 10 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 69ef44182..3b8f8075c 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -23,7 +23,8 @@ import copy
 import os
 from itertools import product
 
-from tests.common.test_vector import ImpalaTestDimension, ImpalaTestVector
+from tests.common.test_vector import (
+    ImpalaTestDimension, ImpalaTestVector, assert_exec_option_key)
 from tests.util.filesystem_utils import (
     IS_HDFS)
 
@@ -257,6 +258,8 @@ def 
create_exec_option_dimension_from_dict(exec_option_dimensions):
   # Generate the cross product (all combinations) of the exec options 
specified. Then
   # store them in exec_option dictionary format.
   keys = sorted(exec_option_dimensions)
+  for name in keys:
+    assert_exec_option_key(name)
   combinations = product(*(exec_option_dimensions[name] for name in keys))
   exec_option_dimension_values = [dict(zip(keys, prod)) for prod in 
combinations]
 
@@ -268,7 +271,11 @@ def add_exec_option_dimension(test_suite, key, values):
   """
   Takes an ImpalaTestSuite object 'test_suite' and register new exec option 
dimension.
   'key' must be a query option known to Impala, and 'values' must be a list of 
more than
-  one element.
+  one element. Exec option 'key' must not be declared before.
+  If writing constraint against 'key', the value should be looked up at:
+    vector.get_value('key')
+  instead of:
+    vector.get_value('exec_option')['key']
   """
   test_suite.ImpalaTestMatrix.add_exec_option_dimension(
     ImpalaTestDimension(key, *values))
@@ -277,7 +284,8 @@ def add_exec_option_dimension(test_suite, key, values):
 def add_mandatory_exec_option(test_suite, key, value):
   """
   Takes an ImpalaTestSuite object 'test_suite' and adds 'key=value' to every 
exec option
-  test dimension, leaving the number of tests that will be run unchanged.
+  test dimension, leaving the number of tests that will be run unchanged. Exec 
option
+  'key' must not be declared before.
   """
   test_suite.ImpalaTestMatrix.add_mandatory_exec_option(key, value)
 
@@ -286,8 +294,10 @@ def extend_exec_option_dimension(test_suite, key, value):
   """
   Takes an ImpalaTestSuite object 'test_suite' and extends the exec option 
test dimension
   by creating a copy of each existing exec option value that has 'key' set to 
'value',
-  doubling the number of tests that will be run.
+  doubling the number of tests that will be run. Exec option 'key' must not be 
declared
+  before.
   """
+  test_suite.ImpalaTestMatrix.assert_unique_exec_option_key(key)
   dim = test_suite.ImpalaTestMatrix.dimensions["exec_option"]
   new_value = []
   for v in dim:
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index 7919fd796..da21b688d 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -63,6 +63,10 @@ from copy import deepcopy
 EXEC_OPTION_KEY = 'exec_option'
 
 
+def assert_exec_option_key(key):
+  assert key.islower(), "Exec option " + key + " is not in lower case"
+
+
 # A list of test dimension values.
 class ImpalaTestDimension(list):
   def __init__(self, name, *args):
@@ -124,12 +128,31 @@ class ImpalaTestMatrix(object):
 
   def add_dimension(self, dimension):
     self.dimensions[dimension.name] = dimension
+    if dimension.name == EXEC_OPTION_KEY:
+      self.independent_exec_option_names.clear()
 
-  def add_mandatory_exec_option(self, exec_option_key, exec_option_value):
+  def assert_unique_exec_option_key(self, key):
+    """Assert that 'exec_option' dimension exist and 'key' is not exist yet
+    in self.dimension['exec_option']."""
+    assert_exec_option_key(key)
     assert EXEC_OPTION_KEY in self.dimensions, (
-      "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+        "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+
     for vector in self.dimensions[EXEC_OPTION_KEY]:
-      vector.value[exec_option_key] = exec_option_value
+      assert key not in vector.value, (
+          "'{}' is already exist in '{}' dimension!".format(key, 
EXEC_OPTION_KEY))
+
+    # 'key' must not previously declared with add_exec_option_dimension().
+    assert key not in self.independent_exec_option_names, (
+        "['{}']['{}'] was previously declared with non-unique value: 
{}".format(
+          EXEC_OPTION_KEY, key, [dim.value for dim in self.dimensions[key]]))
+
+  def add_mandatory_exec_option(self, key, value):
+    """Append key=value pair into 'exec_option' dimension."""
+    self.assert_unique_exec_option_key(key)
+
+    for vector in self.dimensions[EXEC_OPTION_KEY]:
+      vector.value[key] = value
 
   def add_exec_option_dimension(self, dimension):
     """
@@ -139,11 +162,11 @@ class ImpalaTestMatrix(object):
     exhaustive combination correct while eliminating the need for individual 
test method
     to append the custom exec options into 'exec_option' dimension themself.
     """
-    assert EXEC_OPTION_KEY in self.dimensions, (
-      "Must have '" + EXEC_OPTION_KEY + "' dimension previously declared!")
+    self.assert_unique_exec_option_key(dimension.name)
     assert len(dimension) > 1, (
-      "Dimension " + dimension.name + " must have more than 1 possible value! "
-      "Use add_mandatory_exec_option() instead.")
+        "Dimension " + dimension.name + " must have more than 1 possible 
value! "
+        "Use add_mandatory_exec_option() instead.")
+
     self.add_dimension(dimension)
     self.independent_exec_option_names.add(dimension.name)
 
diff --git a/tests/query_test/test_ext_data_sources.py 
b/tests/query_test/test_ext_data_sources.py
index 554bb96fa..8d97e34f7 100644
--- a/tests/query_test/test_ext_data_sources.py
+++ b/tests/query_test/test_ext_data_sources.py
@@ -21,7 +21,7 @@ import re
 
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_dimensions import (create_uncompressed_text_dimension,
-    extend_exec_option_dimension)
+    create_exec_option_dimension)
 
 
 class TestExtDataSources(ImpalaTestSuite):
@@ -34,9 +34,10 @@ class TestExtDataSources(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestExtDataSources, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(exec_single_node_option=[0, 100]))
     cls.ImpalaTestMatrix.add_dimension(
         create_uncompressed_text_dimension(cls.get_workload()))
-    extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", "100")
 
   def _get_tbl_properties(self, table_name):
     """Extracts the table properties mapping from the output of DESCRIBE 
FORMATTED"""
diff --git a/tests/query_test/test_join_queries.py 
b/tests/query_test/test_join_queries.py
index ddcf59f4d..8bfc7e7f0 100644
--- a/tests/query_test/test_join_queries.py
+++ b/tests/query_test/test_join_queries.py
@@ -26,7 +26,7 @@ from tests.common.skip import SkipIf, SkipIfFS
 from tests.common.test_vector import ImpalaTestDimension
 from tests.common.test_dimensions import (
     add_mandatory_exec_option,
-    create_single_exec_option_dimension,
+    create_exec_option_dimension,
     create_table_format_dimension)
 
 
@@ -246,14 +246,15 @@ class TestExprValueCache(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestExprValueCache, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_dimension(
-        create_single_exec_option_dimension())
+    # create_single_exec_option_dimension + batch_size=65536
+    cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
+      cluster_sizes=[0], disable_codegen_options=[False], batch_sizes=[65536],
+      disable_codegen_rows_threshold_options=[5000]))
     cls.ImpalaTestMatrix.add_dimension(
         create_table_format_dimension(cls.get_workload(), 
'parquet/snap/block'))
     add_mandatory_exec_option(cls, 'runtime_filter_mode', 'OFF')
     add_mandatory_exec_option(cls, 'mem_limit', '149mb')
     add_mandatory_exec_option(cls, 'mt_dop', 1)
-    add_mandatory_exec_option(cls, 'batch_size', 65536)
 
   def test_expr_value_cache_fits(self, vector):
     self.run_test_case('tpcds-q97', vector)
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index f3176ddc6..2183e7298 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -614,10 +614,8 @@ class TestKuduOperations(KuduTestSuite):
 class TestKuduPartitioning(KuduTestSuite):
   @classmethod
   def add_test_dimensions(cls):
-    super(TestKuduPartitioning, cls).add_test_dimensions()
-
     # Test both the interpreted and the codegen'd path.
-    add_exec_option_dimension(cls, "disable_codegen", [0, 1])
+    super(TestKuduPartitioning, cls).add_test_dimensions()
 
   def test_partitions_evenly_distributed(self, vector, cursor,
       kudu_client, unique_database):
diff --git a/tests/query_test/test_limit_pushdown_analytic.py 
b/tests/query_test/test_limit_pushdown_analytic.py
index d1295eea8..7864a0614 100644
--- a/tests/query_test/test_limit_pushdown_analytic.py
+++ b/tests/query_test/test_limit_pushdown_analytic.py
@@ -20,8 +20,7 @@
 
 from __future__ import absolute_import, division, print_function
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import (create_single_exec_option_dimension,
-    extend_exec_option_dimension)
+from tests.common.test_dimensions import create_exec_option_dimension
 
 
 class TestLimitPushdownAnalyticTpch(ImpalaTestSuite):
@@ -48,9 +47,12 @@ class TestLimitPushdownAnalyticFunctional(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestLimitPushdownAnalyticFunctional, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
     # Also run with num_nodes=1 because it's easier to reproduce IMPALA-10296.
-    extend_exec_option_dimension(cls, 'num_nodes', 1)
+    cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
+      cluster_sizes=[0, 1],
+      disable_codegen_options=[False],
+      disable_codegen_rows_threshold_options=[5000],
+      batch_sizes=[0]))
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format in ['parquet'])
 
diff --git a/tests/query_test/test_nested_types.py 
b/tests/query_test/test_nested_types.py
index a21224bcf..ea0a3259c 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -143,7 +143,7 @@ class TestNestedStructsInSelectList(ImpalaTestSuite):
     """Queries where a struct column is in the select list"""
     new_vector = deepcopy(vector)
     
new_vector.get_value('exec_option')['convert_legacy_hive_parquet_utc_timestamps']
 = 1
-    new_vector.get_value('exec_option')['TIMEZONE'] = '"Europe/Budapest"'
+    new_vector.get_value('exec_option')['timezone'] = '"Europe/Budapest"'
     self.run_test_case('QueryTest/struct-in-select-list', new_vector)
 
   @SkipIfFS.hbase
@@ -936,7 +936,7 @@ class TestNestedTypesStarExpansion(ImpalaTestSuite):
             'disable_codegen': ['False', 'True']}))
     cls.ImpalaTestMatrix.add_mandatory_exec_option(
             'convert_legacy_hive_parquet_utc_timestamps', 'true')
-    cls.ImpalaTestMatrix.add_mandatory_exec_option('TIMEZONE', 
'"Europe/Budapest"')
+    cls.ImpalaTestMatrix.add_mandatory_exec_option('timezone', 
'"Europe/Budapest"')
 
   def test_star_expansion(self, vector):
     # Queries with star (*) expression on tables with array, map
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index 8c33dbe18..3d2ef4d5d 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -29,7 +29,7 @@ from tests.common.test_dimensions import (
     create_uncompressed_text_dimension, create_uncompressed_json_dimension,
     create_exec_option_dimension_from_dict, create_client_protocol_dimension,
     hs2_parquet_constraint, extend_exec_option_dimension, 
FILE_FORMAT_TO_STORED_AS_MAP,
-    add_exec_option_dimension)
+    add_exec_option_dimension, create_exec_option_dimension)
 from tests.util.filesystem_utils import get_fs_path
 from subprocess import check_call
 
@@ -42,6 +42,11 @@ class TestQueries(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestQueries, cls).add_test_dimensions()
+    single_node_option = ([0, 100] if cls.exploration_strategy() == 
'exhaustive'
+        else [0])
+    cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
+      exec_single_node_option=single_node_option))
+
     if cls.exploration_strategy() == 'core':
       cls.ImpalaTestMatrix.add_constraint(lambda v:
           v.get_value('table_format').file_format == 'parquet')
@@ -53,8 +58,7 @@ class TestQueries(ImpalaTestSuite):
 
     # Adding a test dimension here to test the small query opt in exhaustive.
     if cls.exploration_strategy() == 'exhaustive':
-      extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", 
"100")
-      extend_exec_option_dimension(cls, "ASYNC_CODEGEN", 1)
+      extend_exec_option_dimension(cls, "async_codegen", 1)
       extend_exec_option_dimension(cls, "debug_action", cls.debug_actions)
       cls.ImpalaTestMatrix.add_constraint(cls.debug_action_constraint)
 
@@ -62,7 +66,7 @@ class TestQueries(ImpalaTestSuite):
   def debug_action_constraint(cls, vector):
     exec_option = vector.get_value("exec_option")
 
-    is_async = exec_option.get("ASYNC_CODEGEN") == 1
+    is_async = exec_option.get("async_codegen") == 1
     using_async_debug_actions = exec_option.get("debug_action") == 
cls.debug_actions
     codegen_enabled = exec_option["disable_codegen"] == 0
 
@@ -321,14 +325,15 @@ class TestHdfsQueries(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestHdfsQueries, cls).add_test_dimensions()
+    # Adding a test dimension here to test the small query opt in exhaustive.
+    single_node_option = ([0, 100] if cls.exploration_strategy() == 
'exhaustive'
+        else [0])
+    cls.ImpalaTestMatrix.add_dimension(create_exec_option_dimension(
+      exec_single_node_option=single_node_option))
     # Kudu doesn't support AllTypesAggMultiFilesNoPart (KUDU-1271, KUDU-1570).
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format != 'kudu')
 
-    # Adding a test dimension here to test the small query opt in exhaustive.
-    if cls.exploration_strategy() == 'exhaustive':
-      extend_exec_option_dimension(cls, "exec_single_node_rows_threshold", 
"100")
-
   @classmethod
   def get_workload(cls):
     return 'functional-query'
diff --git a/tests/query_test/test_runtime_filters.py 
b/tests/query_test/test_runtime_filters.py
index cd76eba50..db2921f7b 100644
--- a/tests/query_test/test_runtime_filters.py
+++ b/tests/query_test/test_runtime_filters.py
@@ -262,7 +262,7 @@ class TestMinMaxFilters(ImpalaTestSuite):
     # IMPALA-10715. Enable only min/max since the bloom filters will return
     # rows only satisfying the join predicates. This test requires the return
     # of non-qualifying rows to succeed.
-    add_mandatory_exec_option(cls, "ENABLED_RUNTIME_FILTER_TYPES", "MIN_MAX")
+    add_mandatory_exec_option(cls, "enabled_runtime_filter_types", "MIN_MAX")
 
   def test_min_max_filters(self, vector):
     self.execute_query("SET MINMAX_FILTER_THRESHOLD=0.5")
diff --git a/tests/query_test/test_scanners.py 
b/tests/query_test/test_scanners.py
index 30b84cb0a..c49c512f0 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -224,8 +224,9 @@ class TestScannersAllTableFormatsWithLimit(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestScannersAllTableFormatsWithLimit, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(
+        create_exec_option_dimension(batch_sizes=[100]))
     add_exec_option_dimension(cls, 'mt_dop', MT_DOP_VALUES)
-    add_mandatory_exec_option(cls, 'batch_size', 100)
 
   def test_limit(self, vector):
     vector.get_value('exec_option')['abort_on_error'] = 1

(impala) 01/02: IMPALA-13319: Avoid duplicate exec option declaration in py.test

Reply via email to