This is an automated email from the ASF dual-hosted git repository. arawat pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 8675dbfe664251f875dbf67a2878068462993305 Author: Riza Suminto <[email protected]> AuthorDate: Fri Jan 10 13:48:48 2025 -0800 IMPALA-13665: Parallelize TestDecimalFuzz TestDecimalFuzz.test_decimal_ops and TestDecimalFuzz.test_width_bucket each execute_scalar query 10000 times. This patch speed them up by breaking each into 10 parallel tests run where each run execute_scalar query 1000 times. This patch also make execute_scalar and execute_scalar_expect_success to run query with long_polling_time_ms=100 if there is no query_options specified. Adds assertion in execute_scalar_expect_success that result is indeed only a single row. Slightly change exists_func to avoid unused argument warning. Testing: - From tests/ run and pass the following command ./run-tests.py query_test/test_decimal_fuzz.py Change-Id: Ic12b51b50739deff7792e2640764bd75e8b8922d Reviewed-on: http://gerrit.cloudera.org:8080/22328 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- tests/common/impala_test_suite.py | 36 +++++++++++++++++++++++++---------- tests/query_test/test_decimal_fuzz.py | 32 +++++++++++++++++++------------ 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index c7f9be133..425fca5d4 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -1053,12 +1053,12 @@ class ImpalaTestSuite(BaseTestSuite): @execute_wrapper def execute_scalar(self, query, query_options=None): - """Executes a scalar query return the single row result. - Only validate that query return just one row. + """Executes a scalar query return the single row result. Only validate that + query return at most one row. Return None if query return an empty result. Remember to pass vector.get_value('exec_option') as 'query_options' argument - if the test has one.""" - result = self.__execute_query(self.client, query, query_options) - assert len(result.data) <= 1, 'Multiple values returned from scalar' + if the test has one. If query_options is not set, the query will be run with + long_polling_time_ms=100 to speed up response.""" + result = self.__execute_scalar(self.client, query, query_options) return result.data[0] if len(result.data) == 1 else None @classmethod @@ -1068,11 +1068,12 @@ class ImpalaTestSuite(BaseTestSuite): """Executes a scalar query return the single row result. Validate that query execution is indeed successful and return just one row. Remember to pass vector.get_value('exec_option') as 'query_options' argument - if the test has one.""" - result = cls.__execute_query(impalad_client, query, query_options, user) + if the test has one. If query_options is not set, the query will be run with + long_polling_time_ms=100 to speed up response.""" + result = cls.__execute_scalar(impalad_client, query, query_options, user) assert result.success - assert len(result.data) <= 1, 'Multiple values returned from scalar' - return result.data[0] if len(result.data) == 1 else None + assert len(result.data) == 1 + return result.data[0] def exec_and_compare_hive_and_impala_hs2(self, stmt, compare=lambda x, y: x == y): """Compare Hive and Impala results when executing the same statment over HS2""" @@ -1131,6 +1132,19 @@ class ImpalaTestSuite(BaseTestSuite): if query_options is not None: impalad_client.set_configuration(query_options) return impalad_client.execute(query, user=user) + @classmethod + def __execute_scalar(cls, impalad_client, query, query_options=None, user=None): + """Executes the given scalar query against the specified Impalad. + If query_options is not set, the query will be run with long_polling_time_ms=100 + to speed up response.""" + if query_options is None: + impalad_client.set_configuration_option('long_polling_time_ms', 100) + else: + impalad_client.set_configuration(query_options) + result = impalad_client.execute(query, user=user) + assert len(result.data) <= 1, 'Multiple values returned from scalar' + return result + def clone_table(self, src_tbl, dst_tbl, recover_partitions, vector): src_loc = self._get_table_location(src_tbl, vector) self.client.execute("create external table {0} like {1} location '{2}'" @@ -1551,7 +1565,9 @@ class ImpalaTestSuite(BaseTestSuite): """ actual_log_path = self.__build_log_path(daemon, level) - def exists_func(_): + def exists_func(is_last_try): + if is_last_try: + LOG.info("Checking existence of {} for the last time.".format(actual_log_path)) return os.path.exists(actual_log_path) assert retry(exists_func, max_attempts, sleep_time_s, 1, True), "file '{}' did not " \ diff --git a/tests/query_test/test_decimal_fuzz.py b/tests/query_test/test_decimal_fuzz.py index cdb28d9a9..9bfe78581 100644 --- a/tests/query_test/test_decimal_fuzz.py +++ b/tests/query_test/test_decimal_fuzz.py @@ -22,14 +22,16 @@ from __future__ import absolute_import, division, print_function from builtins import range import decimal import math -import pytest import random from tests.beeswax.impala_beeswax import ImpalaBeeswaxException from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.test_dimensions import create_single_exec_option_dimension +from tests.common.test_dimensions import ( + add_mandatory_exec_option, + create_single_exec_option_dimension) from tests.common.test_vector import ImpalaTestDimension, ImpalaTestMatrix + class TestDecimalFuzz(ImpalaTestSuite): # Impala's max precision for decimals is 38, so we should have the same in the tests @@ -43,7 +45,13 @@ class TestDecimalFuzz(ImpalaTestSuite): def add_test_dimensions(cls): cls.ImpalaTestMatrix = ImpalaTestMatrix() cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension()) - cls.iterations = 10000 + + total_iterations = 10000 + batches = list(range(0, 10)) + cls.iterations = total_iterations / len(batches) + cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension("test_batch", *batches)) + add_mandatory_exec_option(cls, 'decimal_v2', 'true') + add_mandatory_exec_option(cls, 'long_polling_time_ms', 100) def weighted_choice(self, options): total_weight = sum(options.values()) @@ -202,7 +210,7 @@ class TestDecimalFuzz(ImpalaTestSuite): return True return False - def execute_one_decimal_op(self): + def execute_one_decimal_op(self, query_options): '''Executes a single query and compares the result to a result that we computed in Python.''' op = random.choice(['+', '-', '*', '/', '%']) @@ -215,8 +223,8 @@ class TestDecimalFuzz(ImpalaTestSuite): value2=value2, precision2=precision2, scale2=scale2) try: - result = self.execute_scalar(query, query_options={'decimal_v2': 'true'}) - except ImpalaBeeswaxException as e: + result = self.execute_scalar(query, query_options) + except ImpalaBeeswaxException: result = None if result is not None: result = decimal.Decimal(result) @@ -239,15 +247,15 @@ class TestDecimalFuzz(ImpalaTestSuite): expected_result = decimal.Decimal(value1) % decimal.Decimal(value2) else: assert False - except decimal.InvalidOperation as e: + except decimal.InvalidOperation: expected_result = None - except decimal.DivisionByZero as e: + except decimal.DivisionByZero: expected_result = None assert self.result_equals(expected_result, result) def test_decimal_ops(self, vector): for _ in range(self.iterations): - self.execute_one_decimal_op() + self.execute_one_decimal_op(vector.get_exec_option_dict()) def width_bucket(self, val, min_range, max_range, num_buckets): # Multiplying the values by 10**40 guarantees that the numbers can be converted @@ -267,7 +275,7 @@ class TestDecimalFuzz(ImpalaTestSuite): dist_from_min = val_int - min_range_int return (num_buckets * dist_from_min) // range_size + 1 - def execute_one_width_bucket(self): + def execute_one_width_bucket(self, query_options): val, val_prec, val_scale = self.get_decimal() min_range, min_range_prec, min_range_scale = self.get_decimal() max_range, max_range_prec, max_range_scale = self.get_decimal() @@ -291,7 +299,7 @@ class TestDecimalFuzz(ImpalaTestSuite): return try: - result = self.execute_scalar(query, query_options={'decimal_v2': 'true'}) + result = self.execute_scalar(query, query_options) assert int(result) == expected_result except ImpalaBeeswaxException as e: if "You need to wrap the arguments in a CAST" not in str(e): @@ -301,4 +309,4 @@ class TestDecimalFuzz(ImpalaTestSuite): def test_width_bucket(self, vector): for _ in range(self.iterations): - self.execute_one_width_bucket() + self.execute_one_width_bucket(vector.get_exec_option_dict())
