(impala) 02/05: IMPALA-13665: Parallelize TestDecimalFuzz

arawat Sat, 18 Jan 2025 11:44:50 -0800

This is an automated email from the ASF dual-hosted git repository.

arawat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 8675dbfe664251f875dbf67a2878068462993305
Author: Riza Suminto <[email protected]>
AuthorDate: Fri Jan 10 13:48:48 2025 -0800

    IMPALA-13665: Parallelize TestDecimalFuzz
    
    TestDecimalFuzz.test_decimal_ops and TestDecimalFuzz.test_width_bucket
    each execute_scalar query 10000 times. This patch speed them up by
    breaking each into 10 parallel tests run where each run execute_scalar
    query 1000 times.
    
    This patch also make execute_scalar and execute_scalar_expect_success to
    run query with long_polling_time_ms=100 if there is no query_options
    specified. Adds assertion in execute_scalar_expect_success that result
    is indeed only a single row.
    
    Slightly change exists_func to avoid unused argument warning.
    
    Testing:
    - From tests/ run and pass the following command
      ./run-tests.py query_test/test_decimal_fuzz.py
    
    Change-Id: Ic12b51b50739deff7792e2640764bd75e8b8922d
    Reviewed-on: http://gerrit.cloudera.org:8080/22328
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 tests/common/impala_test_suite.py     | 36 +++++++++++++++++++++++++----------
 tests/query_test/test_decimal_fuzz.py | 32 +++++++++++++++++++------------
 2 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/tests/common/impala_test_suite.py 
b/tests/common/impala_test_suite.py
index c7f9be133..425fca5d4 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -1053,12 +1053,12 @@ class ImpalaTestSuite(BaseTestSuite):
 
   @execute_wrapper
   def execute_scalar(self, query, query_options=None):
-    """Executes a scalar query return the single row result.
-    Only validate that query return just one row.
+    """Executes a scalar query return the single row result. Only validate that
+    query return at most one row. Return None if query return an empty result.
     Remember to pass vector.get_value('exec_option') as 'query_options' 
argument
-    if the test has one."""
-    result = self.__execute_query(self.client, query, query_options)
-    assert len(result.data) <= 1, 'Multiple values returned from scalar'
+    if the test has one. If query_options is not set, the query will be run 
with
+    long_polling_time_ms=100 to speed up response."""
+    result = self.__execute_scalar(self.client, query, query_options)
     return result.data[0] if len(result.data) == 1 else None
 
   @classmethod
@@ -1068,11 +1068,12 @@ class ImpalaTestSuite(BaseTestSuite):
     """Executes a scalar query return the single row result.
     Validate that query execution is indeed successful and return just one row.
     Remember to pass vector.get_value('exec_option') as 'query_options' 
argument
-    if the test has one."""
-    result = cls.__execute_query(impalad_client, query, query_options, user)
+    if the test has one. If query_options is not set, the query will be run 
with
+    long_polling_time_ms=100 to speed up response."""
+    result = cls.__execute_scalar(impalad_client, query, query_options, user)
     assert result.success
-    assert len(result.data) <= 1, 'Multiple values returned from scalar'
-    return result.data[0] if len(result.data) == 1 else None
+    assert len(result.data) == 1
+    return result.data[0]
 
   def exec_and_compare_hive_and_impala_hs2(self, stmt, compare=lambda x, y: x 
== y):
     """Compare Hive and Impala results when executing the same statment over 
HS2"""
@@ -1131,6 +1132,19 @@ class ImpalaTestSuite(BaseTestSuite):
     if query_options is not None: 
impalad_client.set_configuration(query_options)
     return impalad_client.execute(query, user=user)
 
+  @classmethod
+  def __execute_scalar(cls, impalad_client, query, query_options=None, 
user=None):
+    """Executes the given scalar query against the specified Impalad.
+    If query_options is not set, the query will be run with 
long_polling_time_ms=100
+    to speed up response."""
+    if query_options is None:
+      impalad_client.set_configuration_option('long_polling_time_ms', 100)
+    else:
+      impalad_client.set_configuration(query_options)
+    result = impalad_client.execute(query, user=user)
+    assert len(result.data) <= 1, 'Multiple values returned from scalar'
+    return result
+
   def clone_table(self, src_tbl, dst_tbl, recover_partitions, vector):
     src_loc = self._get_table_location(src_tbl, vector)
     self.client.execute("create external table {0} like {1} location '{2}'"
@@ -1551,7 +1565,9 @@ class ImpalaTestSuite(BaseTestSuite):
     """
     actual_log_path = self.__build_log_path(daemon, level)
 
-    def exists_func(_):
+    def exists_func(is_last_try):
+      if is_last_try:
+        LOG.info("Checking existence of {} for the last 
time.".format(actual_log_path))
       return os.path.exists(actual_log_path)
 
     assert retry(exists_func, max_attempts, sleep_time_s, 1, True), "file '{}' 
did not " \
diff --git a/tests/query_test/test_decimal_fuzz.py 
b/tests/query_test/test_decimal_fuzz.py
index cdb28d9a9..9bfe78581 100644
--- a/tests/query_test/test_decimal_fuzz.py
+++ b/tests/query_test/test_decimal_fuzz.py
@@ -22,14 +22,16 @@ from __future__ import absolute_import, division, 
print_function
 from builtins import range
 import decimal
 import math
-import pytest
 import random
 
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import create_single_exec_option_dimension
+from tests.common.test_dimensions import (
+  add_mandatory_exec_option,
+  create_single_exec_option_dimension)
 from tests.common.test_vector import ImpalaTestDimension, ImpalaTestMatrix
 
+
 class TestDecimalFuzz(ImpalaTestSuite):
 
   # Impala's max precision for decimals is 38, so we should have the same in 
the tests
@@ -43,7 +45,13 @@ class TestDecimalFuzz(ImpalaTestSuite):
   def add_test_dimensions(cls):
     cls.ImpalaTestMatrix = ImpalaTestMatrix()
     cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
-    cls.iterations = 10000
+
+    total_iterations = 10000
+    batches = list(range(0, 10))
+    cls.iterations = total_iterations / len(batches)
+    cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension("test_batch", 
*batches))
+    add_mandatory_exec_option(cls, 'decimal_v2', 'true')
+    add_mandatory_exec_option(cls, 'long_polling_time_ms', 100)
 
   def weighted_choice(self, options):
     total_weight = sum(options.values())
@@ -202,7 +210,7 @@ class TestDecimalFuzz(ImpalaTestSuite):
         return True
     return False
 
-  def execute_one_decimal_op(self):
+  def execute_one_decimal_op(self, query_options):
     '''Executes a single query and compares the result to a result that we 
computed in
     Python.'''
     op = random.choice(['+', '-', '*', '/', '%'])
@@ -215,8 +223,8 @@ class TestDecimalFuzz(ImpalaTestSuite):
         value2=value2, precision2=precision2, scale2=scale2)
 
     try:
-      result = self.execute_scalar(query, query_options={'decimal_v2': 'true'})
-    except ImpalaBeeswaxException as e:
+      result = self.execute_scalar(query, query_options)
+    except ImpalaBeeswaxException:
       result = None
     if result is not None:
       result = decimal.Decimal(result)
@@ -239,15 +247,15 @@ class TestDecimalFuzz(ImpalaTestSuite):
           expected_result = decimal.Decimal(value1) % decimal.Decimal(value2)
         else:
           assert False
-      except decimal.InvalidOperation as e:
+      except decimal.InvalidOperation:
         expected_result = None
-      except decimal.DivisionByZero as e:
+      except decimal.DivisionByZero:
         expected_result = None
       assert self.result_equals(expected_result, result)
 
   def test_decimal_ops(self, vector):
     for _ in range(self.iterations):
-      self.execute_one_decimal_op()
+      self.execute_one_decimal_op(vector.get_exec_option_dict())
 
   def width_bucket(self, val, min_range, max_range, num_buckets):
     # Multiplying the values by 10**40 guarantees that the numbers can be 
converted
@@ -267,7 +275,7 @@ class TestDecimalFuzz(ImpalaTestSuite):
     dist_from_min = val_int - min_range_int
     return (num_buckets * dist_from_min) // range_size + 1
 
-  def execute_one_width_bucket(self):
+  def execute_one_width_bucket(self, query_options):
     val, val_prec, val_scale = self.get_decimal()
     min_range, min_range_prec, min_range_scale = self.get_decimal()
     max_range, max_range_prec, max_range_scale = self.get_decimal()
@@ -291,7 +299,7 @@ class TestDecimalFuzz(ImpalaTestSuite):
       return
 
     try:
-      result = self.execute_scalar(query, query_options={'decimal_v2': 'true'})
+      result = self.execute_scalar(query, query_options)
       assert int(result) == expected_result
     except ImpalaBeeswaxException as e:
       if "You need to wrap the arguments in a CAST" not in str(e):
@@ -301,4 +309,4 @@ class TestDecimalFuzz(ImpalaTestSuite):
 
   def test_width_bucket(self, vector):
     for _ in range(self.iterations):
-      self.execute_one_width_bucket()
+      self.execute_one_width_bucket(vector.get_exec_option_dict())

(impala) 02/05: IMPALA-13665: Parallelize TestDecimalFuzz

Reply via email to