This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 6357534926a24c1ec8e95c37e7b23f5ac29571e6
Author: stiga-huang <[email protected]>
AuthorDate: Tue May 17 20:20:55 2022 +0800

    IMPALA-11295: Deflake TestParquet.test_multiple_blocks_mt_dop
    
    TestParquet.test_multiple_blocks_mt_dop runs a query on 6 scan ranges
    using mt_dop=2. It then verifies the sum of ranges read on a backend is
    2 (6/3). The test assumes that counters of the 2 instances on the same
    host are printed consecutively. However, this is not always true. They
    could be interleaving.
    
    This patch makes the test more robust by grouping the counters based on
    the host.
    
    Test
     - I can't reproduce the issue locally. But I'm able to run the new test
       100 times without any error.
    
    Change-Id: I16c576c41a212f83dda82a83931ab336a78a41e4
    Reviewed-on: http://gerrit.cloudera.org:8080/18533
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-on: http://gerrit.cloudera.org:8080/18892
    Reviewed-by: Csaba Ringhofer <[email protected]>
    Tested-by: Quanlong Huang <[email protected]>
---
 tests/query_test/test_scanners.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/query_test/test_scanners.py 
b/tests/query_test/test_scanners.py
index 9a8553349..53a15f0ae 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -688,6 +688,11 @@ class TestParquet(ImpalaTestSuite):
       assert len(num_rows_read_list) == 7
       assert len(ranges_complete_list) == 7
 
+      # Extract the host for each fragment instance. The first is the 
coordinator
+      # fragment instance.
+      host_list = re.findall(r'host=(\S+:[0-9]*)', result.runtime_profile)
+      assert len(host_list) == 7
+
       total_rows_read = 0
       # Skip the Averaged Fragment; it comes first in the runtime profile.
       for num_row_read in num_rows_read_list[1:]:
@@ -695,10 +700,15 @@ class TestParquet(ImpalaTestSuite):
       assert total_rows_read == TOTAL_ROWS
 
       # Again skip the Averaged Fragment; it comes first in the runtime 
profile.
-      # With mt_dop 2, every backend will have 2 instances which are printed 
consecutively
-      # in the profile.
-      for i in range(1, len(ranges_complete_list), 2):
-        assert int(ranges_complete_list[i]) + int(ranges_complete_list[i + 1]) 
== 2
+      # With mt_dop 2, every backend will have 2 instances.
+      ranges_per_host = {}
+      for i in range(1, 7):
+        host = host_list[i]
+        if host not in ranges_per_host:
+          ranges_per_host[host] = 0
+        ranges_per_host[host] += int(ranges_complete_list[i])
+      for host in ranges_per_host:
+        assert ranges_per_host[host] == 2
     finally:
       self.client.clear_configuration()
 

Reply via email to