(impala) branch master updated: IMPALA-13125: Fix pairwise test vector generation

csringhofer Thu, 28 Aug 2025 08:47:55 -0700

This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git



The following commit(s) were added to refs/heads/master by this push:
     new 843de4478 IMPALA-13125: Fix pairwise test vector generation
843de4478 is described below

commit 843de44788758ab00694bc456e0d92608a8a4c60
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Sun Aug 24 17:20:38 2025 +0200

    IMPALA-13125: Fix pairwise test vector generation
    
    Replaced allpairspy with a homemade pair finder that
    seems to find a somewhat less optimal (larger) covering
    vector set but works reliably with filters. For details
    see tests/common/test_vector.py
    
    Also fixes a few test issues uncovered. Some fixes are
    copied from https://gerrit.cloudera.org/#/c/23319/
    
    Added the possibility of shuffling vectors to get a
    different test set (env var IMPALA_TEST_VECTOR_SEED).
    By default the algorithm is deterministic so the test
    set won't change between runs (similarly to allpairspy).
    
    Added a new constraint to test only a single compression
    per file format in some tests to reduce the number of
    new vectors.
    
    EE + custom_cluster test count in exhaustive runs:
    before patch:                   ~11000
    after patch:                    ~16000
    without compression constraint: ~17000
    
    Change-Id: I419c24659a08d8d6592fadbbd5b764ff73cbba3e
    Reviewed-on: http://gerrit.cloudera.org:8080/23342
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 testdata/bin/generate-test-vectors.py              |  3 +
 .../queries/QueryTest/inline-view.test             |  2 +-
 .../queries/QueryTest/sort-complex.test            |  8 +-
 .../queries/QueryTest/top-n-complex.test           |  6 +-
 tests/common/test_dimensions.py                    | 15 ++++
 tests/common/test_vector.py                        | 94 +++++++++++++++++++---
 tests/query_test/test_date_queries.py              |  8 +-
 tests/query_test/test_decimal_queries.py           |  8 +-
 tests/query_test/test_queries.py                   | 25 +++++-
 tests/util/workload_management.py                  |  2 +-
 10 files changed, 141 insertions(+), 30 deletions(-)

diff --git a/testdata/bin/generate-test-vectors.py 
b/testdata/bin/generate-test-vectors.py
index 9b5272c37..39bd7b0e3 100755
--- a/testdata/bin/generate-test-vectors.py
+++ b/testdata/bin/generate-test-vectors.py
@@ -51,6 +51,9 @@ from itertools import product
 from optparse import OptionParser
 from allpairspy import AllPairs as all_pairs
 
+# TODO IMPALA-13125 turned out to be not completely reliably - this script may 
also miss
+#      some pairs
+
 parser = OptionParser()
 parser.add_option("-w", "--workload", dest="workload",
                   help="The workload to generate test vectors for")
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/inline-view.test 
b/testdata/workloads/functional-query/queries/QueryTest/inline-view.test
index 1ea226740..6488d7a14 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/inline-view.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/inline-view.test
@@ -170,7 +170,7 @@ from (
   ) t1
 join alltypes t2 on (t1.int_col = t2.id)
 where month = 1
----- RESULTS
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
 0,3,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
 1,3,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 
00:01:00,2009,1
 2,3,2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 
00:02:00.100000000,2009,1
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test 
b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
index 5ecdbf820..ebf32c2cd 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/sort-complex.test
@@ -218,10 +218,10 @@ BIGINT,STRING, SMALLINT
 select id, str, alltypes, tiny_struct, small_struct from complextypes_structs 
order by str;
 ---- RESULTS
 5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}'
-1,'first 
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
 08:19:04","s1":"some string","s2":"another 
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
-4,'fourth 
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2023-12-31
 23:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d  
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
-2,'second 
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
 10:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab 
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
-6,'sixth 
item','{"ti":127,"si":100,"i":234732212,"bi":664233223342,"b":true,"f":34.56000137329102,"do":99523423.33,"da":"1985-11-19","ts":"2020-09-15
 01:11:22","s1":"string1","s2":"string2","c1":"z","c2":"   
","vc":"cv","de1":346,"de2":6235.600}','NULL','{"i":null,"s":null}'
+1,'first 
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
 10:19:04","s1":"some string","s2":"another 
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
+4,'fourth 
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2024-01-01
 00:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d  
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
+2,'second 
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
 12:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab 
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
+6,'sixth 
item','{"ti":127,"si":100,"i":234732212,"bi":664233223342,"b":true,"f":34.56000137329102,"do":99523423.33,"da":"1985-11-19","ts":"2020-09-15
 03:11:22","s1":"string1","s2":"string2","c1":"z","c2":"   
","vc":"cv","de1":346,"de2":6235.600}','NULL','{"i":null,"s":null}'
 3,'third 
item','{"ti":null,"si":null,"i":null,"bi":null,"b":null,"f":null,"do":null,"da":null,"ts":null,"s1":null,"s2":null,"c1":null,"c2":null,"vc":null,"de1":null,"de2":null}','{"b":true}','{"i":98765,"s":null}'
 ---- TYPES
 INT,STRING,STRING,STRING,STRING
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test 
b/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
index ccf615823..1c27943ac 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/top-n-complex.test
@@ -210,9 +210,9 @@ BIGINT,STRING, SMALLINT
 select id, str, alltypes, tiny_struct, small_struct from complextypes_structs 
order by str limit 4;
 ---- RESULTS
 5,'fifth item','NULL','{"b":false}','{"i":98765,"s":"abcde f"}'
-1,'first 
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
 08:19:04","s1":"some string","s2":"another 
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
-4,'fourth 
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2023-12-31
 23:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d  
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
-2,'second 
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
 10:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab 
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
+1,'first 
item','{"ti":100,"si":12348,"i":156789012,"bi":163234345342,"b":true,"f":1234.56005859375,"do":65323423.33,"da":"2021-05-30","ts":"2021-06-01
 10:19:04","s1":"some string","s2":"another 
str","c1":"x","c2":"xyz","vc":"somevarcha","de1":12345,"de2":null}','{"b":true}','NULL'
+4,'fourth 
item','{"ti":90,"si":30482,"i":1664336,"bi":23567459873,"b":true,"f":0.5600000023841858,"do":NaN,"da":"2000-12-31","ts":"2024-01-01
 00:00:00.123400000","s1":"random string","s2":"","c1":"c","c2":"d  
","vc":"addsdrr","de1":33357,"de2":null}','{"b":null}','{"i":null,"s":"str"}'
+2,'second 
item','{"ti":123,"si":4567,"i":1562322212,"bi":334333345342,"b":false,"f":NaN,"do":23233423.099,"da":null,"ts":"2020-06-11
 12:10:04","s1":null,"s2":"NULL","c1":"a","c2":"ab 
","vc":"varchar","de1":11223,"de2":null}','{"b":false}','{"i":19191,"s":"small_struct_str"}'
 ---- TYPES
 INT,STRING,STRING,STRING,STRING
 
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 959225a23..de65d10b5 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -65,6 +65,13 @@ class TableFormatInfo(object):
                         'kudu', 'iceberg', 'json']
   KNOWN_COMPRESSION_CODECS = ['none', 'snap', 'gzip', 'bzip', 'def', 'zstd', 
'lz4']
   KNOWN_COMPRESSION_TYPES = ['none', 'block', 'record']
+  DEFAULT_COMPRESSIONS_IN_TESTS = {
+    'avro': 'snap',
+    'json': 'none',  # some tables (e.g. date_tbl) was only created for none
+    'seq': 'gzip',
+    'rc': 'bzip',
+    'text': 'none'  # use none for tables that are not written by Impala
+  }
 
   def __init__(self, **kwargs):
     self.dataset = kwargs.get('dataset', 'UNKNOWN')
@@ -220,6 +227,14 @@ def orc_schema_resolution_constraint(v):
   return file_format == 'orc' or orc_schema_resolution == 0
 
 
+def single_compression_constraint(v):
+  """ Constraint to use a single compression in compressable file formats """
+  file_format = v.get_value('table_format').file_format
+  compression = v.get_value('table_format').compression_codec
+  if file_format not in TableFormatInfo.DEFAULT_COMPRESSIONS_IN_TESTS: return 
True
+  return TableFormatInfo.DEFAULT_COMPRESSIONS_IN_TESTS[file_format] == 
compression
+
+
 # Common sets of values for the exec option vectors
 ALL_BATCH_SIZES = [0]
 
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index 12f87bcb6..a0ba60737 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -57,9 +57,12 @@
 # Additional examples of usage can be found within the test suites.
 
 from __future__ import absolute_import, division, print_function
+from collections import OrderedDict
 from itertools import product
 from copy import deepcopy
+import random
 import logging
+import os
 
 
 LOG = logging.getLogger(__name__)
@@ -126,6 +129,9 @@ class ImpalaTestVector(object):
   def __str__(self):
       return ' | '.join(['%s' % vector_value for vector_value in 
self.vector_values])
 
+  def __repr__(self):
+    return str(self)
+
   # Each value in a test vector is wrapped in the Value object. This wrapping 
is
   # done internally so this object should never need to be created by the user.
   class Value(object):
@@ -134,14 +140,17 @@ class ImpalaTestVector(object):
       self.value = value
 
     def __str__(self):
-      return '%s: %s' % (self.name, self.value)
+      return '"%s: %s"' % (self.name, self.value)
+
+    def __repr__(self):
+      return str(self)
 
 
 # Matrix -> Collection of vectors
 # Vector -> Call to get specific values
 class ImpalaTestMatrix(object):
   def __init__(self, *args):
-    self.dimensions = dict((arg.name, arg) for arg in args)
+    self.dimensions = OrderedDict((arg.name, arg) for arg in args)
     self.constraint_list = list()
     self.independent_exec_option_names = set()
 
@@ -244,15 +253,49 @@ class ImpalaTestMatrix(object):
       for vec in product(*self.__extract_vector_values()) if 
self.is_valid(vec)]
 
   def __generate_pairwise_combinations(self):
-    from allpairspy import AllPairs
-    all_pairs = AllPairs
-
     # Pairwise fails if the number of inputs == 1. Use exhaustive in this case 
the
     # results will be the same.
     if len(self.dimensions) == 1:
       return self.__generate_exhaustive_combinations()
-    return [ImpalaTestVector(self.__deepcopy_vector_values(vec))
-      for vec in all_pairs(self.__extract_vector_values(), 
filter_func=self.is_valid)]
+    vals = self.__extract_vector_values()
+    all_vectors = [v for v in product(*vals) if self.is_valid(v)]
+    # Add possibility to shuffle vectors to get different covering vector set.
+    # This could excercise 3+ way combinations skipped by the pair wise 
algorithm.
+    seed = os.environ.get('IMPALA_TEST_VECTOR_SEED', None)
+    if seed:
+      rnd = random.Random(seed)
+      rnd.shuffle(all_vectors)
+    found = set()
+    result = []
+    # Originally allpairspy was used for vector set generation to cover all 
pairs,
+    # but it turned out to be unreliable when using constraints 
(IMPALA-13125). Below
+    # is a simple implementation that may use more vectors than needed but 
will always
+    # cover all parameter pairs if possible (constraints can lead to pairs 
that can't
+    # be covered). A caveat is that first the product of all dimensions is 
needed
+    # ('all_vectors'), which grows exponentially with the number of 
dimensions, but the
+    # vector counts in Impala tests are still manageable.
+    #
+    # Note that there is not much to optimize in the test suites I checked, 
because due
+    # to the constraints and the size differences between dimensions the 
vector set
+    # is dictated by a few dimensions (file_format+exec_options) and this set 
can easily
+    # cover pairs with other dimensions.
+    #
+    #  TODO: is there a nicer algorithm / is it needed?
+    #   pair creation could be modelled as a bipartite graph with dimension 
pairs A and
+    #   test vectors B as vertices, and a minimal set of B would be needed to 
have
+    #   adjacent node for each vertex in A - I am pretty sure that this is 
polynomial, but
+    #   didn't do the research
+    # The following steps look for "better vectors" first that cover more 
dimension pairs
+    # to avoid very suboptimal solutions - the example on allpairspy site was 
covered
+    # with 25 vectors instead of the 21 of by allpairspy, which seems 
acceptable.
+    remaining = self.__pairwise_step(all_vectors, result, found, 
len(self.dimensions))
+    remaining = self.__pairwise_step(remaining, result, found, 2)
+    remaining = self.__pairwise_step(remaining, result, found, 1)
+    assert len(remaining) == 0
+
+    res = [ImpalaTestVector(self.__deepcopy_vector_values(vec))
+      for vec in result]
+    return res
 
   def add_constraint(self, constraint_func):
     self.constraint_list.append(constraint_func)
@@ -266,11 +309,38 @@ class ImpalaTestMatrix(object):
     return [v[1] for v in self.dimensions.items()]
 
   def is_valid(self, vector):
+    assert isinstance(vector, tuple)
+    assert len(vector) == len(self.dimensions)
     for constraint in self.constraint_list:
-      if (isinstance(vector, list) or isinstance(vector, tuple)) and\
-          len(vector) == len(self.dimensions):
-        valid = constraint(ImpalaTestVector(vector))
-        if valid:
-          continue
+      if not constraint(ImpalaTestVector(vector)):
         return False
     return True
+
+  def __pairwise_step(self, vectors, results, pairs_covered, min_cover_count):
+    """ Simple algorithm to select a subset of 'vectors' (each with N items) 
to cover
+        every parameter pair.
+        With 'min_cover_count'=1 all pairs will be covered (if possible), for 
bigger
+        values only those vectors will be selected that cover at least 
'min_cover_count'
+        new pairs and the unused vectors will be returned in the result of the 
function.
+        'pairs_covered' is a set of the already covered pairs. The key is a 
tuple of 4
+        elements (i, v[i], j, v[j]) where i<j and both are < N. Each vector 
covers
+        N * (N - 1) pairs.
+    """
+    remaining = []
+    for v in vectors:
+      cnt = 0
+      for i, x in enumerate(v):
+        for j, y in enumerate(v[i + 1:], i + 1):
+          t = (i, x, j, y)
+          if t not in pairs_covered:
+            cnt += 1
+      if cnt == 0: continue
+      if cnt < min_cover_count:
+        remaining.append(v)
+        continue
+      results.append(v)
+      for i, x in enumerate(v):
+        for j, y in enumerate(v[i + 1:], i + 1):
+          t = (i, x, j, y)
+          pairs_covered.add(t)
+    return remaining
diff --git a/tests/query_test/test_date_queries.py 
b/tests/query_test/test_date_queries.py
index 8f60bfc04..3df5e17ca 100644
--- a/tests/query_test/test_date_queries.py
+++ b/tests/query_test/test_date_queries.py
@@ -27,6 +27,7 @@ from tests.common.test_dimensions import (
     create_exec_option_dimension_from_dict,
     create_uncompressed_text_dimension,
     default_protocol_or_parquet_constraint,
+    single_compression_constraint
 )
 from tests.shell.util import create_impala_shell_executable_dimension
 
@@ -44,9 +45,10 @@ class TestDateQueriesBase(ImpalaTestSuite):
     # DATE type is only supported for text, parquet, avro, orc and json 
fileformat on HDFS
     # and HBASE.
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format in ('text', 'hbase', 
'parquet', 'json')
-        or (v.get_value('table_format').file_format == 'avro'
-            and v.get_value('table_format').compression_codec == 'snap'))
+        v.get_value('table_format').file_format in (
+            'text', 'hbase', 'parquet', 'json', 'avro'))
+
+    cls.ImpalaTestMatrix.add_constraint(single_compression_constraint)
 
     # Run these queries through both beeswax and HS2 to get coverage of date 
returned
     # via both protocols.
diff --git a/tests/query_test/test_decimal_queries.py 
b/tests/query_test/test_decimal_queries.py
index 18062ea20..2361220af 100644
--- a/tests/query_test/test_decimal_queries.py
+++ b/tests/query_test/test_decimal_queries.py
@@ -27,6 +27,7 @@ from tests.common.test_dimensions import (
     create_client_protocol_dimension,
     create_exec_option_dimension_from_dict,
     default_protocol_or_parquet_constraint,
+    single_compression_constraint,
 )
 from tests.util.filesystem_utils import IS_S3
 
@@ -44,10 +45,9 @@ class TestDecimalQueries(ImpalaTestSuite):
     # Hive < 0.11 does not support decimal so we can't run these tests against 
the other
     # file formats.
     # TODO: Enable them on Hive >= 0.11.
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format in ['parquet', 'orc', 'kudu', 
'json']
-        or (v.get_value('table_format').file_format == 'text'
-            and v.get_value('table_format').compression_codec == 'none'))
+    cls.ImpalaTestMatrix.add_constraint(lambda v: 
v.get_value('table_format').file_format
+                                        in ['text', 'parquet', 'orc', 'kudu', 
'json'])
+    cls.ImpalaTestMatrix.add_constraint(single_compression_constraint)
 
     # Run these queries through both beeswax and HS2 to get coverage of 
decimals returned
     # via both protocols.
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index f1efb6e91..a3c86e771 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -36,6 +36,7 @@ from tests.common.test_dimensions import (
     create_uncompressed_text_dimension,
     default_protocol_or_parquet_constraint,
     extend_exec_option_dimension,
+    single_compression_constraint,
     FILE_FORMAT_TO_STORED_AS_MAP,
 )
 from tests.util.filesystem_utils import get_fs_path
@@ -58,6 +59,9 @@ class TestQueries(ImpalaTestSuite):
     if cls.exploration_strategy() == 'core':
       cls.ImpalaTestMatrix.add_constraint(lambda v:
           v.get_value('table_format').file_format == 'parquet')
+
+    cls.ImpalaTestMatrix.add_constraint(single_compression_constraint)
+
     # Run these queries through both beeswax and HS2 to get coverage of both 
protocols.
     # Don't run all combinations of table format and protocol - the dimensions 
should
     # be orthogonal.
@@ -121,7 +125,12 @@ class TestQueries(ImpalaTestSuite):
     self.run_test_case('QueryTest/top-n', vector)
 
     if file_format in ['parquet', 'orc']:
-      self.run_test_case('QueryTest/top-n-complex', vector)
+      # set timestamp options to get consistent results for both format.
+      new_vector = deepcopy(vector)
+      options = new_vector.get_value('exec_option')
+      options['convert_legacy_hive_parquet_utc_timestamps'] = 1
+      options['timezone'] = '"Europe/Budapest"'
+      self.run_test_case('QueryTest/top-n-complex', new_vector)
 
   def test_union(self, vector):
     self.run_test_case('QueryTest/union', vector)
@@ -143,6 +152,9 @@ class TestQueries(ImpalaTestSuite):
     self.run_test_case('QueryTest/intersect', vector)
 
   def test_except(self, vector):
+    if vector.get_value('table_format').file_format == "hbase":
+      pytest.xfail(reason="IMPALA-14333 - HBase does not return rows "
+                   "where tinyint_col is NULL")
     self.run_test_case('QueryTest/except', vector)
 
   def test_sort(self, vector):
@@ -156,10 +168,19 @@ class TestQueries(ImpalaTestSuite):
     self.run_test_case('QueryTest/top-n', vector)
 
     if file_format in ['parquet', 'orc']:
-      self.run_test_case('QueryTest/sort-complex', vector)
+      # set timestamp options to get consistent results for both format.
+      new_vector = deepcopy(vector)
+      options = new_vector.get_value('exec_option')
+      options['convert_legacy_hive_parquet_utc_timestamps'] = 1
+      options['timezone'] = '"Europe/Budapest"'
+      self.run_test_case('QueryTest/sort-complex', new_vector)
+
 
   def test_partitioned_top_n(self, vector):
     """Test partitioned Top-N operator."""
+    if vector.get_value('table_format').file_format == "hbase":
+      pytest.xfail(reason="IMPALA-14333 - HBase does not return rows "
+                   "where tinyint_col is NULL")
     self.run_test_case('QueryTest/partitioned-top-n', vector)
     if vector.get_value('table_format').file_format in ['parquet', 'orc']:
       self.run_test_case('QueryTest/partitioned-top-n-complex', vector)
diff --git a/tests/util/workload_management.py 
b/tests/util/workload_management.py
index e98249efa..8f29bc873 100644
--- a/tests/util/workload_management.py
+++ b/tests/util/workload_management.py
@@ -247,7 +247,7 @@ def assert_query(query_tbl, client, expected_cluster_id="", 
raw_profile=None,
     query_opts = re.search(r'\n\s+Query Options \(set by 
configuration\):\s+(.*?)\n',
         profile_text)
     assert query_opts is not None
-    assert value == query_opts.group(1).replace("&apos;", "'"), \
+    assert value == query_opts.group(1).replace("&apos;", 
"'").replace("&quot;", '"'), \
         "query opts set by config incorrect"
 
   # Resource Pool

(impala) branch master updated: IMPALA-13125: Fix pairwise test vector generation

Reply via email to