This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 182aa5066e2eb2cf73c08a24932776bcfda279e7
Author: Riza Suminto <[email protected]>
AuthorDate: Sat Apr 12 19:54:01 2025 -0700

    IMPALA-13958: Revisit hs2_parquet_constraint and hs2_text_constraint
    
    hs2_parquet_constraint and hs2_text_constraint is meant to extend test
    vector dimension to also test non-default test protocol (other than
    beeswax), but limit it to only run against 'parquet/none' or 'text/none'
    format accordingly.
    
    This patch modifies these constraints to
    default_protocol_or_parquet_constraint and
    default_protocol_or_text_constraint respectively such that the full file
    format coverage happen for default_test_protocol configuration and
    limited for the other protocols. Drop hs2_parquet_constraint entirely
    from test_utf8_strings.py because that test is already constrained to
    single 'parquet/none' file format.
    
    Num modified rows validation in date-fileformat-support.test and
    date-partitioning.test are changed to check the NumModifiedRows counter
    from profile.
    
    Fix TestQueriesJsonTables to always run with beeswax protocol because
    its assertions relies on beeswax-specific return values.
    
    Run impala-isort and fix few flake8 issues and in modified test files.
    
    Testing:
    Run and pass the affected test files using exhaustive exploration and
    env var DEFAULT_TEST_PROTOCOL=hs2. Confirmed that full file format
    coverage happen for hs2 protocol. Note that
    DEFAULT_TEST_PROTOCOL=beeswax is still the default.
    
    Change-Id: I8be0a628842e29a8fcc036180654cd159f6a23c8
    Reviewed-on: http://gerrit.cloudera.org:8080/22775
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 .../queries/QueryTest/date-fileformat-support.test | 14 ++++----
 .../queries/QueryTest/date-partitioning.test       | 26 +++++++--------
 tests/common/test_dimensions.py                    | 37 +++++++++++---------
 tests/query_test/test_chars.py                     | 14 +++++---
 tests/query_test/test_date_queries.py              | 22 ++++++++----
 tests/query_test/test_decimal_queries.py           | 32 ++++++++++--------
 tests/query_test/test_queries.py                   | 39 ++++++++++++++++------
 tests/query_test/test_utf8_strings.py              | 13 +++++---
 8 files changed, 123 insertions(+), 74 deletions(-)

diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test
 
b/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test
index 2d71f5b0b..ac18ee4e1 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test
@@ -45,18 +45,18 @@ NULL
 # Inserting text partitions to $DATABASE.date_tbl is OK.
 insert into $DATABASE.date_tbl partition (date_part)
 select date_col, date_part from functional.date_tbl;
----- RESULTS
-date_part=0001-01-01: 7
-date_part=1399-06-27: 3
-date_part=2017-11-27: 10
-date_part=9999-12-31: 2
+---- RUNTIME_PROFILE
+NumModifiedRows: 7
+NumModifiedRows: 3
+NumModifiedRows: 10
+NumModifiedRows: 2
 ====
 ---- QUERY
 # Inserting into parquet partition is supported.
 insert into $DATABASE.date_tbl partition(date_part='1899-12-31')
 select date_col from functional_parquet.date_tbl where date_part = 
'1399-06-27';
----- RESULTS
-date_part=1899-12-31: 3
+---- RUNTIME_PROFILE
+NumModifiedRows: 3
 ====
 ---- QUERY
 # Adding ORC partition works even though Impala cannot write ORC format.
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test 
b/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test
index c41de4fc0..9fd8b51a7 100644
--- 
a/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test
+++ 
b/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test
@@ -25,29 +25,29 @@ AnalysisException: Partition spec already exists: (p=DATE 
'1300-01-01').
 ---- QUERY
 # Date partition formatted differently in insert
 insert into $DATABASE.dtbl partition (p='1300-1-01') values ('1300-1-1');
----- RESULTS
-p=1300-01-01: 1
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
 ====
 ---- QUERY
 insert into $DATABASE.dtbl partition (p='1300-01-1') values ('1300-1-02');
----- RESULTS
-p=1300-01-01: 1
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
 ====
 ---- QUERY
 insert into $DATABASE.dtbl partition (p=DATE '1300-1-1') values ('1300-1-03');
----- RESULTS
-p=1300-01-01: 1
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
 ====
 ---- QUERY
 # Insert into a new partition
 insert into $DATABASE.dtbl partition (p=DATE '1400-01-1') values ('1400-1-1');
----- RESULTS
-p=1400-01-01: 1
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
 ====
 ---- QUERY
 insert into $DATABASE.dtbl partition (p='1400-1-01') values ('1400-1-2');
----- RESULTS
-p=1400-01-01: 1
+---- RUNTIME_PROFILE
+NumModifiedRows: 1
 ====
 ---- QUERY
 select p, c from $DATABASE.dtbl;
@@ -86,9 +86,9 @@ UDF ERROR: String to Date parse failed. Invalid string val: 
'1400-01-'
 # Test that STRING is implicitly cast to DATE.
 insert into $DATABASE.dtbl partition(p) select * from $DATABASE.stbl
 where p in ('1400-1-1', '1400-1-01', '1500-01-1');
----- RESULTS
-p=1400-01-01: 2
-p=1500-01-01: 1
+---- RUNTIME_PROFILE
+NumModifiedRows: 2
+NumModifiedRows: 1
 ====
 ---- QUERY
 select p, c from $DATABASE.dtbl;
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index ba3179037..959225a23 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -18,18 +18,25 @@
 # Common test dimensions and associated utility functions.
 
 from __future__ import absolute_import, division, print_function
-from builtins import range
 import copy
+from itertools import product
 import os
+
+from builtins import range
 import pytest
-from itertools import product
 
 from tests.common.test_vector import (
-  EXEC_OPTION, PROTOCOL, TABLE_FORMAT,
-  BEESWAX, HS2, HS2_HTTP,
-  ImpalaTestDimension, ImpalaTestVector, assert_exec_option_key)
-from tests.util.filesystem_utils import (
-    IS_HDFS)
+    assert_exec_option_key,
+    BEESWAX,
+    EXEC_OPTION,
+    HS2,
+    HS2_HTTP,
+    ImpalaTestDimension,
+    ImpalaTestVector,
+    PROTOCOL,
+    TABLE_FORMAT,
+)
+from tests.util.filesystem_utils import IS_HDFS
 
 WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
 
@@ -190,18 +197,18 @@ def create_client_protocol_no_strict_dimension():
   return ImpalaTestDimension('strict_hs2_protocol', False)
 
 
-def hs2_parquet_constraint(v):
-  """Constraint function, used to only run HS2 against Parquet format, because 
file format
-  and the client protocol are orthogonal."""
-  return (v.get_protocol() == BEESWAX
+def default_protocol_or_parquet_constraint(v):
+  """Constraint function, used to limit non-default test protocol against 
uncompressed
+  parquet format, because file format and the client protocol are 
orthogonal."""
+  return (v.get_protocol() == pytest.config.option.default_test_protocol
           or (v.get_table_format().file_format == 'parquet'
               and v.get_table_format().compression_codec == 'none'))
 
 
-def hs2_text_constraint(v):
-  """Constraint function, used to only run HS2 against uncompressed text, 
because file
-  format and the client protocol are orthogonal."""
-  return (v.get_protocol() == BEESWAX
+def default_protocol_or_text_constraint(v):
+  """Constraint function, used to limit non-default test protocol against 
uncompressed
+  text format, because file format and the client protocol are orthogonal."""
+  return (v.get_protocol() == pytest.config.option.default_test_protocol
           or (v.get_table_format().file_format == 'text'
               and v.get_table_format().compression_codec == 'none'))
 
diff --git a/tests/query_test/test_chars.py b/tests/query_test/test_chars.py
index 2405e4358..6592692fd 100644
--- a/tests/query_test/test_chars.py
+++ b/tests/query_test/test_chars.py
@@ -19,11 +19,16 @@ from __future__ import absolute_import, division, 
print_function
 from copy import deepcopy
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import (create_exec_option_dimension,
-    create_client_protocol_dimension, hs2_parquet_constraint, 
hs2_text_constraint)
+from tests.common.test_dimensions import (
+    create_client_protocol_dimension,
+    create_exec_option_dimension,
+    default_protocol_or_parquet_constraint,
+    default_protocol_or_text_constraint,
+)
 
 
 class TestStringQueries(ImpalaTestSuite):
+
   @classmethod
   def add_test_dimensions(cls):
     super(TestStringQueries, cls).add_test_dimensions()
@@ -35,7 +40,7 @@ class TestStringQueries(ImpalaTestSuite):
     # Run these queries through both beeswax and HS2 to get coverage of 
CHAR/VARCHAR
     # returned via both protocols.
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    cls.ImpalaTestMatrix.add_constraint(hs2_text_constraint)
+    cls.ImpalaTestMatrix.add_constraint(default_protocol_or_text_constraint)
 
   def test_chars(self, vector):
     self.run_test_case('QueryTest/chars', vector)
@@ -57,6 +62,7 @@ class TestStringQueries(ImpalaTestSuite):
 
 
 class TestCharFormats(ImpalaTestSuite):
+
   @classmethod
   def add_test_dimensions(cls):
     super(TestCharFormats, cls).add_test_dimensions()
@@ -72,7 +78,7 @@ class TestCharFormats(ImpalaTestSuite):
     # Run these queries through both beeswax and HS2 to get coverage of 
CHAR/VARCHAR
     # returned via both protocols.
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
+    cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
 
   def test_char_format(self, vector):
     self.run_test_case('QueryTest/chars-formats', vector)
diff --git a/tests/query_test/test_date_queries.py 
b/tests/query_test/test_date_queries.py
index 310159203..8f60bfc04 100644
--- a/tests/query_test/test_date_queries.py
+++ b/tests/query_test/test_date_queries.py
@@ -18,11 +18,16 @@
 # Targeted tests for date type.
 
 from __future__ import absolute_import, division, print_function
+
 from tests.common.file_utils import create_table_and_copy_files
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.skip import SkipIfFS
-from tests.common.test_dimensions import 
(create_exec_option_dimension_from_dict,
-    create_client_protocol_dimension, hs2_parquet_constraint)
+from tests.common.test_dimensions import (
+    create_client_protocol_dimension,
+    create_exec_option_dimension_from_dict,
+    create_uncompressed_text_dimension,
+    default_protocol_or_parquet_constraint,
+)
 from tests.shell.util import create_impala_shell_executable_dimension
 
 
@@ -46,12 +51,17 @@ class TestDateQueriesBase(ImpalaTestSuite):
     # Run these queries through both beeswax and HS2 to get coverage of date 
returned
     # via both protocols.
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
     
cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension())
 
 
 class TestDateQueriesAllFormat(TestDateQueriesBase):
 
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestDateQueriesAllFormat, cls).add_test_dimensions()
+    # Limit to 'parquet/none' for non-default test protocol.
+    cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
+
   def test_queries(self, vector):
     if vector.get_value('table_format').file_format == 'avro':
       # Avro date test queries are in a separate test file.
@@ -69,9 +79,9 @@ class TestDateQueriesTextFormat(TestDateQueriesBase):
   @classmethod
   def add_test_dimensions(cls):
     super(TestDateQueriesTextFormat, cls).add_test_dimensions()
-    # Only run this test class with 'text' table_format.
-    cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format == 'text')
+    # Only run this test class with 'text/none' table_format.
+    cls.ImpalaTestMatrix.add_dimension(
+        create_uncompressed_text_dimension(cls.get_workload()))
 
   def test_partitioning(self, vector, unique_database):
     """ Test partitioning by DATE. """
diff --git a/tests/query_test/test_decimal_queries.py 
b/tests/query_test/test_decimal_queries.py
index dd2c4ef15..18062ea20 100644
--- a/tests/query_test/test_decimal_queries.py
+++ b/tests/query_test/test_decimal_queries.py
@@ -18,12 +18,16 @@
 # Targeted tests for decimal type.
 
 from __future__ import absolute_import, division, print_function
+
 import pytest
 
 from tests.common.impala_connection import IMPALA_CONNECTION_EXCEPTION
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import 
(create_exec_option_dimension_from_dict,
-    create_client_protocol_dimension, hs2_parquet_constraint)
+from tests.common.test_dimensions import (
+    create_client_protocol_dimension,
+    create_exec_option_dimension_from_dict,
+    default_protocol_or_parquet_constraint,
+)
 from tests.util.filesystem_utils import IS_S3
 
 
@@ -33,22 +37,22 @@ class TestDecimalQueries(ImpalaTestSuite):
     super(TestDecimalQueries, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(
       create_exec_option_dimension_from_dict({
-        'decimal_v2' : ['false', 'true'],
-        'batch_size' : [0, 1],
-        'disable_codegen' : ['false', 'true'],
-        'disable_codegen_rows_threshold' : [0]}))
+        'decimal_v2': ['false', 'true'],
+        'batch_size': [0, 1],
+        'disable_codegen': ['false', 'true'],
+        'disable_codegen_rows_threshold': [0]}))
     # Hive < 0.11 does not support decimal so we can't run these tests against 
the other
     # file formats.
     # TODO: Enable them on Hive >= 0.11.
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
-        (v.get_value('table_format').file_format == 'text' and
-         v.get_value('table_format').compression_codec == 'none') or
-         v.get_value('table_format').file_format in ['parquet', 'orc', 'kudu', 
'json'])
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format in ['parquet', 'orc', 'kudu', 
'json']
+        or (v.get_value('table_format').file_format == 'text'
+            and v.get_value('table_format').compression_codec == 'none'))
 
     # Run these queries through both beeswax and HS2 to get coverage of 
decimals returned
     # via both protocols.
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
+    cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
 
   def test_queries(self, vector):
     self.run_test_case('QueryTest/decimal', vector)
@@ -75,8 +79,8 @@ class TestAvroDecimalQueries(ImpalaTestSuite):
   def add_test_dimensions(cls):
     super(TestAvroDecimalQueries, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-        (v.get_value('table_format').file_format == 'avro' and
-         v.get_value('table_format').compression_codec == 'snap'))
+        v.get_value('table_format').file_format == 'avro'
+        and v.get_value('table_format').compression_codec == 'snap')
 
   def test_avro_queries(self, vector):
     self.run_test_case('QueryTest/decimal_avro', vector)
@@ -91,7 +95,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
   def add_test_dimensions(cls):
     super(TestDecimalOverflowExprs, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-        (v.get_value('table_format').file_format in ['kudu', 'parquet', 
'text']))
+        v.get_value('table_format').file_format in ['kudu', 'parquet', 'text'])
 
   def test_insert_select_exprs(self, vector, unique_database):
     TBL_NAME_1 = '`{0}`.`overflowed_decimal_tbl_1`'.format(unique_database)
diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py
index b712bdeb8..c8fdb4353 100644
--- a/tests/query_test/test_queries.py
+++ b/tests/query_test/test_queries.py
@@ -18,20 +18,28 @@
 # General Impala query tests
 
 from __future__ import absolute_import, division, print_function
-import pytest
-import re
 from copy import deepcopy
+import re
+from subprocess import check_call
+
+import pytest
 
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.skip import (
-    SkipIfEC, SkipIfCatalogV2, SkipIfNotHdfsMinicluster, SkipIfFS)
+from tests.common.skip import SkipIfFS, SkipIfNotHdfsMinicluster
 from tests.common.test_dimensions import (
-    create_uncompressed_text_dimension, create_uncompressed_json_dimension,
-    create_exec_option_dimension_from_dict, create_client_protocol_dimension,
-    hs2_parquet_constraint, extend_exec_option_dimension, 
FILE_FORMAT_TO_STORED_AS_MAP,
-    add_exec_option_dimension, create_exec_option_dimension)
+    add_exec_option_dimension,
+    create_client_protocol_dimension,
+    create_exec_option_dimension,
+    create_exec_option_dimension_from_dict,
+    create_uncompressed_json_dimension,
+    create_uncompressed_text_dimension,
+    default_protocol_or_parquet_constraint,
+    extend_exec_option_dimension,
+    FILE_FORMAT_TO_STORED_AS_MAP,
+)
+from tests.common.test_vector import BEESWAX
 from tests.util.filesystem_utils import get_fs_path
-from subprocess import check_call
+
 
 class TestQueries(ImpalaTestSuite):
 
@@ -54,7 +62,7 @@ class TestQueries(ImpalaTestSuite):
     # Don't run all combinations of table format and protocol - the dimensions 
should
     # be orthogonal.
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
+    cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint)
 
     # Adding a test dimension here to test the small query opt in exhaustive.
     if cls.exploration_strategy() == 'exhaustive':
@@ -212,6 +220,7 @@ class TestQueries(ImpalaTestSuite):
       pytest.xfail("null data does not appear to work in hbase")
     self.run_test_case('QueryTest/null_data', vector)
 
+
 # Tests in this class are only run against text/none either because that's the 
only
 # format that is supported, or the tests don't exercise the file format.
 class TestQueriesTextTables(ImpalaTestSuite):
@@ -254,6 +263,13 @@ class TestQueriesTextTables(ImpalaTestSuite):
 # Tests in this class are only run against json/none either because that's the 
only
 # format that is supported, or the tests don't exercise the file format.
 class TestQueriesJsonTables(ImpalaTestSuite):
+
+  @classmethod
+  def default_test_protocol(cls):
+    # Some assertions in this test relies on beeswax-specific return values 
such as
+    # Infinity, NaN, false, and true. HS2 returns inf, nan, False, and True 
instead.
+    return BEESWAX
+
   @classmethod
   def add_test_dimensions(cls):
     super(TestQueriesJsonTables, cls).add_test_dimensions()
@@ -277,6 +293,7 @@ class TestQueriesJsonTables(ImpalaTestSuite):
     vector.get_value('exec_option')['abort_on_error'] = 0
     self.run_test_case('QueryTest/overflow_json', vector)
 
+
 # Tests in this class are only run against Parquet because the tests don't 
exercise the
 # file format.
 class TestQueriesParquetTables(ImpalaTestSuite):
@@ -304,6 +321,7 @@ class TestQueriesParquetTables(ImpalaTestSuite):
     vector.get_value('exec_option')['num_nodes'] = 1
     self.run_test_case('QueryTest/single-node-large-sorts', vector)
 
+
 # Tests for queries in HDFS-specific tables, e.g. AllTypesAggMultiFilesNoPart.
 class TestHdfsQueries(ImpalaTestSuite):
   @classmethod
@@ -387,6 +405,7 @@ class 
TestPartitionKeyScansWithMultipleBlocks(ImpalaTestSuite):
           "SELECT max(year) FROM %s.alltypes_multiblocks" % (unique_database))
     assert int(result.get_data()) == 2010
 
+
 class TestTopNReclaimQuery(ImpalaTestSuite):
   """Test class to validate that TopN periodically reclaims tuple pool memory
    and runs with a lower memory footprint."""
diff --git a/tests/query_test/test_utf8_strings.py 
b/tests/query_test/test_utf8_strings.py
index 768da848e..d104a77aa 100644
--- a/tests/query_test/test_utf8_strings.py
+++ b/tests/query_test/test_utf8_strings.py
@@ -16,24 +16,27 @@
 # under the License.
 
 from __future__ import absolute_import, division, print_function
+
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_dimensions import (create_exec_option_dimension,
-    create_client_protocol_dimension, hs2_parquet_constraint)
+from tests.common.test_dimensions import (
+    create_client_protocol_dimension,
+    create_exec_option_dimension,
+)
 
 
 class TestUtf8StringFunctions(ImpalaTestSuite):
+
   @classmethod
   def add_test_dimensions(cls):
     super(TestUtf8StringFunctions, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(
       create_exec_option_dimension(disable_codegen_options=[False, True]))
     cls.ImpalaTestMatrix.add_constraint(lambda v:
-        v.get_value('table_format').file_format in ['parquet'] and
-        v.get_value('table_format').compression_codec in ['none'])
+        v.get_value('table_format').file_format in ['parquet']
+        and v.get_value('table_format').compression_codec in ['none'])
     # Run these queries through both beeswax and HS2 to get coverage of 
CHAR/VARCHAR
     # returned via both protocols.
     cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension())
-    cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint)
 
   def test_string_functions(self, vector):
     self.run_test_case('QueryTest/utf8-string-functions', vector)

Reply via email to