This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 182aa5066e2eb2cf73c08a24932776bcfda279e7 Author: Riza Suminto <[email protected]> AuthorDate: Sat Apr 12 19:54:01 2025 -0700 IMPALA-13958: Revisit hs2_parquet_constraint and hs2_text_constraint hs2_parquet_constraint and hs2_text_constraint is meant to extend test vector dimension to also test non-default test protocol (other than beeswax), but limit it to only run against 'parquet/none' or 'text/none' format accordingly. This patch modifies these constraints to default_protocol_or_parquet_constraint and default_protocol_or_text_constraint respectively such that the full file format coverage happen for default_test_protocol configuration and limited for the other protocols. Drop hs2_parquet_constraint entirely from test_utf8_strings.py because that test is already constrained to single 'parquet/none' file format. Num modified rows validation in date-fileformat-support.test and date-partitioning.test are changed to check the NumModifiedRows counter from profile. Fix TestQueriesJsonTables to always run with beeswax protocol because its assertions relies on beeswax-specific return values. Run impala-isort and fix few flake8 issues and in modified test files. Testing: Run and pass the affected test files using exhaustive exploration and env var DEFAULT_TEST_PROTOCOL=hs2. Confirmed that full file format coverage happen for hs2 protocol. Note that DEFAULT_TEST_PROTOCOL=beeswax is still the default. Change-Id: I8be0a628842e29a8fcc036180654cd159f6a23c8 Reviewed-on: http://gerrit.cloudera.org:8080/22775 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- .../queries/QueryTest/date-fileformat-support.test | 14 ++++---- .../queries/QueryTest/date-partitioning.test | 26 +++++++-------- tests/common/test_dimensions.py | 37 +++++++++++--------- tests/query_test/test_chars.py | 14 +++++--- tests/query_test/test_date_queries.py | 22 ++++++++---- tests/query_test/test_decimal_queries.py | 32 ++++++++++-------- tests/query_test/test_queries.py | 39 ++++++++++++++++------ tests/query_test/test_utf8_strings.py | 13 +++++--- 8 files changed, 123 insertions(+), 74 deletions(-) diff --git a/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test b/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test index 2d71f5b0b..ac18ee4e1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test +++ b/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test @@ -45,18 +45,18 @@ NULL # Inserting text partitions to $DATABASE.date_tbl is OK. insert into $DATABASE.date_tbl partition (date_part) select date_col, date_part from functional.date_tbl; ----- RESULTS -date_part=0001-01-01: 7 -date_part=1399-06-27: 3 -date_part=2017-11-27: 10 -date_part=9999-12-31: 2 +---- RUNTIME_PROFILE +NumModifiedRows: 7 +NumModifiedRows: 3 +NumModifiedRows: 10 +NumModifiedRows: 2 ==== ---- QUERY # Inserting into parquet partition is supported. insert into $DATABASE.date_tbl partition(date_part='1899-12-31') select date_col from functional_parquet.date_tbl where date_part = '1399-06-27'; ----- RESULTS -date_part=1899-12-31: 3 +---- RUNTIME_PROFILE +NumModifiedRows: 3 ==== ---- QUERY # Adding ORC partition works even though Impala cannot write ORC format. diff --git a/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test b/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test index c41de4fc0..9fd8b51a7 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test +++ b/testdata/workloads/functional-query/queries/QueryTest/date-partitioning.test @@ -25,29 +25,29 @@ AnalysisException: Partition spec already exists: (p=DATE '1300-01-01'). ---- QUERY # Date partition formatted differently in insert insert into $DATABASE.dtbl partition (p='1300-1-01') values ('1300-1-1'); ----- RESULTS -p=1300-01-01: 1 +---- RUNTIME_PROFILE +NumModifiedRows: 1 ==== ---- QUERY insert into $DATABASE.dtbl partition (p='1300-01-1') values ('1300-1-02'); ----- RESULTS -p=1300-01-01: 1 +---- RUNTIME_PROFILE +NumModifiedRows: 1 ==== ---- QUERY insert into $DATABASE.dtbl partition (p=DATE '1300-1-1') values ('1300-1-03'); ----- RESULTS -p=1300-01-01: 1 +---- RUNTIME_PROFILE +NumModifiedRows: 1 ==== ---- QUERY # Insert into a new partition insert into $DATABASE.dtbl partition (p=DATE '1400-01-1') values ('1400-1-1'); ----- RESULTS -p=1400-01-01: 1 +---- RUNTIME_PROFILE +NumModifiedRows: 1 ==== ---- QUERY insert into $DATABASE.dtbl partition (p='1400-1-01') values ('1400-1-2'); ----- RESULTS -p=1400-01-01: 1 +---- RUNTIME_PROFILE +NumModifiedRows: 1 ==== ---- QUERY select p, c from $DATABASE.dtbl; @@ -86,9 +86,9 @@ UDF ERROR: String to Date parse failed. Invalid string val: '1400-01-' # Test that STRING is implicitly cast to DATE. insert into $DATABASE.dtbl partition(p) select * from $DATABASE.stbl where p in ('1400-1-1', '1400-1-01', '1500-01-1'); ----- RESULTS -p=1400-01-01: 2 -p=1500-01-01: 1 +---- RUNTIME_PROFILE +NumModifiedRows: 2 +NumModifiedRows: 1 ==== ---- QUERY select p, c from $DATABASE.dtbl; diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py index ba3179037..959225a23 100644 --- a/tests/common/test_dimensions.py +++ b/tests/common/test_dimensions.py @@ -18,18 +18,25 @@ # Common test dimensions and associated utility functions. from __future__ import absolute_import, division, print_function -from builtins import range import copy +from itertools import product import os + +from builtins import range import pytest -from itertools import product from tests.common.test_vector import ( - EXEC_OPTION, PROTOCOL, TABLE_FORMAT, - BEESWAX, HS2, HS2_HTTP, - ImpalaTestDimension, ImpalaTestVector, assert_exec_option_key) -from tests.util.filesystem_utils import ( - IS_HDFS) + assert_exec_option_key, + BEESWAX, + EXEC_OPTION, + HS2, + HS2_HTTP, + ImpalaTestDimension, + ImpalaTestVector, + PROTOCOL, + TABLE_FORMAT, +) +from tests.util.filesystem_utils import IS_HDFS WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR'] @@ -190,18 +197,18 @@ def create_client_protocol_no_strict_dimension(): return ImpalaTestDimension('strict_hs2_protocol', False) -def hs2_parquet_constraint(v): - """Constraint function, used to only run HS2 against Parquet format, because file format - and the client protocol are orthogonal.""" - return (v.get_protocol() == BEESWAX +def default_protocol_or_parquet_constraint(v): + """Constraint function, used to limit non-default test protocol against uncompressed + parquet format, because file format and the client protocol are orthogonal.""" + return (v.get_protocol() == pytest.config.option.default_test_protocol or (v.get_table_format().file_format == 'parquet' and v.get_table_format().compression_codec == 'none')) -def hs2_text_constraint(v): - """Constraint function, used to only run HS2 against uncompressed text, because file - format and the client protocol are orthogonal.""" - return (v.get_protocol() == BEESWAX +def default_protocol_or_text_constraint(v): + """Constraint function, used to limit non-default test protocol against uncompressed + text format, because file format and the client protocol are orthogonal.""" + return (v.get_protocol() == pytest.config.option.default_test_protocol or (v.get_table_format().file_format == 'text' and v.get_table_format().compression_codec == 'none')) diff --git a/tests/query_test/test_chars.py b/tests/query_test/test_chars.py index 2405e4358..6592692fd 100644 --- a/tests/query_test/test_chars.py +++ b/tests/query_test/test_chars.py @@ -19,11 +19,16 @@ from __future__ import absolute_import, division, print_function from copy import deepcopy from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.test_dimensions import (create_exec_option_dimension, - create_client_protocol_dimension, hs2_parquet_constraint, hs2_text_constraint) +from tests.common.test_dimensions import ( + create_client_protocol_dimension, + create_exec_option_dimension, + default_protocol_or_parquet_constraint, + default_protocol_or_text_constraint, +) class TestStringQueries(ImpalaTestSuite): + @classmethod def add_test_dimensions(cls): super(TestStringQueries, cls).add_test_dimensions() @@ -35,7 +40,7 @@ class TestStringQueries(ImpalaTestSuite): # Run these queries through both beeswax and HS2 to get coverage of CHAR/VARCHAR # returned via both protocols. cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) - cls.ImpalaTestMatrix.add_constraint(hs2_text_constraint) + cls.ImpalaTestMatrix.add_constraint(default_protocol_or_text_constraint) def test_chars(self, vector): self.run_test_case('QueryTest/chars', vector) @@ -57,6 +62,7 @@ class TestStringQueries(ImpalaTestSuite): class TestCharFormats(ImpalaTestSuite): + @classmethod def add_test_dimensions(cls): super(TestCharFormats, cls).add_test_dimensions() @@ -72,7 +78,7 @@ class TestCharFormats(ImpalaTestSuite): # Run these queries through both beeswax and HS2 to get coverage of CHAR/VARCHAR # returned via both protocols. cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) - cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint) + cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint) def test_char_format(self, vector): self.run_test_case('QueryTest/chars-formats', vector) diff --git a/tests/query_test/test_date_queries.py b/tests/query_test/test_date_queries.py index 310159203..8f60bfc04 100644 --- a/tests/query_test/test_date_queries.py +++ b/tests/query_test/test_date_queries.py @@ -18,11 +18,16 @@ # Targeted tests for date type. from __future__ import absolute_import, division, print_function + from tests.common.file_utils import create_table_and_copy_files from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfFS -from tests.common.test_dimensions import (create_exec_option_dimension_from_dict, - create_client_protocol_dimension, hs2_parquet_constraint) +from tests.common.test_dimensions import ( + create_client_protocol_dimension, + create_exec_option_dimension_from_dict, + create_uncompressed_text_dimension, + default_protocol_or_parquet_constraint, +) from tests.shell.util import create_impala_shell_executable_dimension @@ -46,12 +51,17 @@ class TestDateQueriesBase(ImpalaTestSuite): # Run these queries through both beeswax and HS2 to get coverage of date returned # via both protocols. cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) - cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint) cls.ImpalaTestMatrix.add_dimension(create_impala_shell_executable_dimension()) class TestDateQueriesAllFormat(TestDateQueriesBase): + @classmethod + def add_test_dimensions(cls): + super(TestDateQueriesAllFormat, cls).add_test_dimensions() + # Limit to 'parquet/none' for non-default test protocol. + cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint) + def test_queries(self, vector): if vector.get_value('table_format').file_format == 'avro': # Avro date test queries are in a separate test file. @@ -69,9 +79,9 @@ class TestDateQueriesTextFormat(TestDateQueriesBase): @classmethod def add_test_dimensions(cls): super(TestDateQueriesTextFormat, cls).add_test_dimensions() - # Only run this test class with 'text' table_format. - cls.ImpalaTestMatrix.add_constraint(lambda v: - v.get_value('table_format').file_format == 'text') + # Only run this test class with 'text/none' table_format. + cls.ImpalaTestMatrix.add_dimension( + create_uncompressed_text_dimension(cls.get_workload())) def test_partitioning(self, vector, unique_database): """ Test partitioning by DATE. """ diff --git a/tests/query_test/test_decimal_queries.py b/tests/query_test/test_decimal_queries.py index dd2c4ef15..18062ea20 100644 --- a/tests/query_test/test_decimal_queries.py +++ b/tests/query_test/test_decimal_queries.py @@ -18,12 +18,16 @@ # Targeted tests for decimal type. from __future__ import absolute_import, division, print_function + import pytest from tests.common.impala_connection import IMPALA_CONNECTION_EXCEPTION from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.test_dimensions import (create_exec_option_dimension_from_dict, - create_client_protocol_dimension, hs2_parquet_constraint) +from tests.common.test_dimensions import ( + create_client_protocol_dimension, + create_exec_option_dimension_from_dict, + default_protocol_or_parquet_constraint, +) from tests.util.filesystem_utils import IS_S3 @@ -33,22 +37,22 @@ class TestDecimalQueries(ImpalaTestSuite): super(TestDecimalQueries, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( create_exec_option_dimension_from_dict({ - 'decimal_v2' : ['false', 'true'], - 'batch_size' : [0, 1], - 'disable_codegen' : ['false', 'true'], - 'disable_codegen_rows_threshold' : [0]})) + 'decimal_v2': ['false', 'true'], + 'batch_size': [0, 1], + 'disable_codegen': ['false', 'true'], + 'disable_codegen_rows_threshold': [0]})) # Hive < 0.11 does not support decimal so we can't run these tests against the other # file formats. # TODO: Enable them on Hive >= 0.11. - cls.ImpalaTestMatrix.add_constraint(lambda v:\ - (v.get_value('table_format').file_format == 'text' and - v.get_value('table_format').compression_codec == 'none') or - v.get_value('table_format').file_format in ['parquet', 'orc', 'kudu', 'json']) + cls.ImpalaTestMatrix.add_constraint(lambda v: + v.get_value('table_format').file_format in ['parquet', 'orc', 'kudu', 'json'] + or (v.get_value('table_format').file_format == 'text' + and v.get_value('table_format').compression_codec == 'none')) # Run these queries through both beeswax and HS2 to get coverage of decimals returned # via both protocols. cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) - cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint) + cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint) def test_queries(self, vector): self.run_test_case('QueryTest/decimal', vector) @@ -75,8 +79,8 @@ class TestAvroDecimalQueries(ImpalaTestSuite): def add_test_dimensions(cls): super(TestAvroDecimalQueries, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint(lambda v: - (v.get_value('table_format').file_format == 'avro' and - v.get_value('table_format').compression_codec == 'snap')) + v.get_value('table_format').file_format == 'avro' + and v.get_value('table_format').compression_codec == 'snap') def test_avro_queries(self, vector): self.run_test_case('QueryTest/decimal_avro', vector) @@ -91,7 +95,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite): def add_test_dimensions(cls): super(TestDecimalOverflowExprs, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_constraint(lambda v: - (v.get_value('table_format').file_format in ['kudu', 'parquet', 'text'])) + v.get_value('table_format').file_format in ['kudu', 'parquet', 'text']) def test_insert_select_exprs(self, vector, unique_database): TBL_NAME_1 = '`{0}`.`overflowed_decimal_tbl_1`'.format(unique_database) diff --git a/tests/query_test/test_queries.py b/tests/query_test/test_queries.py index b712bdeb8..c8fdb4353 100644 --- a/tests/query_test/test_queries.py +++ b/tests/query_test/test_queries.py @@ -18,20 +18,28 @@ # General Impala query tests from __future__ import absolute_import, division, print_function -import pytest -import re from copy import deepcopy +import re +from subprocess import check_call + +import pytest from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.skip import ( - SkipIfEC, SkipIfCatalogV2, SkipIfNotHdfsMinicluster, SkipIfFS) +from tests.common.skip import SkipIfFS, SkipIfNotHdfsMinicluster from tests.common.test_dimensions import ( - create_uncompressed_text_dimension, create_uncompressed_json_dimension, - create_exec_option_dimension_from_dict, create_client_protocol_dimension, - hs2_parquet_constraint, extend_exec_option_dimension, FILE_FORMAT_TO_STORED_AS_MAP, - add_exec_option_dimension, create_exec_option_dimension) + add_exec_option_dimension, + create_client_protocol_dimension, + create_exec_option_dimension, + create_exec_option_dimension_from_dict, + create_uncompressed_json_dimension, + create_uncompressed_text_dimension, + default_protocol_or_parquet_constraint, + extend_exec_option_dimension, + FILE_FORMAT_TO_STORED_AS_MAP, +) +from tests.common.test_vector import BEESWAX from tests.util.filesystem_utils import get_fs_path -from subprocess import check_call + class TestQueries(ImpalaTestSuite): @@ -54,7 +62,7 @@ class TestQueries(ImpalaTestSuite): # Don't run all combinations of table format and protocol - the dimensions should # be orthogonal. cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) - cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint) + cls.ImpalaTestMatrix.add_constraint(default_protocol_or_parquet_constraint) # Adding a test dimension here to test the small query opt in exhaustive. if cls.exploration_strategy() == 'exhaustive': @@ -212,6 +220,7 @@ class TestQueries(ImpalaTestSuite): pytest.xfail("null data does not appear to work in hbase") self.run_test_case('QueryTest/null_data', vector) + # Tests in this class are only run against text/none either because that's the only # format that is supported, or the tests don't exercise the file format. class TestQueriesTextTables(ImpalaTestSuite): @@ -254,6 +263,13 @@ class TestQueriesTextTables(ImpalaTestSuite): # Tests in this class are only run against json/none either because that's the only # format that is supported, or the tests don't exercise the file format. class TestQueriesJsonTables(ImpalaTestSuite): + + @classmethod + def default_test_protocol(cls): + # Some assertions in this test relies on beeswax-specific return values such as + # Infinity, NaN, false, and true. HS2 returns inf, nan, False, and True instead. + return BEESWAX + @classmethod def add_test_dimensions(cls): super(TestQueriesJsonTables, cls).add_test_dimensions() @@ -277,6 +293,7 @@ class TestQueriesJsonTables(ImpalaTestSuite): vector.get_value('exec_option')['abort_on_error'] = 0 self.run_test_case('QueryTest/overflow_json', vector) + # Tests in this class are only run against Parquet because the tests don't exercise the # file format. class TestQueriesParquetTables(ImpalaTestSuite): @@ -304,6 +321,7 @@ class TestQueriesParquetTables(ImpalaTestSuite): vector.get_value('exec_option')['num_nodes'] = 1 self.run_test_case('QueryTest/single-node-large-sorts', vector) + # Tests for queries in HDFS-specific tables, e.g. AllTypesAggMultiFilesNoPart. class TestHdfsQueries(ImpalaTestSuite): @classmethod @@ -387,6 +405,7 @@ class TestPartitionKeyScansWithMultipleBlocks(ImpalaTestSuite): "SELECT max(year) FROM %s.alltypes_multiblocks" % (unique_database)) assert int(result.get_data()) == 2010 + class TestTopNReclaimQuery(ImpalaTestSuite): """Test class to validate that TopN periodically reclaims tuple pool memory and runs with a lower memory footprint.""" diff --git a/tests/query_test/test_utf8_strings.py b/tests/query_test/test_utf8_strings.py index 768da848e..d104a77aa 100644 --- a/tests/query_test/test_utf8_strings.py +++ b/tests/query_test/test_utf8_strings.py @@ -16,24 +16,27 @@ # under the License. from __future__ import absolute_import, division, print_function + from tests.common.impala_test_suite import ImpalaTestSuite -from tests.common.test_dimensions import (create_exec_option_dimension, - create_client_protocol_dimension, hs2_parquet_constraint) +from tests.common.test_dimensions import ( + create_client_protocol_dimension, + create_exec_option_dimension, +) class TestUtf8StringFunctions(ImpalaTestSuite): + @classmethod def add_test_dimensions(cls): super(TestUtf8StringFunctions, cls).add_test_dimensions() cls.ImpalaTestMatrix.add_dimension( create_exec_option_dimension(disable_codegen_options=[False, True])) cls.ImpalaTestMatrix.add_constraint(lambda v: - v.get_value('table_format').file_format in ['parquet'] and - v.get_value('table_format').compression_codec in ['none']) + v.get_value('table_format').file_format in ['parquet'] + and v.get_value('table_format').compression_codec in ['none']) # Run these queries through both beeswax and HS2 to get coverage of CHAR/VARCHAR # returned via both protocols. cls.ImpalaTestMatrix.add_dimension(create_client_protocol_dimension()) - cls.ImpalaTestMatrix.add_constraint(hs2_parquet_constraint) def test_string_functions(self, vector): self.run_test_case('QueryTest/utf8-string-functions', vector)
