This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 41d7a2b449218209b8e7a58649f589fd2252113f Author: Steve Carlin <[email protected]> AuthorDate: Wed Jan 28 17:04:31 2026 -0800 IMPALA-14746: Calcite planner: Allow overrides in test framework This commit will allow overrides in the test framework for Calcite where the result set is different from the original planner. The overrides will be used if the USE_CALCITE_PLANNER environment variable is set. The allowable overridden sections are: RESULTS, CATCH, RUNTIME_PROFILE. The sections will be overridden if the environment variable is set and if the section has a CALCITE_PLANNER_ prefix (e.g. CALCITE_PLANNER_RESULTS) A sample is given in the subquery.test file. Change-Id: Id7e22c63b27232bffc442a75952b9942067f0e85 Reviewed-on: http://gerrit.cloudera.org:8080/23907 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- bin/impala-config.sh | 2 ++ bin/start-impala-cluster.py | 6 ++-- .../queries/QueryTest/subquery.test | 15 ++++++++- tests/common/environ.py | 1 + tests/common/impala_test_suite.py | 39 ++++++++++++++-------- tests/util/test_file_parser.py | 14 ++++---- 6 files changed, 55 insertions(+), 22 deletions(-) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 9f923e426..074e1c14d 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -333,6 +333,8 @@ export IMPALA_DOCKER_JAVA=${IMPALA_DOCKER_JAVA:-"17"} # to true due to the large performance benefits. export IMPALA_USE_CLOUDFLARE_ZLIB=${IMPALA_USE_CLOUDFLARE_ZLIB:-"true"} +export USE_CALCITE_PLANNER=${USE_CALCITE_PLANNER:-"false"} + # When IMPALA_(CDP_COMPONENT)_URL are overridden, they may contain '$(platform_label)' # which will be substituted for the CDP platform label in bootstrap_toolchain.py unset IMPALA_HADOOP_URL diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py index 4dee99b8b..f452b0a1a 100755 --- a/bin/start-impala-cluster.py +++ b/bin/start-impala-cluster.py @@ -56,6 +56,8 @@ INTERNAL_LISTEN_HOST = os.getenv("INTERNAL_LISTEN_HOST", "localhost") TARGET_FILESYSTEM = os.getenv("TARGET_FILESYSTEM") or "hdfs" HOST_TZ = os.getenv("TZ", None) +BOOLEAN_STRINGS = ["true", "false"] + # Options parser = OptionParser() parser.add_option("-s", "--cluster_size", type="int", dest="cluster_size", default=3, @@ -201,8 +203,8 @@ parser.add_option("--tuple_cache_debug_dump_dir", dest="tuple_cache_debug_dump_d parser.add_option("--tuple_cache_eviction_policy", dest="tuple_cache_eviction_policy", default="LRU", help="This specifies the cache eviction policy to use " "for the tuple cache.") -parser.add_option("--use_calcite_planner", default="False", type="choice", - choices=["true", "True", "false", "False"], +parser.add_option("--use_calcite_planner", type="choice", choices=BOOLEAN_STRINGS, + default=os.environ.get("USE_CALCITE_PLANNER", "false"), help="If true, use the Calcite planner for query optimization " "instead of the Impala planner") parser.add_option("--enable_ranger_authz", dest="enable_ranger_authz", diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test index ebe76dd1c..cab6a3e59 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test +++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test @@ -1017,7 +1017,7 @@ SELECT id FROM alltypes WHERE id = (SELECT bigint_col FROM functional.alltypes_view) ---- RESULTS ---- CATCH -Subquery must not return more than one row: SELECT bigint_col FROM functional.alltypes_view +row_regex:.*Subquery must not return more than one row.* ==== ---- QUERY # Runtime scalar subquery with offset. @@ -1092,8 +1092,21 @@ select id, (select count(id) from alltypessmall where id=t.id) from alltypestiny t order by id ---- RESULTS +---- CALCITE_PLANNER_RESULTS +0,1 +1,1 +2,1 +3,1 +4,1 +5,1 +6,1 +7,1 +---- TYPES +INT, BIGINT ---- CATCH A correlated scalar subquery is not supported in the expression: +---- CALCITE_PLANNER_CATCH +__NO_ERROR__ ==== ---- QUERY # Uncorrelated Scalar Aggregate in select list combined with aggregation in parent query diff --git a/tests/common/environ.py b/tests/common/environ.py index bba7df8fc..53701d990 100644 --- a/tests/common/environ.py +++ b/tests/common/environ.py @@ -127,6 +127,7 @@ IS_TUPLE_CACHE_CORRECT_CHECK = ( os.getenv("TUPLE_CACHE_DEBUG_DUMP_DIR", "") != "" ) +IS_CALCITE_PLANNER = os.environ.get("USE_CALCITE_PLANNER", False) == 'true' class ImpalaBuildFlavors: """ diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index fe59cd024..ab710a387 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -48,6 +48,7 @@ from tests.common.environ import ( MANAGED_WAREHOUSE_DIR, EXTERNAL_WAREHOUSE_DIR, ICEBERG_DEFAULT_FORMAT_VERSION, + IS_CALCITE_PLANNER, ImpalaTestClusterProperties) from tests.common.errors import Timeout from tests.common.impala_connection import create_connection @@ -723,14 +724,15 @@ class ImpalaTestSuite(BaseTestSuite): assert False, 'Unexpected exception string. Expected: %s\nNot found in actual: %s' % \ (expected_str, actual_str) - def __verify_results_and_errors(self, vector, test_section, result, use_db): + def __verify_results_and_errors(self, vector, test_section, result_section_name, + result, use_db): """Verifies that both results and error sections are as expected. Rewrites both by replacing $NAMENODE, $DATABASE and $IMPALA_HOME with their actual values, and optionally rewriting filenames with __HDFS_FILENAME__, to ensure that expected and actual values are easily compared. """ replace_filenames_with_placeholder = True - for section_name in ('RESULTS', 'ERRORS'): + for section_name in (result_section_name, 'ERRORS'): if section_name in test_section: if "$NAMENODE" in test_section[section_name]: replace_filenames_with_placeholder = False @@ -751,7 +753,7 @@ class ImpalaTestSuite(BaseTestSuite): if use_db: test_section[section_name] = test_section[section_name].replace( '$DATABASE', use_db) - result_section, type_section = 'RESULTS', 'TYPES' + result_section, type_section = result_section_name, 'TYPES' verify_raw_results(test_section, result, vector, result_section, type_section, self.pytest_config().option.update_results, @@ -894,18 +896,22 @@ class ImpalaTestSuite(BaseTestSuite): LOG.info('Query Name: \n%s\n' % test_section['QUERY_NAME']) result = None + catch_section_name = 'CALCITE_PLANNER_CATCH' \ + if IS_CALCITE_PLANNER and 'CALCITE_PLANNER_CATCH' in test_section \ + else 'CATCH' try: result = exec_fn(query, user=test_section.get('USER', '').strip() or None) except Exception as e: - if 'CATCH' in test_section: - self.__verify_exceptions(test_section['CATCH'], str(e), use_db) + if catch_section_name in test_section: + self.__verify_exceptions(test_section[catch_section_name], str(e), use_db) assert error_msg_startswith(str(e)) continue raise - if 'CATCH' in test_section and '__NO_ERROR__' not in test_section['CATCH']: + if catch_section_name in test_section \ + and '__NO_ERROR__' not in test_section[catch_section_name]: expected_str = self.__do_replacements( - " or ".join(test_section['CATCH']).strip(), + " or ".join(test_section[catch_section_name]).strip(), use_db=use_db, extra=test_file_vars) assert False, "Expected exception: {0}\n\nwhen running:\n\n{1}".format( @@ -918,15 +924,19 @@ class ImpalaTestSuite(BaseTestSuite): if encoding and result.data: result.data = [row.decode(encoding) for row in result.data] # Replace $NAMENODE in the expected results with the actual namenode URI. - if 'RESULTS' in test_section: + results_section_name = 'CALCITE_PLANNER_RESULTS' \ + if IS_CALCITE_PLANNER and 'CALCITE_PLANNER_RESULTS' in test_section \ + else 'RESULTS' + if results_section_name in test_section: # Combining 'RESULTS' with 'DML_RESULTS" is currently unsupported because # __verify_results_and_errors calls verify_raw_results which always checks # ERRORS, TYPES, LABELS, etc. which doesn't make sense if there are two # different result sets to consider (IMPALA-4471). assert 'DML_RESULTS' not in test_section - test_section['RESULTS'] = self.__do_replacements( - test_section['RESULTS'], use_db=use_db, extra=test_file_vars) - self.__verify_results_and_errors(vector, test_section, result, use_db) + test_section[results_section_name] = self.__do_replacements( + test_section[results_section_name], use_db=use_db, extra=test_file_vars) + self.__verify_results_and_errors(vector, test_section, results_section_name, + result, use_db) else: # TODO: Can't validate errors without expected results for now. assert 'ERRORS' not in test_section,\ @@ -934,8 +944,9 @@ class ImpalaTestSuite(BaseTestSuite): # If --update_results, then replace references to the namenode URI with $NAMENODE. # TODO(todd) consider running do_replacements in reverse, though that may cause # some false replacements for things like username. - if self.pytest_config().option.update_results and 'RESULTS' in test_section: - test_section['RESULTS'] = test_section['RESULTS'] \ + if self.pytest_config().option.update_results \ + and results_section_name in test_section: + test_section[results_section_name] = test_section[results_section_name] \ .replace(NAMENODE, '$NAMENODE') \ .replace(IMPALA_HOME, '$IMPALA_HOME') \ .replace(INTERNAL_LISTEN_HOST, '$INTERNAL_LISTEN_HOST') \ @@ -945,6 +956,8 @@ class ImpalaTestSuite(BaseTestSuite): # If this table format has a RUNTIME_PROFILE section specifically for it, # evaluate that section and ignore any general RUNTIME_PROFILE sections. rt_profile_info = 'RUNTIME_PROFILE_%s' % table_format_info.file_format + elif IS_CALCITE_PLANNER and 'CALCITE_PLANNER_RUNTIME_PROFILE' in test_section: + rt_profile_info = 'CALCITE_PLANNER_RUNTIME_PROFILE' elif 'RUNTIME_PROFILE' in test_section: rt_profile_info = 'RUNTIME_PROFILE' diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py index b6e3601f8..5f80ff3d9 100644 --- a/tests/util/test_file_parser.py +++ b/tests/util/test_file_parser.py @@ -102,7 +102,9 @@ def parse_query_test_file(file_name, valid_section_names=None, encoding=None): if section_names is None: section_names = ['QUERY', 'HIVE_QUERY', 'RESULTS', 'TYPES', 'LABELS', 'SETUP', 'CATCH', 'ERRORS', 'USER', 'RUNTIME_PROFILE', 'SHELL', 'DML_RESULTS', - 'HS2_TYPES', 'HIVE_MAJOR_VERSION', 'LINEAGE', 'IS_HDFS_ONLY'] + 'HS2_TYPES', 'HIVE_MAJOR_VERSION', 'LINEAGE', 'IS_HDFS_ONLY', + 'CALCITE_PLANNER_RESULTS', 'CALCITE_PLANNER_CATCH', + 'CALCITE_PLANNER_RUNTIME_PROFILE'] return parse_test_file(file_name, section_names, encoding=encoding, skip_unknown_sections=False) @@ -266,15 +268,15 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True): else: raise RuntimeError('Unknown subsection comment: %s' % comment) - if subsection_name == 'CATCH': - parsed_sections['CATCH'] = list() + if 'CATCH' in subsection_name: + parsed_sections[subsection_name] = list() if subsection_comment is None: - parsed_sections['CATCH'].append(subsection_str) + parsed_sections[subsection_name].append(subsection_str) elif subsection_comment == 'ANY_OF': - parsed_sections['CATCH'].extend(lines_content) + parsed_sections[subsection_name].extend(lines_content) else: raise RuntimeError('Unknown subsection comment: %s' % subsection_comment) - for exception_str in parsed_sections['CATCH']: + for exception_str in parsed_sections[subsection_name]: assert exception_str.strip(), "Empty exception string." continue
