(impala) 01/02: IMPALA-13729: Accept error messages not starting with prompt

joemcdonnell Wed, 26 Feb 2025 09:44:49 -0800

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


commit 242095ac8a2f8493d72bb47693ee1e0728b035c8
Author: Xuebin Su <[email protected]>
AuthorDate: Sat Feb 8 17:18:17 2025 +0800

    IMPALA-13729: Accept error messages not starting with prompt
    
    Previously, error_msg_expected() only accepted error messages starting
    with the following error prompt:
    ```
    Query <query_id> failed:\n
    ```
    However, for some tests using the Beeswax protocol, the error prompt may
    appear in the middle of the error message instead of at its beginning.
    
    Therefore, this patch adapts error_msg_expected() to accept error
    messages not starting with the error prompt.
    
    The error_msg_expected() function is renamed to error_msg_startswith()
    to better describe its behavior.
    
    Change-Id: Iac3e68bcc36776f7fd6cc9c838dd8da9c3ecf58b
    Reviewed-on: http://gerrit.cloudera.org:8080/22468
    Reviewed-by: Daniel Becker <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Riza Suminto <[email protected]>
---
 tests/common/impala_test_suite.py             |  4 +--
 tests/common/test_result_verifier.py          | 42 +++++++++++++++------------
 tests/custom_cluster/test_coordinators.py     |  4 +--
 tests/custom_cluster/test_executor_groups.py  |  4 +--
 tests/custom_cluster/test_kill_query.py       |  4 +--
 tests/custom_cluster/test_kudu.py             |  8 ++---
 tests/custom_cluster/test_process_failures.py |  4 +--
 tests/custom_cluster/test_rpc_timeout.py      |  6 ++--
 tests/custom_cluster/test_sys_db.py           |  4 +--
 tests/hs2/hs2_test_suite.py                   |  4 +--
 tests/query_test/test_iceberg.py              | 15 ++++++----
 tests/shell/test_shell_commandline.py         | 14 ++++-----
 tests/shell/test_shell_interactive.py         |  6 ++--
 tests/util/cancel_util.py                     |  8 ++---
 14 files changed, 67 insertions(+), 60 deletions(-)

diff --git a/tests/common/impala_test_suite.py 
b/tests/common/impala_test_suite.py
index 4addc0ecd..866d9639d 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -58,7 +58,7 @@ from tests.common.test_dimensions import (
     get_dataset_from_workload,
     load_table_info_dimension)
 from tests.common.test_result_verifier import (
-    error_msg_expected,
+    error_msg_startswith,
     try_compile_regex,
     verify_lineage,
     verify_raw_results,
@@ -877,7 +877,7 @@ class ImpalaTestSuite(BaseTestSuite):
         except Exception as e:
           if 'CATCH' in test_section:
             self.__verify_exceptions(test_section['CATCH'], str(e), use_db)
-            assert error_msg_expected(str(e))
+            assert error_msg_startswith(str(e))
             continue
           raise
 
diff --git a/tests/common/test_result_verifier.py 
b/tests/common/test_result_verifier.py
index 0ecc4042d..3a9045a81 100644
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -823,34 +823,38 @@ def assert_codegen_cache_hit(profile_string, expect_hit):
 QUERY_ID_REGEX = r"(?!0{16})[0-9a-z]{16}:(?!0{16})[0-9a-z]{16}"
 
 
-def error_msg_expected(actual_msg, expected_msg="", query_id=None):
+def error_msg_startswith(actual_msg, expected_msg="", query_id=None):
   """
-  Check if the actual error message is expected.
-
   As defined in `ImpalaServer::QUERY_ERROR_FORMAT`, an error message is 
expected to
   has the following form:
 
     Query <query_id> failed:\n<error_detail>\n
 
-  - For `query_id`,
-      - If a query id is specified in the parameter, it checks if the actual 
error
-        message contains exactly the query id.
-      - Otherwise, it checks whether `query_id` match the format using a
-        regular expresssion.
-  - For `error_detail`, it checks whether this part starts with the 
`expected_msg` if it
-    is specified in the parameter. This is sufficient to distinguish one kind 
of error
-    from another.
+  This function returns True if
+  1. `actual_msg` contains the error prompt "Query <query_id> failed:\n", and
+  2. `expected_msg` follows right after the error prompt.
+
+  For `query_id`,
+  - If the query_id argument is not None, the function checks if the actual 
error
+    message contains exactly the query id.
+  - Otherwise, it checks if the `query_id` part in the actual error message 
matches the
+    format using the regular expression.
+
+  NOTE: Messages of errors such as "Invalid session id" do not contain a query 
id
+  since such an error may occur before a query id is generated.
   """
-  if query_id is None:
-    ERROR_REGEX = "^Query " + QUERY_ID_REGEX + " failed:\n"
-    m = re.search(ERROR_REGEX, actual_msg)
+  if query_id is not None:
+    ERROR_PROMPT = "Query " + query_id + " failed:\n"
+    start = actual_msg.find(ERROR_PROMPT)
+    if start == -1:
+      return False
+    return actual_msg.startswith(expected_msg, start + len(ERROR_PROMPT))
+  else:
+    ERROR_PROMPT = "Query " + QUERY_ID_REGEX + " failed:\n"
+    m = re.search(ERROR_PROMPT, actual_msg)
     if m is None:
       return False
-    return actual_msg.find(expected_msg, m.end()) != -1
-
-  # The beginning of `ImpalaServer::QUERY_ERROR_FORMAT`
-  ERROR_PROMPT = "Query {} failed:\n{}"
-  return actual_msg.startswith(ERROR_PROMPT.format(query_id, expected_msg))
+    return actual_msg.startswith(expected_msg, m.end())
 
 
 def error_msg_equal(msg1, msg2):
diff --git a/tests/custom_cluster/test_coordinators.py 
b/tests/custom_cluster/test_coordinators.py
index 0fc75e4dd..f9b882919 100644
--- a/tests/custom_cluster/test_coordinators.py
+++ b/tests/custom_cluster/test_coordinators.py
@@ -26,7 +26,7 @@ from subprocess import check_call
 from tests.util.filesystem_utils import get_fs_path
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.skip import SkipIf, SkipIfFS
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 
 LOG = logging.getLogger('test_coordinators')
 LOG.setLevel(level=logging.DEBUG)
@@ -321,7 +321,7 @@ class TestCoordinators(CustomClusterTestSuite):
     result = self.execute_query_expect_failure(self.client, query)
     expected_error = "Admission for query exceeded timeout 2000ms in pool " \
                      "default-pool. Queued reason: Waiting for executors to 
start."
-    assert error_msg_expected(str(result), expected_error)
+    assert error_msg_startswith(str(result), expected_error)
     # Now pick a coordinator only query.
     query = "select 1"
     self.execute_query_expect_success(self.client, query)
diff --git a/tests/custom_cluster/test_executor_groups.py 
b/tests/custom_cluster/test_executor_groups.py
index 923a4f688..3b4fe2768 100644
--- a/tests/custom_cluster/test_executor_groups.py
+++ b/tests/custom_cluster/test_executor_groups.py
@@ -21,7 +21,7 @@ from __future__ import absolute_import, division, 
print_function
 from builtins import range
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.parametrize import UniqueDatabase
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 from tests.util.concurrent_workload import ConcurrentWorkload
 
 import json
@@ -536,7 +536,7 @@ class TestExecutorGroups(CustomClusterTestSuite):
                      "start. Only DDL queries and queries scheduled only on 
the " \
                      "coordinator (either NUM_NODES set to 1 or when small 
query " \
                      "optimization is triggered) can currently run."
-    assert error_msg_expected(str(result), expected_error)
+    assert error_msg_startswith(str(result), expected_error)
     assert self._get_num_executor_groups(only_healthy=True) == 0
 
   @pytest.mark.execute_serially
diff --git a/tests/custom_cluster/test_kill_query.py 
b/tests/custom_cluster/test_kill_query.py
index 1e035a75a..c194be0d5 100644
--- a/tests/custom_cluster/test_kill_query.py
+++ b/tests/custom_cluster/test_kill_query.py
@@ -21,7 +21,7 @@ import pytest
 
 import tests.common.cluster_config as cluster_config
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 from tests.util.cancel_util import (
     QueryToKill,
     assert_kill_error,
@@ -107,7 +107,7 @@ class TestKillQuery(CustomClusterTestSuite):
             "Rejected query from pool default-pool: "
             "disabled by requests limit set to 0"
         )
-        assert error_msg_expected(str(e), expected_msg)
+        assert error_msg_startswith(str(e), expected_msg)
       assert_kill_error(
           client,
           "Could not find query on any coordinator",
diff --git a/tests/custom_cluster/test_kudu.py 
b/tests/custom_cluster/test_kudu.py
index aa380a951..b6d5006b8 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -27,7 +27,7 @@ from tests.common.custom_cluster_test_suite import 
CustomClusterTestSuite
 from tests.common.kudu_test_suite import KuduTestSuite
 from tests.common.skip import SkipIfKudu, SkipIfBuildType, SkipIf
 from tests.common.test_dimensions import BEESWAX, add_mandatory_exec_option
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 
 KUDU_MASTER_HOSTS = pytest.config.option.kudu_master_hosts
 LOG = logging.getLogger(__name__)
@@ -441,19 +441,19 @@ class TestKuduTransactionBase(CustomClusterTestSuite):
       self.execute_query(self._update_query.format(table_name))
       assert False, "query was expected to fail"
     except ImpalaBeeswaxException as e:
-      assert error_msg_expected(str(e), "Kudu reported error: Not implemented")
+      assert error_msg_startswith(str(e), "Kudu reported error: Not 
implemented")
 
     try:
       self.execute_query(self._upsert_query.format(table_name))
       assert False, "query was expected to fail"
     except ImpalaBeeswaxException as e:
-      assert error_msg_expected(str(e), "Kudu reported error: Not implemented")
+      assert error_msg_startswith(str(e), "Kudu reported error: Not 
implemented")
 
     try:
       self.execute_query(self._delete_query.format(table_name))
       assert False, "query was expected to fail"
     except ImpalaBeeswaxException as e:
-      assert error_msg_expected(str(e), "Kudu reported error: Not implemented")
+      assert error_msg_startswith(str(e), "Kudu reported error: Not 
implemented")
 
     # Verify that number of rows has not been changed.
     cursor.execute(self._row_num_query.format(table_name))
diff --git a/tests/custom_cluster/test_process_failures.py 
b/tests/custom_cluster/test_process_failures.py
index 011b481d0..39aa34ad3 100644
--- a/tests/custom_cluster/test_process_failures.py
+++ b/tests/custom_cluster/test_process_failures.py
@@ -23,7 +23,7 @@ from beeswaxd.BeeswaxService import QueryState
 from tests.common.custom_cluster_test_suite import (
     DEFAULT_CLUSTER_SIZE,
     CustomClusterTestSuite)
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 
 # The exact query doesn't matter much for these tests, just want a query that 
touches
 # data on all nodes.
@@ -154,7 +154,7 @@ class TestProcessFailures(CustomClusterTestSuite):
     query_id = handle.get_handle().id
     error_state = "Failed due to unreachable impalad"
     assert impalad.service.wait_for_query_status(client, query_id, error_state)
-    assert error_msg_expected(client.get_log(handle), error_state, query_id)
+    assert error_msg_startswith(client.get_log(handle), error_state, query_id)
 
     # Assert that the query status on the query profile web page contains the 
expected
     # failed hostport.
diff --git a/tests/custom_cluster/test_rpc_timeout.py 
b/tests/custom_cluster/test_rpc_timeout.py
index 811bb99a9..ac2e1def8 100644
--- a/tests/custom_cluster/test_rpc_timeout.py
+++ b/tests/custom_cluster/test_rpc_timeout.py
@@ -22,7 +22,7 @@ from tests.beeswax.impala_beeswax import 
ImpalaBeeswaxException
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.skip import SkipIfBuildType, SkipIfFS
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 from tests.verifiers.metric_verifier import MetricVerifier
 
 # The BE krpc port of the impalad to simulate rpc errors in tests.
@@ -229,8 +229,8 @@ class TestRPCTimeout(CustomClusterTestSuite):
     debug_action = 'IMPALA_MISS_EXEC_COMPLETE_CB:[email protected]'
     ex = self.execute_query_expect_failure(self.client, query,
         query_options={'retry_failed_queries': 'false', 'debug_action': 
debug_action})
-    assert error_msg_expected(str(ex), "Remote error: Runtime error: Debug 
Action: "
-        "IMPALA_SERVICE_POOL:FAIL")
+    assert error_msg_startswith(str(ex), "Exec() rpc failed: Remote error: "
+                                "Runtime error: Debug Action: 
IMPALA_SERVICE_POOL:FAIL")
 
 class TestCatalogRPCTimeout(CustomClusterTestSuite):
   """"Tests RPC timeout and retry handling for catalogd operations."""
diff --git a/tests/custom_cluster/test_sys_db.py 
b/tests/custom_cluster/test_sys_db.py
index 633731266..597f668b8 100644
--- a/tests/custom_cluster/test_sys_db.py
+++ b/tests/custom_cluster/test_sys_db.py
@@ -20,7 +20,7 @@ from __future__ import absolute_import, division, 
print_function
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.test_dimensions import create_single_exec_option_dimension
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 
 
 class TestSysDb(CustomClusterTestSuite):
@@ -60,6 +60,6 @@ class TestSysDb(CustomClusterTestSuite):
     except ImpalaBeeswaxException as e:
       expected_error = "IllegalStateException: Can't create blacklisted table: 
{0}" \
           .format(table_name)
-      assert error_msg_expected(str(e), expected_error), \
+      assert error_msg_startswith(str(e), expected_error), \
           "table '{0}' failed to create but for the wrong reason:\n{1}\n" \
           .format(table_name, str(e))
diff --git a/tests/hs2/hs2_test_suite.py b/tests/hs2/hs2_test_suite.py
index 312a61060..a79743749 100644
--- a/tests/hs2/hs2_test_suite.py
+++ b/tests/hs2/hs2_test_suite.py
@@ -26,7 +26,7 @@ from thrift.transport.TSocket import TSocket
 from thrift.transport.TTransport import TBufferedTransport
 from thrift.protocol import TBinaryProtocol
 from tests.common.impala_test_suite import ImpalaTestSuite, 
IMPALAD_HS2_HOST_PORT
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 from time import sleep, time
 import sys
 
@@ -149,7 +149,7 @@ class HS2TestSuite(ImpalaTestSuite):
     if expected_status_code != TCLIService.TStatusCode.SUCCESS_STATUS\
        and expected_error_prefix is not None:
       assert response.status.errorMessage.startswith(expected_error_prefix) or 
\
-             error_msg_expected(response.status.errorMessage, 
expected_error_prefix)
+             error_msg_startswith(response.status.errorMessage, 
expected_error_prefix)
 
   @staticmethod
   def check_invalid_session(response):
diff --git a/tests/query_test/test_iceberg.py b/tests/query_test/test_iceberg.py
index 46321ef0f..1d286b045 100644
--- a/tests/query_test/test_iceberg.py
+++ b/tests/query_test/test_iceberg.py
@@ -39,7 +39,7 @@ from tests.beeswax.impala_beeswax import 
ImpalaBeeswaxException
 from tests.common.iceberg_test_suite import IcebergTestSuite
 from tests.common.skip import SkipIf, SkipIfFS, SkipIfDockerizedCluster
 from tests.common.test_dimensions import add_exec_option_dimension
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 from tests.common.file_utils import (
   create_iceberg_table_from_directory,
   create_table_from_parquet)
@@ -1442,7 +1442,9 @@ class TestIcebergTable(IcebergTestSuite):
         query_options=abort_ice_transaction_options)
     # Check that the error message looks reasonable.
     result = str(err)
-    assert error_msg_expected(result, "CommitFailedException: simulated commit 
failure")
+    assert error_msg_startswith(result,
+        "ImpalaRuntimeException: simulated commit failure\n"
+        "CAUSED BY: CommitFailedException: simulated commit failure")
     # Check that no data was inserted.
     data = self.execute_query_expect_success(self.client,
         "select * from {0}".format(tbl_name))
@@ -1457,8 +1459,8 @@ class TestIcebergTable(IcebergTestSuite):
         .format(tbl_name, "j"), query_options=abort_ice_transaction_options)
     ddl_result = str(ddl_err)
     # Check that the error message looks reasonable.
-    assert error_msg_expected(ddl_result,
-                              "CommitFailedException: simulated commit 
failure")
+    assert error_msg_startswith(ddl_result,
+                                "CommitFailedException: simulated commit 
failure")
     # Check that no column was added.
     data = self.execute_query_expect_success(self.client,
         "select * from {0}".format(tbl_name))
@@ -2033,8 +2035,9 @@ class TestIcebergV2Table(IcebergTestSuite):
           "update {0} set i=2 where i=1".format(fq_tbl_name),
           query_options=fail_ice_commit_options)
       # Check that we get the error message.
-      assert error_msg_expected(
-          str(err), "ValidationException: simulated validation check failure")
+      assert error_msg_startswith(str(err),
+          "ImpalaRuntimeException: simulated validation check failure\n"
+          "CAUSED BY: ValidationException: simulated validation check failure")
       # Check that the table content was not updated.
       data = self.execute_query_expect_success(self.client,
           "select * from {0}".format(fq_tbl_name))
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index 0da89ed42..79669421f 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -39,7 +39,7 @@ from tests.common.skip import SkipIf
 from tests.common.test_dimensions import (
   create_client_protocol_dimension, create_client_protocol_strict_dimension,
   create_uncompressed_text_dimension, create_single_exec_option_dimension)
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 from time import sleep, time
 from tests.shell.util import (get_impalad_host_port, assert_var_substitution,
   run_impala_shell_cmd, ImpalaShell, build_shell_env, wait_for_query_state,
@@ -289,9 +289,9 @@ class TestImpalaShell(ImpalaTestSuite):
     args = ['-q', 'set abort_on_error=true;'
             'select id from functional_parquet.bad_column_metadata t']
     result = run_impala_shell_cmd(vector, args, expect_success=False)
-    assert error_msg_expected(
-      stderr_get_first_error_msg(result.stderr),
-      "Column metadata states there are 11 values, but read 10 values from 
column id."
+    assert error_msg_startswith(
+        stderr_get_first_error_msg(result.stderr),
+        "Column metadata states there are 11 values, but read 10 values from 
column id."
     )
 
 
@@ -302,9 +302,9 @@ class TestImpalaShell(ImpalaTestSuite):
             'select id, cnt from functional_parquet.bad_column_metadata t, '
             '(select 1 cnt) u']
     result = run_impala_shell_cmd(vector, args, expect_success=False)
-    assert error_msg_expected(
-      stderr_get_first_error_msg(result.stderr),
-      "Column metadata states there are 11 values, but read 10 values from 
column id."
+    assert error_msg_startswith(
+        stderr_get_first_error_msg(result.stderr),
+        "Column metadata states there are 11 values, but read 10 values from 
column id."
     )
 
   def test_no_warnings_in_log_with_quiet_mode(self, vector):
diff --git a/tests/shell/test_shell_interactive.py 
b/tests/shell/test_shell_interactive.py
index d60463826..87649ae66 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -46,7 +46,7 @@ from tests.common.skip import SkipIfLocal
 from tests.common.test_dimensions import (
   create_client_protocol_dimension, create_client_protocol_strict_dimension,
   create_uncompressed_text_dimension, create_single_exec_option_dimension)
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 from tests.shell.util import (assert_var_substitution, ImpalaShell, 
get_impalad_port,
   get_shell_cmd, get_open_sessions_metric, spawn_shell, get_unused_port,
   create_impala_shell_executable_dimension, get_impala_shell_executable,
@@ -1163,7 +1163,7 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
       assert "ParseException" in result.stderr,\
              result.stderr
     else:
-      assert error_msg_expected(
+      assert error_msg_startswith(
           stderr_get_first_error_msg(result.stderr),
           "ParseException: Unmatched string literal"
       )
@@ -1176,7 +1176,7 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
     query = "select cast(now() as string format 'yyyy年MM月dd日')"
     shell.send_cmd(query)
     result = shell.get_result()
-    assert error_msg_expected(
+    assert error_msg_startswith(
         stderr_get_first_error_msg(result.stderr),
         "Bad date/time conversion format: yyyy年MM月dd日"
     )
diff --git a/tests/util/cancel_util.py b/tests/util/cancel_util.py
index fc1ed4e8b..741dd184e 100644
--- a/tests/util/cancel_util.py
+++ b/tests/util/cancel_util.py
@@ -21,7 +21,7 @@ from time import sleep
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_connection import create_connection
 from tests.common.impala_test_suite import ImpalaTestSuite
-from tests.common.test_result_verifier import error_msg_expected
+from tests.common.test_result_verifier import error_msg_startswith
 
 
 class QueryToKill:
@@ -55,11 +55,11 @@ class QueryToKill:
     # If ImpalaServer::UnregisterQuery() happens before the last polling, the 
error
     # message will be "Invalid or unknown query handle". Otherwise, the error 
message
     # will be "Cancelled".
-    assert error_msg_expected(
+    assert error_msg_startswith(
         str(self.exc),
         "Invalid or unknown query handle",
         self.client.get_query_id(self.handle),
-    ) or error_msg_expected(
+    ) or error_msg_startswith(
         str(self.exc),
         "Cancelled",
         self.client.get_query_id(self.handle),
@@ -86,7 +86,7 @@ def assert_kill_error(client, error_msg, query_id=None, 
sql=None, user=None):
     client.execute(sql, user=user)
     assert False, "Failed to catch the exception."
   except Exception as exc:
-    assert error_msg_expected(str(exc), error_msg)
+    assert error_msg_startswith(str(exc), error_msg)
 
 
 def cancel_query_and_validate_state(client, query, exec_option, table_format,

(impala) 01/02: IMPALA-13729: Accept error messages not starting with prompt

Reply via email to