This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 474af29dc9af266a139830321314a4cc15ec3b47 Author: Balazs Hevele <[email protected]> AuthorDate: Wed Jan 21 18:27:42 2026 +0100 IMPALA-572 impala-shell: add option to write profiles to a file Added an argument to write runtime profiles to a given file, after running "profile;" or a query with -p flag set. Usage: impala-shell.sh --profile_output=path/to/file It is also available as a shell option: SET PROFILE_OUTPUT=path/to/file; If no file is provided, the profile will be written to standard output. Change-Id: Id8ce4ddcf013392b3c4d66941f07fb90f9c90c3c Reviewed-on: http://gerrit.cloudera.org:8080/23883 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- shell/impala_shell/impala_shell.py | 67 +++++++++++++++------- shell/impala_shell/impala_shell_config_defaults.py | 1 + shell/impala_shell/option_parser.py | 4 ++ tests/shell/test_shell_commandline.py | 38 ++++++++++++ tests/shell/test_shell_interactive.py | 37 ++++++++++++ 5 files changed, 125 insertions(+), 22 deletions(-) diff --git a/shell/impala_shell/impala_shell.py b/shell/impala_shell/impala_shell.py index 93d133f8d..7855fb794 100644 --- a/shell/impala_shell/impala_shell.py +++ b/shell/impala_shell/impala_shell.py @@ -202,6 +202,7 @@ class ImpalaShell(cmd.Cmd, object): 'VERBOSE': (lambda x: x in ImpalaShell.TRUE_STRINGS, "verbose"), 'DELIMITER': (lambda x: " " if x == '\\s' else x, "output_delimiter"), 'OUTPUT_FILE': (lambda x: None if x == '' else x, "output_file"), + 'PROFILE_OUTPUT': (lambda x: None if x == '' else x, "profile_output"), 'VERTICAL': (lambda x: x in ImpalaShell.TRUE_STRINGS, "vertical"), } @@ -263,6 +264,8 @@ class ImpalaShell(cmd.Cmd, object): self.cached_prompt = str() self.show_profiles = options.show_profiles + self.profile_output = options.profile_output + self.rpc_stdout = options.rpc_stdout self.rpc_file = options.rpc_file @@ -1225,26 +1228,37 @@ class ImpalaShell(cmd.Cmd, object): file_descriptor.flush() def print_runtime_profile(self, profile, failed_profile, - profile_display_mode=QueryAttemptDisplayModes.LATEST, status=False): - """Prints the given runtime profiles to the console. Optionally prints the failed + profile_display_mode=QueryAttemptDisplayModes.LATEST): + """Prints the given runtime profiles to the console, or to a file if --profile_output + or profile_output shell option was set. Optionally prints the failed profile if the query was retried. The format the profiles are printed is controlled by the option profile_display_mode, see QueryProfileDisplayModes docs above. """ - if self.show_profiles or status: - if profile: - query_profile_prefix = match_string_type("Query Runtime Profile:\n", profile) - if profile_display_mode == QueryAttemptDisplayModes.ALL: - print(query_profile_prefix + profile) - if failed_profile: - failed_profile_prefix = \ - match_string_type("Failed Query Runtime Profile(s):\n", failed_profile) - print(failed_profile_prefix + failed_profile) - elif profile_display_mode == QueryAttemptDisplayModes.LATEST: - print(query_profile_prefix + profile) - elif profile_display_mode == QueryAttemptDisplayModes.ORIGINAL: - print(query_profile_prefix + failed_profile if failed_profile else profile) - else: - raise FatalShellException("Invalid value for query profile display mode") + + if not profile: return + + try: + out_file = sys.stdout + if self.profile_output: + out_file = open(self.profile_output, 'a') + + query_profile_prefix = match_string_type("Query Runtime Profile:\n", profile) + if profile_display_mode == QueryAttemptDisplayModes.ALL: + print(query_profile_prefix + profile, file=out_file) + if failed_profile: + failed_profile_prefix = \ + match_string_type("Failed Query Runtime Profile(s):\n", failed_profile) + print(failed_profile_prefix + failed_profile, file=out_file) + elif profile_display_mode == QueryAttemptDisplayModes.LATEST: + print(query_profile_prefix + profile, file=out_file) + elif profile_display_mode == QueryAttemptDisplayModes.ORIGINAL: + query_profile = failed_profile if failed_profile else profile + print(query_profile_prefix + query_profile, file=out_file) + else: + raise FatalShellException("Invalid value for query profile display mode") + finally: + if self.profile_output: + out_file.close() def _parse_table_name_arg(self, arg): """ Parses an argument string and returns the result as a db name, table name combo. @@ -1304,8 +1318,7 @@ class ImpalaShell(cmd.Cmd, object): profile, failed_profile = self.imp_client.get_runtime_profile( self.last_query_handle) - return self.print_runtime_profile(profile, failed_profile, profile_display_mode, - True) + return self.print_runtime_profile(profile, failed_profile, profile_display_mode) def do_select(self, args): """Executes a SELECT... query, fetching all rows""" @@ -2440,15 +2453,25 @@ def impala_shell_main(): "BinaryProtocol will not be accelerated, which can reduce performance. " "Error was '{0}'".format(e), file=sys.stderr) + # If output files are given for query results/query profiles, + # make sure the given file(s) can be opened for writing. + # This will also clear the file(s) if successful. if options.output_file: try: - # Make sure the given file can be opened for writing. This will also clear the file - # if successful. - open(options.output_file, 'wb') + with open(options.output_file, 'wb'): + pass except IOError as e: print('Error opening output file for writing: %s' % e, file=sys.stderr) raise FatalShellException() + if options.profile_output: + try: + with open(options.profile_output, 'w'): + pass + except IOError as e: + print('Error opening profile output file for writing: %s' % e, file=sys.stderr) + raise FatalShellException() + if options.http_socket_timeout_s is not None: if (options.http_socket_timeout_s != 'None' and float(options.http_socket_timeout_s) < 0): diff --git a/shell/impala_shell/impala_shell_config_defaults.py b/shell/impala_shell/impala_shell_config_defaults.py index ca61912e8..c52e4cfbd 100644 --- a/shell/impala_shell/impala_shell_config_defaults.py +++ b/shell/impala_shell/impala_shell_config_defaults.py @@ -38,6 +38,7 @@ impala_shell_defaults = { 'kerberos_service_name': 'impala', 'output_delimiter': '\\t', 'output_file': None, + 'profile_output': None, 'print_header': False, 'vertical': False, 'live_progress': True, # The option only applies to interactive shell session diff --git a/shell/impala_shell/option_parser.py b/shell/impala_shell/option_parser.py index e8c918c8b..cb06bc502 100644 --- a/shell/impala_shell/option_parser.py +++ b/shell/impala_shell/option_parser.py @@ -205,6 +205,10 @@ def get_option_parser(defaults): parser.add_option("-p", "--show_profiles", dest="show_profiles", action="store_true", help="Always display query profiles after execution") + parser.add_option("--profile_output", dest="profile_output", + help="If set, query profiles will be written to the " + "given file. Profiles for multiple semicolon-terminated " + "queries will be appended to the same file") parser.add_option("--rpc_stdout", dest="rpc_stdout", action="store_true", help="Output hs2 rpc details to stdout. " diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py index f67239c46..50f7a88a7 100644 --- a/tests/shell/test_shell_commandline.py +++ b/tests/shell/test_shell_commandline.py @@ -417,6 +417,44 @@ class TestImpalaShell(ImpalaTestSuite): assert len(re.findall(regex, result_set.stdout)) == 2, \ "Could not detect two profiles, stdout: %s" % result_set.stdout + def test_runtime_profile_output_file(self, vector, tmp_file): + """Test that writing profile with --profile_output writes the profile + into the file.""" + + if vector.get_value('strict_hs2_protocol'): + pytest.skip("Runtime profile is not supported in strict hs2 mode.") + + # This regex helps us uniquely identify a profile. + regex = re.compile(r"Operator\s+#Hosts\s+#Inst\s+Avg\s+Time") + + # Test writing profile with "query; profile;" + args = ['-q', 'select 1; profile;', '--profile_output=%s' % tmp_file] + result_set = run_impala_shell_cmd(vector, args) + + # We expect no results in stdout + assert len(re.findall(regex, result_set.stdout)) == 0, \ + "Did not expect runtime profile in stdout, stdout: %s" % result_set.stdout + + # We expect the result in the file + with open(tmp_file, "r") as f: + lines = f.read() + assert len(re.findall(regex, lines)) == 1, \ + "Could not detect profile in the file, file content: %s" % lines + + # Test writing profile with "query;" and -p option + args = ['-p', '-q', 'select 1;', '--profile_output=%s' % tmp_file] + result_set = run_impala_shell_cmd(vector, args) + + # We expect no results in stdout + assert len(re.findall(regex, result_set.stdout)) == 0, \ + "Did not expect runtime profile in stdout, stdout: %s" % result_set.stdout + + # We expect the result in the file + with open(tmp_file, "r") as f: + lines = f.read() + assert len(re.findall(regex, lines)) == 1, \ + "Could not detect profile in the file, file content: %s" % lines + def test_runtime_profile_referenced_tables(self, vector, unique_database): if vector.get_value('strict_hs2_protocol'): pytest.skip("Runtime profile is not supported in strict hs2 mode.") diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py index 01164b258..f6bd7c49f 100755 --- a/tests/shell/test_shell_interactive.py +++ b/tests/shell/test_shell_interactive.py @@ -234,6 +234,8 @@ class TestImpalaShellInteractive(ImpalaTestSuite): ("WRITE_DELIMITED: False", "VERBOSE: True")) self._expect_with_cmd(proc, "set", vector, ("DELIMITER: \\t", "OUTPUT_FILE: None")) + self._expect_with_cmd(proc, "set", vector, + ("DELIMITER: \\t", "PROFILE_OUTPUT: None")) self._expect_with_cmd(proc, "set write_delimited=true", vector) self._expect_with_cmd(proc, "set", vector, ("WRITE_DELIMITED: True", "VERBOSE: True")) self._expect_with_cmd(proc, "set DELIMITER=,", vector) @@ -241,6 +243,11 @@ class TestImpalaShellInteractive(ImpalaTestSuite): self._expect_with_cmd(proc, "set output_file=/tmp/clmn.txt", vector) self._expect_with_cmd(proc, "set", vector, ("DELIMITER: ,", "OUTPUT_FILE: /tmp/clmn.txt")) + self._expect_with_cmd(proc, "set", vector, ("DELIMITER: ,", "PROFILE_OUTPUT: None")) + self._expect_with_cmd(proc, "set profile_output=/tmp/profile.txt", vector) + self._expect_with_cmd(proc, "set", vector, + ("DELIMITER: ,", "PROFILE_OUTPUT: /tmp/profile.txt")) + proc.sendeof() proc.wait() @@ -303,6 +310,36 @@ class TestImpalaShellInteractive(ImpalaTestSuite): result = p2.get_result() assert "VIETNAM" in result.stdout + def test_print_runtime_profile_to_file(self, vector): + """Test print profile to file and unset""" + + if vector.get_value('strict_hs2_protocol'): + pytest.skip("Runtime profile is not supported in strict hs2 mode.") + + # Test writing profile to file + p1 = ImpalaShell(vector) + local_file = NamedTemporaryFile(delete=True) + p1.send_cmd("set profile_output=%s" % local_file.name) + p1.send_cmd("select 1") + p1.send_cmd("profile") + result = p1.get_result() + # Profile is not expected in stdout + assert "Query Runtime Profile" not in result.stdout + # Profile is expected in file + with open(local_file.name, "r") as f: + result = f.read() + assert "Query Runtime Profile" in result + + # Test unset profile + p2 = ImpalaShell(vector) + p2.send_cmd("set profile_output=%s" % local_file.name) + p2.send_cmd("unset profile_output") + p2.send_cmd("select 1") + p2.send_cmd("profile") + result = p2.get_result() + # Profile is expected in stdout + assert "Query Runtime Profile" in result.stdout + def test_live_progress_no_overlap(self, vector): if vector.get_value('strict_hs2_protocol'): pytest.skip("Live progress not supported in strict hs2 mode.")
