This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 474af29dc9af266a139830321314a4cc15ec3b47
Author: Balazs Hevele <[email protected]>
AuthorDate: Wed Jan 21 18:27:42 2026 +0100

    IMPALA-572 impala-shell: add option to write profiles to a file
    
    Added an argument to write runtime profiles to a given file,
    after running "profile;" or a query with -p flag set.
    
    Usage:
        impala-shell.sh --profile_output=path/to/file
    
    It is also available as a shell option:
        SET PROFILE_OUTPUT=path/to/file;
    
    If no file is provided, the profile will be written to standard output.
    
    Change-Id: Id8ce4ddcf013392b3c4d66941f07fb90f9c90c3c
    Reviewed-on: http://gerrit.cloudera.org:8080/23883
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 shell/impala_shell/impala_shell.py                 | 67 +++++++++++++++-------
 shell/impala_shell/impala_shell_config_defaults.py |  1 +
 shell/impala_shell/option_parser.py                |  4 ++
 tests/shell/test_shell_commandline.py              | 38 ++++++++++++
 tests/shell/test_shell_interactive.py              | 37 ++++++++++++
 5 files changed, 125 insertions(+), 22 deletions(-)

diff --git a/shell/impala_shell/impala_shell.py 
b/shell/impala_shell/impala_shell.py
index 93d133f8d..7855fb794 100644
--- a/shell/impala_shell/impala_shell.py
+++ b/shell/impala_shell/impala_shell.py
@@ -202,6 +202,7 @@ class ImpalaShell(cmd.Cmd, object):
     'VERBOSE': (lambda x: x in ImpalaShell.TRUE_STRINGS, "verbose"),
     'DELIMITER': (lambda x: " " if x == '\\s' else x, "output_delimiter"),
     'OUTPUT_FILE': (lambda x: None if x == '' else x, "output_file"),
+    'PROFILE_OUTPUT': (lambda x: None if x == '' else x, "profile_output"),
     'VERTICAL': (lambda x: x in ImpalaShell.TRUE_STRINGS, "vertical"),
   }
 
@@ -263,6 +264,8 @@ class ImpalaShell(cmd.Cmd, object):
     self.cached_prompt = str()
 
     self.show_profiles = options.show_profiles
+    self.profile_output = options.profile_output
+
     self.rpc_stdout = options.rpc_stdout
     self.rpc_file = options.rpc_file
 
@@ -1225,26 +1228,37 @@ class ImpalaShell(cmd.Cmd, object):
       file_descriptor.flush()
 
   def print_runtime_profile(self, profile, failed_profile,
-        profile_display_mode=QueryAttemptDisplayModes.LATEST, status=False):
-    """Prints the given runtime profiles to the console. Optionally prints the 
failed
+        profile_display_mode=QueryAttemptDisplayModes.LATEST):
+    """Prints the given runtime profiles to the console, or to a file if 
--profile_output
+    or profile_output shell option was set. Optionally prints the failed
     profile if the query was retried. The format the profiles are printed is 
controlled
     by the option profile_display_mode, see QueryProfileDisplayModes docs 
above.
     """
-    if self.show_profiles or status:
-      if profile:
-        query_profile_prefix = match_string_type("Query Runtime Profile:\n", 
profile)
-        if profile_display_mode == QueryAttemptDisplayModes.ALL:
-          print(query_profile_prefix + profile)
-          if failed_profile:
-            failed_profile_prefix = \
-                match_string_type("Failed Query Runtime Profile(s):\n", 
failed_profile)
-            print(failed_profile_prefix + failed_profile)
-        elif profile_display_mode == QueryAttemptDisplayModes.LATEST:
-          print(query_profile_prefix + profile)
-        elif profile_display_mode == QueryAttemptDisplayModes.ORIGINAL:
-          print(query_profile_prefix + failed_profile if failed_profile else 
profile)
-        else:
-          raise FatalShellException("Invalid value for query profile display 
mode")
+
+    if not profile: return
+
+    try:
+      out_file = sys.stdout
+      if self.profile_output:
+        out_file = open(self.profile_output, 'a')
+
+      query_profile_prefix = match_string_type("Query Runtime Profile:\n", 
profile)
+      if profile_display_mode == QueryAttemptDisplayModes.ALL:
+        print(query_profile_prefix + profile, file=out_file)
+        if failed_profile:
+          failed_profile_prefix = \
+              match_string_type("Failed Query Runtime Profile(s):\n", 
failed_profile)
+          print(failed_profile_prefix + failed_profile, file=out_file)
+      elif profile_display_mode == QueryAttemptDisplayModes.LATEST:
+        print(query_profile_prefix + profile, file=out_file)
+      elif profile_display_mode == QueryAttemptDisplayModes.ORIGINAL:
+        query_profile = failed_profile if failed_profile else profile
+        print(query_profile_prefix + query_profile, file=out_file)
+      else:
+        raise FatalShellException("Invalid value for query profile display 
mode")
+    finally:
+      if self.profile_output:
+        out_file.close()
 
   def _parse_table_name_arg(self, arg):
     """ Parses an argument string and returns the result as a db name, table 
name combo.
@@ -1304,8 +1318,7 @@ class ImpalaShell(cmd.Cmd, object):
 
     profile, failed_profile = self.imp_client.get_runtime_profile(
         self.last_query_handle)
-    return self.print_runtime_profile(profile, failed_profile, 
profile_display_mode,
-            True)
+    return self.print_runtime_profile(profile, failed_profile, 
profile_display_mode)
 
   def do_select(self, args):
     """Executes a SELECT... query, fetching all rows"""
@@ -2440,15 +2453,25 @@ def impala_shell_main():
             "BinaryProtocol will not be accelerated, which can reduce 
performance. "
             "Error was '{0}'".format(e), file=sys.stderr)
 
+  # If output files are given for query results/query profiles,
+  # make sure the given file(s) can be opened for writing.
+  # This will also clear the file(s) if successful.
   if options.output_file:
     try:
-      # Make sure the given file can be opened for writing. This will also 
clear the file
-      # if successful.
-      open(options.output_file, 'wb')
+      with open(options.output_file, 'wb'):
+        pass
     except IOError as e:
       print('Error opening output file for writing: %s' % e, file=sys.stderr)
       raise FatalShellException()
 
+  if options.profile_output:
+    try:
+      with open(options.profile_output, 'w'):
+          pass
+    except IOError as e:
+      print('Error opening profile output file for writing: %s' % e, 
file=sys.stderr)
+      raise FatalShellException()
+
   if options.http_socket_timeout_s is not None:
     if (options.http_socket_timeout_s != 'None'
         and float(options.http_socket_timeout_s) < 0):
diff --git a/shell/impala_shell/impala_shell_config_defaults.py 
b/shell/impala_shell/impala_shell_config_defaults.py
index ca61912e8..c52e4cfbd 100644
--- a/shell/impala_shell/impala_shell_config_defaults.py
+++ b/shell/impala_shell/impala_shell_config_defaults.py
@@ -38,6 +38,7 @@ impala_shell_defaults = {
             'kerberos_service_name': 'impala',
             'output_delimiter': '\\t',
             'output_file': None,
+            'profile_output': None,
             'print_header': False,
             'vertical': False,
             'live_progress': True,  # The option only applies to interactive 
shell session
diff --git a/shell/impala_shell/option_parser.py 
b/shell/impala_shell/option_parser.py
index e8c918c8b..cb06bc502 100644
--- a/shell/impala_shell/option_parser.py
+++ b/shell/impala_shell/option_parser.py
@@ -205,6 +205,10 @@ def get_option_parser(defaults):
   parser.add_option("-p", "--show_profiles", dest="show_profiles",
                     action="store_true",
                     help="Always display query profiles after execution")
+  parser.add_option("--profile_output", dest="profile_output",
+                    help="If set, query profiles will be written to the "
+                         "given file. Profiles for multiple 
semicolon-terminated "
+                         "queries will be appended to the same file")
   parser.add_option("--rpc_stdout", dest="rpc_stdout",
                     action="store_true",
                     help="Output hs2 rpc details to stdout. "
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index f67239c46..50f7a88a7 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -417,6 +417,44 @@ class TestImpalaShell(ImpalaTestSuite):
     assert len(re.findall(regex, result_set.stdout)) == 2, \
         "Could not detect two profiles, stdout: %s" % result_set.stdout
 
+  def test_runtime_profile_output_file(self, vector, tmp_file):
+    """Test that writing profile with --profile_output writes the profile
+    into the file."""
+
+    if vector.get_value('strict_hs2_protocol'):
+      pytest.skip("Runtime profile is not supported in strict hs2 mode.")
+
+    # This regex helps us uniquely identify a profile.
+    regex = re.compile(r"Operator\s+#Hosts\s+#Inst\s+Avg\s+Time")
+
+    # Test writing profile with "query; profile;"
+    args = ['-q', 'select 1; profile;', '--profile_output=%s' % tmp_file]
+    result_set = run_impala_shell_cmd(vector, args)
+
+    # We expect no results in stdout
+    assert len(re.findall(regex, result_set.stdout)) == 0, \
+        "Did not expect runtime profile in stdout, stdout: %s" % 
result_set.stdout
+
+    # We expect the result in the file
+    with open(tmp_file, "r") as f:
+      lines = f.read()
+      assert len(re.findall(regex, lines)) == 1, \
+          "Could not detect profile in the file, file content: %s" % lines
+
+    # Test writing profile with "query;" and -p option
+    args = ['-p', '-q', 'select 1;', '--profile_output=%s' % tmp_file]
+    result_set = run_impala_shell_cmd(vector, args)
+
+    # We expect no results in stdout
+    assert len(re.findall(regex, result_set.stdout)) == 0, \
+        "Did not expect runtime profile in stdout, stdout: %s" % 
result_set.stdout
+
+    # We expect the result in the file
+    with open(tmp_file, "r") as f:
+      lines = f.read()
+      assert len(re.findall(regex, lines)) == 1, \
+          "Could not detect profile in the file, file content: %s" % lines
+
   def test_runtime_profile_referenced_tables(self, vector, unique_database):
     if vector.get_value('strict_hs2_protocol'):
       pytest.skip("Runtime profile is not supported in strict hs2 mode.")
diff --git a/tests/shell/test_shell_interactive.py 
b/tests/shell/test_shell_interactive.py
index 01164b258..f6bd7c49f 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -234,6 +234,8 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
         ("WRITE_DELIMITED: False", "VERBOSE: True"))
     self._expect_with_cmd(proc, "set", vector,
         ("DELIMITER: \\t", "OUTPUT_FILE: None"))
+    self._expect_with_cmd(proc, "set", vector,
+        ("DELIMITER: \\t", "PROFILE_OUTPUT: None"))
     self._expect_with_cmd(proc, "set write_delimited=true", vector)
     self._expect_with_cmd(proc, "set", vector, ("WRITE_DELIMITED: True", 
"VERBOSE: True"))
     self._expect_with_cmd(proc, "set DELIMITER=,", vector)
@@ -241,6 +243,11 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
     self._expect_with_cmd(proc, "set output_file=/tmp/clmn.txt", vector)
     self._expect_with_cmd(proc, "set", vector,
         ("DELIMITER: ,", "OUTPUT_FILE: /tmp/clmn.txt"))
+    self._expect_with_cmd(proc, "set", vector, ("DELIMITER: ,", 
"PROFILE_OUTPUT: None"))
+    self._expect_with_cmd(proc, "set profile_output=/tmp/profile.txt", vector)
+    self._expect_with_cmd(proc, "set", vector,
+        ("DELIMITER: ,", "PROFILE_OUTPUT: /tmp/profile.txt"))
+
     proc.sendeof()
     proc.wait()
 
@@ -303,6 +310,36 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
     result = p2.get_result()
     assert "VIETNAM" in result.stdout
 
+  def test_print_runtime_profile_to_file(self, vector):
+    """Test print profile to file and unset"""
+
+    if vector.get_value('strict_hs2_protocol'):
+      pytest.skip("Runtime profile is not supported in strict hs2 mode.")
+
+    # Test writing profile to file
+    p1 = ImpalaShell(vector)
+    local_file = NamedTemporaryFile(delete=True)
+    p1.send_cmd("set profile_output=%s" % local_file.name)
+    p1.send_cmd("select 1")
+    p1.send_cmd("profile")
+    result = p1.get_result()
+    # Profile is not expected in stdout
+    assert "Query Runtime Profile" not in result.stdout
+    # Profile is expected in file
+    with open(local_file.name, "r") as f:
+      result = f.read()
+      assert "Query Runtime Profile" in result
+
+    # Test unset profile
+    p2 = ImpalaShell(vector)
+    p2.send_cmd("set profile_output=%s" % local_file.name)
+    p2.send_cmd("unset profile_output")
+    p2.send_cmd("select 1")
+    p2.send_cmd("profile")
+    result = p2.get_result()
+    # Profile is expected in stdout
+    assert "Query Runtime Profile" in result.stdout
+
   def test_live_progress_no_overlap(self, vector):
     if vector.get_value('strict_hs2_protocol'):
       pytest.skip("Live progress not supported in strict hs2 mode.")

Reply via email to