This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 4e3c9d09d8a466d45be6bea02b8fea997f826ceb
Author: Balazs Hevele <[email protected]>
AuthorDate: Wed Jan 28 11:23:10 2026 +0100

    IMPALA-14673 impala-shell: profile format argument
    
    Selecting format was already supported by Impala's HS2 Interface,
    but so far we only used the default STRING format in impala shell.
    
    Added a command line argument to set which format to use:
      impala-shell.sh --profile_format=Format
    
    Can also be changed runtime as a shell option, in the shell:
      set profile_format=Format;
    
    Format can be string/base64/json.
    If not set, the default string format will be used.
    
    With base64 format, the output can be parsed with impala-profile-tool.
    
    Change-Id: I032a5d0436f123d737d46d0b8cecbae09888a789
    Reviewed-on: http://gerrit.cloudera.org:8080/23884
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 shell/impala_shell/impala_client.py                | 27 +++++++++++++---
 shell/impala_shell/impala_shell.py                 | 12 ++++++--
 shell/impala_shell/impala_shell_config_defaults.py |  1 +
 shell/impala_shell/option_parser.py                |  4 +++
 tests/shell/test_shell_commandline.py              | 36 ++++++++++++++++++++++
 5 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/shell/impala_shell/impala_client.py 
b/shell/impala_shell/impala_client.py
index 61a3f227c..c56e88b15 100644
--- a/shell/impala_shell/impala_client.py
+++ b/shell/impala_shell/impala_client.py
@@ -78,6 +78,7 @@ from impala_thrift_gen.TCLIService.TCLIService import (
     TStatusCode,
     TTypeId,
 )
+from impala_thrift_gen.RuntimeProfile.ttypes import TRuntimeProfileFormat
 
 # Getters to extract HS2's representation of values to the display version.
 # An entry must be added to this map for each supported type. HS2's TColumn 
has many
@@ -360,7 +361,7 @@ class ImpalaClient(object):
     False otherwise."""
     raise NotImplementedError()
 
-  def get_runtime_profile(self, last_query_handle):
+  def get_runtime_profile(self, last_query_handle, profile_format="string"):  
# noqa: U100
     """Get the runtime profile string from the server. Returns None if
     an error was encountered. If the query was retried, returns the profile of 
the failed
     attempt as well; the tuple (profile, failed_profile) is returned where 
'profile' is
@@ -947,15 +948,27 @@ class ImpalaHS2Client(ImpalaClient):
     finally:
       self._clear_current_query_handle()
 
-  def get_runtime_profile(self, last_query_handle):
+  def get_runtime_profile(self, last_query_handle, profile_format="string"):
     try:
       self._set_current_query_handle(last_query_handle)
 
       def GetRuntimeProfile(req):
         return self.imp_service.GetRuntimeProfile(req)
+
+      # convert profile format from string to int (enum id)
+      profile_format_key = profile_format.upper()
+      if profile_format_key in TRuntimeProfileFormat._NAMES_TO_VALUES \
+          and profile_format_key != "THRIFT":
+        profile_format_id = 
TRuntimeProfileFormat._NAMES_TO_VALUES[profile_format_key]
+      else:
+        err_msg = "Invalid profile format value {0}."
+        print(err_msg.format(profile_format), file=sys.stderr)
+        return None, None
+
       # GetRuntimeProfile rpc is idempotent and so safe to retry.
       profile_req = TGetRuntimeProfileReq(last_query_handle,
                                           self.session_handle,
+                                          format=profile_format_id,
                                           include_query_attempts=True)
       resp = self._do_hs2_rpc(GetRuntimeProfile, profile_req, 
retry_on_error=True)
       self._check_hs2_rpc_status(resp.status)
@@ -1240,7 +1253,8 @@ class StrictHS2Client(ImpalaHS2Client):
   def get_error_log(self, last_query_handle):
     return ""
 
-  def get_runtime_profile(self, last_query_handle):
+  def get_runtime_profile(self, last_query_handle,  # noqa: U100
+      profile_format="string"):  # noqa: U100
     return None, None
 
   def _populate_query_options(self):
@@ -1389,7 +1403,12 @@ class ImpalaBeeswaxClient(ImpalaClient):
         lambda: self.imp_service.Cancel(last_query_handle), False)
     return rpc_status == RpcStatus.OK
 
-  def get_runtime_profile(self, last_query_handle):
+  def get_runtime_profile(self, last_query_handle, profile_format="string"):
+    if profile_format.upper() != "STRING":
+        err_msg = "Invalid profile format value {0}. Beeswax only supports 
string."
+        print(err_msg.format(profile_format), file=sys.stderr)
+        return None, None
+
     profile, rpc_status = self._do_beeswax_rpc(
         lambda: self.imp_service.GetRuntimeProfile(last_query_handle))
     if rpc_status == RpcStatus.OK and profile:
diff --git a/shell/impala_shell/impala_shell.py 
b/shell/impala_shell/impala_shell.py
index 7855fb794..12326b4e3 100644
--- a/shell/impala_shell/impala_shell.py
+++ b/shell/impala_shell/impala_shell.py
@@ -204,6 +204,7 @@ class ImpalaShell(cmd.Cmd, object):
     'OUTPUT_FILE': (lambda x: None if x == '' else x, "output_file"),
     'PROFILE_OUTPUT': (lambda x: None if x == '' else x, "profile_output"),
     'VERTICAL': (lambda x: x in ImpalaShell.TRUE_STRINGS, "vertical"),
+    'PROFILE_FORMAT': (lambda x: "string" if x == '' else x, "profile_format")
   }
 
   # Minimum time in seconds between two calls to get the exec summary.
@@ -265,6 +266,7 @@ class ImpalaShell(cmd.Cmd, object):
 
     self.show_profiles = options.show_profiles
     self.profile_output = options.profile_output
+    self.profile_format = options.profile_format
 
     self.rpc_stdout = options.rpc_stdout
     self.rpc_file = options.rpc_file
@@ -1243,6 +1245,12 @@ class ImpalaShell(cmd.Cmd, object):
         out_file = open(self.profile_output, 'a')
 
       query_profile_prefix = match_string_type("Query Runtime Profile:\n", 
profile)
+      if self.profile_format.upper() == "BASE64":
+        # Change prefix so that the output can be read by impala-profile-tool
+        timestamp = str(int(time.time()))
+        query_id = self.imp_client.get_query_id_str(self.last_query_handle)
+        query_profile_prefix = timestamp + " " + query_id + " "
+
       if profile_display_mode == QueryAttemptDisplayModes.ALL:
         print(query_profile_prefix + profile, file=out_file)
         if failed_profile:
@@ -1317,7 +1325,7 @@ class ImpalaShell(cmd.Cmd, object):
         return CmdStatus.ERROR
 
     profile, failed_profile = self.imp_client.get_runtime_profile(
-        self.last_query_handle)
+        self.last_query_handle, self.profile_format)
     return self.print_runtime_profile(profile, failed_profile, 
profile_display_mode)
 
   def do_select(self, args):
@@ -1552,7 +1560,7 @@ class ImpalaShell(cmd.Cmd, object):
         self.imp_client.close_query(self.last_query_handle)
       if self.show_profiles:
         profile, retried_profile = self.imp_client.get_runtime_profile(
-            self.last_query_handle)
+            self.last_query_handle, self.profile_format)
         self.print_runtime_profile(profile, retried_profile)
       return CmdStatus.SUCCESS
     except QueryCancelledByShellException as e:
diff --git a/shell/impala_shell/impala_shell_config_defaults.py 
b/shell/impala_shell/impala_shell_config_defaults.py
index c52e4cfbd..9e914e5c9 100644
--- a/shell/impala_shell/impala_shell_config_defaults.py
+++ b/shell/impala_shell/impala_shell_config_defaults.py
@@ -46,6 +46,7 @@ impala_shell_defaults = {
             'query': None,
             'query_file': None,
             'show_profiles': False,
+            'profile_format': 'string',
             'rpc_stdout': False,
             'rpc_file': None,
             'ssl': False,
diff --git a/shell/impala_shell/option_parser.py 
b/shell/impala_shell/option_parser.py
index cb06bc502..abc75b4ef 100644
--- a/shell/impala_shell/option_parser.py
+++ b/shell/impala_shell/option_parser.py
@@ -209,6 +209,10 @@ def get_option_parser(defaults):
                     help="If set, query profiles will be written to the "
                          "given file. Profiles for multiple 
semicolon-terminated "
                          "queries will be appended to the same file")
+  parser.add_option("--profile_format", dest="profile_format", 
default="STRING",
+                    help="Query profile format. Valid inputs are "
+                         "['string', 'base64', 'json']. Format base64 "
+                         "is compatible with impala-profile-tool.")
   parser.add_option("--rpc_stdout", dest="rpc_stdout",
                     action="store_true",
                     help="Output hs2 rpc details to stdout. "
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index 50f7a88a7..a79b0260d 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -455,6 +455,42 @@ class TestImpalaShell(ImpalaTestSuite):
       assert len(re.findall(regex, lines)) == 1, \
           "Could not detect profile in the file, file content: %s" % lines
 
+  def test_runtime_profile_format(self, vector):
+    if vector.get_value('strict_hs2_protocol'):
+      pytest.skip("Runtime profile is not supported in strict hs2 mode.")
+
+    string_regex = re.compile(r"Query \(id=[a-z0-9:]+\):")
+    base64_regex = re.compile(r"\b[a-zA-Z0-9/+=]{64,}\b")
+    json_regex = re.compile(r"{\"contents\":.*}")
+
+    # test default is string
+    args = ['-q', 'select 1; profile;']
+    result_set = run_impala_shell_cmd(vector, args)
+    assert len(re.findall(string_regex, result_set.stdout)) == 1, \
+        "Could not detect string profile by default, stdout: %s" % 
result_set.stdout
+
+    # test string format explicitly
+    args = ['-q', 'select 1; profile;', '--profile_format=string']
+    result_set = run_impala_shell_cmd(vector, args)
+    assert len(re.findall(string_regex, result_set.stdout)) == 1, \
+        "Could not detect string profile by default, stdout: %s" % 
result_set.stdout
+
+    # test json format
+    args = ['-q', 'select 1; profile;', '--profile_format=json']
+    result_set = run_impala_shell_cmd(vector, args)
+    assert len(re.findall(string_regex, result_set.stdout)) == 0, \
+        "Did not expect string profile with json format, stdout: %s" % 
result_set.stdout
+    assert len(re.findall(json_regex, result_set.stdout)) == 1, \
+        "Could not detect json profile, stdout: %s" % result_set.stdout
+
+    # test base64 format
+    args = ['-q', 'select 1; profile;', '--profile_format=base64']
+    result_set = run_impala_shell_cmd(vector, args)
+    assert len(re.findall(string_regex, result_set.stdout)) == 0, \
+        "Did not expect string profile with base64 format, stdout: %s" % 
result_set.stdout
+    assert len(re.findall(base64_regex, result_set.stdout)) == 1, \
+        "Could not detect base64 profile, stdout: %s" % result_set.stdout
+
   def test_runtime_profile_referenced_tables(self, vector, unique_database):
     if vector.get_value('strict_hs2_protocol'):
       pytest.skip("Runtime profile is not supported in strict hs2 mode.")

Reply via email to