This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 14035065fac5b047fb8395b56d2c78000ba94dec
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Tue May 16 16:18:46 2023 +0200

    IMPALA-12145: Fix profiles with non-ascii character in impala-shell 
(python2)
    
    As __future__.unicode_literals is imported in impala-shell
    concatenating an str with a literal leads to decoding the
    string with 'ascii' codec which fails if there are non-ascii
    characters. Converting the literal to str solves the issue.
    
    Testing:
    - added regression test + ran related EE tests
    
    Change-Id: I99b72dd262fc7c382e8baee1dce7592880c84de2
    Reviewed-on: http://gerrit.cloudera.org:8080/19893
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 shell/impala_shell.py                 |  9 ++++++---
 shell/shell_output.py                 | 20 ++++++++++++++++++++
 tests/shell/test_shell_commandline.py | 10 ++++++++++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index e531d11b8..ddf2e2145 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -45,7 +45,8 @@ from impala_client import ImpalaHS2Client, StrictHS2Client, \
 from impala_shell_config_defaults import impala_shell_defaults
 from option_parser import get_option_parser, get_config_from_file
 from shell_output import (DelimitedOutputFormatter, OutputStream, 
PrettyOutputFormatter,
-                          OverwritingStdErrOutputStream, 
VerticalOutputFormatter)
+                          OverwritingStdErrOutputStream, 
VerticalOutputFormatter,
+                          match_string_type)
 from subprocess import call
 from shell_exceptions import (RPCException, DisconnectedException, 
QueryStateException,
     QueryCancelledByShellException, MissingThriftMethodException)
@@ -1138,11 +1139,13 @@ class ImpalaShell(cmd.Cmd, object):
     """
     if self.show_profiles or status:
       if profile:
-        query_profile_prefix = "Query Runtime Profile:\n"
+        query_profile_prefix = match_string_type("Query Runtime Profile:\n", 
profile)
         if profile_display_mode == QueryAttemptDisplayModes.ALL:
           print(query_profile_prefix + profile)
           if failed_profile:
-            print("Failed Query Runtime Profile(s):\n" + failed_profile)
+            failed_profile_prefix = \
+                match_string_type("Failed Query Runtime Profile(s):\n", 
failed_profile)
+            print(failed_profile_prefix + failed_profile)
         elif profile_display_mode == QueryAttemptDisplayModes.LATEST:
           print(query_profile_prefix + profile)
         elif profile_display_mode == QueryAttemptDisplayModes.ORIGINAL:
diff --git a/shell/shell_output.py b/shell/shell_output.py
index b417ceacc..070f66910 100644
--- a/shell/shell_output.py
+++ b/shell/shell_output.py
@@ -29,6 +29,26 @@ except ImportError:
   from io import StringIO  # python 3
 
 
+def match_string_type(str_to_convert, reference_str):
+  """ Returns 'str_to_convert' converted to the same type as 'reference_str'.
+      Can handle only str and unicode. NOOP in Python 3.
+  """
+  if sys.version_info.major >= 3:
+    assert isinstance(reference_str, str)
+    assert isinstance(str_to_convert, str)
+    return str_to_convert
+
+  if type(str_to_convert) == type(reference_str):
+    return str_to_convert
+
+  if isinstance(reference_str, str):
+    assert isinstance(str_to_convert, unicode)
+    return str_to_convert.encode('UTF-8')
+  else:
+    assert isinstance(reference_str, str)
+    return str_to_convert.decode('UTF-8')
+
+
 class PrettyOutputFormatter(object):
   def __init__(self, prettytable):
     self.prettytable = prettytable
diff --git a/tests/shell/test_shell_commandline.py 
b/tests/shell/test_shell_commandline.py
index 9b209f86d..9bbbac41c 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -613,6 +613,16 @@ class TestImpalaShell(ImpalaTestSuite):
     assert 'UnicodeDecodeError' not in result.stderr
     assert RUSSIAN_CHARS.encode('utf-8') in result.stdout
 
+  def test_international_characters_profile(self, vector):
+    """IMPALA-12145: ensure we can handle international characters in the 
profile. """
+    if vector.get_value('strict_hs2_protocol'):
+      pytest.skip("Profile not supported in strict hs2 mode.")
+    text = RUSSIAN_CHARS.encode('utf-8')
+    args = ['-o', '/dev/null', '-p', '-q', "select '{0}'".format(text)]
+    result = run_impala_shell_cmd(vector, args)
+    assert 'UnicodeDecodeError' not in result.stderr
+    assert text in result.stdout
+
   def test_utf8_decoding_error_handling(self, vector):
     """IMPALA-10145,IMPALA-10299: Regression tests for elegantly handling 
malformed utf-8
     characters."""

Reply via email to