[impala] branch master updated: IMPALA-9627: Update utility scripts for Python 3 (part 2)

michaelsmith Wed, 26 Apr 2023 11:54:09 -0700

This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git



The following commit(s) were added to refs/heads/master by this push:
     new 0a42185d1 IMPALA-9627: Update utility scripts for Python 3 (part 2)
0a42185d1 is described below

commit 0a42185d17164af0c855647de25f1bc87f33ee71
Author: Michael Smith <[email protected]>
AuthorDate: Fri Mar 24 09:31:22 2023 -0700

    IMPALA-9627: Update utility scripts for Python 3 (part 2)
    
    We're starting to see environments where the system Python ('python') is
    Python 3. Updates utility and build scripts to work with Python 3, and
    updates check-pylint-py3k.sh to check scripts that use system python.
    
    Fixes other issues found during a full build and test run with Python
    3.8 as the default for 'python'.
    
    Fixes a impala-shell tip that was supposed to have been two tips (and
    had no space after period when they were printed).
    
    Removes out-of-date deploy.py and various Python 2.6 workarounds.
    
    Testing:
    - Full build with /usr/bin/python pointed to python3
    - run-all-tests passed with python pointed to python3
    - ran push_to_asf.py
    
    Change-Id: Idff388aff33817b0629347f5843ec34c78f0d0cb
    Reviewed-on: http://gerrit.cloudera.org:8080/19697
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Michael Smith <[email protected]>
---
 be/src/codegen/gen_ir_descriptions.py              |   2 +-
 bin/bootstrap_toolchain.py                         |  49 +--
 bin/check-pylint-py3k.sh                           |   9 +-
 bin/check-rat-report.py                            |   2 +-
 bin/collect_minidumps.py                           |   2 +-
 bin/compare_branches.py                            |   4 +-
 bin/create-test-configuration.sh                   |   4 +
 bin/diagnostics/collect_diagnostics.py             |   1 +
 bin/gen-backend-test-script.py                     |   1 +
 bin/gen_build_version.py                           |   2 +-
 bin/generate_xml_config.py                         |   7 +-
 bin/jenkins/critique-gerrit-review.py              |   7 +-
 bin/jenkins/dockerized-impala-preserve-vars.py     |   2 +-
 bin/jenkins/populate_m2_directory.py               |   2 +-
 bin/push_to_asf.py                                 |  56 +--
 bin/start-impala-cluster.py                        |   6 +-
 bin/validate-unified-backend-test-filters.py       |   4 +-
 common/function-registry/gen_builtins_catalog.py   |   1 +
 .../gen_geospatial_udf_wrappers.py                 |  11 +-
 common/thrift/generate_error_codes.py              |   2 +-
 common/thrift/generate_metrics.py                  |   2 +-
 docker/annotate.py                                 |   1 +
 docker/monitor.py                                  |  19 +-
 docker/test-with-docker.py                         |   3 +-
 fe/src/test/resources/hive-site.xml.py             |   1 +
 infra/deploy/deploy.py                             | 465 ---------------------
 infra/python/deps/download_requirements            |   3 +-
 infra/python/deps/find_py26.py                     |  47 ---
 infra/python/deps/pip_download.py                  |   4 +-
 .../impala_py_lib/jenkins/generate_junitxml.py     |  17 +-
 .../impala_py_lib/jenkins/junitxml_prune_notrun.py |   1 +
 shell/impala_shell.py                              |   2 +-
 testdata/bin/setup-ranger.sh                       |   4 +-
 testdata/cluster/admin                             |   2 +
 .../common/etc/hadoop/conf/core-site.xml.py        |   1 +
 .../common/etc/hadoop/conf/ozone-site.xml.py       |   1 +
 .../common/etc/hadoop/conf/yarn-site.xml.py        |   8 +-
 tests/common/impala_cluster.py                     |   6 +-
 tests/comparison/data_generator_mapper.py          |   4 +-
 39 files changed, 115 insertions(+), 650 deletions(-)

diff --git a/be/src/codegen/gen_ir_descriptions.py 
b/be/src/codegen/gen_ir_descriptions.py
index 18387af35..e76e5ef55 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -19,7 +19,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 from string import Template
 import os
 import shutil
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index eb95eaf4a..3817efb42 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -53,6 +53,8 @@
 #
 # The script is directly executable, and it takes no parameters:
 #     ./bootstrap_toolchain.py
+
+from __future__ import absolute_import, division, print_function
 import logging
 import glob
 import multiprocessing.pool
@@ -102,20 +104,6 @@ OS_MAPPING = [
 ]
 
 
-def check_output(cmd_args):
-  """Run the command and return the output. Raise an exception if the command 
returns
-     a non-zero return code. Similar to subprocess.check_output() which is 
only provided
-     in python 2.7.
-  """
-  process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, 
stderr=subprocess.STDOUT,
-      universal_newlines=True)
-  stdout, _ = process.communicate()
-  if process.wait() != 0:
-    raise Exception("Command with args '%s' failed with exit code %s:\n%s"
-        % (cmd_args, process.returncode, stdout))
-  return stdout
-
-
 def get_toolchain_compiler():
   """Return the <name>-<version> string for the compiler package to use for the
   toolchain."""
@@ -139,16 +127,16 @@ def wget_and_unpack_package(download_path, file_name, 
destination, wget_no_clobb
              "--output-document={0}/{1}".format(destination, file_name)]
       if wget_no_clobber:
         cmd.append("--no-clobber")
-      check_output(cmd)
+      subprocess.check_call(cmd)
       break
-    except Exception as e:
+    except subprocess.CalledProcessError as e:
       if attempt == NUM_ATTEMPTS:
         raise
       logging.error("Download failed; retrying after sleep: " + str(e))
       time.sleep(10 + random.random() * 5)  # Sleep between 10 and 15 seconds.
   logging.info("Extracting {0}".format(file_name))
-  check_output(["tar", "xzf", os.path.join(destination, file_name),
-                "--directory={0}".format(destination)])
+  subprocess.check_call(["tar", "xzf", os.path.join(destination, file_name),
+                         "--directory={0}".format(destination)])
   os.unlink(os.path.join(destination, file_name))
 
 
@@ -403,8 +391,9 @@ def get_platform_release_label(release=None):
     if lsb_release_cache:
       release = lsb_release_cache
     else:
-      lsb_release = check_output(["lsb_release", "-irs"])
-      release = "".join(map(lambda x: x.lower(), lsb_release.split()))
+      lsb_release = subprocess.check_output(
+          ["lsb_release", "-irs"], universal_newlines=True)
+      release = "".join([x.lower() for x in lsb_release.split()])
       # Only need to check against the major release if RHEL, CentOS or Suse
       for distro in ['centos', 'rocky', 'almalinux', 'redhatenterprise',
                      'redhatenterpriseserver', 'suse']:
@@ -418,20 +407,6 @@ def get_platform_release_label(release=None):
   raise Exception("Could not find package label for OS version: 
{0}.".format(release))
 
 
-def check_output(cmd_args):
-  """Run the command and return the output. Raise an exception if the command 
returns
-     a non-zero return code. Similar to subprocess.check_output() which is 
only provided
-     in python 2.7.
-  """
-  process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, 
stderr=subprocess.STDOUT,
-      universal_newlines=True)
-  stdout, _ = process.communicate()
-  if process.wait() != 0:
-    raise Exception("Command with args '%s' failed with exit code %s:\n%s"
-        % (cmd_args, process.returncode, stdout))
-  return stdout
-
-
 def check_custom_toolchain(toolchain_packages_home, packages):
   missing = []
   for p in packages:
@@ -470,7 +445,7 @@ def create_directory_from_env_var(env_var):
 
 
 def get_unique_toolchain_downloads(packages):
-  toolchain_packages = map(ToolchainPackage, packages)
+  toolchain_packages = [ToolchainPackage(p) for p in packages]
   unique_pkg_directories = set()
   unique_packages = []
   for p in toolchain_packages:
@@ -489,11 +464,11 @@ def get_toolchain_downloads():
       "llvm", explicit_version=os.environ.get("IMPALA_LLVM_DEBUG_VERSION"))
   gcc_package = ToolchainPackage("gcc")
   toolchain_packages += [llvm_package, llvm_package_asserts, gcc_package]
-  toolchain_packages += map(ToolchainPackage,
+  toolchain_packages += [ToolchainPackage(p) for p in
       ["avro", "binutils", "boost", "breakpad", "bzip2", "calloncehack", 
"cctz", "cmake",
        "crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog", 
"gperftools", "gtest",
        "jwt-cpp", "libev", "libunwind", "lz4", "openldap", "orc", "protobuf",
-       "python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib", 
"zstd"])
+       "python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib", 
"zstd"]]
   python3_package = ToolchainPackage(
       "python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION"))
   toolchain_packages += [python3_package]
diff --git a/bin/check-pylint-py3k.sh b/bin/check-pylint-py3k.sh
index 9dca19c87..305ab990a 100755
--- a/bin/check-pylint-py3k.sh
+++ b/bin/check-pylint-py3k.sh
@@ -68,18 +68,15 @@ for file in $(git ls-files '**/*.py'); do
     if [[ "${file}" =~ "shell/" && ! "${file}" =~ "tests/shell" ]]; then
         continue
     fi
-    # For the moment, the focus is on enforcing py3k checks on files that use 
the
-    # impala-python virtualenv. Ignore executable python files that do not
-    # use impala-python. In practice, this tends to be scripts used during the
-    # build or various scripts for developers in bin.
+    # Ignore files that are created to run with python3.
     FIRST_LINE=$(head -n1 ${file})
     if [[ "${file}: ${FIRST_LINE}" =~ "#!" ]]; then
         if [[ "${FIRST_LINE}" =~ "python3" ]]; then
             >&2 echo "SKIPPING: ${file} is already using python3: 
${FIRST_LINE}"
             continue
         fi
-        if [[ ! "${FIRST_LINE}" =~ "impala-python" ]]; then
-            >&2 echo "SKIPPING: ${file} is not using impala-python: 
${FIRST_LINE}"
+        if [[ "${FIRST_LINE}" =~ "/bin/bash" ]]; then
+            >&2 echo "SKIPPING: ${file} is a weird bash/python hybrid: 
${FIRST_LINE}"
             continue
         fi
     fi
diff --git a/bin/check-rat-report.py b/bin/check-rat-report.py
index 0346a16ee..b3781928f 100755
--- a/bin/check-rat-report.py
+++ b/bin/check-rat-report.py
@@ -33,7 +33,7 @@
 # time, and the RAT JAR is not included in the Impala repo; it must be 
downloaded
 # separately.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import fnmatch
 import re
 import sys
diff --git a/bin/collect_minidumps.py b/bin/collect_minidumps.py
index c4fc601ea..57bc50f3f 100755
--- a/bin/collect_minidumps.py
+++ b/bin/collect_minidumps.py
@@ -24,7 +24,7 @@
 #       --role_name=statestored --max_output_size=50000000 
--end_time=1463033495000 \
 #       --output_file_path=/tmp/minidump_package.tar.gz
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import os
 import re
 import sys
diff --git a/bin/compare_branches.py b/bin/compare_branches.py
index 143fa7242..7ce51b3d7 100755
--- a/bin/compare_branches.py
+++ b/bin/compare_branches.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 # Future imports must happen at the beginning of the file
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 HELP = '''
 Compares two specified branches, using the Gerrit Change-Id as the
@@ -249,7 +249,7 @@ def main():
   jira_key_pat = re.compile(r'(IMPALA-\d+)')
   skip_commits_matching = options.skip_commits_matching.format(
       branch=options.target_branch)
-  for change_id, (commit_hash, msg, author, date, body) in 
source_commits.iteritems():
+  for change_id, (commit_hash, msg, author, date, body) in 
source_commits.items():
     change_in_target = change_id in target_commits
     ignore_by_config = commit_hash in ignored_commits[
         (options.source_branch, options.target_branch)]
diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index 4ee8f5fa2..90861eeb1 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -154,6 +154,10 @@ ln -s "${CONFIG_DIR}/hive-site_events_cleanup.xml" 
hive-site-events-cleanup/hive
 export HIVE_VARIANT=ranger_auth
 HIVE_RANGER_CONF_DIR=hive-site-ranger-auth
 $IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py 
hive-site_ranger_auth.xml
+
+# Cleanup pycache if created
+rm -rf __pycache__
+
 rm -rf $HIVE_RANGER_CONF_DIR
 mkdir -p $HIVE_RANGER_CONF_DIR
 ln -s "${CONFIG_DIR}/hive-site_ranger_auth.xml" 
$HIVE_RANGER_CONF_DIR/hive-site.xml
diff --git a/bin/diagnostics/collect_diagnostics.py 
b/bin/diagnostics/collect_diagnostics.py
index a305a0d0a..2257d6ff5 100755
--- a/bin/diagnostics/collect_diagnostics.py
+++ b/bin/diagnostics/collect_diagnostics.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import, division, print_function
 import argparse
 import datetime
 import errno
diff --git a/bin/gen-backend-test-script.py b/bin/gen-backend-test-script.py
index 2e3793b99..6e572a839 100755
--- a/bin/gen-backend-test-script.py
+++ b/bin/gen-backend-test-script.py
@@ -20,6 +20,7 @@
 # used by the build system (see be/CMakeLists.txt for usage) and is not 
intended to be
 # run directly.
 
+from __future__ import absolute_import, division, print_function
 import argparse
 import os
 import stat
diff --git a/bin/gen_build_version.py b/bin/gen_build_version.py
index c6468fa69..3f4f62b1c 100755
--- a/bin/gen_build_version.py
+++ b/bin/gen_build_version.py
@@ -22,7 +22,7 @@
 # This script generates be/src/common/version.h which contains the build 
version based
 # on the git hash.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import os
 from subprocess import call
 
diff --git a/bin/generate_xml_config.py b/bin/generate_xml_config.py
index e8f03dd11..d4e7e6561 100755
--- a/bin/generate_xml_config.py
+++ b/bin/generate_xml_config.py
@@ -39,8 +39,7 @@ REPL:
 
 """
 
-from __future__ import with_statement
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import imp
 import os
 import re
@@ -79,7 +78,7 @@ def dump_config(d, source_path, out):
       -->
       <configuration>""".format(source_path=os.path.abspath(source_path))
   print(dedent(header), file=out)
-  for k, v in sorted(d.iteritems()):
+  for k, v in sorted(d.items()):
     try:
       k_new = _substitute_env_vars(k)
       if isinstance(v, int):
@@ -113,7 +112,7 @@ def main():
                     .format(path=in_path))
 
   tmp_path = out_path + ".tmp"
-  with file(tmp_path, "w") as out:
+  with open(tmp_path, "w") as out:
     try:
       dump_config(conf, in_path, out)
     except:  # noqa
diff --git a/bin/jenkins/critique-gerrit-review.py 
b/bin/jenkins/critique-gerrit-review.py
index 7f44f47bd..c165017c4 100755
--- a/bin/jenkins/critique-gerrit-review.py
+++ b/bin/jenkins/critique-gerrit-review.py
@@ -36,7 +36,7 @@
 # TODO: generalise to other warnings
 # * clang-tidy
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 from argparse import ArgumentParser
 from collections import defaultdict
 import json
@@ -143,7 +143,8 @@ def get_misc_comments(revision):
   #  @@ -128 +133,2 @@ if __name__ == "__main__":
   RANGE_RE = re.compile(r"^@@ -[0-9,]* \+([0-9]*).*$")
 
-  diff = check_output(["git", "diff", "-U0", "{0}^..{0}".format(revision)])
+  diff = check_output(["git", "diff", "-U0", "{0}^..{0}".format(revision)],
+      universal_newlines=True)
   curr_file = None
   check_source_file = False
   curr_line_num = 0
@@ -213,7 +214,7 @@ def post_review_to_gerrit(review_input):
 
 
 def merge_comments(a, b):
-  for k, v in b.iteritems():
+  for k, v in b.items():
     a[k].extend(v)
 
 
diff --git a/bin/jenkins/dockerized-impala-preserve-vars.py 
b/bin/jenkins/dockerized-impala-preserve-vars.py
index 24abea465..53bfe61dc 100755
--- a/bin/jenkins/dockerized-impala-preserve-vars.py
+++ b/bin/jenkins/dockerized-impala-preserve-vars.py
@@ -28,7 +28,7 @@
 # If an environment variable is not defined in the current environment,
 # it is omitted with a warning.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import sys
 import os
 
diff --git a/bin/jenkins/populate_m2_directory.py 
b/bin/jenkins/populate_m2_directory.py
index de56808df..13ddca148 100755
--- a/bin/jenkins/populate_m2_directory.py
+++ b/bin/jenkins/populate_m2_directory.py
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import json
 import subprocess
 import os
diff --git a/bin/push_to_asf.py b/bin/push_to_asf.py
index fac612c4a..828ed6b14 100755
--- a/bin/push_to_asf.py
+++ b/bin/push_to_asf.py
@@ -35,7 +35,7 @@
 # TODO: Improve console output: replace 'print' with format strings
 #       and use sys.stderr/sys.stdout.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import logging
 import optparse
 import os
@@ -63,25 +63,6 @@ class Colors(object):
   RESET = __on_tty("\x1b[m")
 
 
-def check_output(*popenargs, **kwargs):
-  r"""Run command with arguments and return its output as a byte string.
-  Backported from Python 2.7 as it's implemented as pure python on stdlib.
-  >>> check_output(['/usr/bin/python', '--version'])
-  Python 2.6.2
-  """
-  process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
-  output, unused_err = process.communicate()
-  retcode = process.poll()
-  if retcode:
-    cmd = kwargs.get("args")
-    if cmd is None:
-      cmd = popenargs[0]
-    error = subprocess.CalledProcessError(retcode, cmd)
-    error.output = output
-    raise error
-  return output
-
-
 def confirm_prompt(prompt):
   """
   Issue the given prompt, and ask the user to confirm yes/no. Returns true
@@ -94,7 +75,7 @@ def confirm_prompt(prompt):
       print("Not running interactively. Assuming 'N'.")
       return False
 
-    r = raw_input().strip().lower()
+    r = input().strip().lower()
     if r in ['y', 'yes', '']:
       return True
     elif r in ['n', 'no']:
@@ -103,7 +84,8 @@ def confirm_prompt(prompt):
 
 def get_my_email():
   """ Return the email address in the user's git config. """
-  return check_output(['git', 'config', '--get', 'user.email']).strip()
+  return subprocess.check_output(['git', 'config', '--get', 'user.email'],
+      universal_newlines=True).strip()
 
 
 def check_apache_remote():
@@ -112,9 +94,8 @@ def check_apache_remote():
   Otherwise, exits with an error message.
   """
   try:
-    url = check_output(\
-        ['git', 'config', '--local', '--get',
-            'remote.' + OPTIONS.apache_remote + '.url']).strip()
+    url = subprocess.check_output(['git', 'config', '--local', '--get',
+        'remote.' + OPTIONS.apache_remote + '.url'], 
universal_newlines=True).strip()
   except subprocess.CalledProcessError:
     print("No remote named " + OPTIONS.apache_remote
         + ". Please set one up, for example with: ", file=sys.stderr)
@@ -133,8 +114,8 @@ def check_gerrit_remote():
   Otherwise, exits with an error message.
   """
   try:
-    url = check_output(['git', 'config', '--local', '--get',
-                        'remote.' + OPTIONS.gerrit_remote + '.url']).strip()
+    url = subprocess.check_output(['git', 'config', '--local', '--get',
+        'remote.' + OPTIONS.gerrit_remote + '.url'], 
universal_newlines=True).strip()
   except subprocess.CalledProcessError:
     print("No remote named " + OPTIONS.gerrit_remote
         + ". Please set one up following ", file=sys.stderr)
@@ -157,7 +138,8 @@ def fetch(remote):
 
 def get_branches(remote):
   """ Fetch a dictionary mapping branch name to SHA1 hash from the given 
remote. """
-  out = check_output(["git", "ls-remote", remote, "refs/heads/*"])
+  out = subprocess.check_output(["git", "ls-remote", remote, "refs/heads/*"],
+      universal_newlines=True)
   ret = {}
   for l in out.splitlines():
     sha, ref = l.split("\t")
@@ -169,20 +151,22 @@ def get_branches(remote):
 def rev_parse(rev):
   """Run git rev-parse, returning the sha1, or None if not found"""
   try:
-    return check_output(['git', 'rev-parse', rev], 
stderr=subprocess.STDOUT).strip()
+    return subprocess.check_output(['git', 'rev-parse', rev],
+        stderr=subprocess.STDOUT, universal_newlines=True).strip()
   except subprocess.CalledProcessError:
     return None
 
 
 def rev_list(arg):
   """Run git rev-list, returning an array of SHA1 commit hashes."""
-  return check_output(['git', 'rev-list', arg]).splitlines()
+  return subprocess.check_output(['git', 'rev-list', arg],
+      universal_newlines=True).splitlines()
 
 
 def describe_commit(rev):
   """ Return a one-line description of a commit. """
-  return subprocess.check_output(
-      ['git', 'log', '--color', '-n1', '--oneline', rev]).strip()
+  return subprocess.check_output(['git', 'log', '--color', '-n1', '--oneline', 
rev],
+      universal_newlines=True).strip()
 
 
 def is_fast_forward(ancestor, child):
@@ -191,7 +175,8 @@ def is_fast_forward(ancestor, child):
   could be fast-forward merged.
   """
   try:
-    merge_base = check_output(['git', 'merge-base', ancestor, child]).strip()
+    merge_base = subprocess.check_output(['git', 'merge-base', ancestor, 
child],
+        universal_newlines=True).strip()
   except:
     # If either of the commits is unknown, count this as a non-fast-forward.
     return False
@@ -200,7 +185,8 @@ def is_fast_forward(ancestor, child):
 
 def get_committer_email(rev):
   """ Return the email address of the committer of the given revision. """
-  return check_output(['git', 'log', '-n1', '--pretty=format:%ce', 
rev]).strip()
+  return subprocess.check_output(['git', 'log', '-n1', '--pretty=format:%ce', 
rev],
+      universal_newlines=True).strip()
 
 
 def do_update(branch, gerrit_sha, apache_sha):
@@ -294,7 +280,7 @@ def main():
   # Check the current state of branches on Apache.
   # For each branch, we try to update it if the revisions don't match.
   apache_branches = get_branches(OPTIONS.apache_remote)
-  for branch, apache_sha in sorted(apache_branches.iteritems()):
+  for branch, apache_sha in sorted(apache_branches.items()):
     gerrit_sha = rev_parse("remotes/" + OPTIONS.gerrit_remote + "/" + branch)
     print("Branch '%s':\t" % branch, end='')
     if gerrit_sha is None:
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index 5f93f8170..54c47b86d 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -34,7 +34,7 @@ from datetime import datetime
 from getpass import getuser
 from time import sleep, time
 from optparse import OptionParser, SUPPRESS_HELP
-from subprocess import call, check_call
+from subprocess import call, check_call, check_output
 from testdata.common import cgroups
 from tests.common.environ import build_flavor_timeout
 from tests.common.impala_cluster import (ImpalaCluster, DEFAULT_BEESWAX_PORT,
@@ -787,10 +787,6 @@ if __name__ == "__main__":
   if options.docker_network is None:
     cluster_ops = MiniClusterOperations()
   else:
-    if sys.version_info < (2, 7):
-      raise Exception("Docker minicluster only supported on Python 2.7+")
-    # We use some functions in the docker code that don't exist in Python 2.6.
-    from subprocess import check_output
     cluster_ops = DockerMiniClusterOperations(options.docker_network)
 
   # If core-site.xml is missing, it likely means that we are missing config
diff --git a/bin/validate-unified-backend-test-filters.py 
b/bin/validate-unified-backend-test-filters.py
index a978917b2..42db3bf36 100755
--- a/bin/validate-unified-backend-test-filters.py
+++ b/bin/validate-unified-backend-test-filters.py
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import os
 import subprocess
 import sys
@@ -32,7 +32,7 @@ def get_set_of_tests(unified_binary, filters):
                "--gtest_list_tests"]
     if filters is not None:
         command.append("--gtest_filter={0}".format(filters))
-    p = subprocess.Popen(command, stdout=subprocess.PIPE)
+    p = subprocess.Popen(command, stdout=subprocess.PIPE, 
universal_newlines=True)
     out, err = p.communicate()
     if p.returncode != 0:
         print("FAILED: Unified backend test executable returned an error when 
trying\n"
diff --git a/common/function-registry/gen_builtins_catalog.py 
b/common/function-registry/gen_builtins_catalog.py
index 6d472f5e3..485c47365 100755
--- a/common/function-registry/gen_builtins_catalog.py
+++ b/common/function-registry/gen_builtins_catalog.py
@@ -20,6 +20,7 @@
 # This script generates the FE calls to populate the builtins.
 # To add a builtin, add an entry to impala_functions.py.
 
+from __future__ import absolute_import, division, print_function
 import os
 import impala_functions
 
diff --git a/common/function-registry/gen_geospatial_udf_wrappers.py 
b/common/function-registry/gen_geospatial_udf_wrappers.py
index 73f85d78f..19fa0191d 100755
--- a/common/function-registry/gen_geospatial_udf_wrappers.py
+++ b/common/function-registry/gen_geospatial_udf_wrappers.py
@@ -22,6 +22,7 @@
 # in Impala. A generated class is extending the original UDF and adding wrapper
 # 'evaluate' methods projecting the varargs method as an n parameter method.
 
+from __future__ import absolute_import, division, print_function
 import os
 from gen_builtins_catalog import FE_PATH
 
@@ -141,14 +142,14 @@ package {package};
     return "%s.java" % self.generate_wrapper_name()
 
 
-WRAPPERS = [Wrapper("%s.ST_ConvexHull" % UDF_PACKAGE, BYTE_TYPE, range(2, 9, 
1)),
-            Wrapper("%s.ST_LineString" % UDF_PACKAGE, DOUBLE_TYPE, range(2, 
15, 2),
+WRAPPERS = [Wrapper("%s.ST_ConvexHull" % UDF_PACKAGE, BYTE_TYPE, list(range(2, 
9, 1))),
+            Wrapper("%s.ST_LineString" % UDF_PACKAGE, DOUBLE_TYPE, 
list(range(2, 15, 2)),
               ARGUMENT_EXCEPTION),
-            Wrapper("%s.ST_MultiPoint" % UDF_PACKAGE, DOUBLE_TYPE, range(2, 
15, 2),
+            Wrapper("%s.ST_MultiPoint" % UDF_PACKAGE, DOUBLE_TYPE, 
list(range(2, 15, 2)),
               ARGUMENT_LENGTH_EXCEPTION),
-            Wrapper("%s.ST_Polygon" % UDF_PACKAGE, DOUBLE_TYPE, range(6, 15, 
2),
+            Wrapper("%s.ST_Polygon" % UDF_PACKAGE, DOUBLE_TYPE, list(range(6, 
15, 2)),
               ARGUMENT_LENGTH_EXCEPTION),
-            Wrapper("%s.ST_Union" % UDF_PACKAGE, BYTE_TYPE, range(2, 15, 1))]
+            Wrapper("%s.ST_Union" % UDF_PACKAGE, BYTE_TYPE, list(range(2, 15, 
1)))]
 
 if __name__ == "__main__":
   if not os.path.exists(FE_PATH):
diff --git a/common/thrift/generate_error_codes.py 
b/common/thrift/generate_error_codes.py
index 02b7029ef..e1091bd16 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -25,7 +25,7 @@
 #
 # TODO Add support for SQL Error Codes
 #      https://msdn.microsoft.com/en-us/library/ms714687%28v=vs.85%29.aspx
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 error_codes = (
   ("OK", 0, ""),
 
diff --git a/common/thrift/generate_metrics.py 
b/common/thrift/generate_metrics.py
index 8534d0e0e..c261213b2 100755
--- a/common/thrift/generate_metrics.py
+++ b/common/thrift/generate_metrics.py
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import sys
 import os
 import re
diff --git a/docker/annotate.py b/docker/annotate.py
index f83854f18..c7f6b13fa 100755
--- a/docker/annotate.py
+++ b/docker/annotate.py
@@ -23,6 +23,7 @@
 #
 # Note that "python -u" disables buffering.
 
+from __future__ import absolute_import, division, print_function
 import sys
 import datetime
 
diff --git a/docker/monitor.py b/docker/monitor.py
index 530c63c7c..261dfb270 100644
--- a/docker/monitor.py
+++ b/docker/monitor.py
@@ -32,6 +32,7 @@
 #       re.compile(">>> "))
 #   timeline.create("output.html")
 
+from __future__ import absolute_import, division, print_function
 import datetime
 import json
 import logging
@@ -161,7 +162,7 @@ class ContainerMonitor(object):
       # Container may no longer exist.
       return None
     try:
-      statcontents = file(os.path.join(dirname, stat)).read()
+      statcontents = open(os.path.join(dirname, stat)).read()
       return statcontents.replace("\n", " ").strip()
     except IOError as e:
       # Ignore errors; cgroup can disappear on us.
@@ -186,7 +187,7 @@ class ContainerMonitor(object):
     self.min_memory_usage_gb = None
     self.max_memory_usage_gb = None
 
-    with file(self.output_path, "w") as output:
+    with open(self.output_path, "w") as output:
       while self.keep_monitoring:
         # Use a single timestamp for a given round of monitoring.
         now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
@@ -229,7 +230,7 @@ class Timeline(object):
     """
     interesting_lines = [
         line.strip()
-        for line in file(container.logfile)
+        for line in open(container.logfile)
         if self.interesting_re.search(line)]
     return [(container.name,) + split_timestamp(line) for line in 
interesting_lines]
 
@@ -273,15 +274,15 @@ class Timeline(object):
         dt = ts - prev_ts
         assert type(dt) == float
         if dt != 0:
-          yield ts, container, (user_cpu - prev_user)/dt/USER_HZ,\
-              (system_cpu - prev_system)/dt/USER_HZ
+          yield ts, container, (user_cpu - prev_user) // dt // USER_HZ,\
+              (system_cpu - prev_system) // dt // USER_HZ
       prev_by_container[container] = ts, user_cpu, system_cpu
 
     # Now update container totals
     for c in self.containers:
       if c.id in prev_by_container:
         _, u, s = prev_by_container[c.id]
-        c.total_user_cpu, c.total_system_cpu = u / USER_HZ, s / USER_HZ
+        c.total_user_cpu, c.total_system_cpu = u // USER_HZ, s // USER_HZ
       if c.id in peak_rss_by_container:
         c.peak_total_rss = peak_rss_by_container[c.id]
 
@@ -321,7 +322,7 @@ class Timeline(object):
     for c in self.containers:
       container_by_id[c.id] = c
 
-    for ts, container_id, user, system in 
self.parse_metrics(file(self.monitor_file)):
+    for ts, container_id, user, system in 
self.parse_metrics(open(self.monitor_file)):
       container = container_by_id.get(container_id)
       if not container:
         continue
@@ -337,9 +338,9 @@ class Timeline(object):
       metrics_by_container.setdefault(
           container.name, []).append((ts - min_ts, user, system))
 
-    with file(output, "w") as o:
+    with open(output, "w") as o:
       template_path = os.path.join(os.path.dirname(__file__), 
"timeline.html.template")
-      shutil.copyfileobj(file(template_path), o)
+      shutil.copyfileobj(open(template_path), o)
       o.write("\n<script>\nvar data = \n")
       json.dump(dict(buildname=self.buildname, timeline=timeline_json,
           metrics=metrics_by_container, max_ts=(max_metric_ts - min_ts)), o, 
indent=2)
diff --git a/docker/test-with-docker.py b/docker/test-with-docker.py
index 55ae96372..5d473f2a0 100755
--- a/docker/test-with-docker.py
+++ b/docker/test-with-docker.py
@@ -21,6 +21,7 @@
 # We do not use Impala's python environment here, nor do we depend on
 # non-standard python libraries to avoid needing extra build steps before
 # triggering this.
+from __future__ import absolute_import, division, print_function
 import argparse
 import datetime
 import itertools
@@ -781,7 +782,7 @@ class TestSuiteRunner(object):
     test_with_docker = self.test_with_docker
     suite = self.suite
     envs = ["-e", "NUM_CONCURRENT_TESTS=" + 
str(test_with_docker.parallel_test_concurrency)]
-    for k, v in sorted(suite.envs.iteritems()):
+    for k, v in sorted(suite.envs.items()):
       envs.append("-e")
       envs.append("%s=%s" % (k, v))
 
diff --git a/fe/src/test/resources/hive-site.xml.py 
b/fe/src/test/resources/hive-site.xml.py
index e7cf99d45..563d32fc8 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import, division, print_function
 import os
 
 hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])
diff --git a/infra/deploy/deploy.py b/infra/deploy/deploy.py
deleted file mode 100644
index 58ebb69fa..000000000
--- a/infra/deploy/deploy.py
+++ /dev/null
@@ -1,465 +0,0 @@
-#!/usr/bin/python
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# Deploys a new Impala_Kudu service, either based on an existing Impala service
-# or from scratch.
-#
-# Prerequisites:
-# - A cluster running CDH 5.4.x and Cloudera Manager 5.4.x with x >= 7
-# - CM API Python client (http://cloudera.github.io/cm_api/docs/python-client).
-#
-# Sample usage:
-#
-#   ./deploy.py clone IMPALA_KUDU IMPALA-1
-#   Clones IMPALA-1 into a new Impala_Kudu service called "IMPALA_KUDU".
-#
-#   ./deploy.py create new_service /data/impala/
-#   Creates a new Impala_Kudu service called "new_service" using /data/impala/
-#   for its scratch directories.
-
-from __future__ import print_function
-import argparse
-import hashlib
-import os
-import re
-import time
-
-from cm_api.api_client import ApiResource
-
-IMPALA_KUDU_PARCEL_URL = os.getenv("IMPALA_KUDU_PARCEL_URL",
-                                   
"http://archive.cloudera.com/beta/impala-kudu/parcels/latest";)
-IMPALA_KUDU_PARCEL_PRODUCT = "IMPALA_KUDU"
-MAX_PARCEL_REPO_WAIT_SECS = 60
-MAX_PARCEL_WAIT_SECS = 60 * 30
-
-SERVICE_DEPENDENCIES = {
-    "HDFS" : True,
-    "HIVE" : True,
-    "YARN" : False,
-    "HBASE" : False,
-    "ZOOKEEPER" : False
-}
-
-def parse_args():
-    parser = 
argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--host", type=str,
-                        default="localhost",
-                        help="Hostname of the Cloudera Manager server.")
-    parser.add_argument("--user", type=str,
-                        default="admin",
-                        help="Username with which to log into Cloudera 
Manager.")
-    parser.add_argument("--password", type=str,
-                        default="admin",
-                        help="Password with which to log into Cloudera 
Manager.")
-    parser.add_argument("--cluster", type=str,
-                        help="Name of existing cluster where the Impala_Kudu 
service "
-                        "should be added. If not specified, uses the only 
cluster or "
-                        "raises an exception if multiple clusters are found.")
-    parents_parser = argparse.ArgumentParser(add_help=False)
-    parents_parser.add_argument("service_name", type=str,
-                                help="Name of Impala_Kudu service to create.")
-    subparsers = parser.add_subparsers(dest="subparsers_name")
-    clone_parser = subparsers.add_parser("clone",
-                                         parents=[parents_parser],
-                                         help="Use an existing Impala service 
as a template for "
-                                         "the new Impala_Kudu service. To be 
used when Impala_"
-                                         "Kudu is to run side-by-side with an 
existing Impala.")
-    clone_parser.add_argument("based_on", type=str,
-                              help="Name of existing Impala service to clone 
as the basis for the "
-                              "new service.")
-    create_parser = subparsers.add_parser("create",
-                                          parents=[parents_parser],
-                                          help="create a new Impala_Kudu 
service from scratch. To "
-                                          "be used when Impala_Kudu runs in 
its own cluster.")
-    create_parser.add_argument("--master_host", type=str,
-                               help="Hostname where new Impala_Kudu service's 
master roles should "
-                               "be placed. If not specified, uses the Cloudera 
Manager Server host "
-                               "or raises an exception if that host is not 
managed.")
-    for service_type, required in SERVICE_DEPENDENCIES.iteritems():
-        create_parser.add_argument("--%s_dependency" % (service_type.lower(),),
-                                   type=str,
-                                   help="Name of %s service that the new 
Impala_Kudu service "
-                                   "should depend on. If not specified, will 
use only service of "
-                                   "that type in the cluster. Will raise an 
exception if exactly "
-                                   "one instance of that service is not found 
in the cluster. %s" %
-                                   (service_type, "REQUIRED." if required else 
""))
-    create_parser.add_argument("scratch_dirs", type=str,
-                               help="Comma-separated list of scratch 
directories to use in the new "
-                               "Impala_Kudu service.")
-
-    return parser.parse_args()
-
-def find_cluster(api, cluster_name):
-    if cluster_name:
-        cluster = api.get_cluster(cluster_name)
-    else:
-        all_clusters = api.get_all_clusters()
-        if len(all_clusters) == 0:
-            raise Exception("No clusters found; create one before calling this 
script")
-        if len(all_clusters) > 1:
-            raise Exception("Cannot use implicit cluster; there is more than 
one available")
-        cluster = all_clusters[0]
-    print("Found cluster: %s" % (cluster.displayName, ))
-    return cluster
-
-def find_dependencies(args, cluster):
-    deps = []
-
-    # { service type : { service name : service }}
-    services_by_type = {}
-    for service in cluster.get_all_services():
-        service_dict = services_by_type.get(service.type, {})
-        service_dict[service.name] = service
-        services_by_type[service.type] = service_dict
-
-    for service_type, required in SERVICE_DEPENDENCIES.iteritems():
-        candidates = services_by_type.get(service_type, {})
-        arg = getattr(args, service_type.lower() + "_dependency")
-        if arg:
-            found = candidates.get(arg, None)
-            if not found:
-                raise Exception("Could not find dependency service (type %s, 
name %s)" %
-                                (service_type, arg))
-            print("Found explicit dependency service %s" % (found.name))
-            deps.append(found)
-        else:
-            if not required:
-                print("Skipping optional dependency of type %s" % 
(service_type,))
-                continue
-            if len(candidates) > 1:
-                raise Exception("Found %d possible implicit dependency 
services of type %s" %
-                                (len(candidates), service_type))
-            elif len(candidates) == 0:
-                raise Exception("Could not find implicit dependency service of 
type %s" %
-                                (service_type,))
-            else:
-                found = candidates.values()[0]
-                print("Found implicit dependency service %s" % (found.name,))
-                deps.append(found)
-    return deps
-
-def check_new_service_does_not_exist(api, cluster, new_name):
-    for service in cluster.get_all_services():
-        if service.displayName == new_name:
-            raise Exception("New service name %s already in use" % (new_name,))
-
-    print("New service name %s is not in use" % (new_name,))
-
-def find_template_service(api, cluster, based_on):
-    template_service = None
-    for service in cluster.get_all_services():
-        if based_on and service.displayName == based_on:
-            if service.type != "IMPALA":
-                raise Exception("Based-on service %s is of wrong type %s" %
-                                (based_on, service.type))
-            print("Found based-on service: %s" % (based_on,))
-            template_service = service
-
-    if based_on and not template_service:
-        raise Exception("Could not find based-on service: %s" % (based_on,))
-
-    return template_service
-
-def find_master_host(api, cm_hostname, master_hostname):
-    for h in api.get_all_hosts():
-        if master_hostname and h.hostname == master_hostname:
-            print("Found master host %s" % (master_hostname,))
-            return h
-        elif not master_hostname and h.hostname == cm_hostname:
-            print("Found implicit master host on CM host %s" % (cm_hostname,))
-            return h
-
-    if master_hostname:
-        raise Exception("Could not find master host with hostname %s" % 
(master_hostname,))
-    else:
-        raise Exception("Could not find implicit master host %s" % 
(cm_hostname,))
-
-def get_best_parcel(api, cluster):
-    parcels_available_remotely = []
-    parcels_downloaded = []
-    parcels_distributed = []
-    parcels_activated = []
-    for parcel in cluster.get_all_parcels():
-        if parcel.product == IMPALA_KUDU_PARCEL_PRODUCT:
-            if parcel.stage == "AVAILABLE_REMOTELY":
-                parcels_available_remotely.append(parcel)
-            elif parcel.stage == "DOWNLOADED":
-                parcels_downloaded.append(parcel)
-            elif parcel.stage == "DISTRIBUTED":
-                parcels_distributed.append(parcel)
-            elif parcel.stage == "ACTIVATED":
-                parcels_activated.append(parcel)
-
-    def parcel_cmp(p1, p2):
-        if p1.version < p2.version:
-            return -1
-        elif p1.version > p2.version:
-            return 1
-        else:
-            return 0
-
-    # Prefer the "closest" parcel, even if it's not the newest by version.
-    if len(parcels_activated) > 0:
-        parcel = sorted(parcels_activated, key=lambda parcel: 
parcel.version)[0]
-    elif len(parcels_distributed) > 0:
-        parcel = sorted(parcels_distributed, key=lambda parcel: 
parcel.version)[0]
-    elif len(parcels_downloaded) > 0:
-        parcel = sorted(parcels_downloaded, key=lambda parcel: 
parcel.version)[0]
-    elif len(parcels_available_remotely) > 0:
-        parcel = sorted(parcels_available_remotely, key=lambda parcel: 
parcel.version)[0]
-    else:
-        parcel = None
-
-    if parcel:
-        print("Chose best parcel %s-%s (stage %s)" % (parcel.product,
-                                                      parcel.version,
-                                                      parcel.stage))
-    else:
-        print("Found no candidate parcels")
-
-    return parcel
-
-def ensure_parcel_repo_added(api):
-    cm = api.get_cloudera_manager()
-    config = cm.get_config(view='summary')
-    parcel_urls = config.get("REMOTE_PARCEL_REPO_URLS", "").split(",")
-    if IMPALA_KUDU_PARCEL_URL in parcel_urls:
-        print("Impala_Kudu parcel URL already present")
-    else:
-        print("Adding Impala_Kudu parcel URL")
-        parcel_urls.append(IMPALA_KUDU_PARCEL_URL)
-        config["REMOTE_PARCEL_REPO_URLS"] = ",".join(parcel_urls)
-        cm.update_config(config)
-
-def wait_for_parcel_stage(cluster, parcel, stage):
-    for attempt in xrange(1, MAX_PARCEL_WAIT_SECS + 1):
-        new_parcel = cluster.get_parcel(parcel.product, parcel.version)
-        if new_parcel.stage == stage:
-            return
-        if new_parcel.state.errors:
-            raise Exception(str(new_parcel.state.errors))
-        print("progress: %s / %s" % (new_parcel.state.progress,
-                                     new_parcel.state.totalProgress))
-        time.sleep(1)
-    else:
-        raise Exception("Parcel %s-%s did not reach stage %s in %d seconds" %
-                        (parcel.product, parcel.version, stage, 
MAX_PARCEL_WAIT_SECS,))
-
-def ensure_parcel_activated(cluster, parcel):
-    parcel_stage = parcel.stage
-    if parcel_stage == "AVAILABLE_REMOTELY":
-        print("Downloading parcel: %s-%s " % (parcel.product, parcel.version))
-        parcel.start_download()
-        wait_for_parcel_stage(cluster, parcel, "DOWNLOADED")
-        print("Downloaded parcel: %s-%s " % (parcel.product, parcel.version))
-        parcel_stage = "DOWNLOADED"
-    if parcel_stage == "DOWNLOADED":
-        print("Distributing parcel: %s-%s " % (parcel.product, parcel.version))
-        parcel.start_distribution()
-        wait_for_parcel_stage(cluster, parcel, "DISTRIBUTED")
-        print("Distributed parcel: %s-%s " % (parcel.product, parcel.version))
-        parcel_stage = "DISTRIBUTED"
-    if parcel_stage == "DISTRIBUTED":
-        print("Activating parcel: %s-%s " % (parcel.product, parcel.version))
-        parcel.activate()
-        wait_for_parcel_stage(cluster, parcel, "ACTIVATED")
-        print("Activated parcel: %s-%s " % (parcel.product, parcel.version))
-        parcel_stage = "ACTIVATED"
-
-    print("Parcel %s-%s is activated" % (parcel.product, parcel.version))
-
-def print_configs(entity_name, config_dict):
-    for attr, value in config_dict.iteritems():
-        print("Set %s config %s=\'%s\'" % (entity_name, attr, value))
-
-def create_new_service(api, cluster, new_name, deps, scratch_dirs, 
master_host):
-    new_service = cluster.create_service(new_name, "IMPALA")
-    print("Created new service %s" % (new_name,))
-
-    service_config = {}
-    for d in deps:
-        service_config[d.type.lower() + "_service"] = d.name
-    service_config["impala_service_env_safety_valve"] = "IMPALA_KUDU=1"
-    new_service.update_config(service_config)
-    print_configs("service " + new_name, service_config)
-
-    for rcg in new_service.get_all_role_config_groups():
-        if rcg.roleType == "IMPALAD":
-            scratch_dirs_dict = { "scratch_dirs" : scratch_dirs }
-            rcg.update_config(scratch_dirs_dict)
-            print_configs("rcg " + rcg.displayName, scratch_dirs_dict)
-            for h in cluster.list_hosts():
-                if h.hostId == master_host.hostId:
-                    continue
-
-                # This formula is embedded within CM. If we don't strictly
-                # adhere to it, we can't use any %s-%s-%s naming scheme.
-                md5 = hashlib.md5()
-                md5.update(h.hostId)
-                new_role_name = "%s-%s-%s" % (new_name, rcg.roleType, 
md5.hexdigest())
-                new_service.create_role(new_role_name, rcg.roleType, h.hostId)
-                print("Created new role %s" % (new_role_name,))
-        else:
-            md5 = hashlib.md5()
-            md5.update(master_host.hostId)
-            new_role_name = "%s-%s-%s" % (new_name, rcg.roleType, 
md5.hexdigest())
-            new_service.create_role(new_role_name, rcg.roleType, 
master_host.hostId)
-            print("Created new role %s" % (new_role_name,))
-
-def transform_path(rcg_name, rcg_config_dict, rcg_config_name):
-    # TODO: Do a better job with paths where the role type is embedded.
-    #
-    # e.g. /var/log/impalad/lineage --> /var/log/impalad2/lineage
-    val = rcg_config_dict.get(rcg_config_name, None)
-    if not val:
-        raise Exception("Could not get %s config for rcg %s" %
-                        (rcg_config_name, rcg_name,))
-    new_val = re.sub(r"/(.*?)(/?)$", r"/\g<1>2\g<2>", val)
-    return {rcg_config_name : new_val}
-
-def transform_port(rcg_name, rcg_config_dict, rcg_config_name):
-    # TODO: Actually resolve all port conflicts.
-    val = rcg_config_dict.get(rcg_config_name, None)
-    if not val:
-        raise Exception("Could not get %s config for rcg %s" %
-                        (rcg_config_name, rcg_name,))
-    try:
-        val_int = int(val)
-    except ValueError as e:
-        raise Exception("Could not convert %s config (%s) for rcg %s into 
integer" %
-                        (rcg_config_name, val, rcg_name))
-
-    new_val = str(val_int + 7)
-    return {rcg_config_name : new_val}
-
-def transform_rcg_config(rcg):
-    summary = rcg.get_config()
-    full = {}
-    for name, config in rcg.get_config("full").iteritems():
-        full[name] = config.value if config.value else config.default
-
-    new_config = summary
-
-    if rcg.roleType == "IMPALAD":
-        new_config.update(transform_path(rcg.name, full, 
"audit_event_log_dir"))
-        new_config.update(transform_path(rcg.name, full, 
"lineage_event_log_dir"))
-        new_config.update(transform_path(rcg.name, full, "log_dir"))
-        new_config.update(transform_path(rcg.name, full, "scratch_dirs"))
-
-        new_config.update(transform_port(rcg.name, full, "beeswax_port"))
-        new_config.update(transform_port(rcg.name, full, "hs2_port"))
-        new_config.update(transform_port(rcg.name, full, 
"impalad_webserver_port"))
-        new_config.update(transform_port(rcg.name, full, 
"state_store_subscriber_port"))
-    elif rcg.roleType == "CATALOGSERVER":
-        new_config.update(transform_path(rcg.name, full, "log_dir"))
-
-        new_config.update(transform_port(rcg.name, full, 
"catalog_service_port"))
-        new_config.update(transform_port(rcg.name, full, 
"catalogserver_webserver_port"))
-    elif rcg.roleType == "STATESTORE":
-        new_config.update(transform_path(rcg.name, full, "log_dir"))
-
-        new_config.update(transform_port(rcg.name, full, "state_store_port"))
-        new_config.update(transform_port(rcg.name, full, 
"statestore_webserver_port"))
-
-    return new_config
-
-def clone_existing_service(cluster, new_name, template_service):
-    new_service = cluster.create_service(new_name, "IMPALA")
-    print("Created new service %s" % (new_name,))
-
-    service_config, _ = template_service.get_config()
-    service_config["impala_service_env_safety_valve"] = "IMPALA_KUDU=1"
-    new_service.update_config(service_config)
-    print_configs("service " + new_name, service_config)
-
-    saved_special_port = None
-    i = 0
-    for old_rcg in template_service.get_all_role_config_groups():
-        if old_rcg.name != ("%s-%s-BASE" % (template_service.name, 
old_rcg.roleType)):
-            new_rcg_name = "%s-%s-%d" % (new_name, old_rcg.roleType, i)
-            i += 1
-            new_rcg = new_service.create_role_config_group(new_rcg_name,
-                                                           new_rcg_name,
-                                                           old_rcg.roleType)
-            print("Created new rcg %s" % (new_rcg_name,))
-        else:
-            new_rcg = new_service.get_role_config_group("%s-%s-BASE" % 
(new_name,
-                                                                        
old_rcg.roleType))
-        new_rcg_config = transform_rcg_config(old_rcg)
-        new_rcg.update_config(new_rcg_config)
-        print_configs("rcg " + new_rcg.displayName, new_rcg_config)
-        special_port = new_rcg_config.get("state_store_subscriber_port", None)
-        if special_port:
-            saved_special_port = special_port
-
-        new_role_names = []
-        for old_role in old_rcg.get_all_roles():
-            md5 = hashlib.md5()
-            md5.update(old_role.hostRef.hostId)
-            new_role_name = "%s-%s-%s" % (new_name, new_rcg.roleType, 
md5.hexdigest())
-            new_role = new_service.create_role(new_role_name,
-                                               new_rcg.roleType,
-                                               old_role.hostRef.hostId)
-            print("Created new role %s" % (new_role_name,))
-            new_role_names.append(new_role.name)
-        new_rcg.move_roles(new_role_names)
-
-    for new_rcg in new_service.get_all_role_config_groups():
-        if new_rcg.roleType == "CATALOGSERVER":
-            special_port_config_dict = { "catalogd_cmd_args_safety_valve" :
-                                         "-state_store_subscriber_port=%s" % 
(saved_special_port,) }
-            new_rcg.update_config(special_port_config_dict)
-            print_configs("rcg " + new_rcg.displayName, 
special_port_config_dict)
-
-def main():
-    args = parse_args()
-    api = ApiResource(args.host,
-                      username=args.user,
-                      password=args.password,
-                      version=10)
-
-    cluster = find_cluster(api, args.cluster)
-    check_new_service_does_not_exist(api, cluster, args.service_name)
-    if args.subparsers_name == "clone":
-        template_service = find_template_service(api, cluster, args.based_on)
-    else:
-        master_host = find_master_host(api, args.host, args.master_host)
-        deps = find_dependencies(args, cluster)
-
-    parcel = get_best_parcel(api, cluster)
-    if not parcel:
-        ensure_parcel_repo_added(api)
-
-        for attempt in xrange(1, MAX_PARCEL_REPO_WAIT_SECS + 1):
-            parcel = get_best_parcel(api, cluster)
-            if parcel:
-                break
-            print("Could not find parcel in attempt %d, will sleep and retry" %
-                  (attempt,))
-            time.sleep(1)
-        else:
-            raise Exception("No parcel showed up in %d seconds" % 
(MAX_PARCEL_REPO_WAIT_SECS,))
-
-    ensure_parcel_activated(cluster, parcel)
-    if args.subparsers_name == "create":
-        create_new_service(api, cluster, args.service_name, deps, 
args.scratch_dirs, master_host)
-    else:
-        clone_existing_service(cluster, args.service_name, template_service)
-
-if __name__ == "__main__":
-    main()
diff --git a/infra/python/deps/download_requirements 
b/infra/python/deps/download_requirements
index a1035b346..02af1d113 100755
--- a/infra/python/deps/download_requirements
+++ b/infra/python/deps/download_requirements
@@ -22,7 +22,6 @@ set -euo pipefail
 DIR="$(dirname "$0")"
 
 pushd "$DIR"
-PY26="$(./find_py26.py)"
 # Directly download packages listed in *requirements.txt, but don't install 
them.
-"$PY26" pip_download.py
+./pip_download.py
 popd
diff --git a/infra/python/deps/find_py26.py b/infra/python/deps/find_py26.py
deleted file mode 100755
index dc628c759..000000000
--- a/infra/python/deps/find_py26.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/python
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This script finds Python 2.6 or higher on the system and outputs the
-# system command to stdout. The script exits with a nonzero exit code if
-# Python 2.6+ is not present.
-
-from __future__ import print_function
-import os
-import subprocess
-import sys
-import textwrap
-
-def detect_python_cmd():
-  '''Returns the system command that provides python 2.6 or greater.'''
-  paths = os.getenv("PATH").split(os.path.pathsep)
-  for cmd in ("python", "python27", "python2.7", "python-27", "python-2.7", 
"python26",
-      "python2.6", "python-26", "python-2.6"):
-    for path in paths:
-      cmd_path = os.path.join(path, cmd)
-      if not os.path.exists(cmd_path) or not os.access(cmd_path, os.X_OK):
-        continue
-      exit = subprocess.call([cmd_path, "-c", textwrap.dedent("""
-          import sys
-          sys.exit(int(sys.version_info[:2] < (2, 6)))""")])
-      if exit == 0:
-        return cmd_path
-  raise Exception("Could not find minimum required python version 2.6")
-
-
-print(detect_python_cmd())
diff --git a/infra/python/deps/pip_download.py 
b/infra/python/deps/pip_download.py
index 03713f927..2d965ac33 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -19,9 +19,9 @@
 
 # Implement the basic 'pip download' functionality in a way that gives us more 
control
 # over which archive type is downloaded and what post-download steps are 
executed.
-# This script requires Python 2.6+.
+# This script requires Python 2.7+.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import hashlib
 import multiprocessing.pool
 import os
diff --git a/lib/python/impala_py_lib/jenkins/generate_junitxml.py 
b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
index 09d6ab7b4..9382d8ac3 100755
--- a/lib/python/impala_py_lib/jenkins/generate_junitxml.py
+++ b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
@@ -21,12 +21,13 @@ A script for generating arbitrary junit XML reports while 
building Impala.
 These files will be consumed by jenkins.impala.io to generate reports for
 easier triaging of build and setup errors.
 """
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import argparse
 import codecs
 import errno
 import os
 import re
+import sys
 import textwrap
 from xml.dom import minidom
 from xml.etree import ElementTree as ET
@@ -167,7 +168,10 @@ class JunitReport(object):
     junit_log_file = os.path.join(junitxml_logdir, filename)
 
     with codecs.open(junit_log_file, encoding="UTF-8", mode='w') as f:
-      f.write(unicode(self))
+      if sys.version_info.major < 3:
+        f.write(unicode(self))
+      else:
+        f.write(str(self))
 
     return junit_log_file
 
@@ -212,7 +216,10 @@ class JunitReport(object):
     else:
       # This is a string passed in on the command line. Make sure to return it 
as
       # a unicode string.
-      content = unicode(file_or_string, encoding="UTF-8")
+      if sys.version_info.major < 3:
+        content = unicode(file_or_string, encoding="UTF-8")
+      else:
+        content = file_or_string
     return JunitReport.remove_ansi_escape_sequences(content)
 
   def __unicode__(self):
@@ -223,6 +230,10 @@ class JunitReport(object):
     root_node_dom = minidom.parseString(root_node_unicode)
     return root_node_dom.toprettyxml(indent=' ' * 4)
 
+  def __str__(self):
+    if sys.version_info.major < 3:
+      return unicode(self).encode('utf-8')
+    return self.__unicode__()
 
 def get_options():
   """Parse and return command line options."""
diff --git a/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py 
b/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py
index 98131a7fa..02bc04ec5 100755
--- a/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py
+++ b/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py
@@ -21,6 +21,7 @@
 Some tests that produce JUnitXML include tests that did not run (i.e. 
status="notrun").
 This script walks through the JUnitXML and removes these elements.
 """
+from __future__ import absolute_import, division, print_function
 from optparse import OptionParser
 from xml.etree import ElementTree as ET
 from xml.dom import minidom
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 6e6d38c44..80757b4cb 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1784,7 +1784,7 @@ warned, it can be very long!",
   "Want to know what version of Impala you're connected to? Run the VERSION 
command to \
 find out!",
   "You can change the Impala daemon that you're connected to by using the 
CONNECT \
-command."
+command.",
   "To see how Impala will plan to run your query without actually executing 
it, use the \
 EXPLAIN command. You can change the level of detail in the EXPLAIN output by 
setting the \
 EXPLAIN_LEVEL query option.",
diff --git a/testdata/bin/setup-ranger.sh b/testdata/bin/setup-ranger.sh
index e57f79c76..47309cc71 100755
--- a/testdata/bin/setup-ranger.sh
+++ b/testdata/bin/setup-ranger.sh
@@ -36,7 +36,7 @@ function setup-ranger {
     --header="accept:application/json" \
     --header="Content-Type:application/json" \
     http://localhost:6080/service/xusers/secure/groups |
-    python -c "import sys, json; print json.load(sys.stdin)['id']")
+    python -c "import sys, json; print(json.load(sys.stdin)['id'])")
   export GROUP_ID_OWNER
 
   GROUP_ID_NON_OWNER=$(wget -qO - --auth-no-challenge --user=admin \
@@ -44,7 +44,7 @@ function setup-ranger {
     --header="accept:application/json" \
     --header="Content-Type:application/json" \
     http://localhost:6080/service/xusers/secure/groups |
-    python -c "import sys, json; print json.load(sys.stdin)['id']")
+    python -c "import sys, json; print(json.load(sys.stdin)['id'])")
   export GROUP_ID_NON_OWNER
 
   perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 16ac8e670..10f9e56ea 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -294,6 +294,8 @@ function create_cluster {
       ACTUAL_PATH="${TEMPLATE_PATH%$PY_TEMPLATE_SUFFIX}".xml
       $IMPALA_HOME/bin/generate_xml_config.py $TEMPLATE_PATH $ACTUAL_PATH
       rm $TEMPLATE_PATH
+      # Remove pycache if created
+      rm -rf $(dirname $TEMPLATE_PATH)/__pycache__
     done
   done
 }
diff --git 
a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py 
b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
index 0ebc97372..a80d088ab 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import, division, print_function
 import os
 import sys
 
diff --git 
a/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py 
b/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py
index 71bff9a3b..45598187e 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import absolute_import, division, print_function
 import os
 
 CONFIG = {
diff --git 
a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py 
b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
index 00443a71a..f9372bf5a 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -17,7 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 import os
 import sys
 
@@ -26,15 +26,15 @@ hive_major_version = 
int(os.environ['IMPALA_HIVE_VERSION'][0])
 
 
 def _get_system_ram_mb():
-  lines = file("/proc/meminfo").readlines()
+  lines = open("/proc/meminfo").readlines()
   memtotal_line = [l for l in lines if l.startswith('MemTotal')][0]
   mem_kb = int(memtotal_line.split()[1])
-  return mem_kb / 1024
+  return mem_kb // 1024
 
 
 def _get_yarn_nm_ram_mb():
   sys_ram = _get_system_ram_mb()
-  available_ram_gb = int(os.getenv("IMPALA_CLUSTER_MAX_MEM_GB", str(sys_ram / 
1024)))
+  available_ram_gb = int(os.getenv("IMPALA_CLUSTER_MAX_MEM_GB", str(sys_ram // 
1024)))
   # Fit into the following envelope:
   # - need 4GB at a bare minimum
   # - leave at least 20G for other services
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index e8a9aff7f..a9bc5431a 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -30,7 +30,7 @@ import time
 from getpass import getuser
 from random import choice
 from signal import SIGKILL
-from subprocess import check_call
+from subprocess import check_call, check_output
 from time import sleep
 
 import tests.common.environ
@@ -41,10 +41,6 @@ from tests.common.impala_service import (
     StateStoredService)
 from tests.util.shell_util import exec_process, exec_process_async
 
-if sys.version_info >= (2, 7):
-  # We use some functions in the docker code that don't exist in Python 2.6.
-  from subprocess import check_output
-
 LOG = logging.getLogger('impala_cluster')
 LOG.setLevel(level=logging.DEBUG)
 
diff --git a/tests/comparison/data_generator_mapper.py 
b/tests/comparison/data_generator_mapper.py
index 12135f563..35d412701 100755
--- a/tests/comparison/data_generator_mapper.py
+++ b/tests/comparison/data_generator_mapper.py
@@ -20,7 +20,7 @@
 '''This is a mapper for use with hadoop streaming. See 
data_generator.DatabasePopulator
    for more information on how this file is used.
 '''
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
 
 import os
 import random
@@ -45,7 +45,7 @@ for line in sys.stdin:
   batch_idx = 0
   while row_count > 0:
     table_data_generator.row_count = min(row_count, rows_per_batch)
-    table_data_generator.randomization_seed = int(random.random() * sys.maxint)
+    table_data_generator.randomization_seed = int(random.random() * 
sys.maxsize)
 
     # Generate input for the reducers.
     print("%s\t%s\t%s" % (table_data_generator.table.name, batch_idx,

[impala] branch master updated: IMPALA-9627: Update utility scripts for Python 3 (part 2)

Reply via email to