This is an automated email from the ASF dual-hosted git repository.
michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 0a42185d1 IMPALA-9627: Update utility scripts for Python 3 (part 2)
0a42185d1 is described below
commit 0a42185d17164af0c855647de25f1bc87f33ee71
Author: Michael Smith <[email protected]>
AuthorDate: Fri Mar 24 09:31:22 2023 -0700
IMPALA-9627: Update utility scripts for Python 3 (part 2)
We're starting to see environments where the system Python ('python') is
Python 3. Updates utility and build scripts to work with Python 3, and
updates check-pylint-py3k.sh to check scripts that use system python.
Fixes other issues found during a full build and test run with Python
3.8 as the default for 'python'.
Fixes a impala-shell tip that was supposed to have been two tips (and
had no space after period when they were printed).
Removes out-of-date deploy.py and various Python 2.6 workarounds.
Testing:
- Full build with /usr/bin/python pointed to python3
- run-all-tests passed with python pointed to python3
- ran push_to_asf.py
Change-Id: Idff388aff33817b0629347f5843ec34c78f0d0cb
Reviewed-on: http://gerrit.cloudera.org:8080/19697
Reviewed-by: Michael Smith <[email protected]>
Tested-by: Michael Smith <[email protected]>
---
be/src/codegen/gen_ir_descriptions.py | 2 +-
bin/bootstrap_toolchain.py | 49 +--
bin/check-pylint-py3k.sh | 9 +-
bin/check-rat-report.py | 2 +-
bin/collect_minidumps.py | 2 +-
bin/compare_branches.py | 4 +-
bin/create-test-configuration.sh | 4 +
bin/diagnostics/collect_diagnostics.py | 1 +
bin/gen-backend-test-script.py | 1 +
bin/gen_build_version.py | 2 +-
bin/generate_xml_config.py | 7 +-
bin/jenkins/critique-gerrit-review.py | 7 +-
bin/jenkins/dockerized-impala-preserve-vars.py | 2 +-
bin/jenkins/populate_m2_directory.py | 2 +-
bin/push_to_asf.py | 56 +--
bin/start-impala-cluster.py | 6 +-
bin/validate-unified-backend-test-filters.py | 4 +-
common/function-registry/gen_builtins_catalog.py | 1 +
.../gen_geospatial_udf_wrappers.py | 11 +-
common/thrift/generate_error_codes.py | 2 +-
common/thrift/generate_metrics.py | 2 +-
docker/annotate.py | 1 +
docker/monitor.py | 19 +-
docker/test-with-docker.py | 3 +-
fe/src/test/resources/hive-site.xml.py | 1 +
infra/deploy/deploy.py | 465 ---------------------
infra/python/deps/download_requirements | 3 +-
infra/python/deps/find_py26.py | 47 ---
infra/python/deps/pip_download.py | 4 +-
.../impala_py_lib/jenkins/generate_junitxml.py | 17 +-
.../impala_py_lib/jenkins/junitxml_prune_notrun.py | 1 +
shell/impala_shell.py | 2 +-
testdata/bin/setup-ranger.sh | 4 +-
testdata/cluster/admin | 2 +
.../common/etc/hadoop/conf/core-site.xml.py | 1 +
.../common/etc/hadoop/conf/ozone-site.xml.py | 1 +
.../common/etc/hadoop/conf/yarn-site.xml.py | 8 +-
tests/common/impala_cluster.py | 6 +-
tests/comparison/data_generator_mapper.py | 4 +-
39 files changed, 115 insertions(+), 650 deletions(-)
diff --git a/be/src/codegen/gen_ir_descriptions.py
b/be/src/codegen/gen_ir_descriptions.py
index 18387af35..e76e5ef55 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -19,7 +19,7 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
from string import Template
import os
import shutil
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index eb95eaf4a..3817efb42 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -53,6 +53,8 @@
#
# The script is directly executable, and it takes no parameters:
# ./bootstrap_toolchain.py
+
+from __future__ import absolute_import, division, print_function
import logging
import glob
import multiprocessing.pool
@@ -102,20 +104,6 @@ OS_MAPPING = [
]
-def check_output(cmd_args):
- """Run the command and return the output. Raise an exception if the command
returns
- a non-zero return code. Similar to subprocess.check_output() which is
only provided
- in python 2.7.
- """
- process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
- universal_newlines=True)
- stdout, _ = process.communicate()
- if process.wait() != 0:
- raise Exception("Command with args '%s' failed with exit code %s:\n%s"
- % (cmd_args, process.returncode, stdout))
- return stdout
-
-
def get_toolchain_compiler():
"""Return the <name>-<version> string for the compiler package to use for the
toolchain."""
@@ -139,16 +127,16 @@ def wget_and_unpack_package(download_path, file_name,
destination, wget_no_clobb
"--output-document={0}/{1}".format(destination, file_name)]
if wget_no_clobber:
cmd.append("--no-clobber")
- check_output(cmd)
+ subprocess.check_call(cmd)
break
- except Exception as e:
+ except subprocess.CalledProcessError as e:
if attempt == NUM_ATTEMPTS:
raise
logging.error("Download failed; retrying after sleep: " + str(e))
time.sleep(10 + random.random() * 5) # Sleep between 10 and 15 seconds.
logging.info("Extracting {0}".format(file_name))
- check_output(["tar", "xzf", os.path.join(destination, file_name),
- "--directory={0}".format(destination)])
+ subprocess.check_call(["tar", "xzf", os.path.join(destination, file_name),
+ "--directory={0}".format(destination)])
os.unlink(os.path.join(destination, file_name))
@@ -403,8 +391,9 @@ def get_platform_release_label(release=None):
if lsb_release_cache:
release = lsb_release_cache
else:
- lsb_release = check_output(["lsb_release", "-irs"])
- release = "".join(map(lambda x: x.lower(), lsb_release.split()))
+ lsb_release = subprocess.check_output(
+ ["lsb_release", "-irs"], universal_newlines=True)
+ release = "".join([x.lower() for x in lsb_release.split()])
# Only need to check against the major release if RHEL, CentOS or Suse
for distro in ['centos', 'rocky', 'almalinux', 'redhatenterprise',
'redhatenterpriseserver', 'suse']:
@@ -418,20 +407,6 @@ def get_platform_release_label(release=None):
raise Exception("Could not find package label for OS version:
{0}.".format(release))
-def check_output(cmd_args):
- """Run the command and return the output. Raise an exception if the command
returns
- a non-zero return code. Similar to subprocess.check_output() which is
only provided
- in python 2.7.
- """
- process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
- universal_newlines=True)
- stdout, _ = process.communicate()
- if process.wait() != 0:
- raise Exception("Command with args '%s' failed with exit code %s:\n%s"
- % (cmd_args, process.returncode, stdout))
- return stdout
-
-
def check_custom_toolchain(toolchain_packages_home, packages):
missing = []
for p in packages:
@@ -470,7 +445,7 @@ def create_directory_from_env_var(env_var):
def get_unique_toolchain_downloads(packages):
- toolchain_packages = map(ToolchainPackage, packages)
+ toolchain_packages = [ToolchainPackage(p) for p in packages]
unique_pkg_directories = set()
unique_packages = []
for p in toolchain_packages:
@@ -489,11 +464,11 @@ def get_toolchain_downloads():
"llvm", explicit_version=os.environ.get("IMPALA_LLVM_DEBUG_VERSION"))
gcc_package = ToolchainPackage("gcc")
toolchain_packages += [llvm_package, llvm_package_asserts, gcc_package]
- toolchain_packages += map(ToolchainPackage,
+ toolchain_packages += [ToolchainPackage(p) for p in
["avro", "binutils", "boost", "breakpad", "bzip2", "calloncehack",
"cctz", "cmake",
"crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog",
"gperftools", "gtest",
"jwt-cpp", "libev", "libunwind", "lz4", "openldap", "orc", "protobuf",
- "python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib",
"zstd"])
+ "python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib",
"zstd"]]
python3_package = ToolchainPackage(
"python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION"))
toolchain_packages += [python3_package]
diff --git a/bin/check-pylint-py3k.sh b/bin/check-pylint-py3k.sh
index 9dca19c87..305ab990a 100755
--- a/bin/check-pylint-py3k.sh
+++ b/bin/check-pylint-py3k.sh
@@ -68,18 +68,15 @@ for file in $(git ls-files '**/*.py'); do
if [[ "${file}" =~ "shell/" && ! "${file}" =~ "tests/shell" ]]; then
continue
fi
- # For the moment, the focus is on enforcing py3k checks on files that use
the
- # impala-python virtualenv. Ignore executable python files that do not
- # use impala-python. In practice, this tends to be scripts used during the
- # build or various scripts for developers in bin.
+ # Ignore files that are created to run with python3.
FIRST_LINE=$(head -n1 ${file})
if [[ "${file}: ${FIRST_LINE}" =~ "#!" ]]; then
if [[ "${FIRST_LINE}" =~ "python3" ]]; then
>&2 echo "SKIPPING: ${file} is already using python3:
${FIRST_LINE}"
continue
fi
- if [[ ! "${FIRST_LINE}" =~ "impala-python" ]]; then
- >&2 echo "SKIPPING: ${file} is not using impala-python:
${FIRST_LINE}"
+ if [[ "${FIRST_LINE}" =~ "/bin/bash" ]]; then
+ >&2 echo "SKIPPING: ${file} is a weird bash/python hybrid:
${FIRST_LINE}"
continue
fi
fi
diff --git a/bin/check-rat-report.py b/bin/check-rat-report.py
index 0346a16ee..b3781928f 100755
--- a/bin/check-rat-report.py
+++ b/bin/check-rat-report.py
@@ -33,7 +33,7 @@
# time, and the RAT JAR is not included in the Impala repo; it must be
downloaded
# separately.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import fnmatch
import re
import sys
diff --git a/bin/collect_minidumps.py b/bin/collect_minidumps.py
index c4fc601ea..57bc50f3f 100755
--- a/bin/collect_minidumps.py
+++ b/bin/collect_minidumps.py
@@ -24,7 +24,7 @@
# --role_name=statestored --max_output_size=50000000
--end_time=1463033495000 \
# --output_file_path=/tmp/minidump_package.tar.gz
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import os
import re
import sys
diff --git a/bin/compare_branches.py b/bin/compare_branches.py
index 143fa7242..7ce51b3d7 100755
--- a/bin/compare_branches.py
+++ b/bin/compare_branches.py
@@ -13,7 +13,7 @@
# limitations under the License.
# Future imports must happen at the beginning of the file
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
HELP = '''
Compares two specified branches, using the Gerrit Change-Id as the
@@ -249,7 +249,7 @@ def main():
jira_key_pat = re.compile(r'(IMPALA-\d+)')
skip_commits_matching = options.skip_commits_matching.format(
branch=options.target_branch)
- for change_id, (commit_hash, msg, author, date, body) in
source_commits.iteritems():
+ for change_id, (commit_hash, msg, author, date, body) in
source_commits.items():
change_in_target = change_id in target_commits
ignore_by_config = commit_hash in ignored_commits[
(options.source_branch, options.target_branch)]
diff --git a/bin/create-test-configuration.sh b/bin/create-test-configuration.sh
index 4ee8f5fa2..90861eeb1 100755
--- a/bin/create-test-configuration.sh
+++ b/bin/create-test-configuration.sh
@@ -154,6 +154,10 @@ ln -s "${CONFIG_DIR}/hive-site_events_cleanup.xml"
hive-site-events-cleanup/hive
export HIVE_VARIANT=ranger_auth
HIVE_RANGER_CONF_DIR=hive-site-ranger-auth
$IMPALA_HOME/bin/generate_xml_config.py hive-site.xml.py
hive-site_ranger_auth.xml
+
+# Cleanup pycache if created
+rm -rf __pycache__
+
rm -rf $HIVE_RANGER_CONF_DIR
mkdir -p $HIVE_RANGER_CONF_DIR
ln -s "${CONFIG_DIR}/hive-site_ranger_auth.xml"
$HIVE_RANGER_CONF_DIR/hive-site.xml
diff --git a/bin/diagnostics/collect_diagnostics.py
b/bin/diagnostics/collect_diagnostics.py
index a305a0d0a..2257d6ff5 100755
--- a/bin/diagnostics/collect_diagnostics.py
+++ b/bin/diagnostics/collect_diagnostics.py
@@ -17,6 +17,7 @@
# specific language governing permissions and limitations
# under the License.
+from __future__ import absolute_import, division, print_function
import argparse
import datetime
import errno
diff --git a/bin/gen-backend-test-script.py b/bin/gen-backend-test-script.py
index 2e3793b99..6e572a839 100755
--- a/bin/gen-backend-test-script.py
+++ b/bin/gen-backend-test-script.py
@@ -20,6 +20,7 @@
# used by the build system (see be/CMakeLists.txt for usage) and is not
intended to be
# run directly.
+from __future__ import absolute_import, division, print_function
import argparse
import os
import stat
diff --git a/bin/gen_build_version.py b/bin/gen_build_version.py
index c6468fa69..3f4f62b1c 100755
--- a/bin/gen_build_version.py
+++ b/bin/gen_build_version.py
@@ -22,7 +22,7 @@
# This script generates be/src/common/version.h which contains the build
version based
# on the git hash.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import os
from subprocess import call
diff --git a/bin/generate_xml_config.py b/bin/generate_xml_config.py
index e8f03dd11..d4e7e6561 100755
--- a/bin/generate_xml_config.py
+++ b/bin/generate_xml_config.py
@@ -39,8 +39,7 @@ REPL:
"""
-from __future__ import with_statement
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import imp
import os
import re
@@ -79,7 +78,7 @@ def dump_config(d, source_path, out):
-->
<configuration>""".format(source_path=os.path.abspath(source_path))
print(dedent(header), file=out)
- for k, v in sorted(d.iteritems()):
+ for k, v in sorted(d.items()):
try:
k_new = _substitute_env_vars(k)
if isinstance(v, int):
@@ -113,7 +112,7 @@ def main():
.format(path=in_path))
tmp_path = out_path + ".tmp"
- with file(tmp_path, "w") as out:
+ with open(tmp_path, "w") as out:
try:
dump_config(conf, in_path, out)
except: # noqa
diff --git a/bin/jenkins/critique-gerrit-review.py
b/bin/jenkins/critique-gerrit-review.py
index 7f44f47bd..c165017c4 100755
--- a/bin/jenkins/critique-gerrit-review.py
+++ b/bin/jenkins/critique-gerrit-review.py
@@ -36,7 +36,7 @@
# TODO: generalise to other warnings
# * clang-tidy
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
from argparse import ArgumentParser
from collections import defaultdict
import json
@@ -143,7 +143,8 @@ def get_misc_comments(revision):
# @@ -128 +133,2 @@ if __name__ == "__main__":
RANGE_RE = re.compile(r"^@@ -[0-9,]* \+([0-9]*).*$")
- diff = check_output(["git", "diff", "-U0", "{0}^..{0}".format(revision)])
+ diff = check_output(["git", "diff", "-U0", "{0}^..{0}".format(revision)],
+ universal_newlines=True)
curr_file = None
check_source_file = False
curr_line_num = 0
@@ -213,7 +214,7 @@ def post_review_to_gerrit(review_input):
def merge_comments(a, b):
- for k, v in b.iteritems():
+ for k, v in b.items():
a[k].extend(v)
diff --git a/bin/jenkins/dockerized-impala-preserve-vars.py
b/bin/jenkins/dockerized-impala-preserve-vars.py
index 24abea465..53bfe61dc 100755
--- a/bin/jenkins/dockerized-impala-preserve-vars.py
+++ b/bin/jenkins/dockerized-impala-preserve-vars.py
@@ -28,7 +28,7 @@
# If an environment variable is not defined in the current environment,
# it is omitted with a warning.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import sys
import os
diff --git a/bin/jenkins/populate_m2_directory.py
b/bin/jenkins/populate_m2_directory.py
index de56808df..13ddca148 100755
--- a/bin/jenkins/populate_m2_directory.py
+++ b/bin/jenkins/populate_m2_directory.py
@@ -16,7 +16,7 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import json
import subprocess
import os
diff --git a/bin/push_to_asf.py b/bin/push_to_asf.py
index fac612c4a..828ed6b14 100755
--- a/bin/push_to_asf.py
+++ b/bin/push_to_asf.py
@@ -35,7 +35,7 @@
# TODO: Improve console output: replace 'print' with format strings
# and use sys.stderr/sys.stdout.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import logging
import optparse
import os
@@ -63,25 +63,6 @@ class Colors(object):
RESET = __on_tty("\x1b[m")
-def check_output(*popenargs, **kwargs):
- r"""Run command with arguments and return its output as a byte string.
- Backported from Python 2.7 as it's implemented as pure python on stdlib.
- >>> check_output(['/usr/bin/python', '--version'])
- Python 2.6.2
- """
- process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
- output, unused_err = process.communicate()
- retcode = process.poll()
- if retcode:
- cmd = kwargs.get("args")
- if cmd is None:
- cmd = popenargs[0]
- error = subprocess.CalledProcessError(retcode, cmd)
- error.output = output
- raise error
- return output
-
-
def confirm_prompt(prompt):
"""
Issue the given prompt, and ask the user to confirm yes/no. Returns true
@@ -94,7 +75,7 @@ def confirm_prompt(prompt):
print("Not running interactively. Assuming 'N'.")
return False
- r = raw_input().strip().lower()
+ r = input().strip().lower()
if r in ['y', 'yes', '']:
return True
elif r in ['n', 'no']:
@@ -103,7 +84,8 @@ def confirm_prompt(prompt):
def get_my_email():
""" Return the email address in the user's git config. """
- return check_output(['git', 'config', '--get', 'user.email']).strip()
+ return subprocess.check_output(['git', 'config', '--get', 'user.email'],
+ universal_newlines=True).strip()
def check_apache_remote():
@@ -112,9 +94,8 @@ def check_apache_remote():
Otherwise, exits with an error message.
"""
try:
- url = check_output(\
- ['git', 'config', '--local', '--get',
- 'remote.' + OPTIONS.apache_remote + '.url']).strip()
+ url = subprocess.check_output(['git', 'config', '--local', '--get',
+ 'remote.' + OPTIONS.apache_remote + '.url'],
universal_newlines=True).strip()
except subprocess.CalledProcessError:
print("No remote named " + OPTIONS.apache_remote
+ ". Please set one up, for example with: ", file=sys.stderr)
@@ -133,8 +114,8 @@ def check_gerrit_remote():
Otherwise, exits with an error message.
"""
try:
- url = check_output(['git', 'config', '--local', '--get',
- 'remote.' + OPTIONS.gerrit_remote + '.url']).strip()
+ url = subprocess.check_output(['git', 'config', '--local', '--get',
+ 'remote.' + OPTIONS.gerrit_remote + '.url'],
universal_newlines=True).strip()
except subprocess.CalledProcessError:
print("No remote named " + OPTIONS.gerrit_remote
+ ". Please set one up following ", file=sys.stderr)
@@ -157,7 +138,8 @@ def fetch(remote):
def get_branches(remote):
""" Fetch a dictionary mapping branch name to SHA1 hash from the given
remote. """
- out = check_output(["git", "ls-remote", remote, "refs/heads/*"])
+ out = subprocess.check_output(["git", "ls-remote", remote, "refs/heads/*"],
+ universal_newlines=True)
ret = {}
for l in out.splitlines():
sha, ref = l.split("\t")
@@ -169,20 +151,22 @@ def get_branches(remote):
def rev_parse(rev):
"""Run git rev-parse, returning the sha1, or None if not found"""
try:
- return check_output(['git', 'rev-parse', rev],
stderr=subprocess.STDOUT).strip()
+ return subprocess.check_output(['git', 'rev-parse', rev],
+ stderr=subprocess.STDOUT, universal_newlines=True).strip()
except subprocess.CalledProcessError:
return None
def rev_list(arg):
"""Run git rev-list, returning an array of SHA1 commit hashes."""
- return check_output(['git', 'rev-list', arg]).splitlines()
+ return subprocess.check_output(['git', 'rev-list', arg],
+ universal_newlines=True).splitlines()
def describe_commit(rev):
""" Return a one-line description of a commit. """
- return subprocess.check_output(
- ['git', 'log', '--color', '-n1', '--oneline', rev]).strip()
+ return subprocess.check_output(['git', 'log', '--color', '-n1', '--oneline',
rev],
+ universal_newlines=True).strip()
def is_fast_forward(ancestor, child):
@@ -191,7 +175,8 @@ def is_fast_forward(ancestor, child):
could be fast-forward merged.
"""
try:
- merge_base = check_output(['git', 'merge-base', ancestor, child]).strip()
+ merge_base = subprocess.check_output(['git', 'merge-base', ancestor,
child],
+ universal_newlines=True).strip()
except:
# If either of the commits is unknown, count this as a non-fast-forward.
return False
@@ -200,7 +185,8 @@ def is_fast_forward(ancestor, child):
def get_committer_email(rev):
""" Return the email address of the committer of the given revision. """
- return check_output(['git', 'log', '-n1', '--pretty=format:%ce',
rev]).strip()
+ return subprocess.check_output(['git', 'log', '-n1', '--pretty=format:%ce',
rev],
+ universal_newlines=True).strip()
def do_update(branch, gerrit_sha, apache_sha):
@@ -294,7 +280,7 @@ def main():
# Check the current state of branches on Apache.
# For each branch, we try to update it if the revisions don't match.
apache_branches = get_branches(OPTIONS.apache_remote)
- for branch, apache_sha in sorted(apache_branches.iteritems()):
+ for branch, apache_sha in sorted(apache_branches.items()):
gerrit_sha = rev_parse("remotes/" + OPTIONS.gerrit_remote + "/" + branch)
print("Branch '%s':\t" % branch, end='')
if gerrit_sha is None:
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index 5f93f8170..54c47b86d 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -34,7 +34,7 @@ from datetime import datetime
from getpass import getuser
from time import sleep, time
from optparse import OptionParser, SUPPRESS_HELP
-from subprocess import call, check_call
+from subprocess import call, check_call, check_output
from testdata.common import cgroups
from tests.common.environ import build_flavor_timeout
from tests.common.impala_cluster import (ImpalaCluster, DEFAULT_BEESWAX_PORT,
@@ -787,10 +787,6 @@ if __name__ == "__main__":
if options.docker_network is None:
cluster_ops = MiniClusterOperations()
else:
- if sys.version_info < (2, 7):
- raise Exception("Docker minicluster only supported on Python 2.7+")
- # We use some functions in the docker code that don't exist in Python 2.6.
- from subprocess import check_output
cluster_ops = DockerMiniClusterOperations(options.docker_network)
# If core-site.xml is missing, it likely means that we are missing config
diff --git a/bin/validate-unified-backend-test-filters.py
b/bin/validate-unified-backend-test-filters.py
index a978917b2..42db3bf36 100755
--- a/bin/validate-unified-backend-test-filters.py
+++ b/bin/validate-unified-backend-test-filters.py
@@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import os
import subprocess
import sys
@@ -32,7 +32,7 @@ def get_set_of_tests(unified_binary, filters):
"--gtest_list_tests"]
if filters is not None:
command.append("--gtest_filter={0}".format(filters))
- p = subprocess.Popen(command, stdout=subprocess.PIPE)
+ p = subprocess.Popen(command, stdout=subprocess.PIPE,
universal_newlines=True)
out, err = p.communicate()
if p.returncode != 0:
print("FAILED: Unified backend test executable returned an error when
trying\n"
diff --git a/common/function-registry/gen_builtins_catalog.py
b/common/function-registry/gen_builtins_catalog.py
index 6d472f5e3..485c47365 100755
--- a/common/function-registry/gen_builtins_catalog.py
+++ b/common/function-registry/gen_builtins_catalog.py
@@ -20,6 +20,7 @@
# This script generates the FE calls to populate the builtins.
# To add a builtin, add an entry to impala_functions.py.
+from __future__ import absolute_import, division, print_function
import os
import impala_functions
diff --git a/common/function-registry/gen_geospatial_udf_wrappers.py
b/common/function-registry/gen_geospatial_udf_wrappers.py
index 73f85d78f..19fa0191d 100755
--- a/common/function-registry/gen_geospatial_udf_wrappers.py
+++ b/common/function-registry/gen_geospatial_udf_wrappers.py
@@ -22,6 +22,7 @@
# in Impala. A generated class is extending the original UDF and adding wrapper
# 'evaluate' methods projecting the varargs method as an n parameter method.
+from __future__ import absolute_import, division, print_function
import os
from gen_builtins_catalog import FE_PATH
@@ -141,14 +142,14 @@ package {package};
return "%s.java" % self.generate_wrapper_name()
-WRAPPERS = [Wrapper("%s.ST_ConvexHull" % UDF_PACKAGE, BYTE_TYPE, range(2, 9,
1)),
- Wrapper("%s.ST_LineString" % UDF_PACKAGE, DOUBLE_TYPE, range(2,
15, 2),
+WRAPPERS = [Wrapper("%s.ST_ConvexHull" % UDF_PACKAGE, BYTE_TYPE, list(range(2,
9, 1))),
+ Wrapper("%s.ST_LineString" % UDF_PACKAGE, DOUBLE_TYPE,
list(range(2, 15, 2)),
ARGUMENT_EXCEPTION),
- Wrapper("%s.ST_MultiPoint" % UDF_PACKAGE, DOUBLE_TYPE, range(2,
15, 2),
+ Wrapper("%s.ST_MultiPoint" % UDF_PACKAGE, DOUBLE_TYPE,
list(range(2, 15, 2)),
ARGUMENT_LENGTH_EXCEPTION),
- Wrapper("%s.ST_Polygon" % UDF_PACKAGE, DOUBLE_TYPE, range(6, 15,
2),
+ Wrapper("%s.ST_Polygon" % UDF_PACKAGE, DOUBLE_TYPE, list(range(6,
15, 2)),
ARGUMENT_LENGTH_EXCEPTION),
- Wrapper("%s.ST_Union" % UDF_PACKAGE, BYTE_TYPE, range(2, 15, 1))]
+ Wrapper("%s.ST_Union" % UDF_PACKAGE, BYTE_TYPE, list(range(2, 15,
1)))]
if __name__ == "__main__":
if not os.path.exists(FE_PATH):
diff --git a/common/thrift/generate_error_codes.py
b/common/thrift/generate_error_codes.py
index 02b7029ef..e1091bd16 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -25,7 +25,7 @@
#
# TODO Add support for SQL Error Codes
# https://msdn.microsoft.com/en-us/library/ms714687%28v=vs.85%29.aspx
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
error_codes = (
("OK", 0, ""),
diff --git a/common/thrift/generate_metrics.py
b/common/thrift/generate_metrics.py
index 8534d0e0e..c261213b2 100755
--- a/common/thrift/generate_metrics.py
+++ b/common/thrift/generate_metrics.py
@@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import sys
import os
import re
diff --git a/docker/annotate.py b/docker/annotate.py
index f83854f18..c7f6b13fa 100755
--- a/docker/annotate.py
+++ b/docker/annotate.py
@@ -23,6 +23,7 @@
#
# Note that "python -u" disables buffering.
+from __future__ import absolute_import, division, print_function
import sys
import datetime
diff --git a/docker/monitor.py b/docker/monitor.py
index 530c63c7c..261dfb270 100644
--- a/docker/monitor.py
+++ b/docker/monitor.py
@@ -32,6 +32,7 @@
# re.compile(">>> "))
# timeline.create("output.html")
+from __future__ import absolute_import, division, print_function
import datetime
import json
import logging
@@ -161,7 +162,7 @@ class ContainerMonitor(object):
# Container may no longer exist.
return None
try:
- statcontents = file(os.path.join(dirname, stat)).read()
+ statcontents = open(os.path.join(dirname, stat)).read()
return statcontents.replace("\n", " ").strip()
except IOError as e:
# Ignore errors; cgroup can disappear on us.
@@ -186,7 +187,7 @@ class ContainerMonitor(object):
self.min_memory_usage_gb = None
self.max_memory_usage_gb = None
- with file(self.output_path, "w") as output:
+ with open(self.output_path, "w") as output:
while self.keep_monitoring:
# Use a single timestamp for a given round of monitoring.
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f")
@@ -229,7 +230,7 @@ class Timeline(object):
"""
interesting_lines = [
line.strip()
- for line in file(container.logfile)
+ for line in open(container.logfile)
if self.interesting_re.search(line)]
return [(container.name,) + split_timestamp(line) for line in
interesting_lines]
@@ -273,15 +274,15 @@ class Timeline(object):
dt = ts - prev_ts
assert type(dt) == float
if dt != 0:
- yield ts, container, (user_cpu - prev_user)/dt/USER_HZ,\
- (system_cpu - prev_system)/dt/USER_HZ
+ yield ts, container, (user_cpu - prev_user) // dt // USER_HZ,\
+ (system_cpu - prev_system) // dt // USER_HZ
prev_by_container[container] = ts, user_cpu, system_cpu
# Now update container totals
for c in self.containers:
if c.id in prev_by_container:
_, u, s = prev_by_container[c.id]
- c.total_user_cpu, c.total_system_cpu = u / USER_HZ, s / USER_HZ
+ c.total_user_cpu, c.total_system_cpu = u // USER_HZ, s // USER_HZ
if c.id in peak_rss_by_container:
c.peak_total_rss = peak_rss_by_container[c.id]
@@ -321,7 +322,7 @@ class Timeline(object):
for c in self.containers:
container_by_id[c.id] = c
- for ts, container_id, user, system in
self.parse_metrics(file(self.monitor_file)):
+ for ts, container_id, user, system in
self.parse_metrics(open(self.monitor_file)):
container = container_by_id.get(container_id)
if not container:
continue
@@ -337,9 +338,9 @@ class Timeline(object):
metrics_by_container.setdefault(
container.name, []).append((ts - min_ts, user, system))
- with file(output, "w") as o:
+ with open(output, "w") as o:
template_path = os.path.join(os.path.dirname(__file__),
"timeline.html.template")
- shutil.copyfileobj(file(template_path), o)
+ shutil.copyfileobj(open(template_path), o)
o.write("\n<script>\nvar data = \n")
json.dump(dict(buildname=self.buildname, timeline=timeline_json,
metrics=metrics_by_container, max_ts=(max_metric_ts - min_ts)), o,
indent=2)
diff --git a/docker/test-with-docker.py b/docker/test-with-docker.py
index 55ae96372..5d473f2a0 100755
--- a/docker/test-with-docker.py
+++ b/docker/test-with-docker.py
@@ -21,6 +21,7 @@
# We do not use Impala's python environment here, nor do we depend on
# non-standard python libraries to avoid needing extra build steps before
# triggering this.
+from __future__ import absolute_import, division, print_function
import argparse
import datetime
import itertools
@@ -781,7 +782,7 @@ class TestSuiteRunner(object):
test_with_docker = self.test_with_docker
suite = self.suite
envs = ["-e", "NUM_CONCURRENT_TESTS=" +
str(test_with_docker.parallel_test_concurrency)]
- for k, v in sorted(suite.envs.iteritems()):
+ for k, v in sorted(suite.envs.items()):
envs.append("-e")
envs.append("%s=%s" % (k, v))
diff --git a/fe/src/test/resources/hive-site.xml.py
b/fe/src/test/resources/hive-site.xml.py
index e7cf99d45..563d32fc8 100644
--- a/fe/src/test/resources/hive-site.xml.py
+++ b/fe/src/test/resources/hive-site.xml.py
@@ -17,6 +17,7 @@
# specific language governing permissions and limitations
# under the License.
+from __future__ import absolute_import, division, print_function
import os
hive_major_version = int(os.environ['IMPALA_HIVE_VERSION'][0])
diff --git a/infra/deploy/deploy.py b/infra/deploy/deploy.py
deleted file mode 100644
index 58ebb69fa..000000000
--- a/infra/deploy/deploy.py
+++ /dev/null
@@ -1,465 +0,0 @@
-#!/usr/bin/python
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-# Deploys a new Impala_Kudu service, either based on an existing Impala service
-# or from scratch.
-#
-# Prerequisites:
-# - A cluster running CDH 5.4.x and Cloudera Manager 5.4.x with x >= 7
-# - CM API Python client (http://cloudera.github.io/cm_api/docs/python-client).
-#
-# Sample usage:
-#
-# ./deploy.py clone IMPALA_KUDU IMPALA-1
-# Clones IMPALA-1 into a new Impala_Kudu service called "IMPALA_KUDU".
-#
-# ./deploy.py create new_service /data/impala/
-# Creates a new Impala_Kudu service called "new_service" using /data/impala/
-# for its scratch directories.
-
-from __future__ import print_function
-import argparse
-import hashlib
-import os
-import re
-import time
-
-from cm_api.api_client import ApiResource
-
-IMPALA_KUDU_PARCEL_URL = os.getenv("IMPALA_KUDU_PARCEL_URL",
-
"http://archive.cloudera.com/beta/impala-kudu/parcels/latest")
-IMPALA_KUDU_PARCEL_PRODUCT = "IMPALA_KUDU"
-MAX_PARCEL_REPO_WAIT_SECS = 60
-MAX_PARCEL_WAIT_SECS = 60 * 30
-
-SERVICE_DEPENDENCIES = {
- "HDFS" : True,
- "HIVE" : True,
- "YARN" : False,
- "HBASE" : False,
- "ZOOKEEPER" : False
-}
-
-def parse_args():
- parser =
argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
- parser.add_argument("--host", type=str,
- default="localhost",
- help="Hostname of the Cloudera Manager server.")
- parser.add_argument("--user", type=str,
- default="admin",
- help="Username with which to log into Cloudera
Manager.")
- parser.add_argument("--password", type=str,
- default="admin",
- help="Password with which to log into Cloudera
Manager.")
- parser.add_argument("--cluster", type=str,
- help="Name of existing cluster where the Impala_Kudu
service "
- "should be added. If not specified, uses the only
cluster or "
- "raises an exception if multiple clusters are found.")
- parents_parser = argparse.ArgumentParser(add_help=False)
- parents_parser.add_argument("service_name", type=str,
- help="Name of Impala_Kudu service to create.")
- subparsers = parser.add_subparsers(dest="subparsers_name")
- clone_parser = subparsers.add_parser("clone",
- parents=[parents_parser],
- help="Use an existing Impala service
as a template for "
- "the new Impala_Kudu service. To be
used when Impala_"
- "Kudu is to run side-by-side with an
existing Impala.")
- clone_parser.add_argument("based_on", type=str,
- help="Name of existing Impala service to clone
as the basis for the "
- "new service.")
- create_parser = subparsers.add_parser("create",
- parents=[parents_parser],
- help="create a new Impala_Kudu
service from scratch. To "
- "be used when Impala_Kudu runs in
its own cluster.")
- create_parser.add_argument("--master_host", type=str,
- help="Hostname where new Impala_Kudu service's
master roles should "
- "be placed. If not specified, uses the Cloudera
Manager Server host "
- "or raises an exception if that host is not
managed.")
- for service_type, required in SERVICE_DEPENDENCIES.iteritems():
- create_parser.add_argument("--%s_dependency" % (service_type.lower(),),
- type=str,
- help="Name of %s service that the new
Impala_Kudu service "
- "should depend on. If not specified, will
use only service of "
- "that type in the cluster. Will raise an
exception if exactly "
- "one instance of that service is not found
in the cluster. %s" %
- (service_type, "REQUIRED." if required else
""))
- create_parser.add_argument("scratch_dirs", type=str,
- help="Comma-separated list of scratch
directories to use in the new "
- "Impala_Kudu service.")
-
- return parser.parse_args()
-
-def find_cluster(api, cluster_name):
- if cluster_name:
- cluster = api.get_cluster(cluster_name)
- else:
- all_clusters = api.get_all_clusters()
- if len(all_clusters) == 0:
- raise Exception("No clusters found; create one before calling this
script")
- if len(all_clusters) > 1:
- raise Exception("Cannot use implicit cluster; there is more than
one available")
- cluster = all_clusters[0]
- print("Found cluster: %s" % (cluster.displayName, ))
- return cluster
-
-def find_dependencies(args, cluster):
- deps = []
-
- # { service type : { service name : service }}
- services_by_type = {}
- for service in cluster.get_all_services():
- service_dict = services_by_type.get(service.type, {})
- service_dict[service.name] = service
- services_by_type[service.type] = service_dict
-
- for service_type, required in SERVICE_DEPENDENCIES.iteritems():
- candidates = services_by_type.get(service_type, {})
- arg = getattr(args, service_type.lower() + "_dependency")
- if arg:
- found = candidates.get(arg, None)
- if not found:
- raise Exception("Could not find dependency service (type %s,
name %s)" %
- (service_type, arg))
- print("Found explicit dependency service %s" % (found.name))
- deps.append(found)
- else:
- if not required:
- print("Skipping optional dependency of type %s" %
(service_type,))
- continue
- if len(candidates) > 1:
- raise Exception("Found %d possible implicit dependency
services of type %s" %
- (len(candidates), service_type))
- elif len(candidates) == 0:
- raise Exception("Could not find implicit dependency service of
type %s" %
- (service_type,))
- else:
- found = candidates.values()[0]
- print("Found implicit dependency service %s" % (found.name,))
- deps.append(found)
- return deps
-
-def check_new_service_does_not_exist(api, cluster, new_name):
- for service in cluster.get_all_services():
- if service.displayName == new_name:
- raise Exception("New service name %s already in use" % (new_name,))
-
- print("New service name %s is not in use" % (new_name,))
-
-def find_template_service(api, cluster, based_on):
- template_service = None
- for service in cluster.get_all_services():
- if based_on and service.displayName == based_on:
- if service.type != "IMPALA":
- raise Exception("Based-on service %s is of wrong type %s" %
- (based_on, service.type))
- print("Found based-on service: %s" % (based_on,))
- template_service = service
-
- if based_on and not template_service:
- raise Exception("Could not find based-on service: %s" % (based_on,))
-
- return template_service
-
-def find_master_host(api, cm_hostname, master_hostname):
- for h in api.get_all_hosts():
- if master_hostname and h.hostname == master_hostname:
- print("Found master host %s" % (master_hostname,))
- return h
- elif not master_hostname and h.hostname == cm_hostname:
- print("Found implicit master host on CM host %s" % (cm_hostname,))
- return h
-
- if master_hostname:
- raise Exception("Could not find master host with hostname %s" %
(master_hostname,))
- else:
- raise Exception("Could not find implicit master host %s" %
(cm_hostname,))
-
-def get_best_parcel(api, cluster):
- parcels_available_remotely = []
- parcels_downloaded = []
- parcels_distributed = []
- parcels_activated = []
- for parcel in cluster.get_all_parcels():
- if parcel.product == IMPALA_KUDU_PARCEL_PRODUCT:
- if parcel.stage == "AVAILABLE_REMOTELY":
- parcels_available_remotely.append(parcel)
- elif parcel.stage == "DOWNLOADED":
- parcels_downloaded.append(parcel)
- elif parcel.stage == "DISTRIBUTED":
- parcels_distributed.append(parcel)
- elif parcel.stage == "ACTIVATED":
- parcels_activated.append(parcel)
-
- def parcel_cmp(p1, p2):
- if p1.version < p2.version:
- return -1
- elif p1.version > p2.version:
- return 1
- else:
- return 0
-
- # Prefer the "closest" parcel, even if it's not the newest by version.
- if len(parcels_activated) > 0:
- parcel = sorted(parcels_activated, key=lambda parcel:
parcel.version)[0]
- elif len(parcels_distributed) > 0:
- parcel = sorted(parcels_distributed, key=lambda parcel:
parcel.version)[0]
- elif len(parcels_downloaded) > 0:
- parcel = sorted(parcels_downloaded, key=lambda parcel:
parcel.version)[0]
- elif len(parcels_available_remotely) > 0:
- parcel = sorted(parcels_available_remotely, key=lambda parcel:
parcel.version)[0]
- else:
- parcel = None
-
- if parcel:
- print("Chose best parcel %s-%s (stage %s)" % (parcel.product,
- parcel.version,
- parcel.stage))
- else:
- print("Found no candidate parcels")
-
- return parcel
-
-def ensure_parcel_repo_added(api):
- cm = api.get_cloudera_manager()
- config = cm.get_config(view='summary')
- parcel_urls = config.get("REMOTE_PARCEL_REPO_URLS", "").split(",")
- if IMPALA_KUDU_PARCEL_URL in parcel_urls:
- print("Impala_Kudu parcel URL already present")
- else:
- print("Adding Impala_Kudu parcel URL")
- parcel_urls.append(IMPALA_KUDU_PARCEL_URL)
- config["REMOTE_PARCEL_REPO_URLS"] = ",".join(parcel_urls)
- cm.update_config(config)
-
-def wait_for_parcel_stage(cluster, parcel, stage):
- for attempt in xrange(1, MAX_PARCEL_WAIT_SECS + 1):
- new_parcel = cluster.get_parcel(parcel.product, parcel.version)
- if new_parcel.stage == stage:
- return
- if new_parcel.state.errors:
- raise Exception(str(new_parcel.state.errors))
- print("progress: %s / %s" % (new_parcel.state.progress,
- new_parcel.state.totalProgress))
- time.sleep(1)
- else:
- raise Exception("Parcel %s-%s did not reach stage %s in %d seconds" %
- (parcel.product, parcel.version, stage,
MAX_PARCEL_WAIT_SECS,))
-
-def ensure_parcel_activated(cluster, parcel):
- parcel_stage = parcel.stage
- if parcel_stage == "AVAILABLE_REMOTELY":
- print("Downloading parcel: %s-%s " % (parcel.product, parcel.version))
- parcel.start_download()
- wait_for_parcel_stage(cluster, parcel, "DOWNLOADED")
- print("Downloaded parcel: %s-%s " % (parcel.product, parcel.version))
- parcel_stage = "DOWNLOADED"
- if parcel_stage == "DOWNLOADED":
- print("Distributing parcel: %s-%s " % (parcel.product, parcel.version))
- parcel.start_distribution()
- wait_for_parcel_stage(cluster, parcel, "DISTRIBUTED")
- print("Distributed parcel: %s-%s " % (parcel.product, parcel.version))
- parcel_stage = "DISTRIBUTED"
- if parcel_stage == "DISTRIBUTED":
- print("Activating parcel: %s-%s " % (parcel.product, parcel.version))
- parcel.activate()
- wait_for_parcel_stage(cluster, parcel, "ACTIVATED")
- print("Activated parcel: %s-%s " % (parcel.product, parcel.version))
- parcel_stage = "ACTIVATED"
-
- print("Parcel %s-%s is activated" % (parcel.product, parcel.version))
-
-def print_configs(entity_name, config_dict):
- for attr, value in config_dict.iteritems():
- print("Set %s config %s=\'%s\'" % (entity_name, attr, value))
-
-def create_new_service(api, cluster, new_name, deps, scratch_dirs,
master_host):
- new_service = cluster.create_service(new_name, "IMPALA")
- print("Created new service %s" % (new_name,))
-
- service_config = {}
- for d in deps:
- service_config[d.type.lower() + "_service"] = d.name
- service_config["impala_service_env_safety_valve"] = "IMPALA_KUDU=1"
- new_service.update_config(service_config)
- print_configs("service " + new_name, service_config)
-
- for rcg in new_service.get_all_role_config_groups():
- if rcg.roleType == "IMPALAD":
- scratch_dirs_dict = { "scratch_dirs" : scratch_dirs }
- rcg.update_config(scratch_dirs_dict)
- print_configs("rcg " + rcg.displayName, scratch_dirs_dict)
- for h in cluster.list_hosts():
- if h.hostId == master_host.hostId:
- continue
-
- # This formula is embedded within CM. If we don't strictly
- # adhere to it, we can't use any %s-%s-%s naming scheme.
- md5 = hashlib.md5()
- md5.update(h.hostId)
- new_role_name = "%s-%s-%s" % (new_name, rcg.roleType,
md5.hexdigest())
- new_service.create_role(new_role_name, rcg.roleType, h.hostId)
- print("Created new role %s" % (new_role_name,))
- else:
- md5 = hashlib.md5()
- md5.update(master_host.hostId)
- new_role_name = "%s-%s-%s" % (new_name, rcg.roleType,
md5.hexdigest())
- new_service.create_role(new_role_name, rcg.roleType,
master_host.hostId)
- print("Created new role %s" % (new_role_name,))
-
-def transform_path(rcg_name, rcg_config_dict, rcg_config_name):
- # TODO: Do a better job with paths where the role type is embedded.
- #
- # e.g. /var/log/impalad/lineage --> /var/log/impalad2/lineage
- val = rcg_config_dict.get(rcg_config_name, None)
- if not val:
- raise Exception("Could not get %s config for rcg %s" %
- (rcg_config_name, rcg_name,))
- new_val = re.sub(r"/(.*?)(/?)$", r"/\g<1>2\g<2>", val)
- return {rcg_config_name : new_val}
-
-def transform_port(rcg_name, rcg_config_dict, rcg_config_name):
- # TODO: Actually resolve all port conflicts.
- val = rcg_config_dict.get(rcg_config_name, None)
- if not val:
- raise Exception("Could not get %s config for rcg %s" %
- (rcg_config_name, rcg_name,))
- try:
- val_int = int(val)
- except ValueError as e:
- raise Exception("Could not convert %s config (%s) for rcg %s into
integer" %
- (rcg_config_name, val, rcg_name))
-
- new_val = str(val_int + 7)
- return {rcg_config_name : new_val}
-
-def transform_rcg_config(rcg):
- summary = rcg.get_config()
- full = {}
- for name, config in rcg.get_config("full").iteritems():
- full[name] = config.value if config.value else config.default
-
- new_config = summary
-
- if rcg.roleType == "IMPALAD":
- new_config.update(transform_path(rcg.name, full,
"audit_event_log_dir"))
- new_config.update(transform_path(rcg.name, full,
"lineage_event_log_dir"))
- new_config.update(transform_path(rcg.name, full, "log_dir"))
- new_config.update(transform_path(rcg.name, full, "scratch_dirs"))
-
- new_config.update(transform_port(rcg.name, full, "beeswax_port"))
- new_config.update(transform_port(rcg.name, full, "hs2_port"))
- new_config.update(transform_port(rcg.name, full,
"impalad_webserver_port"))
- new_config.update(transform_port(rcg.name, full,
"state_store_subscriber_port"))
- elif rcg.roleType == "CATALOGSERVER":
- new_config.update(transform_path(rcg.name, full, "log_dir"))
-
- new_config.update(transform_port(rcg.name, full,
"catalog_service_port"))
- new_config.update(transform_port(rcg.name, full,
"catalogserver_webserver_port"))
- elif rcg.roleType == "STATESTORE":
- new_config.update(transform_path(rcg.name, full, "log_dir"))
-
- new_config.update(transform_port(rcg.name, full, "state_store_port"))
- new_config.update(transform_port(rcg.name, full,
"statestore_webserver_port"))
-
- return new_config
-
-def clone_existing_service(cluster, new_name, template_service):
- new_service = cluster.create_service(new_name, "IMPALA")
- print("Created new service %s" % (new_name,))
-
- service_config, _ = template_service.get_config()
- service_config["impala_service_env_safety_valve"] = "IMPALA_KUDU=1"
- new_service.update_config(service_config)
- print_configs("service " + new_name, service_config)
-
- saved_special_port = None
- i = 0
- for old_rcg in template_service.get_all_role_config_groups():
- if old_rcg.name != ("%s-%s-BASE" % (template_service.name,
old_rcg.roleType)):
- new_rcg_name = "%s-%s-%d" % (new_name, old_rcg.roleType, i)
- i += 1
- new_rcg = new_service.create_role_config_group(new_rcg_name,
- new_rcg_name,
- old_rcg.roleType)
- print("Created new rcg %s" % (new_rcg_name,))
- else:
- new_rcg = new_service.get_role_config_group("%s-%s-BASE" %
(new_name,
-
old_rcg.roleType))
- new_rcg_config = transform_rcg_config(old_rcg)
- new_rcg.update_config(new_rcg_config)
- print_configs("rcg " + new_rcg.displayName, new_rcg_config)
- special_port = new_rcg_config.get("state_store_subscriber_port", None)
- if special_port:
- saved_special_port = special_port
-
- new_role_names = []
- for old_role in old_rcg.get_all_roles():
- md5 = hashlib.md5()
- md5.update(old_role.hostRef.hostId)
- new_role_name = "%s-%s-%s" % (new_name, new_rcg.roleType,
md5.hexdigest())
- new_role = new_service.create_role(new_role_name,
- new_rcg.roleType,
- old_role.hostRef.hostId)
- print("Created new role %s" % (new_role_name,))
- new_role_names.append(new_role.name)
- new_rcg.move_roles(new_role_names)
-
- for new_rcg in new_service.get_all_role_config_groups():
- if new_rcg.roleType == "CATALOGSERVER":
- special_port_config_dict = { "catalogd_cmd_args_safety_valve" :
- "-state_store_subscriber_port=%s" %
(saved_special_port,) }
- new_rcg.update_config(special_port_config_dict)
- print_configs("rcg " + new_rcg.displayName,
special_port_config_dict)
-
-def main():
- args = parse_args()
- api = ApiResource(args.host,
- username=args.user,
- password=args.password,
- version=10)
-
- cluster = find_cluster(api, args.cluster)
- check_new_service_does_not_exist(api, cluster, args.service_name)
- if args.subparsers_name == "clone":
- template_service = find_template_service(api, cluster, args.based_on)
- else:
- master_host = find_master_host(api, args.host, args.master_host)
- deps = find_dependencies(args, cluster)
-
- parcel = get_best_parcel(api, cluster)
- if not parcel:
- ensure_parcel_repo_added(api)
-
- for attempt in xrange(1, MAX_PARCEL_REPO_WAIT_SECS + 1):
- parcel = get_best_parcel(api, cluster)
- if parcel:
- break
- print("Could not find parcel in attempt %d, will sleep and retry" %
- (attempt,))
- time.sleep(1)
- else:
- raise Exception("No parcel showed up in %d seconds" %
(MAX_PARCEL_REPO_WAIT_SECS,))
-
- ensure_parcel_activated(cluster, parcel)
- if args.subparsers_name == "create":
- create_new_service(api, cluster, args.service_name, deps,
args.scratch_dirs, master_host)
- else:
- clone_existing_service(cluster, args.service_name, template_service)
-
-if __name__ == "__main__":
- main()
diff --git a/infra/python/deps/download_requirements
b/infra/python/deps/download_requirements
index a1035b346..02af1d113 100755
--- a/infra/python/deps/download_requirements
+++ b/infra/python/deps/download_requirements
@@ -22,7 +22,6 @@ set -euo pipefail
DIR="$(dirname "$0")"
pushd "$DIR"
-PY26="$(./find_py26.py)"
# Directly download packages listed in *requirements.txt, but don't install
them.
-"$PY26" pip_download.py
+./pip_download.py
popd
diff --git a/infra/python/deps/find_py26.py b/infra/python/deps/find_py26.py
deleted file mode 100755
index dc628c759..000000000
--- a/infra/python/deps/find_py26.py
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/usr/bin/python
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# This script finds Python 2.6 or higher on the system and outputs the
-# system command to stdout. The script exits with a nonzero exit code if
-# Python 2.6+ is not present.
-
-from __future__ import print_function
-import os
-import subprocess
-import sys
-import textwrap
-
-def detect_python_cmd():
- '''Returns the system command that provides python 2.6 or greater.'''
- paths = os.getenv("PATH").split(os.path.pathsep)
- for cmd in ("python", "python27", "python2.7", "python-27", "python-2.7",
"python26",
- "python2.6", "python-26", "python-2.6"):
- for path in paths:
- cmd_path = os.path.join(path, cmd)
- if not os.path.exists(cmd_path) or not os.access(cmd_path, os.X_OK):
- continue
- exit = subprocess.call([cmd_path, "-c", textwrap.dedent("""
- import sys
- sys.exit(int(sys.version_info[:2] < (2, 6)))""")])
- if exit == 0:
- return cmd_path
- raise Exception("Could not find minimum required python version 2.6")
-
-
-print(detect_python_cmd())
diff --git a/infra/python/deps/pip_download.py
b/infra/python/deps/pip_download.py
index 03713f927..2d965ac33 100755
--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -19,9 +19,9 @@
# Implement the basic 'pip download' functionality in a way that gives us more
control
# over which archive type is downloaded and what post-download steps are
executed.
-# This script requires Python 2.6+.
+# This script requires Python 2.7+.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import hashlib
import multiprocessing.pool
import os
diff --git a/lib/python/impala_py_lib/jenkins/generate_junitxml.py
b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
index 09d6ab7b4..9382d8ac3 100755
--- a/lib/python/impala_py_lib/jenkins/generate_junitxml.py
+++ b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
@@ -21,12 +21,13 @@ A script for generating arbitrary junit XML reports while
building Impala.
These files will be consumed by jenkins.impala.io to generate reports for
easier triaging of build and setup errors.
"""
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import argparse
import codecs
import errno
import os
import re
+import sys
import textwrap
from xml.dom import minidom
from xml.etree import ElementTree as ET
@@ -167,7 +168,10 @@ class JunitReport(object):
junit_log_file = os.path.join(junitxml_logdir, filename)
with codecs.open(junit_log_file, encoding="UTF-8", mode='w') as f:
- f.write(unicode(self))
+ if sys.version_info.major < 3:
+ f.write(unicode(self))
+ else:
+ f.write(str(self))
return junit_log_file
@@ -212,7 +216,10 @@ class JunitReport(object):
else:
# This is a string passed in on the command line. Make sure to return it
as
# a unicode string.
- content = unicode(file_or_string, encoding="UTF-8")
+ if sys.version_info.major < 3:
+ content = unicode(file_or_string, encoding="UTF-8")
+ else:
+ content = file_or_string
return JunitReport.remove_ansi_escape_sequences(content)
def __unicode__(self):
@@ -223,6 +230,10 @@ class JunitReport(object):
root_node_dom = minidom.parseString(root_node_unicode)
return root_node_dom.toprettyxml(indent=' ' * 4)
+ def __str__(self):
+ if sys.version_info.major < 3:
+ return unicode(self).encode('utf-8')
+ return self.__unicode__()
def get_options():
"""Parse and return command line options."""
diff --git a/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py
b/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py
index 98131a7fa..02bc04ec5 100755
--- a/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py
+++ b/lib/python/impala_py_lib/jenkins/junitxml_prune_notrun.py
@@ -21,6 +21,7 @@
Some tests that produce JUnitXML include tests that did not run (i.e.
status="notrun").
This script walks through the JUnitXML and removes these elements.
"""
+from __future__ import absolute_import, division, print_function
from optparse import OptionParser
from xml.etree import ElementTree as ET
from xml.dom import minidom
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index 6e6d38c44..80757b4cb 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1784,7 +1784,7 @@ warned, it can be very long!",
"Want to know what version of Impala you're connected to? Run the VERSION
command to \
find out!",
"You can change the Impala daemon that you're connected to by using the
CONNECT \
-command."
+command.",
"To see how Impala will plan to run your query without actually executing
it, use the \
EXPLAIN command. You can change the level of detail in the EXPLAIN output by
setting the \
EXPLAIN_LEVEL query option.",
diff --git a/testdata/bin/setup-ranger.sh b/testdata/bin/setup-ranger.sh
index e57f79c76..47309cc71 100755
--- a/testdata/bin/setup-ranger.sh
+++ b/testdata/bin/setup-ranger.sh
@@ -36,7 +36,7 @@ function setup-ranger {
--header="accept:application/json" \
--header="Content-Type:application/json" \
http://localhost:6080/service/xusers/secure/groups |
- python -c "import sys, json; print json.load(sys.stdin)['id']")
+ python -c "import sys, json; print(json.load(sys.stdin)['id'])")
export GROUP_ID_OWNER
GROUP_ID_NON_OWNER=$(wget -qO - --auth-no-challenge --user=admin \
@@ -44,7 +44,7 @@ function setup-ranger {
--header="accept:application/json" \
--header="Content-Type:application/json" \
http://localhost:6080/service/xusers/secure/groups |
- python -c "import sys, json; print json.load(sys.stdin)['id']")
+ python -c "import sys, json; print(json.load(sys.stdin)['id'])")
export GROUP_ID_NON_OWNER
perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
diff --git a/testdata/cluster/admin b/testdata/cluster/admin
index 16ac8e670..10f9e56ea 100755
--- a/testdata/cluster/admin
+++ b/testdata/cluster/admin
@@ -294,6 +294,8 @@ function create_cluster {
ACTUAL_PATH="${TEMPLATE_PATH%$PY_TEMPLATE_SUFFIX}".xml
$IMPALA_HOME/bin/generate_xml_config.py $TEMPLATE_PATH $ACTUAL_PATH
rm $TEMPLATE_PATH
+ # Remove pycache if created
+ rm -rf $(dirname $TEMPLATE_PATH)/__pycache__
done
done
}
diff --git
a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
index 0ebc97372..a80d088ab 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/core-site.xml.py
@@ -17,6 +17,7 @@
# specific language governing permissions and limitations
# under the License.
+from __future__ import absolute_import, division, print_function
import os
import sys
diff --git
a/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py
b/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py
index 71bff9a3b..45598187e 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/ozone-site.xml.py
@@ -17,6 +17,7 @@
# specific language governing permissions and limitations
# under the License.
+from __future__ import absolute_import, division, print_function
import os
CONFIG = {
diff --git
a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
index 00443a71a..f9372bf5a 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import os
import sys
@@ -26,15 +26,15 @@ hive_major_version =
int(os.environ['IMPALA_HIVE_VERSION'][0])
def _get_system_ram_mb():
- lines = file("/proc/meminfo").readlines()
+ lines = open("/proc/meminfo").readlines()
memtotal_line = [l for l in lines if l.startswith('MemTotal')][0]
mem_kb = int(memtotal_line.split()[1])
- return mem_kb / 1024
+ return mem_kb // 1024
def _get_yarn_nm_ram_mb():
sys_ram = _get_system_ram_mb()
- available_ram_gb = int(os.getenv("IMPALA_CLUSTER_MAX_MEM_GB", str(sys_ram /
1024)))
+ available_ram_gb = int(os.getenv("IMPALA_CLUSTER_MAX_MEM_GB", str(sys_ram //
1024)))
# Fit into the following envelope:
# - need 4GB at a bare minimum
# - leave at least 20G for other services
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index e8a9aff7f..a9bc5431a 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -30,7 +30,7 @@ import time
from getpass import getuser
from random import choice
from signal import SIGKILL
-from subprocess import check_call
+from subprocess import check_call, check_output
from time import sleep
import tests.common.environ
@@ -41,10 +41,6 @@ from tests.common.impala_service import (
StateStoredService)
from tests.util.shell_util import exec_process, exec_process_async
-if sys.version_info >= (2, 7):
- # We use some functions in the docker code that don't exist in Python 2.6.
- from subprocess import check_output
-
LOG = logging.getLogger('impala_cluster')
LOG.setLevel(level=logging.DEBUG)
diff --git a/tests/comparison/data_generator_mapper.py
b/tests/comparison/data_generator_mapper.py
index 12135f563..35d412701 100755
--- a/tests/comparison/data_generator_mapper.py
+++ b/tests/comparison/data_generator_mapper.py
@@ -20,7 +20,7 @@
'''This is a mapper for use with hadoop streaming. See
data_generator.DatabasePopulator
for more information on how this file is used.
'''
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function
import os
import random
@@ -45,7 +45,7 @@ for line in sys.stdin:
batch_idx = 0
while row_count > 0:
table_data_generator.row_count = min(row_count, rows_per_batch)
- table_data_generator.randomization_seed = int(random.random() * sys.maxint)
+ table_data_generator.randomization_seed = int(random.random() *
sys.maxsize)
# Generate input for the reducers.
print("%s\t%s\t%s" % (table_data_generator.table.name, batch_idx,