This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 566df808913aef6ff5eecc3849e14df8370bd651 Author: Joe McDonnell <[email protected]> AuthorDate: Tue Apr 6 21:36:07 2021 -0700 IMPALA-11959: Add Python 3 virtualenv This adds a Python 3 equivalent to the impala-python virtualenv base on the toolchain Python 3.7.16. This modifies bootstrap_virtualenv.py to support the two different modes. This adds py2-requirements.txt and py3-requirements.txt to allow some differences between the Python 2 and Python 3 virtualenvs. Here are some specific package changes: - allpairs is replaced with allpairspy, as allpairs did not support Python 3. - requests is upgraded slightly, because otherwise is has issues with idna==2.8. - pylint is limited to Python 3, because we are adding it and don't need it on both - flake8 is limited to Python 2, because it will take some work to switch to a version that works on Python 3 - cm_api is limited to Python 2, because it doesn't support Python 3 - pytest-random does not support Python 3 and it is unused, so it is removed - Bump the version of setuptool-scm to support Python 3 This adds impala-pylint, which can be used to do further Python 3 checks via --py3k. This also adds a bin/check-pylint-py3k.sh script to enforce specific py3k checks. The banned py3k warnings are specified in the bin/banned_py3k_warnings.txt. This is currently empty, but this can ratchet up the py3k strictness over time to avoid regressions. This pulls in a new toolchain with the fix for IMPALA-11956 to get Python 3.7.16. Testing: - Hand tested that the allpairs libraries produce the same results - The python3 virtualenv has no influence on regular tests yet Change-Id: Ica4853f440c9a46a79bd5fb8e0a66730b0b4efc0 Reviewed-on: http://gerrit.cloudera.org:8080/19567 Reviewed-by: Joe McDonnell <[email protected]> Tested-by: Joe McDonnell <[email protected]> --- CMakeLists.txt | 6 +- bin/banned_py3k_warnings.txt | 0 bin/bootstrap_toolchain.py | 3 + bin/check-pylint-py3k.sh | 140 +++++++++++++++++++ bin/impala-config.sh | 3 +- .../setuptools-requirements.txt => bin/impala-pip3 | 8 +- .../impala-pylint | 8 +- .../impala-python3 | 10 +- ...t-impala-python.sh => impala-python3-common.sh} | 27 ++-- bin/init-impala-python.sh | 26 +++- bin/rat_exclude_files.txt | 1 + infra/python/bootstrap_virtualenv.py | 150 ++++++++++++++------- infra/python/deps/pip_download.py | 3 +- ...tools-requirements.txt => py2-requirements.txt} | 21 ++- ...tools-requirements.txt => py3-requirements.txt} | 18 ++- infra/python/deps/requirements.txt | 20 +-- infra/python/deps/setuptools-requirements.txt | 2 +- testdata/bin/generate-test-vectors.py | 3 +- tests/common/test_vector.py | 4 +- tests/custom_cluster/test_hs2_fault_injection.py | 2 - tests/query_test/test_decimal_casting.py | 2 +- 21 files changed, 349 insertions(+), 108 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb35b393a..2bfc50f5d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -514,6 +514,10 @@ add_custom_target(impala_python ALL COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh" ) +add_custom_target(impala_python3 ALL + COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh" "-python3" +) + set(IMPALA_PYTHON_INSTALLS "") if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "") list(APPEND IMPALA_PYTHON_INSTALLS shell_python2_install) @@ -524,7 +528,7 @@ endif() add_custom_target(impala_shell_pypi ALL DEPENDS ${IMPALA_PYTHON_INSTALLS}) add_custom_target(notests_independent_targets DEPENDS - java cscope tarballs impala_python impala_shell_pypi + java cscope tarballs impala_python impala_python3 impala_shell_pypi ) add_custom_target(notests_regular_targets DEPENDS impalad statestored catalogd admissiond fesupport loggingsupport ImpalaUdf udasample udfsample impala-profile-tool diff --git a/bin/banned_py3k_warnings.txt b/bin/banned_py3k_warnings.txt new file mode 100644 index 000000000..e69de29bb diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py index 98ec9d7d6..097484822 100755 --- a/bin/bootstrap_toolchain.py +++ b/bin/bootstrap_toolchain.py @@ -494,6 +494,9 @@ def get_toolchain_downloads(): "crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog", "gperftools", "gtest", "jwt-cpp", "libev", "libunwind", "lz4", "openldap", "openssl", "orc", "protobuf", "python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib", "zstd"]) + python3_package = ToolchainPackage( + "python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION")) + toolchain_packages += [python3_package] toolchain_packages += get_unique_toolchain_downloads( ["thrift:cpp", "thrift:java", "thrift:py"]) protobuf_package_clang = ToolchainPackage( diff --git a/bin/check-pylint-py3k.sh b/bin/check-pylint-py3k.sh new file mode 100755 index 000000000..9dca19c87 --- /dev/null +++ b/bin/check-pylint-py3k.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euo pipefail + +BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +# To allow incrementally banning individual pylint checks, this uses grep +# expressions to match banned pylint warnings. The grep expressions are stored +# in the bin/banned_py3k_warnings.txt file. +BANNED_PY3K_WARNINGS="${BINDIR}/banned_py3k_warnings.txt" + +function print_usage { + echo "check-pylink-py3k.sh : Checks eligible python files for pylint py3k compliance." + echo "Fails if the python files have py3k warnings that match the patterns in " + echo "bin/banned_py3k_warnings.txt." + echo "[--error_output_file] : (optional) Also output the errors to a file" + echo "[--warning_output_file] : (optional) Also output the warnings to a file" +} + +ERROR_OUTPUT_FILE="" +WARNING_OUTPUT_FILE="" +while [ -n "$*" ] +do + case "$1" in + --error_output_file) + ERROR_OUTPUT_FILE="${2-}" + shift; + ;; + --warning_output_file) + WARNING_OUTPUT_FILE="${2-}" + shift; + ;; + --help|*) + print_usage + exit 1 + ;; + esac + shift +done + +pushd ${IMPALA_HOME} > /dev/null 2>&1 + +OUTPUT_TMP_DIR=$(mktemp -d) +PYLINT_OUTPUT_FILE="${OUTPUT_TMP_DIR}/pylint_output.txt" +ERROR_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/error_output_tmp.txt" +WARNING_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/warning_output_tmp.txt" + +RETCODE=0 +for file in $(git ls-files '**/*.py'); do + # Skip the shell entirely (but cover tests/shell) + if [[ "${file}" =~ "shell/" && ! "${file}" =~ "tests/shell" ]]; then + continue + fi + # For the moment, the focus is on enforcing py3k checks on files that use the + # impala-python virtualenv. Ignore executable python files that do not + # use impala-python. In practice, this tends to be scripts used during the + # build or various scripts for developers in bin. + FIRST_LINE=$(head -n1 ${file}) + if [[ "${file}: ${FIRST_LINE}" =~ "#!" ]]; then + if [[ "${FIRST_LINE}" =~ "python3" ]]; then + >&2 echo "SKIPPING: ${file} is already using python3: ${FIRST_LINE}" + continue + fi + if [[ ! "${FIRST_LINE}" =~ "impala-python" ]]; then + >&2 echo "SKIPPING: ${file} is not using impala-python: ${FIRST_LINE}" + continue + fi + fi + + >&2 echo "PROCESSING: ${file}" + + # -s n (skip score for each file) + # --exit-zero: don't fail + impala-pylint -s n --exit-zero --py3k ${file} >> ${PYLINT_OUTPUT_FILE} +done + +touch "${ERROR_OUTPUT_TMP_FILE}" +touch "${WARNING_OUTPUT_TMP_FILE}" + +# Hitting a banned py3k warning will cause this to return an error +echo "" +echo "" +if grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then + echo "ERROR: Some python files contain these banned pylint warnings:" | \ + tee "${ERROR_OUTPUT_TMP_FILE}" + grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \ + tee -a "${ERROR_OUTPUT_TMP_FILE}" + RETCODE=1 +else + echo "No errors found" | tee "${ERROR_OUTPUT_TMP_FILE}" +fi + +if [[ -n "${ERROR_OUTPUT_FILE}" ]]; then + cp "${ERROR_OUTPUT_TMP_FILE}" "${ERROR_OUTPUT_FILE}" +fi + +# The remaining py3k warnings are interesting, but they are not yet enforced. +# Pylint produces annoying lines like "************* Module X", so try to filter those out +echo "" +echo "" +if grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" \ + "${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then + echo "WARNING: Some python files contain these unenforced pylint warnings:" | \ + tee "${WARNING_OUTPUT_TMP_FILE}" + grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \ + tee -a "${WARNING_OUTPUT_TMP_FILE}" + + echo "WARNING SUMMARY table:" + cat "${WARNING_OUTPUT_TMP_FILE}" | grep -v "WARNING" | cut -d: -f4- | \ + sed 's#^ ##' | sort | uniq -c +else + echo "No warnings found" | tee "${WARNING_OUTPUT_TMP_FILE}" +fi + +if [[ -n "${WARNING_OUTPUT_FILE}" ]]; then + cp "${WARNING_OUTPUT_TMP_FILE}" "${WARNING_OUTPUT_FILE}" +fi + +rm -rf "${OUTPUT_TMP_DIR}" + +popd > /dev/null 2>&1 + +exit ${RETCODE} diff --git a/bin/impala-config.sh b/bin/impala-config.sh index ae9a00a42..1e9f9ff17 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -81,7 +81,7 @@ export USE_APACHE_HIVE=${USE_APACHE_HIVE-false} # moving to a different build of the toolchain, e.g. when a version is bumped or a # compile option is changed. The build id can be found in the output of the toolchain # build jobs, it is constructed from the build number and toolchain git hash prefix. -export IMPALA_TOOLCHAIN_BUILD_ID=252-b144ba77b5 +export IMPALA_TOOLCHAIN_BUILD_ID=258-821f1d91bd # Versions of toolchain dependencies. # ----------------------------------- export IMPALA_AVRO_VERSION=1.7.4-p5 @@ -159,6 +159,7 @@ export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.1 unset IMPALA_POSTGRES_JDBC_DRIVER_URL export IMPALA_PYTHON_VERSION=2.7.16 unset IMPALA_PYTHON_URL +export IMPALA_PYTHON3_VERSION=3.7.16 export IMPALA_RAPIDJSON_VERSION=1.1.0 unset IMPALA_RAPIDJSON_URL export IMPALA_RE2_VERSION=20190301 diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-pip3 old mode 100644 new mode 100755 similarity index 86% copy from infra/python/deps/setuptools-requirements.txt copy to bin/impala-pip3 index 071f9fc54..273555feb --- a/infra/python/deps/setuptools-requirements.txt +++ b/bin/impala-pip3 @@ -1,3 +1,5 @@ +#!/bin/bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,7 +17,5 @@ # specific language governing permissions and limitations # under the License. -# Newer versions of setuptools don't support Python 2.7 -setuptools == 44.1.1 - wheel == 0.35.1 -setuptools-scm == 4.1.2 +source "$(dirname "$0")/impala-python3-common.sh" +exec "$PY_ENV_DIR/bin/python3" "$PY_ENV_DIR/bin/pip3" "$@" diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-pylint old mode 100644 new mode 100755 similarity index 86% copy from infra/python/deps/setuptools-requirements.txt copy to bin/impala-pylint index 071f9fc54..012f08bc9 --- a/infra/python/deps/setuptools-requirements.txt +++ b/bin/impala-pylint @@ -1,3 +1,5 @@ +#!/bin/bash +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -15,7 +17,5 @@ # specific language governing permissions and limitations # under the License. -# Newer versions of setuptools don't support Python 2.7 -setuptools == 44.1.1 - wheel == 0.35.1 -setuptools-scm == 4.1.2 +source "$(dirname "$0")/impala-python3-common.sh" +exec "$PY_ENV_DIR/bin/pylint" "$@" diff --git a/infra/python/deps/setuptools-requirements.txt b/bin/impala-python3 old mode 100644 new mode 100755 similarity index 75% copy from infra/python/deps/setuptools-requirements.txt copy to bin/impala-python3 index 071f9fc54..aec831d12 --- a/infra/python/deps/setuptools-requirements.txt +++ b/bin/impala-python3 @@ -1,3 +1,6 @@ +#!/bin/bash +# +############################################################################## # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -14,8 +17,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +############################################################################## -# Newer versions of setuptools don't support Python 2.7 -setuptools == 44.1.1 - wheel == 0.35.1 -setuptools-scm == 4.1.2 +source "$(dirname "$0")/impala-python3-common.sh" +exec "$PY_ENV_DIR/bin/python3" "$@" diff --git a/bin/init-impala-python.sh b/bin/impala-python3-common.sh old mode 100755 new mode 100644 similarity index 60% copy from bin/init-impala-python.sh copy to bin/impala-python3-common.sh index e1e20f4a4..06bf3a87a --- a/bin/init-impala-python.sh +++ b/bin/impala-python3-common.sh @@ -1,5 +1,3 @@ -#!/usr/bin/env bash -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -16,16 +14,19 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# -# This is called during the build to initialize the impala-python -# virtualenv (which involves installing various packages and -# compiling things). This is not directly in CMake, because -# this depends on knowing IMPALA_HOME and other environment -# variables. -bin=`dirname "$0"` -bin=`cd "$bin"; pwd` -. "$bin"/impala-config.sh +# This file is intended to be sourced to perform common setup for +# the Python 3 $IMPALA_HOME/bin/impala-py* executables. + +set -euo pipefail +. $IMPALA_HOME/bin/report_build_error.sh +setup_report_build_error + +. $IMPALA_HOME/bin/set-pythonpath.sh + +export LD_LIBRARY_PATH="$(python "$IMPALA_HOME/infra/python/bootstrap_virtualenv.py" \ + --print-ld-library-path)" -cd $IMPALA_HOME -bin/impala-python -c 'print("Initialized impala-python")' +PY_DIR="$(dirname "$0")/../infra/python" +PY_ENV_DIR="${PY_DIR}/env-gcc${IMPALA_GCC_VERSION}-py3" +python "$PY_DIR/bootstrap_virtualenv.py" --python3 diff --git a/bin/init-impala-python.sh b/bin/init-impala-python.sh index e1e20f4a4..360d2df89 100755 --- a/bin/init-impala-python.sh +++ b/bin/init-impala-python.sh @@ -27,5 +27,29 @@ bin=`dirname "$0"` bin=`cd "$bin"; pwd` . "$bin"/impala-config.sh +function print_usage { + echo "init-impala-python.sh - Script called from CMake to init python venvs" + echo "[-python3] : Init the python3 virtualenv (default is python2)" +} + +IS_PYTHON3=false +while [ -n "$*" ] +do + case "$1" in + -python3) + IS_PYTHON3=true + ;; + -help|*) + print_usage + exit 1 + ;; + esac + shift +done + cd $IMPALA_HOME -bin/impala-python -c 'print("Initialized impala-python")' +if $IS_PYTHON3 ; then + bin/impala-python3 -c 'print("Initialized impala-python3")' +else + bin/impala-python -c 'print("Initialized impala-python")' +fi diff --git a/bin/rat_exclude_files.txt b/bin/rat_exclude_files.txt index 19d4ecbf2..825eef9b5 100644 --- a/bin/rat_exclude_files.txt +++ b/bin/rat_exclude_files.txt @@ -27,6 +27,7 @@ shell/packaging/MANIFEST.in shell/packaging/requirements.txt testdata/cluster/node_templates/cdh7/etc/init.d/kms testdata/authentication/* +bin/banned_py3k_warnings.txt # See $IMPALA_HOME/LICENSE.txt be/src/gutil/* diff --git a/infra/python/bootstrap_virtualenv.py b/infra/python/bootstrap_virtualenv.py index c3bc59932..bd9c08144 100644 --- a/infra/python/bootstrap_virtualenv.py +++ b/infra/python/bootstrap_virtualenv.py @@ -55,7 +55,10 @@ SKIP_TOOLCHAIN_BOOTSTRAP = "SKIP_TOOLCHAIN_BOOTSTRAP" GCC_VERSION = os.environ["IMPALA_GCC_VERSION"] DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps") -ENV_DIR = os.path.join(os.path.dirname(__file__), "env-gcc{0}".format(GCC_VERSION)) +ENV_DIR_PY2 = os.path.join(os.path.dirname(__file__), + "env-gcc{0}".format(GCC_VERSION)) +ENV_DIR_PY3 = os.path.join(os.path.dirname(__file__), + "env-gcc{0}-py3".format(GCC_VERSION)) # Setuptools requirements file. Setuptools is required during pip install for # some packages. Newer setuptools dropped python 2 support, and some python @@ -77,10 +80,16 @@ KUDU_REQS_PATH = os.path.join(DEPS_DIR, "kudu-requirements.txt") # Interface) being installed by the requirements step. ADLS_REQS_PATH = os.path.join(DEPS_DIR, "adls-requirements.txt") +# Extra packages specific to python 3 +PY3_REQS_PATH = os.path.join(DEPS_DIR, "py3-requirements.txt") -def delete_virtualenv_if_exist(): - if os.path.exists(ENV_DIR): - shutil.rmtree(ENV_DIR) +# Extra packages specific to python 2 +PY2_REQS_PATH = os.path.join(DEPS_DIR, "py2-requirements.txt") + + +def delete_virtualenv_if_exist(venv_dir): + if os.path.exists(venv_dir): + shutil.rmtree(venv_dir) def detect_virtualenv_version(): @@ -99,8 +108,16 @@ def detect_virtualenv_version(): return None -def create_virtualenv(): - LOG.info("Creating python virtualenv") +def create_virtualenv(venv_dir, is_py3): + if is_py3: + # Python 3 is much simpler, because there is a builtin venv command + LOG.info("Creating python3 virtualenv") + python_cmd = download_toolchain_python(is_py3) + exec_cmd([python_cmd, "-m" "venv", venv_dir]) + return + + # Python 2 + LOG.info("Creating python2 virtualenv") build_dir = tempfile.mkdtemp() # Try to find the virtualenv version by parsing the requirements file # Default to "*" if we can't figure it out. @@ -114,9 +131,9 @@ def create_virtualenv(): for member in file.getmembers(): file.extract(member, build_dir) file.close() - python_cmd = download_toolchain_python() + python_cmd = download_toolchain_python(is_py3) exec_cmd([python_cmd, find_file(build_dir, "virtualenv*", "virtualenv.py"), "--quiet", - "--python", python_cmd, ENV_DIR]) + "--python", python_cmd, venv_dir]) shutil.rmtree(build_dir) @@ -147,7 +164,7 @@ def select_cc(): return cc -def exec_pip_install(args, cc="no-cc-available", env=None): +def exec_pip_install(venv_dir, is_py3, args, cc="no-cc-available", env=None): '''Executes "pip install" with the provided command line arguments. If 'cc' is set, it is used as the C compiler. Otherwise compilation of C/C++ code is disabled by setting the CC environment variable to a bogus value. @@ -169,8 +186,12 @@ def exec_pip_install(args, cc="no-cc-available", env=None): # Don't call the virtualenv pip directly, it uses a hashbang to to call the python # virtualenv using an absolute path. If the path to the virtualenv is very long, the # hashbang won't work. - impala_pip_base_cmd = [os.path.join(ENV_DIR, "bin", "python"), - os.path.join(ENV_DIR, "bin", "pip"), "install", "-v"] + if is_py3: + impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python3"), + os.path.join(venv_dir, "bin", "pip3"), "install", "-v"] + else: + impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python"), + os.path.join(venv_dir, "bin", "pip"), "install", "-v"] # Passes --no-binary for IMPALA-3767: without this, Cython (and # several other packages) fail download. @@ -181,7 +202,9 @@ def exec_pip_install(args, cc="no-cc-available", env=None): impala_pip_base_cmd[:] + ["--no-binary", ":all:", "--no-cache-dir"] # When using a custom mirror, we also must use the index of that mirror. - if "PYPI_MIRROR" in os.environ: + # The python 3 virtualenv has trouble with using --index-url with PYPI_MIRROR, + # so it falls back to --no-index, which works fine. + if "PYPI_MIRROR" in os.environ and not is_py3: third_party_pkg_install_cmd.extend(["--index-url", "%s/simple" % os.environ["PYPI_MIRROR"]]) else: @@ -217,7 +240,7 @@ def find_file(*paths): return files[0] -def download_toolchain_python(): +def download_toolchain_python(is_py3): '''Grabs the Python implementation from the Impala toolchain, using the machinery from bin/bootstrap_toolchain.py. Skip the download if SKIP_TOOLCHAIN_BOOTSTRAP=true in the environment. In that case @@ -229,27 +252,35 @@ def download_toolchain_python(): raise Exception("Impala environment not set up correctly, make sure " "$IMPALA_TOOLCHAIN_PACKAGES_HOME is set.") - package = ToolchainPackage("python") + if is_py3: + package = ToolchainPackage("python", + explicit_version=os.environ["IMPALA_PYTHON3_VERSION"]) + else: + package = ToolchainPackage("python") if package.needs_download() and \ not (os.environ.get(SKIP_TOOLCHAIN_BOOTSTRAP) == 'true'): package.download() - python_cmd = os.path.join(package.pkg_directory(), "bin/python") + if is_py3: + python_cmd = os.path.join(package.pkg_directory(), "bin/python3") + else: + python_cmd = os.path.join(package.pkg_directory(), "bin/python") if not os.path.exists(python_cmd): raise Exception("Unexpected error bootstrapping python from toolchain: {0} does not " "exist".format(python_cmd)) return python_cmd -def install_deps(): - LOG.info("Installing setuptools into the virtualenv") - exec_pip_install(["-r", SETUPTOOLS_REQS_PATH]) +def install_deps(venv_dir, is_py3): + py_str = "3" if is_py3 else "2" + LOG.info("Installing setuptools into the python{0} virtualenv".format(py_str)) + exec_pip_install(venv_dir, is_py3, ["-r", SETUPTOOLS_REQS_PATH]) cc = select_cc() if cc is None: raise Exception("CC not available") env = dict(os.environ) - LOG.info("Installing packages into the virtualenv") - exec_pip_install(["-r", REQS_PATH], cc=cc, env=env) - mark_reqs_installed(REQS_PATH) + LOG.info("Installing packages into the python{0} virtualenv".format(py_str)) + exec_pip_install(venv_dir, is_py3, ["-r", REQS_PATH], cc=cc, env=env) + mark_reqs_installed(venv_dir, REQS_PATH) def have_toolchain(): @@ -264,26 +295,44 @@ def toolchain_pkg_dir(pkg_name): pkg_name + "-" + pkg_version) -def install_adls_deps(): +def install_adls_deps(venv_dir, is_py3): # The ADLS dependencies require that the OS is at least CentOS 6.7 or above, # which is why we break this into a seperate step. If the target filesystem is # ADLS, the expectation is that the dev environment is running at least CentOS 6.7. if os.environ.get('TARGET_FILESYSTEM') == "adls": - if reqs_are_installed(ADLS_REQS_PATH): + if reqs_are_installed(venv_dir, ADLS_REQS_PATH): LOG.debug("Skipping ADLS deps: matching adls-installed-requirements.txt found") return True cc = select_cc() assert cc is not None - LOG.info("Installing ADLS packages into the virtualenv") - exec_pip_install(["-r", ADLS_REQS_PATH], cc=cc) - mark_reqs_installed(ADLS_REQS_PATH) + py_str = "3" if is_py3 else "2" + LOG.info("Installing ADLS packages into the python{0} virtualenv".format(py_str)) + exec_pip_install(venv_dir, is_py3, ["-r", ADLS_REQS_PATH], cc=cc) + mark_reqs_installed(venv_dir, ADLS_REQS_PATH) -def install_kudu_client_if_possible(): +def install_py_version_deps(venv_dir, is_py3): + cc = select_cc() + assert cc is not None + if not is_py3: + if not reqs_are_installed(venv_dir, PY2_REQS_PATH): + # These are extra python2-only packages + LOG.info("Installing python2 packages into the virtualenv") + exec_pip_install(venv_dir, is_py3, ["-r", PY2_REQS_PATH], cc=cc) + mark_reqs_installed(venv_dir, PY2_REQS_PATH) + else: + if not reqs_are_installed(venv_dir, PY3_REQS_PATH): + # These are extra python3-only packages + LOG.info("Installing python3 packages into the virtualenv") + exec_pip_install(venv_dir, is_py3, ["-r", PY3_REQS_PATH], cc=cc) + mark_reqs_installed(venv_dir, PY3_REQS_PATH) + + +def install_kudu_client_if_possible(venv_dir, is_py3): '''Installs the Kudu python module if possible, which depends on the toolchain and the compiled requirements in requirements.txt. If the toolchain isn't available, nothing will be done.''' - if reqs_are_installed(KUDU_REQS_PATH): + if reqs_are_installed(venv_dir, KUDU_REQS_PATH): LOG.debug("Skipping Kudu: matching kudu-installed-requirements.txt found") return kudu_base_dir = os.environ["IMPALA_KUDU_HOME"] @@ -291,11 +340,13 @@ def install_kudu_client_if_possible(): LOG.debug("Skipping Kudu: %s doesn't exist" % kudu_base_dir) return - LOG.info("Installing Kudu into the virtualenv") + py_str = "3" if is_py3 else "2" + LOG.info("Installing Kudu into the python{0} virtualenv".format(py_str)) # The installation requires that KUDU_HOME/build/latest exists. An empty directory # structure will be made to satisfy that. The Kudu client headers and lib will be made # available through GCC environment variables. - fake_kudu_build_dir = os.path.join(tempfile.gettempdir(), "virtualenv-kudu") + fake_kudu_build_dir = os.path.join(tempfile.gettempdir(), + "virtualenv-kudu{0}".format(py_str)) try: artifact_dir = os.path.join(fake_kudu_build_dir, "build", "latest") if not os.path.exists(artifact_dir): @@ -312,8 +363,8 @@ def install_kudu_client_if_possible(): env["CPLUS_INCLUDE_PATH"] = os.path.join(kudu_client_dir, "include") env["LIBRARY_PATH"] = os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'), os.path.join(kudu_client_dir, 'lib64')]) - exec_pip_install(["-r", KUDU_REQS_PATH], cc=cc, env=env) - mark_reqs_installed(KUDU_REQS_PATH) + exec_pip_install(venv_dir, is_py3, ["-r", KUDU_REQS_PATH], cc=cc, env=env) + mark_reqs_installed(venv_dir, KUDU_REQS_PATH) finally: try: shutil.rmtree(fake_kudu_build_dir) @@ -353,17 +404,17 @@ def error_if_kudu_client_not_found(install_dir): raise Exception("%s not found at %s" % (kudu_client_lib, lib_dir)) -def mark_reqs_installed(reqs_path): +def mark_reqs_installed(venv_dir, reqs_path): '''Mark that the requirements from the given file are installed by copying it into the root directory of the virtualenv.''' - installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path)) + installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path)) shutil.copyfile(reqs_path, installed_reqs_path) -def reqs_are_installed(reqs_path): +def reqs_are_installed(venv_dir, reqs_path): '''Check if the requirements from the given file are installed in the virtualenv by looking for a matching requirements file in the root directory of the virtualenv.''' - installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path)) + installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path)) if not os.path.exists(installed_reqs_path): return False installed_reqs_file = open(installed_reqs_path) @@ -381,11 +432,11 @@ def reqs_are_installed(reqs_path): installed_reqs_file.close() -def setup_virtualenv_if_not_exists(): - if not (reqs_are_installed(REQS_PATH)): - delete_virtualenv_if_exist() - create_virtualenv() - install_deps() +def setup_virtualenv_if_not_exists(venv_dir, is_py3): + if not (reqs_are_installed(venv_dir, REQS_PATH)): + delete_virtualenv_if_exist(venv_dir) + create_virtualenv(venv_dir, is_py3) + install_deps(venv_dir, is_py3) LOG.debug("Virtualenv setup complete") @@ -397,6 +448,8 @@ if __name__ == "__main__": " the virtualenv even if it exists and appears to be completely up-to-date.") parser.add_option("--print-ld-library-path", action="store_true", help="Print the" " LD_LIBRARY_PATH that should be used when running python from the virtualenv.") + parser.add_option("--python3", action="store_true", help="Generate the python3" + " virtualenv") options, args = parser.parse_args() if options.print_ld_library_path: @@ -411,10 +464,17 @@ if __name__ == "__main__": sys.exit() logging.basicConfig(level=getattr(logging, options.log_level)) + + if options.python3: + venv_dir = ENV_DIR_PY3 + else: + venv_dir = ENV_DIR_PY2 + if options.rebuild: - delete_virtualenv_if_exist() + delete_virtualenv_if_exist(venv_dir) # Complete as many bootstrap steps as possible (see file comment for the steps). - setup_virtualenv_if_not_exists() - install_kudu_client_if_possible() - install_adls_deps() + setup_virtualenv_if_not_exists(venv_dir, options.python3) + install_kudu_client_if_possible(venv_dir, options.python3) + install_adls_deps(venv_dir, options.python3) + install_py_version_deps(venv_dir, options.python3) diff --git a/infra/python/deps/pip_download.py b/infra/python/deps/pip_download.py index 9c41135d8..03713f927 100755 --- a/infra/python/deps/pip_download.py +++ b/infra/python/deps/pip_download.py @@ -38,7 +38,8 @@ PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org') # The requirement files that list all of the required packages and versions. REQUIREMENTS_FILES = ['requirements.txt', 'setuptools-requirements.txt', - 'kudu-requirements.txt', 'adls-requirements.txt'] + 'kudu-requirements.txt', 'adls-requirements.txt', + 'py2-requirements.txt', 'py3-requirements.txt'] def check_digest(filename, algorithm, expected_digest): diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/py2-requirements.txt similarity index 65% copy from infra/python/deps/setuptools-requirements.txt copy to infra/python/deps/py2-requirements.txt index 071f9fc54..122b5ab68 100644 --- a/infra/python/deps/setuptools-requirements.txt +++ b/infra/python/deps/py2-requirements.txt @@ -15,7 +15,20 @@ # specific language governing permissions and limitations # under the License. -# Newer versions of setuptools don't support Python 2.7 -setuptools == 44.1.1 - wheel == 0.35.1 -setuptools-scm == 4.1.2 +# Python2-only requirements + +cm-api == 10.0.0 + # Already available as part of python on Linux. + readline == 6.2.4.1; sys_platform == 'darwin' +flake8 == 3.9.2 + mccabe == 0.6.1 + pycodestyle == 2.7.0 + pyflakes == 2.3.1 + enum34 == 1.1.10 + typing == 3.10.0.0 + configparser == 4.0.2 + functools32 == 3.2.3-2 + importlib-metadata == 2.1.3 + contextlib2 == 0.6.0 + pathlib2 == 2.3.7.post1 + zipp == 1.2.0 diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/py3-requirements.txt similarity index 70% copy from infra/python/deps/setuptools-requirements.txt copy to infra/python/deps/py3-requirements.txt index 071f9fc54..d6195a1e8 100644 --- a/infra/python/deps/setuptools-requirements.txt +++ b/infra/python/deps/py3-requirements.txt @@ -15,7 +15,17 @@ # specific language governing permissions and limitations # under the License. -# Newer versions of setuptools don't support Python 2.7 -setuptools == 44.1.1 - wheel == 0.35.1 -setuptools-scm == 4.1.2 +# Python3-only requirements + +pylint == 2.10.2 + astroid == 2.7.3 + lazy-object-proxy == 1.6.0 + wrapt == 1.12.1 + typed-ast == 1.4.3 + configparser == 4.0.2 + isort == 4.3.21 + futures == 3.3.0; python_version == "2.7" + singledispatch == 3.6.1 + toml == 0.10.2 + platformdirs == 2.4.1 + typing-extensions == 3.10.0.2 diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt index 271ddeadc..fe61de14b 100644 --- a/infra/python/deps/requirements.txt +++ b/infra/python/deps/requirements.txt @@ -20,23 +20,8 @@ # Dependents are indented. Dependents that have multiple parents are not listed # multiple times (though maybe they could be). -allpairs == 2.0.1 +allpairspy == 2.5.0 argparse == 1.4.0 -cm-api == 10.0.0 - # Already available as part of python on Linux. - readline == 6.2.4.1; sys_platform == 'darwin' -flake8 == 3.9.2 - mccabe == 0.6.1 - pycodestyle == 2.7.0 - pyflakes == 2.3.1 - enum34 == 1.1.10 - typing == 3.10.0.0 - configparser == 4.0.2 - functools32 == 3.2.3-2 - importlib-metadata == 2.1.3 - contextlib2 == 0.6.0 - pathlib2 == 2.3.7.post1 - zipp == 1.2.0 future == 0.18.3 gcovr == 4.2 Jinja2 == 2.11.3 @@ -61,14 +46,13 @@ pyparsing == 2.0.3 pytest == 2.9.2 py == 1.4.32 pytest-forked == 0.2 - pytest-random == 0.02 pytest-runner == 4.2 pytest-xdist == 1.17.1 pytest-timeout == 1.2.1 python-magic == 0.4.11 pywebhdfs == 0.3.2 pbr == 3.1.1 -requests == 2.20.0 +requests == 2.21.0 chardet == 3.0.4 idna == 2.8 urllib3 == 1.24.2 diff --git a/infra/python/deps/setuptools-requirements.txt b/infra/python/deps/setuptools-requirements.txt index 071f9fc54..713bfa0af 100644 --- a/infra/python/deps/setuptools-requirements.txt +++ b/infra/python/deps/setuptools-requirements.txt @@ -18,4 +18,4 @@ # Newer versions of setuptools don't support Python 2.7 setuptools == 44.1.1 wheel == 0.35.1 -setuptools-scm == 4.1.2 +setuptools-scm == 5.0.2 diff --git a/testdata/bin/generate-test-vectors.py b/testdata/bin/generate-test-vectors.py index 4998a8caa..c7d288bd8 100755 --- a/testdata/bin/generate-test-vectors.py +++ b/testdata/bin/generate-test-vectors.py @@ -48,8 +48,7 @@ import os import sys from itertools import product from optparse import OptionParser -import metacomm.combinatorics.all_pairs2 -all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2 +from allpairspy import AllPairs as all_pairs parser = OptionParser() parser.add_option("-w", "--workload", dest="workload", diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py index 005c35adb..8fcac5a79 100644 --- a/tests/common/test_vector.py +++ b/tests/common/test_vector.py @@ -136,8 +136,8 @@ class ImpalaTestMatrix(object): if self.is_valid(vec)] def __generate_pairwise_combinations(self): - import metacomm.combinatorics.all_pairs2 - all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2 + from allpairspy import AllPairs + all_pairs = AllPairs # Pairwise fails if the number of inputs == 1. Use exhaustive in this case the # results will be the same. diff --git a/tests/custom_cluster/test_hs2_fault_injection.py b/tests/custom_cluster/test_hs2_fault_injection.py index 0b3f8e7b4..27e536fdd 100644 --- a/tests/custom_cluster/test_hs2_fault_injection.py +++ b/tests/custom_cluster/test_hs2_fault_injection.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information diff --git a/tests/query_test/test_decimal_casting.py b/tests/query_test/test_decimal_casting.py index 752e3ad64..f487e8bc3 100644 --- a/tests/query_test/test_decimal_casting.py +++ b/tests/query_test/test_decimal_casting.py @@ -19,7 +19,7 @@ # import pytest from decimal import Decimal, getcontext, ROUND_DOWN, ROUND_HALF_UP -from metacomm.combinatorics.all_pairs2 import all_pairs2 as all_pairs +from allpairspy import AllPairs as all_pairs from random import randint from tests.common.impala_test_suite import ImpalaTestSuite
