This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 12325eb7ecda3754895ca825003ae3215bc060a1 Author: Michael Smith <[email protected]> AuthorDate: Mon Oct 9 16:41:24 2023 -0700 IMPALA-12515: Build modules for extra pythons Adds IMPALA_EXTRA_PACKAGE_PYTHONS to build impala-shell tarball dependencies for additional Python targets. That can be used to build a tarball that supports multiple Python 3 minor versions at once. Updates the impala-shell script to provide a clear error message when attempting to use the tarball with a Python version that it hasn't been built for. Change-Id: I13720a9e3c50f348bef41f5e91f810204e416f13 Reviewed-on: http://gerrit.cloudera.org:8080/20617 Reviewed-by: Joe McDonnell <[email protected]> Tested-by: Michael Smith <[email protected]> --- ...eate_py3_virtualenv.sh => create_virtualenv.sh} | 25 +++---- bin/impala-config.sh | 5 ++ shell/CMakeLists.txt | 83 +++++++++++++--------- shell/impala-shell | 15 +++- shell/make_shell_tarball.sh | 58 ++++++--------- tests/shell/util.py | 4 +- 6 files changed, 103 insertions(+), 87 deletions(-) diff --git a/bin/cmake_aux/create_py3_virtualenv.sh b/bin/cmake_aux/create_virtualenv.sh similarity index 71% rename from bin/cmake_aux/create_py3_virtualenv.sh rename to bin/cmake_aux/create_virtualenv.sh index d632fc952..2afb4dc69 100755 --- a/bin/cmake_aux/create_py3_virtualenv.sh +++ b/bin/cmake_aux/create_virtualenv.sh @@ -16,29 +16,27 @@ # specific language governing permissions and limitations # under the License. # -# Create a python3 virtualenv. When system python is 3.6 or higher, +# Create a python virtualenv. When system python is 3.6 or higher, # we can just use the built-in venv module to create the virtualenv. # If system python is older or the built-in venv module fails, then # this falls back to impala-virtualenv, which uses python2 to -# initialize a virtualenv using python3. +# initialize a virtualenv using the specified interpeter. # -# This takes a single argument, which is the destination directory -# for the virtualenv: -# create_py3_virtualenv.sh venv_dir +# This takes two arguments, which is the interpreter to use and the +# destination directory for the virtualenv: +# create_virtualenv.sh pythonX venv_dir set -euo pipefail -# We should only be calling this when system python3 is available -[[ -n ${IMPALA_SYSTEM_PYTHON3} ]] +PYTHON_EXE=$1 +VENV_DIR=$2 -VENV_DIR=$1 - -IS_PY36_OR_HIGHER=$(${IMPALA_SYSTEM_PYTHON3} -c \ - "import sys; print('true' if sys.version_info.minor >= 6 else 'false')") +IS_PY36_OR_HIGHER=$(${PYTHON_EXE} -c "import sys; print(\ + 'true' if sys.version_info.major >= 3 and sys.version_info.minor >= 6 else 'false')") # If using Python >= 3.6, try to use the builtin venv package. if $IS_PY36_OR_HIGHER ; then - if ${IMPALA_SYSTEM_PYTHON3} -m venv ${VENV_DIR} ; then + if ${PYTHON_EXE} -m venv ${VENV_DIR} ; then # Success exit 0 fi @@ -51,5 +49,4 @@ if $IS_PY36_OR_HIGHER ; then fi # Fall back to using the old impala-virtualenv method -impala-virtualenv --python ${IMPALA_SYSTEM_PYTHON3} ${VENV_DIR} - +impala-virtualenv --python ${PYTHON_EXE} ${VENV_DIR} diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 74c24c558..8c6cea8fe 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -225,6 +225,11 @@ unset IMPALA_THRIFT_PY_URL export IMPALA_SYSTEM_PYTHON2="${IMPALA_SYSTEM_PYTHON2_OVERRIDE-$(command -v python2)}" export IMPALA_SYSTEM_PYTHON3="${IMPALA_SYSTEM_PYTHON3_OVERRIDE-$(command -v python3)}" +# Additional Python versions to use when building the impala-shell prebuilt tarball +# via make_shell_tarball.sh. That tarball includes precompiled packages, so it can be +# used without additional system dependencies needed for pip install. +# export IMPALA_EXTRA_PACKAGE_PYTHONS=python3.6;python3.10 + if [[ $OSTYPE == "darwin"* ]]; then IMPALA_CYRUS_SASL_VERSION=2.1.26 unset IMPALA_CYRUS_SASL_URL diff --git a/shell/CMakeLists.txt b/shell/CMakeLists.txt index 3d97b7e28..513400e5a 100644 --- a/shell/CMakeLists.txt +++ b/shell/CMakeLists.txt @@ -15,44 +15,58 @@ # specific language governing permissions and limitations # under the License. -# These virtualenvs serve two purposes: -# 1. They have system python with wheel installed, and they can be used to produce -# wheels for external dependencies for the shell tarball build. -# 2. We pip install impala-shell into them for use in tests. -# The initial virtualenv creation includes the "pip install wheel" command to -# satisfy #1. #2 is a separate step and has no interaction with #1. -set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv") -set(PYTHON3_VENV "${CMAKE_SOURCE_DIR}/shell/build/py3_venv") -# IMPALA-12117: Use separate pip cache directories to avoid concurrency -# issues. The standard location is in ~/.cache/pip, so this uses directories -# inside ~/.cache. These typical consume a couple MB each. -set(PYTHON2_PIP_CACHE "~/.cache/impala_py2_pip") -set(PYTHON3_PIP_CACHE "~/.cache/impala_py3_pip") - -add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python - COMMAND impala-virtualenv --python "$ENV{IMPALA_SYSTEM_PYTHON2}" "${PYTHON2_VENV}" - COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" wheel -) - -# In cases where system python3 is old, this can use impala-virtualenv, so it -# needs to depend on impala_python. -add_custom_command(OUTPUT "${PYTHON3_VENV}" DEPENDS impala_python - COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_py3_virtualenv.sh" "${PYTHON3_VENV}" - COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" wheel -) - # The shell tarball build only needs the build virtualenvs for the system # pythons that are installed. set(IMPALA_PYTHON_BUILD_VENVS "") -if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "") - list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON2_VENV}") + +set(VENV_LOC "${CMAKE_SOURCE_DIR}/shell/build") +set(PIP_LOC "~/.cache/impala_pip") + +# Tests depend on installing system pythons to specific locations, so we error if they +# won't match what's expected in this config and make_shell_tarball.sh. +set(PYTHON_EXES $ENV{IMPALA_EXTRA_PACKAGE_PYTHONS}) +if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} STREQUAL "") + get_filename_component(PYTHON_NAME $ENV{IMPALA_SYSTEM_PYTHON2} NAME) + if (NOT ${PYTHON_NAME} STREQUAL "python2") + message(FATAL_ERROR "IMPALA_SYSTEM_PYTHON2 must be a binary named python2") + endif() + list(APPEND PYTHON_EXES $ENV{IMPALA_SYSTEM_PYTHON2}) endif() -if (NOT $ENV{IMPALA_SYSTEM_PYTHON3} EQUAL "") - list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON3_VENV}") +if (NOT $ENV{IMPALA_SYSTEM_PYTHON3} STREQUAL "") + get_filename_component(PYTHON_NAME $ENV{IMPALA_SYSTEM_PYTHON3} NAME) + if (NOT ${PYTHON_NAME} STREQUAL "python3") + message(FATAL_ERROR "IMPALA_SYSTEM_PYTHON3 must be a binary named python3") + endif() + list(APPEND PYTHON_EXES $ENV{IMPALA_SYSTEM_PYTHON3}) endif() +message(STATUS "Packaging for ${PYTHON_EXES}") + +foreach(PYTHON_EXE IN LISTS PYTHON_EXES) + get_filename_component(PYTHON_NAME "${PYTHON_EXE}" NAME) + # These virtualenvs serve two purposes: + # 1. They have system python with wheel installed, and they can be used to produce + # wheels for external dependencies for the shell tarball build. + # 2. We pip install impala-shell into them for use in tests. + # The initial virtualenv creation includes the "pip install wheel" command to + # satisfy #1. #2 is a separate step and has no interaction with #1. + set(VENV "${VENV_LOC}/${PYTHON_NAME}_venv") + # IMPALA-12117: Use separate pip cache directories to avoid concurrency + # issues. The standard location is in ~/.cache/pip, so this uses directories + # inside ~/.cache. These typical consume a couple MB each. + set(PIP_CACHE "${PIP_LOC}/${PYTHON_NAME}") + + # Supports fallback to impala-virtualenv for older Python versions. + add_custom_command(OUTPUT "${VENV}" DEPENDS impala_python + COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_virtualenv.sh" + "${PYTHON_EXE}" "${VENV}" + COMMAND "${VENV}/bin/pip" install --cache-dir "${PIP_CACHE}" wheel + ) + + list(APPEND IMPALA_PYTHON_BUILD_VENVS "${VENV}") +endforeach() add_custom_target(shell_tarball DEPENDS gen-deps "${IMPALA_PYTHON_BUILD_VENVS}" - COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh" + COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh" ${PYTHON_EXES} ) add_custom_target(shell_pypi_package DEPENDS shell_tarball impala_python @@ -73,10 +87,13 @@ add_custom_target(shell_pypi_test_package DEPENDS shell_tarball impala_python "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh" ) +# Tests expect to find venvs at 'python2_venv' and 'python3_venv' in tests/shell/util.py. +set(PYTHON2_VENV "${VENV_LOC}/python2_venv") add_custom_target(shell_python2_install DEPENDS "${PYTHON2_VENV}" shell_pypi_test_package - COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" "${SHELL_TEST_PKG}" + COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PIP_LOC}/python2" "${SHELL_TEST_PKG}" ) +set(PYTHON3_VENV "${VENV_LOC}/python3_venv") add_custom_target(shell_python3_install DEPENDS "${PYTHON3_VENV}" shell_pypi_test_package - COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" "${SHELL_TEST_PKG}" + COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PIP_LOC}/python3" "${SHELL_TEST_PKG}" ) diff --git a/shell/impala-shell b/shell/impala-shell index 5f5dd93b7..7f8e385f9 100755 --- a/shell/impala-shell +++ b/shell/impala-shell @@ -45,10 +45,21 @@ fi PYTHONPATH="${SHELL_HOME}/gen-py:${SHELL_HOME}/lib:${PYTHONPATH}" # External dependencies are installed in /ext-py${PYTHON_VERSION} -PYTHON_VERSION=$("${PYTHON_EXE}" -c 'import sys; print(sys.version_info[0])') +PYTHON_VERSION=$("${PYTHON_EXE}" -c 'import sys; \ + print("{}.{}".format(sys.version_info.major, sys.version_info.minor))') +if [ ! -d "${SHELL_HOME}/ext-py${PYTHON_VERSION}" ]; then + # List all ext-py* dirs, remove ext-py prefix, and join into a comma-separated string. + dirs=( $(cd ${SHELL_HOME} && echo ext-py*) ) + vers="${dirs[@]#ext-py}" + pretty="$(printf "%s, " ${vers[@]})" + echo "This impala-shell package was not built to support Python ${PYTHON_VERSION}." \ + "Supported Python versions are: ${pretty%, }." + exit 1 +fi + PYTHONPATH="${SHELL_HOME}/ext-py${PYTHON_VERSION}:${PYTHONPATH}" -if [ ${PYTHON_VERSION} -eq 2 ]; then +if [ "${PYTHON_VERSION}" = "2.7" ]; then # Python 2 requires the legacy pkg_resources.py code PYTHONPATH="${PYTHONPATH}:${SHELL_HOME}/legacy" fi diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh index 20704e86a..09123549b 100755 --- a/shell/make_shell_tarball.sh +++ b/shell/make_shell_tarball.sh @@ -31,6 +31,11 @@ if [ "x${IMPALA_HOME}" == "x" ]; then exit 1 fi +if [ $# -eq 0 ]; then + echo "Must specify at least one python interpreter" + exit 1 +fi + # Detect whether IMPALA_HOME is a git repository. This is used below to allow extra # checks when building ext-py. pushd ${IMPALA_HOME} @@ -66,11 +71,9 @@ THRIFT_GEN_PY_DIR="${SHELL_HOME}/gen-py" echo "Deleting all files in ${TARBALL_ROOT}/{gen-py,lib,ext-py*,legacy}" rm -rf ${TARBALL_ROOT}/lib/* 2>&1 > /dev/null rm -rf ${TARBALL_ROOT}/gen-py/* 2>&1 > /dev/null -rm -rf ${TARBALL_ROOT}/ext-py*/* 2>&1 > /dev/null +rm -rf ${TARBALL_ROOT}/ext-py* 2>&1 > /dev/null rm -rf ${TARBALL_ROOT}/legacy/* 2>&1 > /dev/null mkdir -p ${TARBALL_ROOT}/lib -mkdir -p ${TARBALL_ROOT}/ext-py2 -mkdir -p ${TARBALL_ROOT}/ext-py3 mkdir -p ${TARBALL_ROOT}/legacy rm -f ${THRIFT_GEN_PY_DIR}/impala_build_version.py @@ -126,46 +129,29 @@ for MODULE in ${SHELL_HOME}/ext-py/*; do continue; fi pushd ${MODULE} > /dev/null 2>&1 - if [ ! -z "${IMPALA_SYSTEM_PYTHON2:-}" ]; then - echo "Cleaning up old build artifacts." - rm -rf dist 2>&1 > /dev/null - rm -rf build 2>&1 > /dev/null - echo "Building ${MODULE} with Python 2" - # Use the py2_venv to get the wheel package needed for bdist_wheel below. - # python2 is now the virtualenv's python2, which is $IMPALA_SYSTEM_PYTHON2 - source ${IMPALA_HOME}/shell/build/py2_venv/bin/activate - if [[ "$MODULE" == *"/bitarray"* ]]; then - # Need to use setuptools to build wheel for bitarray module - python2 -c "import setuptools; exec(open('setup.py').read())" \ - -q bdist_wheel - else - python2 setup.py -q bdist_wheel clean - fi - # pip install the wheel into the python 2 external dependencies directory - PYTHON2_PIP_CACHE="~/.cache/impala_py2_pip" - pip install --no-deps --cache "${PYTHON2_PIP_CACHE}" \ - --target ${TARBALL_ROOT}/ext-py2 dist/*.whl - fi - if [ ! -z "${IMPALA_SYSTEM_PYTHON3:-}" ]; then + for PYTHON_EXE in $*; do echo "Cleaning up old build artifacts." rm -rf dist 2>&1 > /dev/null rm -rf build 2>&1 > /dev/null - echo "Building ${MODULE} with Python 3" - # Use the py3_venv to get the wheel package needed for bdist_wheel below. - # python3 is now the virtualenv's python3, which is $IMPALA_SYSTEM_PYTHON3 - source ${IMPALA_HOME}/shell/build/py3_venv/bin/activate + echo "Building ${MODULE} with ${PYTHON_EXE}" + # Use the venv to get the wheel package needed for bdist_wheel below. + PYTHON_NAME=$(basename ${PYTHON_EXE}) + source ${IMPALA_HOME}/shell/build/${PYTHON_NAME}_venv/bin/activate if [[ "$MODULE" == *"/bitarray"* ]]; then # Need to use setuptools to build wheel for bitarray module - python3 -c "import setuptools; exec(open('setup.py').read())" \ - -q bdist_wheel + python -c "import setuptools; exec(open('setup.py').read())" -q bdist_wheel else - python3 setup.py -q bdist_wheel clean + python setup.py -q bdist_wheel clean fi - # pip install the wheel into the python 2 external dependencies directory - PYTHON3_PIP_CACHE="~/.cache/impala_py3_pip" - pip install --no-deps --cache "${PYTHON3_PIP_CACHE}" \ - --target ${TARBALL_ROOT}/ext-py3 dist/*.whl - fi + # pip install the wheel into the external dependencies directory + PIP_CACHE="~/.cache/impala_pip/${PYTHON_NAME}" + PYTHON_VERSION=$(${PYTHON_EXE} -c 'import sys; \ + print("{}.{}".format(sys.version_info.major, sys.version_info.minor))') + # Use --upgrade to suppress warnings about replacement when $* includes duplicate + # Python minor versions. + pip install --upgrade --no-deps --cache "${PIP_CACHE}" \ + --target ${TARBALL_ROOT}/ext-py${PYTHON_VERSION} dist/*.whl + done popd 2>&1 > /dev/null done diff --git a/tests/shell/util.py b/tests/shell/util.py index 06f62842d..892ef22f1 100755 --- a/tests/shell/util.py +++ b/tests/shell/util.py @@ -394,6 +394,6 @@ def get_impala_shell_executable(vector): return { 'dev': [impala_shell_executable], 'dev3': ['env', 'IMPALA_PYTHON_EXECUTABLE=python3', impala_shell_executable], - 'python2': [os.path.join(IMPALA_HOME, 'shell/build/py2_venv/bin/impala-shell')], - 'python3': [os.path.join(IMPALA_HOME, 'shell/build/py3_venv/bin/impala-shell')] + 'python2': [os.path.join(IMPALA_HOME, 'shell/build/python2_venv/bin/impala-shell')], + 'python3': [os.path.join(IMPALA_HOME, 'shell/build/python3_venv/bin/impala-shell')] }[vector.get_value_with_default('impala_shell', 'dev')]
