This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 07d5a93de642b8c10ff4893ffd1bfa8ef0a92147
Author: Joe McDonnell <[email protected]>
AuthorDate: Mon Jun 19 12:27:40 2023 -0700

    IMPALA-12220: pip install ext-py dependencies in the shell tarball
    
    The impala-shell tarball ships its external dependencies
    by building eggs and including them in the ext-py* directories.
    On Redhat 9 and Ubuntu 22, the impala-shell tarball encountered
    a regression where the sasl package could not access its
    Client class:
    Error connecting: AttributeError, module 'sasl' has no attribute 'Client'
    
    This only occurs when using eggs (which are zip files). The virtualenv
    installs worked fine. Unpacking the eggs and using the content directly
    also avoids the problem.
    
    This reworks the shell tarball to instead build wheels and install
    them with 'pip install'. This means that the external dependencies
    are not packaged in eggs, and this avoids the issue with sasl. This
    is a minimal change to avoid the issue until the shell tarball build
    can be reworked more extensively.
    
    Testing:
     - Ran shell tests on Redhat 9
    
    Change-Id: I49403979c559b7f8bbe038865c06db6024468d72
    Reviewed-on: http://gerrit.cloudera.org:8080/20095
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 shell/CMakeLists.txt        | 56 ++++++++++++++++++++++++++++++---------------
 shell/impala-shell          | 29 ++++++++---------------
 shell/make_shell_tarball.sh | 48 ++++++++++++++++++++++++++------------
 3 files changed, 80 insertions(+), 53 deletions(-)

diff --git a/shell/CMakeLists.txt b/shell/CMakeLists.txt
index 3c670a9c7..3d97b7e28 100644
--- a/shell/CMakeLists.txt
+++ b/shell/CMakeLists.txt
@@ -15,7 +15,43 @@
 # specific language governing permissions and limitations
 # under the License.
 
-add_custom_target(shell_tarball DEPENDS gen-deps
+# These virtualenvs serve two purposes:
+# 1. They have system python with wheel installed, and they can be used to 
produce
+#    wheels for external dependencies for the shell tarball build.
+# 2. We pip install impala-shell into them for use in tests.
+# The initial virtualenv creation includes the "pip install wheel" command to
+# satisfy #1. #2 is a separate step and has no interaction with #1.
+set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv")
+set(PYTHON3_VENV "${CMAKE_SOURCE_DIR}/shell/build/py3_venv")
+# IMPALA-12117: Use separate pip cache directories to avoid concurrency
+# issues. The standard location is in ~/.cache/pip, so this uses directories
+# inside ~/.cache. These typical consume a couple MB each.
+set(PYTHON2_PIP_CACHE "~/.cache/impala_py2_pip")
+set(PYTHON3_PIP_CACHE "~/.cache/impala_py3_pip")
+
+add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python
+  COMMAND impala-virtualenv --python "$ENV{IMPALA_SYSTEM_PYTHON2}" 
"${PYTHON2_VENV}"
+  COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" 
wheel
+)
+
+# In cases where system python3 is old, this can use impala-virtualenv, so it
+# needs to depend on impala_python.
+add_custom_command(OUTPUT "${PYTHON3_VENV}" DEPENDS impala_python
+  COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_py3_virtualenv.sh" 
"${PYTHON3_VENV}"
+  COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" 
wheel
+)
+
+# The shell tarball build only needs the build virtualenvs for the system
+# pythons that are installed.
+set(IMPALA_PYTHON_BUILD_VENVS "")
+if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "")
+  list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON2_VENV}")
+endif()
+if (NOT $ENV{IMPALA_SYSTEM_PYTHON3} EQUAL "")
+  list(APPEND IMPALA_PYTHON_BUILD_VENVS "${PYTHON3_VENV}")
+endif()
+
+add_custom_target(shell_tarball DEPENDS gen-deps "${IMPALA_PYTHON_BUILD_VENVS}"
   COMMAND "${CMAKE_SOURCE_DIR}/shell/make_shell_tarball.sh"
 )
 
@@ -37,28 +73,10 @@ add_custom_target(shell_pypi_test_package DEPENDS 
shell_tarball impala_python
     "${CMAKE_SOURCE_DIR}/shell/packaging/make_python_package.sh"
 )
 
-set(PYTHON2_VENV "${CMAKE_SOURCE_DIR}/shell/build/py2_venv")
-set(PYTHON3_VENV "${CMAKE_SOURCE_DIR}/shell/build/py3_venv")
-# IMPALA-12117: Use separate pip cache directories to avoid concurrency
-# issues. The standard location is in ~/.cache/pip, so this uses directories
-# inside ~/.cache. These typical consume a couple MB each.
-set(PYTHON2_PIP_CACHE "~/.cache/impala_py2_pip")
-set(PYTHON3_PIP_CACHE "~/.cache/impala_py3_pip")
-
-add_custom_command(OUTPUT "${PYTHON2_VENV}" DEPENDS impala_python
-  COMMAND impala-virtualenv --python "$ENV{IMPALA_SYSTEM_PYTHON2}" 
"${PYTHON2_VENV}"
-)
-
 add_custom_target(shell_python2_install DEPENDS "${PYTHON2_VENV}" 
shell_pypi_test_package
   COMMAND "${PYTHON2_VENV}/bin/pip" install --cache-dir "${PYTHON2_PIP_CACHE}" 
"${SHELL_TEST_PKG}"
 )
 
-# In cases where system python3 is old, this can use impala-virtualenv, so it
-# needs to depend on impala_python.
-add_custom_command(OUTPUT "${PYTHON3_VENV}" DEPENDS impala_python
-  COMMAND "${CMAKE_SOURCE_DIR}/bin/cmake_aux/create_py3_virtualenv.sh" 
"${PYTHON3_VENV}"
-)
-
 add_custom_target(shell_python3_install DEPENDS "${PYTHON3_VENV}" 
shell_pypi_test_package
   COMMAND "${PYTHON3_VENV}/bin/pip" install --cache-dir "${PYTHON3_PIP_CACHE}" 
"${SHELL_TEST_PKG}"
 )
diff --git a/shell/impala-shell b/shell/impala-shell
index 651db863d..5f5dd93b7 100755
--- a/shell/impala-shell
+++ b/shell/impala-shell
@@ -28,7 +28,7 @@
 # gen-py/ -- containing the Python thrift stubs for connecting to Impalad
 # lib/ -- containing the Hive Beeswax libraries; note this directory exists 
only in the
 #         shell tarball, not the Impala repository, see make_shell_tarball.sh 
for details
-# ext-py${VERSION}/ -- containing all the eggs for the modules packaged with 
the shell.
+# ext-py${VERSION}/ -- containing the external dependencies for the shell.
 
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 SHELL_HOME=${IMPALA_SHELL_HOME:-${SCRIPT_DIR}}
@@ -36,32 +36,21 @@ SHELL_HOME=${IMPALA_SHELL_HOME:-${SCRIPT_DIR}}
 # Set the envrionment's locale settings to allow for utf-8 compatibility
 export LC_CTYPE=${LC_CTYPE:-en_US.UTF-8}
 
-# User can configure EGG_CACHE by setting PYTHON_EGG_CACHE.
-# By default it is set to a per-user temporary location,
-# which follows what hue does.
-PYTHON_EGG_CACHE=${PYTHON_EGG_CACHE:-/tmp/impala-shell-python-egg-cache-${USER}}
-
-if [ ! -d ${PYTHON_EGG_CACHE} ]; then
-  mkdir ${PYTHON_EGG_CACHE}
-fi
-
 # Select python version; prefer 2, use 3 if 2's absent. Allow override with 
envvar
 PYTHON_EXE="${IMPALA_PYTHON_EXECUTABLE:-python}"
 if ! command -v "${PYTHON_EXE}" > /dev/null; then
   PYTHON_EXE=python3
 fi
 
-# External module eggs are located in /ext-py, append them to the PYTHONPATH
-# Loads eggs based on Python version
+PYTHONPATH="${SHELL_HOME}/gen-py:${SHELL_HOME}/lib:${PYTHONPATH}"
+
+# External dependencies are installed in /ext-py${PYTHON_VERSION}
 PYTHON_VERSION=$("${PYTHON_EXE}" -c 'import sys; print(sys.version_info[0])')
-EGG_PATH=''
-for EGG in $(ls ${SHELL_HOME}/ext-py${PYTHON_VERSION}/*.egg); do
-   EGG_PATH="${EGG}:${EGG_PATH}"
-done
+PYTHONPATH="${SHELL_HOME}/ext-py${PYTHON_VERSION}:${PYTHONPATH}"
 
-LEGACY=
 if [ ${PYTHON_VERSION} -eq 2 ]; then
-  LEGACY=":${SHELL_HOME}/legacy"
+  # Python 2 requires the legacy pkg_resources.py code
+  PYTHONPATH="${PYTHONPATH}:${SHELL_HOME}/legacy"
 fi
-PYTHONPATH="${EGG_PATH}${SHELL_HOME}/gen-py:${SHELL_HOME}/lib:${PYTHONPATH}${LEGACY}"
 \
-  PYTHONIOENCODING='utf-8' exec ${PYTHON_EXE} ${SHELL_HOME}/impala_shell.py 
"$@"
+PYTHONPATH="${PYTHONPATH}" PYTHONIOENCODING='utf-8' exec ${PYTHON_EXE} \
+  ${SHELL_HOME}/impala_shell.py "$@"
diff --git a/shell/make_shell_tarball.sh b/shell/make_shell_tarball.sh
index 775a63c48..8e0658f41 100755
--- a/shell/make_shell_tarball.sh
+++ b/shell/make_shell_tarball.sh
@@ -104,8 +104,16 @@ def get_build_date():
   return "${BUILD_DATE}"
 EOF
 
-# Building all eggs.
-echo "Building all external modules into eggs"
+# Building all external dependencies
+#
+# This builds each package to a wheel, then pip installs that wheel into the 
external
+# dependencies directory for that Python version. The result directories are 
similar
+# to the lib/python${version}/site-packages directory for a virtualenv with 
impala-shell
+# installed.
+#
+# These use the same pip caches as the virtualenvs to avoid extra downloads. 
This
+# script is a prerequisite for the pypi packaging, so there is no concurrency 
issue.
+echo "Building all external dependencies"
 for MODULE in ${SHELL_HOME}/ext-py/*; do
   # Sometimes there are leftover module directories from version changes. If 
IMPALA_HOME
   # is a git repository, then we can check if the module directory is tracked 
by git.
@@ -122,29 +130,41 @@ for MODULE in ${SHELL_HOME}/ext-py/*; do
     echo "Cleaning up old build artifacts."
     rm -rf dist 2>&1 > /dev/null
     rm -rf build 2>&1 > /dev/null
-    echo "Creating a Python 2 egg for ${MODULE}"
+    echo "Building ${MODULE} with Python 2"
+    # Use the py2_venv to get the wheel package needed for bdist_wheel below.
+    # python2 is now the virtualenv's python2, which is $IMPALA_SYSTEM_PYTHON2
+    source ${IMPALA_HOME}/shell/build/py2_venv/bin/activate
     if [[ "$MODULE" == *"/bitarray"* ]]; then
-      # Need to use setuptools to build egg for bitarray module
-      ${IMPALA_SYSTEM_PYTHON2} -c "import setuptools; 
exec(open('setup.py').read())" \
-          -q bdist_egg
+      # Need to use setuptools to build wheel for bitarray module
+      python2 -c "import setuptools; exec(open('setup.py').read())" \
+          -q bdist_wheel
     else
-      ${IMPALA_SYSTEM_PYTHON2} setup.py -q bdist_egg clean
+      python2 setup.py -q bdist_wheel clean
     fi
-    cp dist/*.egg ${TARBALL_ROOT}/ext-py2
+    # pip install the wheel into the python 2 external dependencies directory
+    PYTHON2_PIP_CACHE="~/.cache/impala_py2_pip"
+    pip install --no-deps --cache "${PYTHON2_PIP_CACHE}" \
+      --target ${TARBALL_ROOT}/ext-py2 dist/*.whl
   fi
   if [ ! -z "${IMPALA_SYSTEM_PYTHON3:-}" ]; then
     echo "Cleaning up old build artifacts."
     rm -rf dist 2>&1 > /dev/null
     rm -rf build 2>&1 > /dev/null
-    echo "Creating a Python 3 egg for ${MODULE}"
+    echo "Building ${MODULE} with Python 3"
+    # Use the py3_venv to get the wheel package needed for bdist_wheel below.
+    # python3 is now the virtualenv's python3, which is $IMPALA_SYSTEM_PYTHON3
+    source ${IMPALA_HOME}/shell/build/py3_venv/bin/activate
     if [[ "$MODULE" == *"/bitarray"* ]]; then
-      # Need to use setuptools to build egg for bitarray module
-      ${IMPALA_SYSTEM_PYTHON3} -c "import setuptools; 
exec(open('setup.py').read())" \
-          -q bdist_egg
+      # Need to use setuptools to build wheel for bitarray module
+      python3 -c "import setuptools; exec(open('setup.py').read())" \
+          -q bdist_wheel
     else
-      ${IMPALA_SYSTEM_PYTHON3} setup.py -q bdist_egg clean
+      python3 setup.py -q bdist_wheel clean
     fi
-    cp dist/*.egg ${TARBALL_ROOT}/ext-py3
+    # pip install the wheel into the python 2 external dependencies directory
+    PYTHON3_PIP_CACHE="~/.cache/impala_py3_pip"
+    pip install --no-deps --cache "${PYTHON3_PIP_CACHE}" \
+      --target ${TARBALL_ROOT}/ext-py3 dist/*.whl
   fi
   popd 2>&1 > /dev/null
 done

Reply via email to