This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c3dc7f9667564a4ffb9644e6f3d0103c007cdc10
Author: Michael Smith <[email protected]>
AuthorDate: Wed Oct 22 13:23:22 2025 -0700

    IMPALA-13147: Limit concurrency of link jobs
    
    Configure separate compile and link pools for ninja. Configures link
    parallelism based on expected memory use, which can be reduced by
    setting IMPALA_MINIMAL_DEBUG_INFO=true or IMPALA_SPLIT_DEBUG_INFO=true.
    
    Adds IMPALA_MAKE_CMD to simplify using the ninja build tool for all make
    operations in scripts. Install ninja on Ubuntu. Adds a '-make' option to
    buildall.sh to force using 'make'.
    
    Adds MOLD_JOBS=1 to avoid overloading the system when trying 'mold' and
    linking test binaries. However 'mold' is not selected as the default
    due to test failures around SASL/GSSAPI (see IMPALA-14527).
    
    Switches bin/jenkins/all-tests.sh to use ninja and removes the guard in
    bootstrap_development.sh limiting IMPALA_BUILD_THREADS as it's no longer
    needed with ninja.
    
    SKIP_BE_TEST_PATTERN in run-backend-tests is unused (only used with
    TARGET_FILESYSTEM=local) so I don't attempt to make it work with ninja.
    
    Tested with local 'IMPALA_SPLIT_DEBUG_INFO=true buildall.sh -skiptests'
    with default (make) and IMPALA_MAKE_CMD=ninja.
    
    Change-Id: I0952dc19ace5c9c42bed0d2ffb61499656c0a2db
    Reviewed-on: http://gerrit.cloudera.org:8080/23572
    Reviewed-by: Joe McDonnell <[email protected]>
    Reviewed-by: Pranav Lodha <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 CMakeLists.txt                 |  6 ++++++
 README-build.md                |  6 ++++--
 bin/bootstrap_build.sh         |  2 +-
 bin/bootstrap_development.sh   | 10 ----------
 bin/clean.sh                   |  2 +-
 bin/impala-config.sh           | 33 ++++++++++++++++++++++++++++-----
 bin/jenkins/all-tests.sh       |  3 +++
 bin/run-backend-tests.sh       | 13 +++++++------
 buildall.sh                    |  5 ++++-
 testdata/bin/copy-udfs-udas.sh |  2 +-
 10 files changed, 55 insertions(+), 27 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f19d443fc..9aaa132dc 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,12 @@ set(IMPALA_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
 # Build compile commands database
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
+# Configure ninja build pools
+set(CMAKE_JOB_POOLS compilation_pool=$ENV{IMPALA_BUILD_THREADS}
+                    link_pool=$ENV{IMPALA_LINK_THREADS})
+set(CMAKE_JOB_POOL_COMPILE compilation_pool)
+set(CMAKE_JOB_POOL_LINK link_pool)
+
 # Codegen-dependent executables need to be linked with -rdynamic; otherwise 
LLVM
 # can't find dependent symbols at runtime.
 #
diff --git a/README-build.md b/README-build.md
index 93d2dc181..7341d5c31 100644
--- a/README-build.md
+++ b/README-build.md
@@ -57,11 +57,13 @@ can do so through the environment variables and scripts 
listed below.
 
 | Environment variable | Default value | Description |
 |----------------------|---------------|-------------|
-| IMPALA_BUILD_THREADS | "8" or set to number of processors by default. | Used 
for make -j and distcc -j settings. |
+| IMPALA_BUILD_THREADS | Number of processors. | Used for make -j and distcc 
-j settings. |
+| IMPALA_LINK_THREADS  | Bounded based on available memory. | Used for ninja. |
+| IMPALA_MAKE_CMD      | "make" | Make tool to use by default, options are 
make or ninja. |
 | IMPALA_MAKE_FLAGS    | "" | Any extra settings to pass to make.  Also used 
when copying udfs / udas into HDFS. |
 | USE_SYSTEM_GCC       | "0" | If set to any other value, directs cmake to not 
set GCC_ROOT, CMAKE_C_COMPILER, CMAKE_CXX_COMPILER, as well as setting 
TOOLCHAIN_LINK_FLAGS |
 | IMPALA_CXX_COMPILER  | "default" | Used by cmake (cmake_modules/toolchain 
and clang_toolchain.cmake) to select gcc / clang |
-| IMPALA_LINKER.       | "gold"  | Specifies the linker to use. |
+| IMPALA_LINKER        | "gold"  | Specifies the linker to use; options are 
"gold", "mold", or "ld". |
 | IS_OSX               | "false" | (Experimental) currently only used to 
disable Kudu. |
 
 ## Dependencies
diff --git a/bin/bootstrap_build.sh b/bin/bootstrap_build.sh
index 6ef7d6ce1..2dde710bd 100755
--- a/bin/bootstrap_build.sh
+++ b/bin/bootstrap_build.sh
@@ -36,7 +36,7 @@ sudo -E apt-get --quiet update
 # unversioned python-dev and python-setuptools are not available on newer 
releases
 # that don't support Python 2. Add them only when they exist for the platform,
 # otherwise set Python 3 to be the default Python version.
-PACKAGES='g++ gcc git libsasl2-dev libssl-dev make
+PACKAGES='g++ gcc git libsasl2-dev libssl-dev make ninja-build
      python3-dev python3-setuptools python3-venv libffi-dev language-pack-en
      libkrb5-dev krb5-admin-server krb5-kdc krb5-user libxml2-dev libxslt-dev 
wget'
 
diff --git a/bin/bootstrap_development.sh b/bin/bootstrap_development.sh
index 1b1009f83..f4b0e5e9a 100755
--- a/bin/bootstrap_development.sh
+++ b/bin/bootstrap_development.sh
@@ -54,16 +54,6 @@ source "${BINDIR}/bootstrap_system.sh"
 export MAX_PYTEST_FAILURES=0
 source bin/impala-config.sh > /dev/null 2>&1
 
-BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 4))
-if [[ $AVAILABLE_MEM -lt 4 ]]; then
-  echo "Insufficient memory ($AVAILABLE_MEM GB) to link Impala test binaries"
-  echo "Increase memory, or run buildall.sh -format -testdata -notests"
-  exit 1
-elif [[ $BOUNDED_CONCURRENCY -lt $IMPALA_BUILD_THREADS ]]; then
-  echo "Bounding concurrency to $BOUNDED_CONCURRENCY for link phase"
-  IMPALA_BUILD_THREADS=$BOUNDED_CONCURRENCY
-fi
-
 time -p ./buildall.sh -format -testdata -skiptests
 
 # To then run the tests:
diff --git a/bin/clean.sh b/bin/clean.sh
index 8a4f45371..e95f5308d 100755
--- a/bin/clean.sh
+++ b/bin/clean.sh
@@ -27,7 +27,7 @@ setup_report_build_error
 
 # If the project was never build, no Makefile will exist and thus make clean 
will fail.
 # Combine the make command with the bash noop to always return true.
-"${MAKE_CMD:-make}" clean || :
+"${MAKE_CMD:-${IMPALA_MAKE_CMD}}" clean || :
 
 # clean Java projects
 pushd "${IMPALA_HOME}/java"
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 62a42142f..532735b8a 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -580,6 +580,9 @@ export LIB_JAVA=$(find "${JAVA_HOME}/" -name libjava.so | 
head -1)
 export LIB_JSIG=$(find "${JAVA_HOME}/" -name libjsig.so | head -1)
 export LIB_JVM=$(find "${JAVA_HOME}/" -name libjvm.so  | head -1)
 
+# Default to make, but allow overriding to e.g. ninja.
+export IMPALA_MAKE_CMD=${IMPALA_MAKE_CMD:-make}
+
 
#########################################################################################
 # Below here are variables that can be overridden by impala-config-*.sh and 
environment #
 # vars, variables computed based on other variables, and variables that cannot 
be       #
@@ -608,6 +611,10 @@ export USE_SYSTEM_GCC=${USE_SYSTEM_GCC-0}
 # TODO: Add support for lld as well
 export IMPALA_LINKER=${IMPALA_LINKER-gold}
 
+# Limit mold to a single job to avoid excessive memory consumption while fully 
utilizing
+# available CPUs.
+export MOLD_JOBS=${IMPALA_MOLD_JOBS-1}
+
 # Override the default compiler by setting a path to the new compiler. The 
default
 # compiler depends on USE_SYSTEM_GCC and IMPALA_GCC_VERSION. The intended use 
case
 # is to set the compiler to distcc, in that case the user would also set
@@ -1023,7 +1030,7 @@ export IMPALA_DATASET_DIR="$IMPALA_HOME/testdata/datasets"
 export IMPALA_AUX_DATASET_DIR="$IMPALA_AUX_TEST_HOME/testdata/datasets"
 export IMPALA_COMMON_DIR="$IMPALA_HOME/common"
 export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/gdb-$IMPALA_GDB_VERSION/bin:$PATH"
-export 
PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/cmake-$IMPALA_CMAKE_VERSION/bin/:$PATH"
+export 
PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/cmake-$IMPALA_CMAKE_VERSION/bin:$PATH"
 export PATH="$IMPALA_HOME/bin:$PATH"
 
 export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
@@ -1165,16 +1172,31 @@ else
   CGROUP_MEM_LIMIT=8589934591 # max int64 bytes in GB
 fi
 AVAILABLE_MEM=$((AVAILABLE_MEM > $CGROUP_MEM_LIMIT ? $CGROUP_MEM_LIMIT : 
$AVAILABLE_MEM))
-BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 2))
-if [[ $AVAILABLE_MEM -lt 2 ]]; then
+if [[ $AVAILABLE_MEM -lt 5 ]]; then
   echo "Insufficient memory ($AVAILABLE_MEM GB) to build Impala"
   exit 1
-elif [[ $BOUNDED_CONCURRENCY -lt $CORES ]]; then
+fi
+BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 2))
+if [[ $BOUNDED_CONCURRENCY -lt $CORES ]]; then
   echo "Bounding concurrency for available memory ($AVAILABLE_MEM GB)"
 else
   BOUNDED_CONCURRENCY=$CORES
 fi
-export IMPALA_BUILD_THREADS=${IMPALA_BUILD_THREADS-"${BOUNDED_CONCURRENCY}"}
+export IMPALA_BUILD_THREADS=${IMPALA_BUILD_THREADS:-"${BOUNDED_CONCURRENCY}"}
+# Limit number of links; only works with ninja builds.
+# Determines number of concurrent links based on expected memory use.
+if [[ "$IMPALA_MINIMAL_DEBUG_INFO" == "true" ||
+      "$IMPALA_SPLIT_DEBUG_INFO"   == "true" ]]; then
+  MEM_PER_LINK=2
+else
+  MEM_PER_LINK=5
+fi
+BOUNDED_LINKS=$((AVAILABLE_MEM / MEM_PER_LINK))
+if [[ $BOUNDED_LINKS -gt $IMPALA_BUILD_THREADS ]]; then
+  # Avoid regressing behavior if IMPALA_BUILD_THREADS is already set to a low 
value.
+  BOUNDED_LINKS=${IMPALA_BUILD_THREADS}
+fi
+export IMPALA_LINK_THREADS=${IMPALA_LINK_THREADS:-"${BOUNDED_LINKS}"}
 
 # Additional flags to pass to make or ninja.
 export IMPALA_MAKE_FLAGS=${IMPALA_MAKE_FLAGS-}
@@ -1258,6 +1280,7 @@ echo "IMPALA_OBS_VERSION      = $IMPALA_OBS_VERSION"
 echo "IMPALA_SYSTEM_PYTHON2   = $IMPALA_SYSTEM_PYTHON2"
 echo "IMPALA_SYSTEM_PYTHON3   = $IMPALA_SYSTEM_PYTHON3"
 echo "IMPALA_BUILD_THREADS    = $IMPALA_BUILD_THREADS"
+echo "IMPALA_LINK_THREADS     = $IMPALA_LINK_THREADS"
 echo "NUM_CONCURRENT_TESTS    = $NUM_CONCURRENT_TESTS"
 echo "USE_CUSTOM_IMPALA_BASE_IMAGE = $USE_CUSTOM_IMPALA_BASE_IMAGE"
 echo "IMPALA_CUSTOM_DOCKER_BASE    = $IMPALA_CUSTOM_DOCKER_BASE"
diff --git a/bin/jenkins/all-tests.sh b/bin/jenkins/all-tests.sh
index 6624a7dae..b74da5545 100644
--- a/bin/jenkins/all-tests.sh
+++ b/bin/jenkins/all-tests.sh
@@ -31,6 +31,9 @@ export IMPALA_MAVEN_OPTIONS="-U"
 # Allow unlimited pytest failures
 export MAX_PYTEST_FAILURES=0
 
+# Use ninja for better link concurrency.
+export IMPALA_MAKE_CMD=ninja
+
 # When UBSAN_FAIL is "death", the logs are monitored for UBSAN errors. Any 
errors will
 # then cause this script to exit.
 #
diff --git a/bin/run-backend-tests.sh b/bin/run-backend-tests.sh
index 9952496f1..ba2a7d981 100755
--- a/bin/run-backend-tests.sh
+++ b/bin/run-backend-tests.sh
@@ -28,11 +28,6 @@ export GTEST_OUTPUT="xml:$IMPALA_BE_TEST_LOGS_DIR/"
 # The backend unit tests currently do not work when HEAPCHECK is enabled.
 export HEAPCHECK=
 
-BE_TEST_ARGS=""
-if [[ -n "$SKIP_BE_TEST_PATTERN" ]]; then
-  BE_TEST_ARGS="-E ${SKIP_BE_TEST_PATTERN}"
-fi
-
 cd ${IMPALA_BE_DIR}
 . ${IMPALA_HOME}/bin/set-classpath.sh
 cd ..
@@ -44,4 +39,10 @@ export 
ASAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1"
 export UBSAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1"
 
 export 
PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_VERSION}/bin:${PATH}"
-"${MAKE_CMD:-make}" test ARGS="${BE_TEST_ARGS}"
\ No newline at end of file
+if [[ -n "$SKIP_BE_TEST_PATTERN" ]]; then
+  # Requires make, will fail with ninja.
+  "${MAKE_CMD:-${IMPALA_MAKE_CMD}}" test ARGS="-E ${SKIP_BE_TEST_PATTERN}"
+else
+  # Ninja doesn't accept additional parameters, so omit ARGS.
+  "${MAKE_CMD:-${IMPALA_MAKE_CMD}}" test
+fi
diff --git a/buildall.sh b/buildall.sh
index f95b291ec..70f905c51 100755
--- a/buildall.sh
+++ b/buildall.sh
@@ -80,7 +80,7 @@ BUILD_DEBUG_NOOPT=0
 BUILD_SHARED_LIBS=0
 UDF_DEVEL=0
 # Export MAKE_CMD so it is visible in scripts that invoke make, e.g. 
copy-udfs-udas.sh
-export MAKE_CMD=make
+export MAKE_CMD=${IMPALA_MAKE_CMD:-make}
 
 # Defaults that can be picked up from the environment, but are overridable 
through the
 # commandline.
@@ -203,6 +203,9 @@ do
     -ninja)
       MAKE_CMD=ninja
       ;;
+    -make)
+      MAKE_CMD=make
+      ;;
     -cmake_only)
       GEN_CMAKE_ONLY=1
       ;;
diff --git a/testdata/bin/copy-udfs-udas.sh b/testdata/bin/copy-udfs-udas.sh
index e73f38d1e..581df979a 100755
--- a/testdata/bin/copy-udfs-udas.sh
+++ b/testdata/bin/copy-udfs-udas.sh
@@ -49,7 +49,7 @@ done
 if [ $BUILD -eq 1 ]
 then
   pushd "${IMPALA_HOME}"
-  "${MAKE_CMD:-make}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \
+  "${MAKE_CMD:-${IMPALA_MAKE_CMD}}" ${IMPALA_MAKE_FLAGS} 
"-j${IMPALA_BUILD_THREADS:-4}" \
       TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir 
uda-sample-ir
   cd "${IMPALA_HOME}/java/test-corrupt-hive-udfs"
   "${IMPALA_HOME}/bin/mvn-quiet.sh" package

Reply via email to