This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 88dcdfd46 IMPALA-12807: Add support for mold linker
88dcdfd46 is described below
commit 88dcdfd4662d0ec4a6e0d81d643d207f6e5ed900
Author: Joe McDonnell <[email protected]>
AuthorDate: Wed Feb 14 21:57:54 2024 -0800
IMPALA-12807: Add support for mold linker
This adds support for using the mold linker. It changes
the existing USE_GOLD_LINKER environment variable to
IMPALA_LINKER, which accepts ld, gold, or mold as
values. It defaults to 'gold' to match current behavior.
Developers can override it in bin/impala-config-local.sh.
Clang does not implement -gz properly until version 12.
It does not enable compressed debuginfo in the final
binary. IMPALA_LINKER=mold doesn't work with
IMPALA_COMPRESSED_DEBUG_INFO=true on Clang due to this.
This detects Clang <12 and skips -gz as it is ineffective.
Mold follows similar to behavior to LLD and requires
--exclude-libs to use the full library name (i.e.
liblz4.a rather than liblz4). Gold will happily
accept the full library name, so this changes to use
the full library name.
Mold is much faster for incremental builds on my system:
(e.g. touch be/src/scheduling/scheduler.cc && make -j8 impalad)
gold: 15.8s
mold: 2.6s
Testing:
- Ran builds with IMPALA_LINKER=mold on Centos 7, Redhat 8,
and Ubuntu 20.
Change-Id: Ia9e9accd06b6ecd182d200d81afaae09a885c241
Reviewed-on: http://gerrit.cloudera.org:8080/21121
Reviewed-by: Michael Smith <[email protected]>
Reviewed-by: Andrew Sherman <[email protected]>
Tested-by: Impala Public Jenkins <[email protected]>
---
be/CMakeLists.txt | 28 +++++++++++++++++++++++-----
be/src/service/CMakeLists.txt | 12 +++++++-----
bin/bootstrap_toolchain.py | 6 +++---
bin/impala-config.sh | 9 +++++++--
4 files changed, 40 insertions(+), 15 deletions(-)
diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 9accfa8b1..789f0fa18 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -27,6 +27,13 @@ PROJECT(ASSEMBLER)
option(BUILD_WITH_NO_TESTS "Do not generate test and benchmark targets" OFF)
+# Validate the IMPALA_LINKER environment variable
+if (NOT "$ENV{IMPALA_LINKER}" STREQUAL "ld" AND
+ NOT "$ENV{IMPALA_LINKER}" STREQUAL "gold" AND
+ NOT "$ENV{IMPALA_LINKER}" STREQUAL "mold")
+ message(FATAL_ERROR "Invalid IMPALA_LINKER: $ENV{IMPALA_LINKER} (expected:
ld, gold, or mold)")
+endif()
+
# compiler flags that are common across debug/release builds
# -Wall: Enable all warnings.
# -Wno-sign-compare: suppress warnings for comparison between signed and
unsigned
@@ -58,6 +65,14 @@ SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}
-DBOOST_ALLOW_DEPRECATED_HEADERS")
# built at OS where getrandom(2) is available at OSes where getrandom(2)
# isn't supported (e.g., that might happen in containerized deployments).
SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS}
-DBOOST_UUID_RANDOM_PROVIDER_FORCE_POSIX")
+IF($ENV{IMPALA_LINKER} STREQUAL "mold")
+ # Only very recent GCC 12+ has support for -fuse-ld=mold, so we override
"ld" by
+ # putting Mold's libexec/mold directory (which has a "ld" symlink) on the
path.
+ SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -B
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/mold-$ENV{IMPALA_MOLD_VERSION}/libexec/mold")
+ENDIF()
+# Note: apart from gold linker, binutils provides an up-to-date "as" utility.
Older
+# distributions will have an "as" utility too old to process the output from
+# modern GCC.
SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -B
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/binutils-$ENV{IMPALA_BINUTILS_VERSION}/bin/")
# -Wno-deprecated-declarations: OpenSSL3 deprecated various APIs currently
used by
# Impala, so this disables those warnings when using OpenSSL3 until they
can be
@@ -65,7 +80,7 @@ SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -B
$ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME
if (OPENSSL_VERSION VERSION_GREATER_EQUAL 3)
SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-deprecated-declarations")
endif()
-IF($ENV{USE_GOLD_LINKER} STREQUAL "true")
+IF($ENV{IMPALA_LINKER} STREQUAL "gold")
SET(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -fuse-ld=gold")
ENDIF()
@@ -245,11 +260,14 @@ endif()
# debug info in the executable. This can reduce the size of binaries by >50%
# without changing the amount of debug information. gdb is known to work
# with compressed debug info, but other tools may not know how to use it.
-# TODO: The current version of Clang does not handles this flag correctly and
-# simply produces binaries with uncompressed debug info. This needs further
-# debugging.
if ($ENV{IMPALA_COMPRESSED_DEBUG_INFO} STREQUAL "true")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gz")
+ # Clang doesn't handle -gz properly until version 12, so there is no reason
to keep it.
+ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang"
+ AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.0)
+ message(STATUS "Detected Clang < 12: -gz is ineffective on this version,
skipping.")
+ else()
+ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gz")
+ endif()
endif()
# Use ccache when found and not explicitly disabled by setting the
DISABLE_CCACHE envvar.
diff --git a/be/src/service/CMakeLists.txt b/be/src/service/CMakeLists.txt
index a6b51395e..dbe2d181c 100644
--- a/be/src/service/CMakeLists.txt
+++ b/be/src/service/CMakeLists.txt
@@ -58,11 +58,13 @@ add_library(fesupport SHARED
# Hide all symbols from compression libraries so that users of this
libfesupport.so
# don't have conflicts with libhadoop or other libraries.
# TODO: This should also hide most other symbols as a precaution.
-set(LINK_FLAGS "-Wl,--exclude-libs=libbz2")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=liblz4")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libsnappy")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libz")
-set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libzstd")
+# Note: Using the full library name with ".a" is needed for lld/mold linkers.
+# It has no impact on gold, which supports with or without the .a.
+set(LINK_FLAGS "-Wl,--exclude-libs=libbz2.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=liblz4.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libsnappy.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libz.a")
+set(LINK_FLAGS "${LINK_FLAGS} -Wl,--exclude-libs=libzstd.a")
set_target_properties(fesupport
PROPERTIES
LINK_FLAGS "${LINK_FLAGS}")
diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 2162bc1db..171aec497 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -474,9 +474,9 @@ def get_toolchain_downloads():
toolchain_packages += [ToolchainPackage(p) for p in
["avro", "binutils", "boost", "breakpad", "bzip2", "calloncehack",
"cctz",
"cloudflarezlib", "cmake", "crcutil", "curl", "flatbuffers", "gdb",
"gflags",
- "glog", "gperftools", "gtest", "jwt-cpp", "libev", "libunwind", "lz4",
"openldap",
- "orc", "protobuf", "python", "rapidjson", "re2", "snappy", "tpc-h",
"tpc-ds",
- "zlib", "zstd"]]
+ "glog", "gperftools", "gtest", "jwt-cpp", "libev", "libunwind", "lz4",
"mold",
+ "openldap", "orc", "protobuf", "python", "rapidjson", "re2", "snappy",
"tpc-h",
+ "tpc-ds", "zlib", "zstd"]]
python3_package = ToolchainPackage(
"python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION"))
toolchain_packages += [python3_package]
diff --git a/bin/impala-config.sh b/bin/impala-config.sh
index 98c9dda46..30ceb6d1a 100755
--- a/bin/impala-config.sh
+++ b/bin/impala-config.sh
@@ -207,6 +207,8 @@ if [[ $ARCH_NAME == 'aarch64' ]]; then
export IMPALA_HADOOP_CLIENT_VERSION=3.3.6
unset IMPALA_HADOOP_CLIENT_URL
fi
+export IMPALA_MOLD_VERSION=2.4.1
+unset IMPALA_MOLD_URL
# Impala JDBC driver for testing.
export IMPALA_SIMBA_JDBC_DRIVER_VERSION=42-2.6.32.1041
@@ -531,8 +533,11 @@ chmod 755 "${PYTHON_EGG_CACHE}"
# If it's 0, Impala will be built with the compiler in the toolchain directory.
export USE_SYSTEM_GCC=${USE_SYSTEM_GCC-0}
-# Use ld.gold instead of ld by default to speed up builds.
-export USE_GOLD_LINKER=${USE_GOLD_LINKER-true}
+# Allow the linker to be set to gold, mold, or regular ld. Gold is the default
+# as it has been for a long time. Mold is a new linker that is faster than
gold.
+# Note: This is validated in the CMake code.
+# TODO: Add support for lld as well
+export IMPALA_LINKER=${IMPALA_LINKER-gold}
# Override the default compiler by setting a path to the new compiler. The
default
# compiler depends on USE_SYSTEM_GCC and IMPALA_GCC_VERSION. The intended use
case