This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 89698426f6c57a06761003434fd6dc7fbabe15fa Author: Joe McDonnell <[email protected]> AuthorDate: Thu May 25 10:38:29 2023 -0700 IMPALA-12165: Add option for split debug information (-gsplit-dwarf) This adds the IMPALA_SPLIT_DEBUG_INFO environment variable, which controls whether the build uses -gsplit-dwarf. This option puts debug information in a separate .dwo file for each C++ file. Executables contain metadata pointing to those .dwo files and don't need to include the debug information themselves. This reduces link time and disk space usage. The default for IMPALA_SPLIT_DEBUG_INFO is off as this is intended to be an opt-in option for developers. For a debug build with compressed debug information, it cuts disk space usage roughly in half: Without backend tests (measuring "du -sh be"): Regular: 5.6GB Split debuginfo: 2.7GB With backend tests: Regular: 22GB Split debuginfo: 12GB This only works for the debug information from Impala itself. The debug information from dependencies from the toolchain are included in each executable. Split debug information has been around for a long time, so tools like GDB work. Resolving minidumps works properly. Testing: - Ran builds locally (with GCC and Clang) - Attached to Impala with GDB and verified that symbols worked - Resolved a minidump and checked the output Change-Id: I3bbe700279a5dc3fde338fdfa0ea355d3570c9d0 Reviewed-on: http://gerrit.cloudera.org:8080/21720 Reviewed-by: Jason Fehr <[email protected]> Reviewed-by: Michael Smith <[email protected]> Tested-by: Michael Smith <[email protected]> --- be/CMakeLists.txt | 9 +++++++++ bin/impala-config.sh | 10 +++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 789f0fa18..76ebf3ace 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -270,6 +270,15 @@ if ($ENV{IMPALA_COMPRESSED_DEBUG_INFO} STREQUAL "true") endif() endif() +# The IMPALA_SPLIT_DEBUG_INFO option stores debug info in a .dwo file for each C++ file. +# This debug info can be referenced by executables without being incorporated into the +# executable itself. Multiple executables can share a single copy of the debug info. This +# reduces link time and disk space usage. Most tools (including gdb) know how to access +# and read the .dwo files to get debug info. +if ($ENV{IMPALA_SPLIT_DEBUG_INFO} STREQUAL "true") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gsplit-dwarf") +endif() + # Use ccache when found and not explicitly disabled by setting the DISABLE_CCACHE envvar. find_program(CCACHE ccache) set(RULE_LAUNCH_PREFIX) diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 98deae819..f53b28c95 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -594,7 +594,7 @@ export ENABLE_IMPALA_IR_DEBUG_INFO=${ENABLE_IMPALA_IR_DEBUG_INFO-false} # disk space for a developer environment. A large amount of the binary # size is due to debug information. # -# These are two options for reducing the binary size and disk space +# These are a few options for reducing the binary size and disk space # usage. # - IMPALA_MINIMAL_DEBUG_INFO=true changes the build to produce only # minimal debuginfo (i.e. -g1). This has line tables and can do backtraces, @@ -607,11 +607,19 @@ export ENABLE_IMPALA_IR_DEBUG_INFO=${ENABLE_IMPALA_IR_DEBUG_INFO-false} # the Breakpad scripts have been modified to handle it, but there may # be other tools that do not know how to use it. This reduces the size # of binaries by 50+%. +# - IMPALA_SPLIT_DEBUG_INFO=true changes the build to put debug info in +# separate .dwo files for each C++ file. Executables contain metadata +# pointing to these .dwo files without needing to incorporate the debug +# information. This allows executables to share a single copy of +# the debug information. It also reduces link time, as the linker does +# not need to process the debug info. Tools (including gdb) mostly know +# how to handle this split debug information. # # Due to the major reduction in binary size and broad support in debugging # tools, compressed debug information is enabled by default. export IMPALA_MINIMAL_DEBUG_INFO=${IMPALA_MINIMAL_DEBUG_INFO-false} export IMPALA_COMPRESSED_DEBUG_INFO=${IMPALA_COMPRESSED_DEBUG_INFO-true} +export IMPALA_SPLIT_DEBUG_INFO=${IMPALA_SPLIT_DEBUG_INFO-false} # Download and use the CDH components from S3. It can be useful to set this to false if # building against a custom local build using HIVE_SRC_DIR_OVERRIDE,
