Author: Amir Ayupov Date: 2023-05-13T10:36:29-07:00 New Revision: 76b2915fdbbba18693c9aabda419768f41106f31
URL: https://github.com/llvm/llvm-project/commit/76b2915fdbbba18693c9aabda419768f41106f31 DIFF: https://github.com/llvm/llvm-project/commit/76b2915fdbbba18693c9aabda419768f41106f31.diff LOG: [Clang][CMake] Use perf-training for Clang-BOLT Leverage perf-training flow for BOLT profile collection, enabling reproducible BOLT optimization. Remove the use of bootstrapped build for profile collection. Test Plan: - Regular (single-stage) build ``` $ cmake ... -C .../clang/cmake/caches/BOLT.cmake $ ninja clang-bolt ... [21/24] Instrumenting clang binary with BOLT [21/24] Generating BOLT profile for Clang [23/24] Merging BOLT fdata Profile from 2 files merged. [24/24] Optimizing Clang with BOLT ... 1291202496 : executed instructions (-1.1%) 27005133 : taken branches (-71.5%) ... ``` - Two stage build (ThinLTO+InstPGO) ``` $ cmake ... -C .../clang/cmake/caches/BOLT.cmake -C .../clang/cmake/caches/BOLT-PGO.cmake $ ninja clang-bolt $ ninja stage2-clang-bolt ... [2756/2759] Instrumenting clang binary with BOLT [2756/2759] Generating BOLT profile for Clang [2758/2759] Merging BOLT fdata [2759/2759] Optimizing Clang with BOLT ... BOLT-INFO: 7092 out of 184104 functions in the binary (3.9%) have non-empty execution profile 756531927 : executed instructions (-0.5%) 15399400 : taken branches (-40.3%) ... ``` Reviewed By: beanz Differential Revision: https://reviews.llvm.org/D143553 Added: clang/utils/perf-training/bolt.lit.cfg clang/utils/perf-training/bolt.lit.site.cfg.in Modified: clang/CMakeLists.txt clang/cmake/caches/BOLT.cmake clang/utils/perf-training/CMakeLists.txt Removed: ################################################################################ diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index ef69a68e460a0..bd2ac69c1455e 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -851,9 +851,8 @@ endif() if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED) set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang) - set(CLANGXX_PATH ${CLANG_PATH}++) set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst) - set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst) + set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata) # Instrument clang with BOLT add_custom_target(clang-instrumented @@ -863,73 +862,11 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED) DEPENDS clang llvm-bolt COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED} -instrument --instrumentation-file-append-pid - --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata + --instrumentation-file=${BOLT_FDATA} COMMENT "Instrumenting clang binary with BOLT" VERBATIM ) - # Make a symlink from clang-bolt.inst to clang++-bolt.inst - add_custom_target(clang++-instrumented - DEPENDS ${CLANGXX_INSTRUMENTED} - ) - add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED} - DEPENDS clang-instrumented - COMMAND ${CMAKE_COMMAND} -E create_symlink - ${CLANG_INSTRUMENTED} - ${CLANGXX_INSTRUMENTED} - COMMENT "Creating symlink from BOLT instrumented clang to clang++" - VERBATIM - ) - - # Build specified targets with instrumented Clang to collect the profile - set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/) - set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/) - set(build_configuration "$<CONFIG>") - include(ExternalProject) - ExternalProject_Add(bolt-instrumentation-profile - DEPENDS clang++-instrumented - PREFIX bolt-instrumentation-profile - SOURCE_DIR ${CMAKE_SOURCE_DIR} - STAMP_DIR ${STAMP_DIR} - BINARY_DIR ${BINARY_DIR} - EXCLUDE_FROM_ALL 1 - CMAKE_ARGS - ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS} - # We shouldn't need to set this here, but INSTALL_DIR doesn't - # seem to work, so instead I'm passing this through - -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} - -DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED} - -DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED} - -DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED} - -DCMAKE_ASM_COMPILER_ID=Clang - -DCMAKE_BUILD_TYPE=Release - -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS} - -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD} - BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR} - --config ${build_configuration} - --target ${CLANG_BOLT_INSTRUMENT_TARGETS} - INSTALL_COMMAND "" - STEP_TARGETS configure build - USES_TERMINAL_CONFIGURE 1 - USES_TERMINAL_BUILD 1 - USES_TERMINAL_INSTALL 1 - ) - - # Merge profiles into one using merge-fdata - add_custom_target(clang-bolt-profile - DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata - ) - add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata - DEPENDS merge-fdata bolt-instrumentation-profile-build - WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} - COMMAND ${Python3_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata - $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata - ${CMAKE_CURRENT_BINARY_DIR} - COMMENT "Preparing BOLT profile" - VERBATIM - ) - # Optimize original (pre-bolt) Clang using the collected profile set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt) add_custom_target(clang-bolt @@ -939,7 +876,7 @@ if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED) DEPENDS clang-bolt-profile COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_OPTIMIZED} - -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata + -data ${BOLT_FDATA} -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang> diff --git a/clang/cmake/caches/BOLT.cmake b/clang/cmake/caches/BOLT.cmake index 78ab3b64386a4..7c75d60320f7e 100644 --- a/clang/cmake/caches/BOLT.cmake +++ b/clang/cmake/caches/BOLT.cmake @@ -1,9 +1,6 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "") set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "") -set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "") -set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") -set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "") set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "") set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt index 0d551baba2ccf..c6d51863fb1b5 100644 --- a/clang/utils/perf-training/CMakeLists.txt +++ b/clang/utils/perf-training/CMakeLists.txt @@ -61,3 +61,26 @@ if(APPLE AND DTRACE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD) COMMENT "Generating order file" DEPENDS generate-dtrace-logs) endif() + +if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED) + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg + ) + + add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang" + ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/ + EXCLUDE_FROM_CHECK_ALL + DEPENDS clang-instrumented clear-bolt-fdata + ) + + add_custom_target(clear-bolt-fdata + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata + COMMENT "Clearing old BOLT fdata") + + # Merge profiles into one using merge-fdata + add_custom_target(clang-bolt-profile + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Merging BOLT fdata" + DEPENDS merge-fdata generate-bolt-fdata) +endif() diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg new file mode 100644 index 0000000000000..234ac855bd67c --- /dev/null +++ b/clang/utils/perf-training/bolt.lit.cfg @@ -0,0 +1,20 @@ +# -*- Python -*- + +from lit import Test +import lit.formats +import lit.util +import os +import subprocess + +config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/') + +config.name = 'Clang Perf Training' +config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test'] + +use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") +config.test_format = lit.formats.ShTest(use_lit_shell == "0") +config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang))) +config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang))) +config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang))) +config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) ) +config.substitutions.append( ('%test_root', config.test_exec_root ) ) diff --git a/clang/utils/perf-training/bolt.lit.site.cfg.in b/clang/utils/perf-training/bolt.lit.site.cfg.in new file mode 100644 index 0000000000000..3029319673fc2 --- /dev/null +++ b/clang/utils/perf-training/bolt.lit.site.cfg.in @@ -0,0 +1,14 @@ +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.clang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@") +config.perf_helper_dir = "@CMAKE_CURRENT_SOURCE_DIR@" +config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.test_source_root = "@CLANG_PGO_TRAINING_DATA@" +config.target_triple = "@LLVM_TARGET_TRIPLE@" +config.python_exe = "@Python3_EXECUTABLE@" +config.clang_obj_root = path(r"@CLANG_BINARY_DIR@") + +# Let the main config do the real work. +lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg") _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits