https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/79942
>From 63904124dc6c6cd9b855046fac3ad1a5e72d60b7 Mon Sep 17 00:00:00 2001 From: Amir Ayupov <aau...@fb.com> Date: Wed, 19 Jul 2023 20:30:29 -0700 Subject: [PATCH 1/4] [Clang][CMake] Add CSSPGO support to LLVM_BUILD_INSTRUMENTED Build on Clang-BOLT infrastructure to collect sample profiles for CSSPGO. Add clang/cmake/caches/CSSPGO.cmake to automate CSSPGO Clang build. Differential Revision: https://reviews.llvm.org/D155419 --- clang/CMakeLists.txt | 12 ++++- clang/cmake/caches/CSSPGO.cmake | 3 ++ clang/utils/perf-training/CMakeLists.txt | 32 ++++++++++++- clang/utils/perf-training/lit.cfg | 6 +++ clang/utils/perf-training/lit.site.cfg.in | 1 + clang/utils/perf-training/perf-helper.py | 55 ++++++++++++++++++---- llvm/CMakeLists.txt | 3 ++ llvm/cmake/modules/HandleLLVMOptions.cmake | 26 +++++++++- 8 files changed, 124 insertions(+), 14 deletions(-) create mode 100644 clang/cmake/caches/CSSPGO.cmake diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 47fc2e4886cfc..5d16442ac7bc3 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -741,11 +741,21 @@ if (CLANG_ENABLE_BOOTSTRAP) if(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED) add_dependencies(clang-bootstrap-deps llvm-profdata) set(PGO_OPT -DLLVM_PROFDATA=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profdata) + string(TOUPPER "${BOOTSTRAP_LLVM_BUILD_INSTRUMENTED}" BOOTSTRAP_LLVM_BUILD_INSTRUMENTED) + if (BOOTSTRAP_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") + add_dependencies(clang-bootstrap-deps llvm-profgen) + list(APPEND PGO_OPT -DLLVM_PROFGEN=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-profgen) + endif() endif() if(LLVM_BUILD_INSTRUMENTED) + string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED) add_dependencies(clang-bootstrap-deps generate-profdata) - set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata) + if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") + set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata) + else() + set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata) + endif() # Use the current tools for LTO instead of the instrumented ones list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH CMAKE_CXX_COMPILER diff --git a/clang/cmake/caches/CSSPGO.cmake b/clang/cmake/caches/CSSPGO.cmake new file mode 100644 index 0000000000000..34159068d5ea3 --- /dev/null +++ b/clang/cmake/caches/CSSPGO.cmake @@ -0,0 +1,3 @@ +set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") +set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED "CSSPGO" CACHE STRING "") +include(${CMAKE_CURRENT_LIST_DIR}/PGO.cmake) diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt index 61df987da7139..b8da611ad4506 100644 --- a/clang/utils/perf-training/CMakeLists.txt +++ b/clang/utils/perf-training/CMakeLists.txt @@ -18,8 +18,33 @@ if(LLVM_BUILD_INSTRUMENTED) DEPENDS clang clear-profraw ${CLANG_PGO_TRAINING_DEPS} ) + add_custom_target(generate-profdata-deps) + string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED) + if (uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") + set(PROFDATA_SAMPLE "--sample") + if(NOT LLVM_PROFGEN) + find_program(LLVM_PROFGEN llvm-profgen) + endif() + + if(NOT LLVM_PROFGEN) + message(STATUS "To enable converting CSSPGO samples LLVM_PROFGEN has to point to llvm-profgen") + endif() + + # Convert perf profiles into profraw + add_custom_target(convert-perf-profraw + COMMAND "${Python3_EXECUTABLE}" + ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py perf2prof ${LLVM_PROFGEN} + $<TARGET_FILE:clang> ${CMAKE_CURRENT_BINARY_DIR} + COMMENT "Converting perf profiles into profraw" + DEPENDS generate-profraw) + add_dependencies(generate-profdata-deps convert-perf-profraw) + else() + add_dependencies(generate-profdata-deps generate-profraw) + endif() + add_custom_target(clear-profraw COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ profraw + COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data COMMENT "Clearing old profraw data") if(NOT LLVM_PROFDATA) @@ -30,9 +55,12 @@ if(LLVM_BUILD_INSTRUMENTED) message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata") else() add_custom_target(generate-profdata - COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ + COMMAND "${Python3_EXECUTABLE}" + ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} + ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_BINARY_DIR}/profiles/ ${PROFDATA_SAMPLE} COMMENT "Merging profdata" - DEPENDS generate-profraw) + DEPENDS generate-profdata-deps) if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR) llvm_ExternalProject_Add(generate-profraw-external ${CLANG_PGO_TRAINING_DATA_SOURCE_DIR} USE_TOOLCHAIN EXLUDE_FROM_ALL NO_INSTALL DEPENDS generate-profraw) diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg index 0bd06c0d44f65..c6b54f461f1e2 100644 --- a/clang/utils/perf-training/lit.cfg +++ b/clang/utils/perf-training/lit.cfg @@ -28,6 +28,12 @@ config.name = 'Clang Perf Training' config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test'] cc1_wrapper = '%s %s/perf-helper.py cc1' % (config.python_exe, config.perf_helper_dir) +if config.llvm_build_instrumented.upper() == "CSSPGO": + perf_wrapper = "%s %s/perf-helper.py perf --lbr --call-graph --event=br_inst_retired.near_taken:uppp -- " % ( + config.python_exe, + config.perf_helper_dir, + ) + cc1_wrapper = perf_wrapper use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") config.test_format = lit.formats.ShTest(use_lit_shell == "0") diff --git a/clang/utils/perf-training/lit.site.cfg.in b/clang/utils/perf-training/lit.site.cfg.in index fae93065a4edf..0ae05c10b6d88 100644 --- a/clang/utils/perf-training/lit.site.cfg.in +++ b/clang/utils/perf-training/lit.site.cfg.in @@ -8,6 +8,7 @@ config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" config.test_source_root = "@CLANG_PGO_TRAINING_DATA@" config.target_triple = "@LLVM_TARGET_TRIPLE@" config.python_exe = "@Python3_EXECUTABLE@" +config.llvm_build_instrumented = "@LLVM_BUILD_INSTRUMENTED@" # Let the main config do the real work. lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/lit.cfg") diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py index 3ed42a187fd80..a6360e46e7447 100644 --- a/clang/utils/perf-training/perf-helper.py +++ b/clang/utils/perf-training/perf-helper.py @@ -43,14 +43,21 @@ def clean(args): def merge(args): - if len(args) < 3: - print( - "Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__ - + "\tMerges all profraw files from path into output." - ) - return 1 - cmd = [args[0], "merge", "-o", args[1]] - for path in args[2:]: + parser = argparse.ArgumentParser( + prog="perf-helper merge", + description="Merges all profraw files from path(s) into output" + ) + parser.add_argument("profdata", help="Path to llvm-profdata tool") + parser.add_argument("output", help="Output filename") + parser.add_argument("paths", nargs='+', + help="Folder(s) containing input profraw files") + parser.add_argument("--sample", action="store_true", help="Sample profile") + opts = parser.parse_args(args) + + cmd = [opts.profdata, "merge", "-o", opts.output] + if opts.sample: + cmd += ["--sample"] + for path in opts.paths: cmd.extend(findFilesWithExtension(path, "profraw")) subprocess.check_call(cmd) return 0 @@ -71,11 +78,19 @@ def merge_fdata(args): def perf(args): parser = argparse.ArgumentParser( - prog="perf-helper perf", description="perf wrapper for BOLT profile collection" + prog="perf-helper perf", + description="perf wrapper for BOLT/CSSPGO profile collection" ) parser.add_argument( "--lbr", action="store_true", help="Use perf with branch stacks" ) + parser.add_argument( + "--call-graph", action="store_true", help="Collect call graph" + ) + parser.add_argument( + "--event", help="PMU event name, defaults to cycles:u", + default="cycles:u" + ) parser.add_argument("cmd", nargs=argparse.REMAINDER, help="") opts = parser.parse_args(args) @@ -84,12 +99,14 @@ def perf(args): perf_args = [ "perf", "record", - "--event=cycles:u", + f"--event={opts.event}", "--freq=max", "--output=%d.perf.data" % os.getpid(), ] if opts.lbr: perf_args += ["--branch-filter=any,u"] + if opts.call_graph: + perf_args += ["-g", "--call-graph=fp"] perf_args.extend(cmd) start_time = time.time() @@ -125,6 +142,23 @@ def perf2bolt(args): return 0 +def perf2prof(args): + parser = argparse.ArgumentParser( + prog="perf-helper perf2prof", + description="perf to CSSPGO prof conversion wrapper", + ) + parser.add_argument("profgen", help="Path to llvm-profgen binary") + parser.add_argument("binary", help="Input binary") + parser.add_argument("path", help="Path containing perf.data files") + opts = parser.parse_args(args) + + profgen_args = [opts.profgen, f"--binary={opts.binary}"] + for filename in findFilesWithExtension(opts.path, "perf.data"): + subprocess.check_call(profgen_args + [f"--perfdata={filename}", + f"--output={filename}.profraw"]) + return 0 + + def dtrace(args): parser = argparse.ArgumentParser( prog="perf-helper dtrace", @@ -567,6 +601,7 @@ def genOrderFile(args): "merge-fdata": merge_fdata, "perf": perf, "perf2bolt": perf2bolt, + "perf2prof": perf2prof, } diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 485c76b8bb936..4421ce7e59b92 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -879,6 +879,9 @@ set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ${LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_defa set(LLVM_PROFDATA_FILE "" CACHE FILEPATH "Profiling data file to use when compiling in order to improve runtime performance.") +set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH + "Sampling profiling data file to use when compiling in order to improve runtime performance.") + if(LLVM_INCLUDE_TESTS) # Lit test suite requires at least python 3.6 set(LLVM_MINIMUM_PYTHON_VERSION 3.6) diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake index 0699a8586fcc7..3754dff598451 100644 --- a/llvm/cmake/modules/HandleLLVMOptions.cmake +++ b/llvm/cmake/modules/HandleLLVMOptions.cmake @@ -1116,7 +1116,7 @@ endif() option(LLVM_ENABLE_IR_PGO "Build LLVM and tools with IR PGO instrumentation (deprecated)" Off) mark_as_advanced(LLVM_ENABLE_IR_PGO) -set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR or Frontend") +set(LLVM_BUILD_INSTRUMENTED OFF CACHE STRING "Build LLVM and tools with PGO instrumentation. May be specified as IR, Frontend, CSIR, CSSPGO") set(LLVM_VP_COUNTERS_PER_SITE "1.5" CACHE STRING "Value profile counters to use per site for IR PGO with Clang") mark_as_advanced(LLVM_BUILD_INSTRUMENTED LLVM_VP_COUNTERS_PER_SITE) string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED) @@ -1149,6 +1149,15 @@ if (LLVM_BUILD_INSTRUMENTED) CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) endif() + elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") + append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling" + CMAKE_CXX_FLAGS + CMAKE_C_FLAGS) + if(NOT LINKER_IS_LLD_LINK) + append("-fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -fno-optimize-sibling-calls -fpseudo-probe-for-profiling" + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) + endif() else() append("-fprofile-instr-generate=\"${LLVM_PROFILE_FILE_PATTERN}\"" CMAKE_CXX_FLAGS @@ -1199,6 +1208,21 @@ if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE}) endif() endif() +if(LLVM_SPROFDATA_FILE AND EXISTS ${LLVM_SPROFDATA_FILE}) + if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) + append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\"" + CMAKE_CXX_FLAGS + CMAKE_C_FLAGS) + if(NOT LINKER_IS_LLD_LINK) + append("-fpseudo-probe-for-profiling -fprofile-sample-use=\"${LLVM_SPROFDATA_FILE}\"" + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) + endif() + else() + message(FATAL_ERROR "LLVM_SPROFDATA_FILE can only be specified when compiling with clang") + endif() +endif() + option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off) option(LLVM_INDIVIDUAL_TEST_COVERAGE "Emit individual coverage file for each test case." OFF) mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE) >From 56c5dfbb0ed353893f9f69ac782d9761f525eb46 Mon Sep 17 00:00:00 2001 From: Amir Ayupov <aau...@fb.com> Date: Wed, 19 Jul 2023 20:30:29 -0700 Subject: [PATCH 2/4] format --- clang/utils/perf-training/perf-helper.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py index a6360e46e7447..e94fc13c773b4 100644 --- a/clang/utils/perf-training/perf-helper.py +++ b/clang/utils/perf-training/perf-helper.py @@ -45,7 +45,7 @@ def clean(args): def merge(args): parser = argparse.ArgumentParser( prog="perf-helper merge", - description="Merges all profraw files from path(s) into output" + description="Merges all profraw files from path(s) into output", ) parser.add_argument("profdata", help="Path to llvm-profdata tool") parser.add_argument("output", help="Output filename") @@ -79,17 +79,14 @@ def merge_fdata(args): def perf(args): parser = argparse.ArgumentParser( prog="perf-helper perf", - description="perf wrapper for BOLT/CSSPGO profile collection" + description="perf wrapper for BOLT/CSSPGO profile collection", ) parser.add_argument( "--lbr", action="store_true", help="Use perf with branch stacks" ) + parser.add_argument("--call-graph", action="store_true", help="Collect call graph") parser.add_argument( - "--call-graph", action="store_true", help="Collect call graph" - ) - parser.add_argument( - "--event", help="PMU event name, defaults to cycles:u", - default="cycles:u" + "--event", help="PMU event name, defaults to cycles:u", default="cycles:u" ) parser.add_argument("cmd", nargs=argparse.REMAINDER, help="") @@ -144,9 +141,9 @@ def perf2bolt(args): def perf2prof(args): parser = argparse.ArgumentParser( - prog="perf-helper perf2prof", - description="perf to CSSPGO prof conversion wrapper", - ) + prog="perf-helper perf2prof", + description="perf to CSSPGO prof conversion wrapper", + ) parser.add_argument("profgen", help="Path to llvm-profgen binary") parser.add_argument("binary", help="Input binary") parser.add_argument("path", help="Path containing perf.data files") @@ -154,8 +151,9 @@ def perf2prof(args): profgen_args = [opts.profgen, f"--binary={opts.binary}"] for filename in findFilesWithExtension(opts.path, "perf.data"): - subprocess.check_call(profgen_args + [f"--perfdata={filename}", - f"--output={filename}.profraw"]) + subprocess.check_call( + profgen_args + [f"--perfdata={filename}", f"--output={filename}.profraw"] + ) return 0 >From 9cb38cf84025389177ca8fc61f34646019ace589 Mon Sep 17 00:00:00 2001 From: Amir Ayupov <aau...@fb.com> Date: Mon, 29 Jan 2024 21:15:10 -0800 Subject: [PATCH 3/4] drop uppercase_ --- clang/utils/perf-training/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/utils/perf-training/CMakeLists.txt b/clang/utils/perf-training/CMakeLists.txt index b8da611ad4506..cb293e7259ab6 100644 --- a/clang/utils/perf-training/CMakeLists.txt +++ b/clang/utils/perf-training/CMakeLists.txt @@ -19,8 +19,8 @@ if(LLVM_BUILD_INSTRUMENTED) ) add_custom_target(generate-profdata-deps) - string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" uppercase_LLVM_BUILD_INSTRUMENTED) - if (uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") + string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED) + if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") set(PROFDATA_SAMPLE "--sample") if(NOT LLVM_PROFGEN) find_program(LLVM_PROFGEN llvm-profgen) >From 53e8dc8ff9e68f769ddcca5644aa2d350d883a51 Mon Sep 17 00:00:00 2001 From: Amir Ayupov <aau...@fb.com> Date: Fri, 2 Feb 2024 12:01:26 -0800 Subject: [PATCH 4/4] format --- clang/utils/perf-training/perf-helper.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py index e94fc13c773b4..cfa449d542861 100644 --- a/clang/utils/perf-training/perf-helper.py +++ b/clang/utils/perf-training/perf-helper.py @@ -49,8 +49,9 @@ def merge(args): ) parser.add_argument("profdata", help="Path to llvm-profdata tool") parser.add_argument("output", help="Output filename") - parser.add_argument("paths", nargs='+', - help="Folder(s) containing input profraw files") + parser.add_argument( + "paths", nargs="+", help="Folder(s) containing input profraw files" + ) parser.add_argument("--sample", action="store_true", help="Sample profile") opts = parser.parse_args(args) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits