https://github.com/tstellar updated https://github.com/llvm/llvm-project/pull/119117
>From 5d13b69039fab7c5960288cead18dc76f5d01f4f Mon Sep 17 00:00:00 2001 From: Tom Stellard <tstel...@redhat.com> Date: Thu, 5 Dec 2024 15:01:27 +0000 Subject: [PATCH 1/4] [clang][perf-training] Fix profiling with -DCLANG_BOLT=perf This fixes the llvm-support build that generates the profile data. However, I'm wondering if maybe we should disable llvm-suppot and only run hello-world with -DCLANG_BOLT=perf. The bolt optimizations with perf only give about a 3% performance increase (although maybe with hw counters this would be better) and it takes a very long time to convert all the perf profiles to the fdata format. --- clang/utils/perf-training/bolt.lit.cfg | 22 +++++++++++++++---- .../perf-training/llvm-support/build.test | 4 ++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg index 1d0cf9a8a17a8e..04b18975275495 100644 --- a/clang/utils/perf-training/bolt.lit.cfg +++ b/clang/utils/perf-training/bolt.lit.cfg @@ -8,21 +8,32 @@ import subprocess clang_bolt_mode = config.clang_bolt_mode.lower() clang_binary = "clang" -perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf " +perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf" if clang_bolt_mode == "instrument": perf_wrapper = "" clang_binary = config.clang_bolt_name elif clang_bolt_mode == "lbr": - perf_wrapper += " --lbr -- " + perf_wrapper += " --lbr --" elif clang_bolt_mode == "perf": - perf_wrapper += " -- " + perf_wrapper += " --" else: assert 0, "Unsupported CLANG_BOLT_MODE variable" -config.clang = perf_wrapper + os.path.realpath( +clang_nowrapper = os.path.realpath( lit.util.which(clang_binary, config.clang_tools_dir) ).replace("\\", "/") +config.clang = f'{perf_wrapper} {clang_nowrapper}' + +# We need to limit the number of build jobs with perf in order to avoid this +# error: +# +# | Permission error mapping pages. +# | Consider increasing /proc/sys/kernel/perf_event_mlock_kb, +# | or try again with a smaller value of -m/--mmap_pages. +ninja_args = "" +if clang_bolt_mode != "instrument": + ninja_args = "-j1" config.name = "Clang Perf Training" config.suffixes = [ @@ -52,3 +63,6 @@ config.substitutions.append(("%test_root", config.test_exec_root)) config.substitutions.append(('%cmake_generator', config.cmake_generator)) config.substitutions.append(('%cmake', config.cmake_exe)) config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir)) +config.substitutions.append(('%perf_cmake_compiler_launcher', perf_wrapper.replace(' ', ';'))) +config.substitutions.append(('%nowrapper_clang', clang_nowrapper)) +config.substitutions.append(('%ninja_args', ninja_args)) diff --git a/clang/utils/perf-training/llvm-support/build.test b/clang/utils/perf-training/llvm-support/build.test index f29a594c846869..1f4d76502a3757 100644 --- a/clang/utils/perf-training/llvm-support/build.test +++ b/clang/utils/perf-training/llvm-support/build.test @@ -1,2 +1,2 @@ -RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_C_COMPILER=%clang -DCMAKE_CXX_COMPILER=%clang -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release -RUN: %cmake --build %t -v --target LLVMSupport +RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_CXX_COMPILER_LAUNCHER="%perf_cmake_compiler_launcher" -DCMAKE_C_COMPILER="%nowrapper_clang" -DCMAKE_CXX_COMPILER="%nowrapper_clang" -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release +RUN: %cmake --build %t %ninja_args -v --target LLVMSupport >From 9aa48ac20e931d8192cecfec6ef789ea936fa6ff Mon Sep 17 00:00:00 2001 From: Tom Stellard <tstel...@redhat.com> Date: Mon, 9 Dec 2024 19:23:40 +0000 Subject: [PATCH 2/4] Revert "[clang][perf-training] Fix profiling with -DCLANG_BOLT=perf" This reverts commit 5d13b69039fab7c5960288cead18dc76f5d01f4f. --- clang/utils/perf-training/bolt.lit.cfg | 22 ++++--------------- .../perf-training/llvm-support/build.test | 4 ++-- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg index 04b18975275495..1d0cf9a8a17a8e 100644 --- a/clang/utils/perf-training/bolt.lit.cfg +++ b/clang/utils/perf-training/bolt.lit.cfg @@ -8,32 +8,21 @@ import subprocess clang_bolt_mode = config.clang_bolt_mode.lower() clang_binary = "clang" -perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf" +perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf " if clang_bolt_mode == "instrument": perf_wrapper = "" clang_binary = config.clang_bolt_name elif clang_bolt_mode == "lbr": - perf_wrapper += " --lbr --" + perf_wrapper += " --lbr -- " elif clang_bolt_mode == "perf": - perf_wrapper += " --" + perf_wrapper += " -- " else: assert 0, "Unsupported CLANG_BOLT_MODE variable" -clang_nowrapper = os.path.realpath( +config.clang = perf_wrapper + os.path.realpath( lit.util.which(clang_binary, config.clang_tools_dir) ).replace("\\", "/") -config.clang = f'{perf_wrapper} {clang_nowrapper}' - -# We need to limit the number of build jobs with perf in order to avoid this -# error: -# -# | Permission error mapping pages. -# | Consider increasing /proc/sys/kernel/perf_event_mlock_kb, -# | or try again with a smaller value of -m/--mmap_pages. -ninja_args = "" -if clang_bolt_mode != "instrument": - ninja_args = "-j1" config.name = "Clang Perf Training" config.suffixes = [ @@ -63,6 +52,3 @@ config.substitutions.append(("%test_root", config.test_exec_root)) config.substitutions.append(('%cmake_generator', config.cmake_generator)) config.substitutions.append(('%cmake', config.cmake_exe)) config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir)) -config.substitutions.append(('%perf_cmake_compiler_launcher', perf_wrapper.replace(' ', ';'))) -config.substitutions.append(('%nowrapper_clang', clang_nowrapper)) -config.substitutions.append(('%ninja_args', ninja_args)) diff --git a/clang/utils/perf-training/llvm-support/build.test b/clang/utils/perf-training/llvm-support/build.test index 1f4d76502a3757..f29a594c846869 100644 --- a/clang/utils/perf-training/llvm-support/build.test +++ b/clang/utils/perf-training/llvm-support/build.test @@ -1,2 +1,2 @@ -RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_CXX_COMPILER_LAUNCHER="%perf_cmake_compiler_launcher" -DCMAKE_C_COMPILER="%nowrapper_clang" -DCMAKE_CXX_COMPILER="%nowrapper_clang" -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release -RUN: %cmake --build %t %ninja_args -v --target LLVMSupport +RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_C_COMPILER=%clang -DCMAKE_CXX_COMPILER=%clang -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release +RUN: %cmake --build %t -v --target LLVMSupport >From d4480da2205be8e68a9139885ed2c4a008514920 Mon Sep 17 00:00:00 2001 From: Tom Stellard <tstel...@redhat.com> Date: Tue, 10 Dec 2024 01:44:46 +0000 Subject: [PATCH 3/4] Fixes for PGO --- clang/utils/perf-training/bolt.lit.cfg | 20 ++++++++++++++++++- clang/utils/perf-training/lit.cfg | 5 +++++ .../perf-training/llvm-support/build.test | 4 ++-- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg index 1d0cf9a8a17a8e..3188f6357dc3c5 100644 --- a/clang/utils/perf-training/bolt.lit.cfg +++ b/clang/utils/perf-training/bolt.lit.cfg @@ -4,6 +4,7 @@ from lit import Test import lit.formats import lit.util import os +import re import subprocess clang_bolt_mode = config.clang_bolt_mode.lower() @@ -20,9 +21,24 @@ elif clang_bolt_mode == "perf": else: assert 0, "Unsupported CLANG_BOLT_MODE variable" -config.clang = perf_wrapper + os.path.realpath( +clang_nowrapper = os.path.realpath( lit.util.which(clang_binary, config.clang_tools_dir) ).replace("\\", "/") +config.clang = perf_wrapper + clang_nowrapper +config.cmake_compiler_args = "-DCMAKE_CXX_COMPILER_LAUNCHER='{0}' -DCMAKE_C_COMPILER_LAUNCHER='{0}' -DCMAKE_C_COMPILER='{1}' -DCMAKE_CXX_COMPILER='{1};--driver-mode=g++'".format( + re.sub(r"\s+", ";", perf_wrapper.rstrip()), + re.sub(r"\s+", ";", clang_nowrapper) +) + +# We need to limit the number of build jobs with perf in order to avoid this +# error: +# +# | Permission error mapping pages. +# | Consider increasing /proc/sys/kernel/perf_event_mlock_kb, +# | or try again with a smaller value of -m/--mmap_pages. +ninja_args = "" +if clang_bolt_mode != "instrument": + ninja_args = "-j1" config.name = "Clang Perf Training" config.suffixes = [ @@ -49,6 +65,8 @@ config.substitutions.append(("%clang_cpp", f" {config.clang} --driver-mode=g++ " config.substitutions.append(("%clang_skip_driver", config.clang)) config.substitutions.append(("%clang", config.clang)) config.substitutions.append(("%test_root", config.test_exec_root)) +config.substitutions.append(("%cmake_compiler_args", config.cmake_compiler_args)) config.substitutions.append(('%cmake_generator', config.cmake_generator)) config.substitutions.append(('%cmake', config.cmake_exe)) config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir)) +config.substitutions.append(('%ninja_args', ninja_args)) diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg index 654961e215da68..e788a7a01ef976 100644 --- a/clang/utils/perf-training/lit.cfg +++ b/clang/utils/perf-training/lit.cfg @@ -31,14 +31,19 @@ cc1_wrapper = '%s %s/perf-helper.py cc1' % (config.python_exe, config.perf_helpe use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") config.test_format = lit.formats.ShTest(use_lit_shell == "0") +config.cmake_compiler_args = '-DCMAKE_C_COMPILER="{0}" -DCMAKE_CXX_COMPILER="{0};--driver-mode=g++"'.format( + config.clang.replace(' ', ';') +) config.substitutions.append( ('%clang_cpp_skip_driver', ' %s %s %s ' % (cc1_wrapper, config.clang, sysroot_flags))) config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags))) config.substitutions.append( ('%clang_skip_driver', ' %s %s %s ' % (cc1_wrapper, config.clang, sysroot_flags))) config.substitutions.append( ('%clang', '%s %s ' % (config.clang, sysroot_flags) ) ) config.substitutions.append( ('%test_root', config.test_exec_root ) ) +config.substitutions.append( ('%cmake_compiler_args', config.cmake_compiler_args)) config.substitutions.append( ('%cmake_generator', config.cmake_generator ) ) config.substitutions.append( ('%cmake', config.cmake_exe ) ) config.substitutions.append( ('%llvm_src_dir', config.llvm_src_dir ) ) +config.substitutions.append( ('%ninja_args', '' ) ) config.environment['LLVM_PROFILE_FILE'] = 'perf-training-%4m.profraw' diff --git a/clang/utils/perf-training/llvm-support/build.test b/clang/utils/perf-training/llvm-support/build.test index f29a594c846869..402a5b9e27ff88 100644 --- a/clang/utils/perf-training/llvm-support/build.test +++ b/clang/utils/perf-training/llvm-support/build.test @@ -1,2 +1,2 @@ -RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir -DCMAKE_C_COMPILER=%clang -DCMAKE_CXX_COMPILER=%clang -DCMAKE_CXX_FLAGS="--driver-mode=g++" -DCMAKE_BUILD_TYPE=Release -RUN: %cmake --build %t -v --target LLVMSupport +RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir %cmake_compiler_args -DCMAKE_BUILD_TYPE=Release +RUN: %cmake --build %t %ninja_args -v --target LLVMSupport >From 5298942c5e7d39e33d7c76b6a0199857ce42f628 Mon Sep 17 00:00:00 2001 From: Tom Stellard <tstel...@redhat.com> Date: Tue, 10 Dec 2024 02:49:31 +0000 Subject: [PATCH 4/4] Wrap cmake command with perf instead of clang to save time on perf2bolt conversions --- clang/utils/perf-training/bolt.lit.cfg | 4 ++-- clang/utils/perf-training/lit.cfg | 1 + clang/utils/perf-training/llvm-support/build.test | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/utils/perf-training/bolt.lit.cfg b/clang/utils/perf-training/bolt.lit.cfg index 3188f6357dc3c5..9ca04bf71ac454 100644 --- a/clang/utils/perf-training/bolt.lit.cfg +++ b/clang/utils/perf-training/bolt.lit.cfg @@ -25,8 +25,7 @@ clang_nowrapper = os.path.realpath( lit.util.which(clang_binary, config.clang_tools_dir) ).replace("\\", "/") config.clang = perf_wrapper + clang_nowrapper -config.cmake_compiler_args = "-DCMAKE_CXX_COMPILER_LAUNCHER='{0}' -DCMAKE_C_COMPILER_LAUNCHER='{0}' -DCMAKE_C_COMPILER='{1}' -DCMAKE_CXX_COMPILER='{1};--driver-mode=g++'".format( - re.sub(r"\s+", ";", perf_wrapper.rstrip()), +config.cmake_compiler_args = "-DCMAKE_C_COMPILER='{0}' -DCMAKE_CXX_COMPILER='{0};--driver-mode=g++'".format( re.sub(r"\s+", ";", clang_nowrapper) ) @@ -70,3 +69,4 @@ config.substitutions.append(('%cmake_generator', config.cmake_generator)) config.substitutions.append(('%cmake', config.cmake_exe)) config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir)) config.substitutions.append(('%ninja_args', ninja_args)) +config.substitutions.append(('%perf_wrapper', perf_wrapper)) diff --git a/clang/utils/perf-training/lit.cfg b/clang/utils/perf-training/lit.cfg index e788a7a01ef976..93ce5513ec46af 100644 --- a/clang/utils/perf-training/lit.cfg +++ b/clang/utils/perf-training/lit.cfg @@ -44,6 +44,7 @@ config.substitutions.append( ('%cmake_generator', config.cmake_generator ) ) config.substitutions.append( ('%cmake', config.cmake_exe ) ) config.substitutions.append( ('%llvm_src_dir', config.llvm_src_dir ) ) config.substitutions.append( ('%ninja_args', '' ) ) +config.substitutions.append( ('%perf_wrapper', '' ) ) config.environment['LLVM_PROFILE_FILE'] = 'perf-training-%4m.profraw' diff --git a/clang/utils/perf-training/llvm-support/build.test b/clang/utils/perf-training/llvm-support/build.test index 402a5b9e27ff88..32ce9a870b91df 100644 --- a/clang/utils/perf-training/llvm-support/build.test +++ b/clang/utils/perf-training/llvm-support/build.test @@ -1,2 +1,2 @@ RUN: %cmake -G %cmake_generator -B %t -S %llvm_src_dir %cmake_compiler_args -DCMAKE_BUILD_TYPE=Release -RUN: %cmake --build %t %ninja_args -v --target LLVMSupport +RUN: %perf_wrapper %cmake --build %t -v --target LLVMSupport _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits