Amir updated this revision to Diff 496271.
Amir added a comment.
Reduce changes
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D143617/new/
https://reviews.llvm.org/D143617
Files:
clang/CMakeLists.txt
clang/cmake/caches/BOLT.cmake
clang/utils/perf-training/CMakeLists.txt
clang/utils/perf-training/bolt.lit.cfg
clang/utils/perf-training/bolt.lit.site.cfg.in
clang/utils/perf-training/perf-helper.py
Index: clang/utils/perf-training/perf-helper.py
===================================================================
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -56,6 +56,57 @@
subprocess.check_call(cmd)
return 0
+def perf(args):
+ parser = argparse.ArgumentParser(prog='perf-helper perf',
+ description='perf wrapper for BOLT profile collection')
+ parser.add_argument('--lbr', required=False, action='store_true',
+ help='Use perf with branch stacks')
+ parser.add_argument('cmd', nargs='*', help='')
+
+ # Use python's arg parser to handle all leading option arguments, but pass
+ # everything else through to perf
+ first_cmd = next(arg for arg in args if not arg.startswith("--"))
+ last_arg_idx = args.index(first_cmd)
+
+ opts = parser.parse_args(args[:last_arg_idx])
+ #cmd = shlex.split(args[last_arg_idx:])
+ cmd = args[last_arg_idx:]
+
+ perf_args = []
+ perf_args.extend((
+ 'perf', 'record', '--event=cycles:u', '--freq=max',
+ '--output=%d.perf.data' % os.getpid()))
+ if opts.lbr:
+ perf_args += ['--branch-filter=any,u']
+ perf_args.extend(cmd)
+
+ start_time = time.time()
+ subprocess.check_call(perf_args)
+
+ elapsed = time.time() - start_time
+ print("... data collection took %.4fs" % elapsed)
+ return 0
+
+def perf2bolt(args):
+ parser = argparse.ArgumentParser(prog='perf-helper perf2bolt',
+ description='perf2bolt conversion wrapper for perf.data files')
+ parser.add_argument('p2b_path', help='Path to llvm-bolt')
+ parser.add_argument('path', help='Path containing perf.data files')
+ parser.add_argument('binary', help='Input binary')
+ parser.add_argument('--nolbr', required=False, action='store_true',
+ help='Use -nl perf2bolt mode')
+ opts = parser.parse_args(args)
+
+ p2b_args = []
+ p2b_args.extend((opts.p2b_path, opts.binary, '--aggregate-only',
+ '--profile-format=yaml'))
+ if opts.nolbr:
+ p2b_args += ['-nl']
+ p2b_args += ['-p']
+ for filename in findFilesWithExtension(opts.path, 'perf.data'):
+ subprocess.check_call(p2b_args + [filename, '-o', filename+'.fdata'])
+ return 0
+
def dtrace(args):
parser = argparse.ArgumentParser(prog='perf-helper dtrace',
description='dtrace wrapper for order file generation')
@@ -410,6 +461,8 @@
'cc1' : cc1,
'gen-order-file' : genOrderFile,
'merge-fdata' : merge_fdata,
+ 'perf' : perf,
+ 'perf2bolt' : perf2bolt,
}
def main():
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===================================================================
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
config.target_triple = "@LLVM_TARGET_TRIPLE@"
config.python_exe = "@Python3_EXECUTABLE@"
config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
# Let the main config do the real work.
lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===================================================================
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,16 +6,25 @@
import os
import subprocess
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
-sysroot_flags = ''
+clang_binary = 'clang'
+perf_wrapper = ''
+if config.clang_bolt_mode.lower() == "instrument":
+ clang_binary = 'clang-bolt.inst'
+else: # perf or LBR
+ perf_wrapper = '%s %s/perf-helper.py perf' % (config.python_exe, config.perf_helper_dir)
+ if config.clang_bolt_mode.lower() == "lbr":
+ perf_wrapper += " --lbr"
+ perf_wrapper += " -- "
+
+config.clang = os.path.realpath(lit.util.which(clang_binary, config.clang_tools_dir)).replace('\\', '/')
config.name = 'Clang Perf Training'
config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (config.clang, sysroot_flags)))
-config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.substitutions.append( ('%clang_cpp_skip_driver', ' %s %s --driver-mode=g++ ' % (perf_wrapper, config.clang)))
+config.substitutions.append( ('%clang_cpp', ' %s %s --driver-mode=g++ ' % (perf_wrapper, config.clang)))
+config.substitutions.append( ('%clang_skip_driver', ' %s %s ' % (perf_wrapper, config.clang)))
+config.substitutions.append( ('%clang', ' %s %s ' % (perf_wrapper, config.clang) ) )
config.substitutions.append( ('%test_root', config.test_exec_root ) )
Index: clang/utils/perf-training/CMakeLists.txt
===================================================================
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -60,7 +60,7 @@
DEPENDS generate-dtrace-logs)
endif()
-if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
@@ -69,16 +69,38 @@
add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
EXCLUDE_FROM_CHECK_ALL
- DEPENDS clang-instrumented clear-bolt-fdata
+ DEPENDS clang-bolt-training-deps clear-bolt-fdata clear-perf-data
)
add_custom_target(clear-bolt-fdata
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
COMMENT "Clearing old BOLT fdata")
+ add_custom_target(clear-perf-data
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
+ COMMENT "Clearing old perf data")
+
+ string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+ # Pass extra flag in no-LBR mode
+ if (uppercase_CLANG_BOLT STREQUAL "PERF")
+ set(BOLT_NO_LBR "--nolbr")
+ endif()
+
+ add_custom_target(merge-fdata-deps)
+ if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+ add_dependencies(merge-fdata-deps generate-bolt-fdata)
+ else()
+ # Convert perf profiles into fdata
+ add_custom_target(convert-perf-fdata
+ COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py perf2bolt $<TARGET_FILE:llvm-bolt> ${CMAKE_CURRENT_BINARY_DIR} $<TARGET_FILE:clang> ${BOLT_NO_LBR}
+ COMMENT "Converting perf files to BOLT fdata"
+ DEPENDS llvm-bolt generate-bolt-fdata)
+ add_dependencies(merge-fdata-deps convert-perf-fdata)
+ endif()
+
# Merge profiles into one using merge-fdata
add_custom_target(merge-bolt-fdata
COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Merging BOLT fdata"
- DEPENDS merge-fdata generate-bolt-fdata)
+ DEPENDS merge-fdata merge-fdata-deps)
endif()
Index: clang/cmake/caches/BOLT.cmake
===================================================================
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,5 +1,5 @@
set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -851,25 +851,38 @@
endforeach()
endif()
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
+ May be specified as Instrument or Perf or LBR to use a particular profiling \
+ mechanism.")
+string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+
+if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+ if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+ # Instrument clang with BOLT
+ add_custom_target(clang-instrumented
+ DEPENDS ${CLANG_INSTRUMENTED}
+ )
+ set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
+ add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+ DEPENDS clang llvm-bolt
+ COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+ -instrument --instrumentation-file-append-pid
+ --instrumentation-file=${BOLT_FDATA}
+ COMMENT "Instrumenting clang binary with BOLT"
+ VERBATIM
+ )
+ add_custom_target(clang-bolt-training-deps DEPENDS clang-instrumented)
+ else() # perf or LBR
+ add_custom_target(clang-bolt-training-deps DEPENDS clang)
+ endif()
- # Instrument clang with BOLT
- add_custom_target(clang-instrumented
- DEPENDS ${CLANG_INSTRUMENTED}
- )
- set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
- add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
- DEPENDS clang llvm-bolt
- COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
- -instrument --instrumentation-file-append-pid
- --instrumentation-file=${BOLT_FDATA}
- COMMENT "Instrumenting clang binary with BOLT"
- VERBATIM
- )
-
- # Optimize original (pre-bolt) Clang using the collected profile
+ # Pass extra flag in no-LBR mode
+ if (uppercase_CLANG_BOLT STREQUAL "PERF")
+ set(BOLT_NO_LBR "-nl")
+ endif()
+ # Optimize original Clang using the collected profile
set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
add_custom_target(clang-bolt
DEPENDS merge-bolt-fdata
@@ -878,6 +891,7 @@
-data ${BOLT_FDATA}
-reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
-split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+ ${BOLT_NO_LBR}
COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>
COMMENT "Optimizing Clang with BOLT"
VERBATIM
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits