ikudrin created this revision.
ikudrin added reviewers: john.brawn, daltenty, jsji, simon_tatham, tmatheson,
mstorsjo, phosek.
ikudrin added projects: LLVM, clang.
Herald added subscribers: ekilmer, inglorion.
Herald added a project: All.
ikudrin requested review of this revision.
As for now, `extract_symbols.py` uses a predefined set of tools, none of which
can read bitcode files. The patch makes it possible to override the used tool
and passes a fresh built `llvm-nm` for that for multi-staged LTO builds. This
fixes building plugins with LTO builds and subsequently makes
`clang/test/Frontend/plugin-*` tests pass.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D149119
Files:
clang/CMakeLists.txt
llvm/utils/extract_symbols.py
Index: llvm/utils/extract_symbols.py
===================================================================
--- llvm/utils/extract_symbols.py
+++ llvm/utils/extract_symbols.py
@@ -29,8 +29,8 @@
# as, especially on Windows, waiting for the entire output to be ready can take
# a significant amount of time.
-def dumpbin_get_symbols(lib):
- process = subprocess.Popen(['dumpbin','/symbols',lib], bufsize=1,
+def dumpbin_get_symbols(tool, lib):
+ process = subprocess.Popen([tool,'/symbols',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
process.stdin.close()
@@ -41,10 +41,10 @@
yield (match.group(2), match.group(1) != "UNDEF")
process.wait()
-def nm_get_symbols(lib):
+def nm_get_symbols(tool, lib):
# -P means the output is in portable format, and -g means we only get global
# symbols.
- cmd = ['nm','-P','-g']
+ cmd = [tool,'-P','-g']
if sys.platform.startswith('aix'):
cmd += ['-Xany','-C','-p']
process = subprocess.Popen(cmd+[lib], bufsize=1,
@@ -68,8 +68,8 @@
yield (match.group(1), False)
process.wait()
-def readobj_get_symbols(lib):
- process = subprocess.Popen(['llvm-readobj','--symbols',lib], bufsize=1,
+def readobj_get_symbols(tool, lib):
+ process = subprocess.Popen([tool,'--symbols',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
process.stdin.close()
@@ -95,10 +95,10 @@
# Define functions which determine if the target is 32-bit Windows (as that's
# where calling convention name decoration happens).
-def dumpbin_is_32bit_windows(lib):
+def dumpbin_is_32bit_windows(tool, lib):
# dumpbin /headers can output a huge amount of data (>100MB in a debug
# build) so we read only up to the 'machine' line then close the output.
- process = subprocess.Popen(['dumpbin','/headers',lib], bufsize=1,
+ process = subprocess.Popen([tool,'/headers',lib], bufsize=1,
stdout=subprocess.PIPE, stdin=subprocess.PIPE,
universal_newlines=True)
process.stdin.close()
@@ -112,8 +112,8 @@
process.wait()
return retval
-def objdump_is_32bit_windows(lib):
- output = subprocess.check_output(['objdump','-f',lib],
+def objdump_is_32bit_windows(tool, lib):
+ output = subprocess.check_output([tool,'-f',lib],
universal_newlines=True)
for line in output.splitlines():
match = re.match('.+file format (\S+)', line)
@@ -121,8 +121,8 @@
return (match.group(1) == 'pe-i386')
return False
-def readobj_is_32bit_windows(lib):
- output = subprocess.check_output(['llvm-readobj','--file-header',lib],
+def readobj_is_32bit_windows(tool, lib):
+ output = subprocess.check_output([tool,'--file-header',lib],
universal_newlines=True)
for line in output.splitlines():
match = re.match('Format: (\S+)', line)
@@ -132,7 +132,7 @@
# On AIX, there isn't an easy way to detect 32-bit windows objects with the system toolchain,
# so just assume false.
-def aix_is_32bit_windows(lib):
+def aix_is_32bit_windows(tool, lib):
return False
# MSVC mangles names to ?<identifier_mangling>@<type_mangling>. By examining the
@@ -355,10 +355,10 @@
return components
def extract_symbols(arg):
- get_symbols, should_keep_symbol, calling_convention_decoration, lib = arg
+ get_symbols, get_symbols_tool, should_keep_symbol, calling_convention_decoration, lib = arg
symbol_defs = dict()
symbol_refs = set()
- for (symbol, is_def) in get_symbols(lib):
+ for (symbol, is_def) in get_symbols(get_symbols_tool, lib):
symbol = should_keep_symbol(symbol, calling_convention_decoration)
if symbol:
if is_def:
@@ -392,8 +392,20 @@
# Not a template
return None
+def parse_arg_override(parser, val):
+ tool, _, path = val.partition('=')
+ if not tool in known_tools:
+ parser.error(f'Unknown tool: {tool}')
+ if not path or not os.path.isfile(path):
+ parser.error(f'Override path for tool {tool} does not exist')
+ return (tool, path)
+
if __name__ == '__main__':
- tool_exes = ['dumpbin','nm','objdump','llvm-readobj']
+ known_tools = {'dumpbin': 'dumpbin',
+ 'nm': 'nm',
+ 'objdump': 'objdump',
+ 'llvm-readobj': 'llvm-readobj'}
+ tool_exes = list(known_tools)
parser = argparse.ArgumentParser(
description='Extract symbols to export from libraries')
parser.add_argument('--mangling', choices=['itanium','microsoft'],
@@ -401,11 +413,18 @@
parser.add_argument('--tools', choices=tool_exes, nargs='*',
help='tools to use to extract symbols and determine the'
' target')
+ parser.add_argument('--override', action='append', metavar='<tool>=<path>',
+ type=lambda x: parse_arg_override(parser, x),
+ help='explicitly specify <path> to run <tool>')
parser.add_argument('libs', metavar='lib', type=str, nargs='+',
help='libraries to extract symbols from')
parser.add_argument('-o', metavar='file', type=str, help='output to file')
args = parser.parse_args()
+ if args.override:
+ for (tool, path) in args.override:
+ known_tools[tool] = path
+
# Determine the function to use to get the list of symbols from the inputs,
# and the function to use to determine if the target is 32-bit windows.
tools = { 'dumpbin' : (dumpbin_get_symbols, dumpbin_is_32bit_windows),
@@ -413,18 +432,20 @@
'objdump' : (None, objdump_is_32bit_windows),
'llvm-readobj' : (readobj_get_symbols, readobj_is_32bit_windows) }
get_symbols = None
+ get_symbols_tool = None
is_32bit_windows = aix_is_32bit_windows if sys.platform.startswith('aix') else None
+ is_32bit_windows_tool = None
# If we have a tools argument then use that for the list of tools to check
if args.tools:
tool_exes = args.tools
# Find a tool to use by trying each in turn until we find one that exists
# (subprocess.call will throw OSError when the program does not exist)
- get_symbols = None
for exe in tool_exes:
try:
+ tool = known_tools[exe]
# Close std streams as we don't want any output and we don't
# want the process to wait for something on stdin.
- p = subprocess.Popen([exe], stdout=subprocess.PIPE,
+ p = subprocess.Popen([tool], stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
stdin=subprocess.PIPE,
universal_newlines=True)
@@ -436,8 +457,10 @@
# is_32bit_windows
if not get_symbols:
get_symbols = tools[exe][0]
+ get_symbols_tool = tool
if not is_32bit_windows:
is_32bit_windows = tools[exe][1]
+ is_32bit_windows_tool = tool
if get_symbols and is_32bit_windows:
break
except OSError:
@@ -478,7 +501,7 @@
# Check if calling convention decoration is used by inspecting the first
# library in the list
- calling_convention_decoration = is_32bit_windows(libs[0])
+ calling_convention_decoration = is_32bit_windows(is_32bit_windows_tool, libs[0])
# Extract symbols from libraries in parallel. This is a huge time saver when
# doing a debug build, as there are hundreds of thousands of symbols in each
@@ -489,7 +512,8 @@
# use a lambda or local function definition as that doesn't work on
# windows, so create a list of tuples which duplicates the arguments
# that are the same in all calls.
- vals = [(get_symbols, should_keep_symbol, calling_convention_decoration, x) for x in libs]
+ vals = [(get_symbols, get_symbols_tool, should_keep_symbol,
+ calling_convention_decoration, x) for x in libs]
# Do an async map then wait for the result to make sure that
# KeyboardInterrupt gets caught correctly (see
# http://bugs.python.org/issue8296)
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -616,6 +616,12 @@
elseif(MSVC)
add_dependencies(clang-bootstrap-deps llvm-lib)
set(${CLANG_STAGE}_AR -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-lib)
+ # 'extract_symbols.py' uses utilities like 'dumpbin' and 'llvm-readobj'
+ # which can be found via PATH. They cannot read bitcode files, resulting
+ # in missing symbols for plugins if the compiler is built with LTO. To fix
+ # this, build 'llvm-nm' and instruct 'extract_symbols.py' to use it.
+ add_dependencies(clang-bootstrap-deps llvm-nm)
+ set(BOOTSTRAP_LLVM_EXTRACT_SYMBOLS_FLAGS --tools nm dumpbin --override nm=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-nm.exe)
elseif(NOT WIN32)
add_dependencies(clang-bootstrap-deps llvm-ar llvm-ranlib)
if(NOT BOOTSTRAP_LLVM_ENABLE_LLD AND LLVM_BINUTILS_INCDIR)
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits