Author: Dave Lee Date: 2026-02-28T14:31:26-08:00 New Revision: a0fb4f67084839d0209a303b223217a04532fd48
URL: https://github.com/llvm/llvm-project/commit/a0fb4f67084839d0209a303b223217a04532fd48 DIFF: https://github.com/llvm/llvm-project/commit/a0fb4f67084839d0209a303b223217a04532fd48.diff LOG: [lldb] Add BytecodeSection class to formatter_bytecode.py (#183876) Changes `formatter_bytecode.compile_file` to return a `BytecodeSection` value. The `BytecodeSection` holds the data that needs to be emitted to an `__lldbformatters` section. The `BytecodeSection` currently provides `write_binary`, but will be updated in a follow up commit to include `write_source` which will allow the data to be emitted as C source code, or Swift source code. This will make it easier to integrate into build systems, as it's easier to get data into a binary via source code, than as a raw binary file. Added: Modified: lldb/examples/python/formatter_bytecode.py Removed: ################################################################################ diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py index abbd85d7a77b8..8f2f09834ff4f 100644 --- a/lldb/examples/python/formatter_bytecode.py +++ b/lldb/examples/python/formatter_bytecode.py @@ -17,6 +17,7 @@ import re import io +from dataclasses import dataclass from typing import BinaryIO, TextIO, Tuple, Union BINARY_VERSION = 1 @@ -191,35 +192,52 @@ def conclude_segment(): return segments -def compile_file(type_name: str, input: TextIO, output: BinaryIO) -> None: - input_tokens = _tokenize(input.read()) +@dataclass +class BytecodeSection: + """Abstraction of the data serialized to __lldbformatters sections.""" + + type_name: str + flags: int + signatures: list[Tuple[str, bytes]] + + def validate(self): + seen = set() + for sig, _ in self.signatures: + if sig in seen: + raise ValueError(f"duplicate signature: {sig}") + seen.add(sig) + + def write_binary(self, output: BinaryIO) -> None: + self.validate() + + bin = bytearray() + bin.extend(_to_uleb(len(self.type_name))) + bin.extend(bytes(self.type_name, encoding="utf-8")) + bin.extend(_to_byte(self.flags)) + for sig, bc in self.signatures: + bin.extend(_to_byte(SIGNATURES[sig])) + bin.extend(_to_uleb(len(bc))) + bin.extend(bc) + + output.write(_to_byte(BINARY_VERSION)) + output.write(_to_uleb(len(bin))) + output.write(bin) - signatures = {} + +def compile_file(type_name: str, input: TextIO) -> BytecodeSection: + input_tokens = _tokenize(input.read()) + signatures = [] for sig, tokens in _segment_by_signature(input_tokens): - if sig in signatures: - raise ValueError(f"duplicate signature: {sig}") - signatures[sig] = compile_tokens(tokens) - - bin = bytearray() - bin.extend(_to_uleb(len(type_name))) - bin.extend(bytes(type_name, encoding="utf-8")) - flags = 0 - bin.extend(_to_byte(flags)) - for sig, bc in signatures.items(): - bin.extend(_to_byte(SIGNATURES[sig])) - bin.extend(_to_uleb(len(bc))) - bin.extend(bc) - - output.write(_to_byte(BINARY_VERSION)) - output.write(_to_uleb(len(bin))) - output.write(bin) - - -def compile(assembler: str) -> bytearray: + signatures.append((sig, compile_tokens(tokens))) + + return BytecodeSection(type_name, flags=0, signatures=signatures) + + +def compile(assembler: str) -> bytes: return compile_tokens(_tokenize(assembler)) -def compile_tokens(tokens: list[str]) -> bytearray: +def compile_tokens(tokens: list[str]) -> bytes: """Compile assembler into bytecode""" # This is a stack of all in-flight/unterminated blocks. bytecode = [bytearray()] @@ -258,7 +276,7 @@ def emit(byte): else: emit(opcode[tok]) assert len(bytecode) == 1 # unterminated { - return bytecode[0] + return bytes(bytecode[0]) ################################################################################ @@ -291,7 +309,7 @@ def disassemble_file(input: BinaryIO, output: TextIO) -> None: print(f"@{sig_name}: {asm}", file=output) -def disassemble(bytecode: Union[bytes, bytearray]) -> Tuple[str, list[int]]: +def disassemble(bytecode: bytes) -> Tuple[str, list[int]]: """Disassemble bytecode into (assembler, token starts)""" asm = "" all_bytes = list(bytecode) @@ -365,7 +383,7 @@ def count_fmt_params(fmt: str) -> int: return n -def interpret(bytecode: bytearray, control: list, data: list, tracing: bool = False): +def interpret(bytecode: bytes, control: list, data: list, tracing: bool = False): """Interpret bytecode""" frame = [] frame.append((0, len(bytecode))) @@ -657,7 +675,8 @@ def _main(): open(args.input) as input, open(args.output, "wb") as output, ): - compile_file(args.type_name, input, output) + section = compile_file(args.type_name, input) + section.write_binary(output) elif args.disassemble: if args.output: with ( @@ -709,7 +728,8 @@ def roundtrip(asm): def test_compile_file(self): def run_compile(type_name, asm): out = io.BytesIO() - compile_file(type_name, io.StringIO(asm), out) + section = compile_file(type_name, io.StringIO(asm)) + section.write_binary(out) out.seek(0) return out _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
