Author: Dave Lee Date: 2026-03-03T15:48:30-08:00 New Revision: ece4b759327c03fa09edc90b802db3a74fa19e33
URL: https://github.com/llvm/llvm-project/commit/ece4b759327c03fa09edc90b802db3a74fa19e33 DIFF: https://github.com/llvm/llvm-project/commit/ece4b759327c03fa09edc90b802db3a74fa19e33.diff LOG: [lldb] Add C source output mode to formatter_bytecode.py (#184242) Add the ability to generate a C source file, which is in addition to the existing functionality of generating binary. An example of the generated source: ```c #ifdef __APPLE__ #define FORMATTER_SECTION "__DATA_CONST,__lldbformatters" #else #define FORMATTER_SECTION ".lldbformatters" #endif __attribute__((used, section(FORMATTER_SECTION))) unsigned char _Account_synthetic[] = // version "\x01" // remaining record size "\x15" // type name size "\x07" // type name "Account" // flags "\x00" // sig_get_num_children "\x02" // program size "\x02" // program "\x20\x01" // sig_get_child_at_index "\x04" // program size "\x06" // program "\x02\x20\x00\x23\x11\x60" ; ``` Added: Modified: lldb/examples/python/formatter_bytecode.py Removed: ################################################################################ diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py index 8f2f09834ff4f..1ad10d60fb974 100644 --- a/lldb/examples/python/formatter_bytecode.py +++ b/lldb/examples/python/formatter_bytecode.py @@ -17,6 +17,7 @@ import re import io +import textwrap from dataclasses import dataclass from typing import BinaryIO, TextIO, Tuple, Union @@ -207,9 +208,7 @@ def validate(self): raise ValueError(f"duplicate signature: {sig}") seen.add(sig) - def write_binary(self, output: BinaryIO) -> None: - self.validate() - + def _to_binary(self) -> bytes: bin = bytearray() bin.extend(_to_uleb(len(self.type_name))) bin.extend(bytes(self.type_name, encoding="utf-8")) @@ -219,9 +218,77 @@ def write_binary(self, output: BinaryIO) -> None: bin.extend(_to_uleb(len(bc))) bin.extend(bc) + return bytes(bin) + + def write_binary(self, output: BinaryIO) -> None: + self.validate() + + bin = self._to_binary() output.write(_to_byte(BINARY_VERSION)) output.write(_to_uleb(len(bin))) - output.write(bin) + output.write(self._to_binary()) + + class _CBuilder: + """Helper class for emitting binary data as a C-string literal.""" + + entries: list[Tuple[str, str]] + + def __init__(self) -> None: + self.entries = [] + + def add_byte(self, x: int, comment: str) -> None: + self.add_bytes(_to_byte(x), comment) + + def add_uleb(self, x: int, comment: str) -> None: + self.add_bytes(_to_uleb(x), comment) + + def add_bytes(self, x: bytes, comment: str) -> None: + # Construct zero padded hex values with length two. + string = "".join(f"\\x{b:02x}" for b in x) + self.add_string(string, comment) + + def add_string(self, string: str, comment: str) -> None: + self.entries.append((f'"{string}"', comment)) + + def write_source(self, output: TextIO) -> None: + self.validate() + + size = len(self._to_binary()) + + b = self._CBuilder() + b.add_byte(BINARY_VERSION, "version") + b.add_uleb(size, "remaining record size") + b.add_uleb(len(self.type_name), "type name size") + b.add_string(self.type_name, "type name") + b.add_byte(self.flags, "flags") + for sig, bc in self.signatures: + b.add_byte(SIGNATURES[sig], f"sig_{sig}") + b.add_uleb(len(bc), "program size") + b.add_bytes(bc, "program") + + print( + textwrap.dedent( + """ + #ifdef __APPLE__ + #define FORMATTER_SECTION "__DATA_CONST,__lldbformatters" + #else + #define FORMATTER_SECTION ".lldbformatters" + #endif + """ + ), + file=output, + ) + var_name = re.sub(r"\W", "_", self.type_name) + print( + "__attribute__((used, section(FORMATTER_SECTION)))", + file=output, + ) + print(f"unsigned char _{var_name}_synthetic[] =", file=output) + indent = " " + for string, comment in b.entries: + print(f"{indent}// {comment}", file=output) + print(f"{indent}{string}", file=output) + print(";", file=output) def compile_file(type_name: str, input: TextIO) -> BytecodeSection: @@ -601,7 +668,7 @@ def next_byte(): ################################################################################ -def _to_uleb(value: int) -> bytearray: +def _to_uleb(value: int) -> bytes: """Encode an integer to ULEB128 bytes.""" if value < 0: raise ValueError(f"negative number cannot be encoded to ULEB128: {value}") @@ -616,7 +683,7 @@ def _to_uleb(value: int) -> bytearray: if value == 0: break - return result + return bytes(result) def _from_uleb(stream: BinaryIO) -> int: @@ -665,18 +732,27 @@ def _main(): "--output", help="output file (required for --compile)", ) + parser.add_argument( + "-f", + "--format", + choices=("binary", "c"), + default="binary", + help="output file format", + ) parser.add_argument("-t", "--test", action="store_true", help="run unit tests") args = parser.parse_args() if args.compile: if not args.output: parser.error("--output is required with --compile") - with ( - open(args.input) as input, - open(args.output, "wb") as output, - ): + with open(args.input) as input: section = compile_file(args.type_name, input) - section.write_binary(output) + if args.format == "binary": + with open(args.output, "wb") as output: + section.write_binary(output) + else: # args.format == "c" + with open(args.output, "w") as output: + section.write_source(output) elif args.disassemble: if args.output: with ( @@ -758,4 +834,42 @@ def run_disassemble(binary): with self.assertRaises(ValueError): run_compile("MyType", "@summary: 1u return\n@summary: 2u return") + def test_write_source(self): + # Use the Account example from main.cpp as a reference, whose + # exact byte values are known. + section = BytecodeSection( + type_name="Account", + flags=0, + signatures=[ + ("get_num_children", bytes([0x20, 0x01])), + ("get_child_at_index", bytes([0x02, 0x20, 0x00, 0x23, 0x11, 0x60])), + ], + ) + out = io.StringIO() + section.write_source(out) + src = out.getvalue() + + self.assertIn("__attribute__((used, section(FORMATTER_SECTION)))", src) + self.assertIn("unsigned char _Account_synthetic[] =", src) + self.assertIn('"\\x01"', src) # version + self.assertIn('"\\x15"', src) # record size (21) + self.assertIn('"\\x07"', src) # type name size (7) + self.assertIn('"Account"', src) # type name + self.assertIn('"\\x00"', src) # flags + self.assertIn('"\\x02"', src) # sig_get_num_children + self.assertIn('"\\x20\\x01"', src) # program + self.assertIn('"\\x04"', src) # sig_get_child_at_index + self.assertIn('"\\x06"', src) # program size + self.assertIn('"\\x02\\x20\\x00\\x23\\x11\\x60"', src) # program + self.assertIn("// version", src) + self.assertIn("// type name", src) + self.assertIn("// program", src) + # Semicolon terminates the array initializer. + self.assertEqual(src.count(";"), 1) + + # Non-identifier characters in the type name are replaced with '_'. + out2 = io.StringIO() + BytecodeSection("std::vector<int>", 0, []).write_source(out2) + self.assertIn("_std__vector_int__synthetic[] =", out2.getvalue()) + unittest.main(argv=[__file__]) _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
