Author: Dave Lee
Date: 2026-03-03T15:48:30-08:00
New Revision: ece4b759327c03fa09edc90b802db3a74fa19e33

URL: 
https://github.com/llvm/llvm-project/commit/ece4b759327c03fa09edc90b802db3a74fa19e33
DIFF: 
https://github.com/llvm/llvm-project/commit/ece4b759327c03fa09edc90b802db3a74fa19e33.diff

LOG: [lldb] Add C source output mode to formatter_bytecode.py (#184242)

Add the ability to generate a C source file, which is in addition to the
existing functionality of generating binary.

An example of the generated source:

```c
#ifdef __APPLE__
#define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
#else
#define FORMATTER_SECTION ".lldbformatters"
#endif

__attribute__((used, section(FORMATTER_SECTION)))
unsigned char _Account_synthetic[] =
    // version
    "\x01"
    // remaining record size
    "\x15"
    // type name size
    "\x07"
    // type name
    "Account"
    // flags
    "\x00"
    // sig_get_num_children
    "\x02"
    // program size
    "\x02"
    // program
    "\x20\x01"
    // sig_get_child_at_index
    "\x04"
    // program size
    "\x06"
    // program
    "\x02\x20\x00\x23\x11\x60"
;
```

Added: 
    

Modified: 
    lldb/examples/python/formatter_bytecode.py

Removed: 
    


################################################################################
diff  --git a/lldb/examples/python/formatter_bytecode.py 
b/lldb/examples/python/formatter_bytecode.py
index 8f2f09834ff4f..1ad10d60fb974 100644
--- a/lldb/examples/python/formatter_bytecode.py
+++ b/lldb/examples/python/formatter_bytecode.py
@@ -17,6 +17,7 @@
 
 import re
 import io
+import textwrap
 from dataclasses import dataclass
 from typing import BinaryIO, TextIO, Tuple, Union
 
@@ -207,9 +208,7 @@ def validate(self):
                 raise ValueError(f"duplicate signature: {sig}")
             seen.add(sig)
 
-    def write_binary(self, output: BinaryIO) -> None:
-        self.validate()
-
+    def _to_binary(self) -> bytes:
         bin = bytearray()
         bin.extend(_to_uleb(len(self.type_name)))
         bin.extend(bytes(self.type_name, encoding="utf-8"))
@@ -219,9 +218,77 @@ def write_binary(self, output: BinaryIO) -> None:
             bin.extend(_to_uleb(len(bc)))
             bin.extend(bc)
 
+        return bytes(bin)
+
+    def write_binary(self, output: BinaryIO) -> None:
+        self.validate()
+
+        bin = self._to_binary()
         output.write(_to_byte(BINARY_VERSION))
         output.write(_to_uleb(len(bin)))
-        output.write(bin)
+        output.write(self._to_binary())
+
+    class _CBuilder:
+        """Helper class for emitting binary data as a C-string literal."""
+
+        entries: list[Tuple[str, str]]
+
+        def __init__(self) -> None:
+            self.entries = []
+
+        def add_byte(self, x: int, comment: str) -> None:
+            self.add_bytes(_to_byte(x), comment)
+
+        def add_uleb(self, x: int, comment: str) -> None:
+            self.add_bytes(_to_uleb(x), comment)
+
+        def add_bytes(self, x: bytes, comment: str) -> None:
+            # Construct zero padded hex values with length two.
+            string = "".join(f"\\x{b:02x}" for b in x)
+            self.add_string(string, comment)
+
+        def add_string(self, string: str, comment: str) -> None:
+            self.entries.append((f'"{string}"', comment))
+
+    def write_source(self, output: TextIO) -> None:
+        self.validate()
+
+        size = len(self._to_binary())
+
+        b = self._CBuilder()
+        b.add_byte(BINARY_VERSION, "version")
+        b.add_uleb(size, "remaining record size")
+        b.add_uleb(len(self.type_name), "type name size")
+        b.add_string(self.type_name, "type name")
+        b.add_byte(self.flags, "flags")
+        for sig, bc in self.signatures:
+            b.add_byte(SIGNATURES[sig], f"sig_{sig}")
+            b.add_uleb(len(bc), "program size")
+            b.add_bytes(bc, "program")
+
+        print(
+            textwrap.dedent(
+                """
+                #ifdef __APPLE__
+                #define FORMATTER_SECTION "__DATA_CONST,__lldbformatters"
+                #else
+                #define FORMATTER_SECTION ".lldbformatters"
+                #endif
+                """
+            ),
+            file=output,
+        )
+        var_name = re.sub(r"\W", "_", self.type_name)
+        print(
+            "__attribute__((used, section(FORMATTER_SECTION)))",
+            file=output,
+        )
+        print(f"unsigned char _{var_name}_synthetic[] =", file=output)
+        indent = "    "
+        for string, comment in b.entries:
+            print(f"{indent}// {comment}", file=output)
+            print(f"{indent}{string}", file=output)
+        print(";", file=output)
 
 
 def compile_file(type_name: str, input: TextIO) -> BytecodeSection:
@@ -601,7 +668,7 @@ def next_byte():
 
################################################################################
 
 
-def _to_uleb(value: int) -> bytearray:
+def _to_uleb(value: int) -> bytes:
     """Encode an integer to ULEB128 bytes."""
     if value < 0:
         raise ValueError(f"negative number cannot be encoded to ULEB128: 
{value}")
@@ -616,7 +683,7 @@ def _to_uleb(value: int) -> bytearray:
         if value == 0:
             break
 
-    return result
+    return bytes(result)
 
 
 def _from_uleb(stream: BinaryIO) -> int:
@@ -665,18 +732,27 @@ def _main():
         "--output",
         help="output file (required for --compile)",
     )
+    parser.add_argument(
+        "-f",
+        "--format",
+        choices=("binary", "c"),
+        default="binary",
+        help="output file format",
+    )
     parser.add_argument("-t", "--test", action="store_true", help="run unit 
tests")
 
     args = parser.parse_args()
     if args.compile:
         if not args.output:
             parser.error("--output is required with --compile")
-        with (
-            open(args.input) as input,
-            open(args.output, "wb") as output,
-        ):
+        with open(args.input) as input:
             section = compile_file(args.type_name, input)
-            section.write_binary(output)
+        if args.format == "binary":
+            with open(args.output, "wb") as output:
+                section.write_binary(output)
+        else:  # args.format == "c"
+            with open(args.output, "w") as output:
+                section.write_source(output)
     elif args.disassemble:
         if args.output:
             with (
@@ -758,4 +834,42 @@ def run_disassemble(binary):
             with self.assertRaises(ValueError):
                 run_compile("MyType", "@summary: 1u return\n@summary: 2u 
return")
 
+        def test_write_source(self):
+            # Use the Account example from main.cpp as a reference, whose
+            # exact byte values are known.
+            section = BytecodeSection(
+                type_name="Account",
+                flags=0,
+                signatures=[
+                    ("get_num_children", bytes([0x20, 0x01])),
+                    ("get_child_at_index", bytes([0x02, 0x20, 0x00, 0x23, 
0x11, 0x60])),
+                ],
+            )
+            out = io.StringIO()
+            section.write_source(out)
+            src = out.getvalue()
+
+            self.assertIn("__attribute__((used, section(FORMATTER_SECTION)))", 
src)
+            self.assertIn("unsigned char _Account_synthetic[] =", src)
+            self.assertIn('"\\x01"', src)  # version
+            self.assertIn('"\\x15"', src)  # record size (21)
+            self.assertIn('"\\x07"', src)  # type name size (7)
+            self.assertIn('"Account"', src)  # type name
+            self.assertIn('"\\x00"', src)  # flags
+            self.assertIn('"\\x02"', src)  # sig_get_num_children
+            self.assertIn('"\\x20\\x01"', src)  # program
+            self.assertIn('"\\x04"', src)  # sig_get_child_at_index
+            self.assertIn('"\\x06"', src)  # program size
+            self.assertIn('"\\x02\\x20\\x00\\x23\\x11\\x60"', src)  # program
+            self.assertIn("// version", src)
+            self.assertIn("// type name", src)
+            self.assertIn("// program", src)
+            # Semicolon terminates the array initializer.
+            self.assertEqual(src.count(";"), 1)
+
+            # Non-identifier characters in the type name are replaced with '_'.
+            out2 = io.StringIO()
+            BytecodeSection("std::vector<int>", 0, []).write_source(out2)
+            self.assertIn("_std__vector_int__synthetic[] =", out2.getvalue())
+
     unittest.main(argv=[__file__])


        
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits

Reply via email to