https://github.com/kastiglione updated https://github.com/llvm/llvm-project/pull/113734
>From 0f1c5ff8b0556d8e7e69f3ec9c6a71784304a2b1 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Fri, 25 Oct 2024 12:56:00 -0700 Subject: [PATCH 1/7] [lldb] Proof of concept data formatter compiler for Python --- .../formatter-bytecode/optional_summary.py | 14 ++ .../formatter-bytecode/python_to_assembly.py | 145 ++++++++++++++++++ 2 files changed, 159 insertions(+) create mode 100644 lldb/examples/formatter-bytecode/optional_summary.py create mode 100755 lldb/examples/formatter-bytecode/python_to_assembly.py diff --git a/lldb/examples/formatter-bytecode/optional_summary.py b/lldb/examples/formatter-bytecode/optional_summary.py new file mode 100644 index 0000000000000..68e672d86613d --- /dev/null +++ b/lldb/examples/formatter-bytecode/optional_summary.py @@ -0,0 +1,14 @@ +def OptionalSummaryProvider(valobj, _): + failure = 2 + storage = valobj.GetChildMemberWithName("Storage") + hasVal = storage.GetChildMemberWithName("hasVal").GetValueAsUnsigned(failure) + if hasVal == failure: + return "<could not read Optional>" + + if hasVal == 0: + return "None" + + underlying_type = storage.GetType().GetTemplateArgumentType(0) + value = storage.GetChildMemberWithName("value") + value = value.Cast(underlying_type) + return value.GetSummary() diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py new file mode 100755 index 0000000000000..6e2adbe093fda --- /dev/null +++ b/lldb/examples/formatter-bytecode/python_to_assembly.py @@ -0,0 +1,145 @@ +#!/usr/bin/python3 + +import ast +import io +import sys +from typing import Any + +BUILTINS = { + "Cast": "@cast", + "GetChildMemberWithName": "@get_child_with_name", + "GetSummary": "@get_summary", + "GetTemplateArgumentType": "@get_template_argument_type", + "GetType": "@get_type", + "GetValueAsUnsigned": "@get_value_as_unsigned", +} + +COMPS = { + ast.Eq: "=", + ast.NotEq: "!=", + ast.Lt: "<", + ast.LtE: "=<", + ast.Gt: ">", + ast.GtE: "=>", +} + +class Compiler(ast.NodeVisitor): + # Track the stack index of locals variables. + # + # This is essentially an ordered dictionary, where the key is an index on + # the stack, and the value is the name of the variable whose value is at + # that index. + # + # Ex: `locals[0]` is the name of the first value pushed on the stack, etc. + locals: list[str] + + buffer: io.StringIO + final_buffer: io.StringIO + + def __init__(self) -> None: + self.locals = [] + self.buffer = io.StringIO() + self.final_buffer = io.StringIO() + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + # Initialize `locals` with the (positional) arguments. + self.locals = [arg.arg for arg in node.args.args] + self.generic_visit(node) + self.locals.clear() + + def visit_Compare(self, node: ast.Compare) -> None: + self.visit(node.left) + # XXX: Does not handle multiple comparisons, ex: `0 < x < 10` + self.visit(node.comparators[0]) + self._output(COMPS[type(node.ops[0])]) + + def visit_If(self, node: ast.If) -> None: + self.visit(node.test) + + # Does the body `return`? + has_return = any(isinstance(x, ast.Return) for x in node.body) + + self._output("{") + self._visit_each(node.body) + if not node.orelse and not has_return: + # No else, and no early exit: a simple `if` + self._output("} if") + return + + self._output("}") + if node.orelse: + # Handle else. + self._output("{") + self._visit_each(node.orelse) + self._output("} ifelse") + elif has_return: + # Convert early exit into an `ifelse`. + self._output("{") + self._output("} ifelse", final=True) + + def visit_Constant(self, node: ast.Constant) -> None: + if isinstance(node.value, str): + self._output(f'"{node.value}"') + elif isinstance(node.value, bool): + self._output(int(node.value)) + else: + self._output(node.value) + + def visit_Call(self, node: ast.Call) -> None: + if isinstance(node.func, ast.Attribute): + # The receiver is the left hande side of the dot. + receiver = node.func.value + method = node.func.attr + if selector := BUILTINS.get(method): + # Visit the method's receiver to have its value on the stack. + self.visit(receiver) + # Visit the args to position them on the stack. + self._visit_each(node.args) + self._output(f"{selector} call") + else: + # TODO: fail + print(f"error: unsupported method {node.func.attr}", file=sys.stderr) + + def visit_Assign(self, node: ast.Assign) -> None: + # Visit RHS first, putting values on the stack. + self.visit(node.value) + # Determine the name(s). Either a single Name, or a Tuple of Names. + target = node.targets[0] + if isinstance(target, ast.Name): + names = [target.id] + elif isinstance(target, ast.Tuple): + # These tuple elements are Name nodes. + names = [x.id for x in target.elts] + + # Forget any previous bindings of these names. + # Their values are orphaned on the stack. + for local in self.locals: + if local in names: + old_idx = self.locals.index(local) + self.locals[old_idx] = "" + + self.locals.extend(names) + + def visit_Name(self, node: ast.Name) -> None: + idx = self.locals.index(node.id) + self._output(f"{idx} pick # {node.id}") + + def _visit_each(self, nodes: list[ast.AST]) -> None: + for child in nodes: + self.visit(child) + + def _output(self, x: Any, final: bool = False) -> None: + dest = self.final_buffer if final else self.buffer + print(x, file=dest) + + @property + def output(self) -> str: + return compiler.buffer.getvalue() + compiler.final_buffer.getvalue() + + +if __name__ == "__main__": + with open(sys.argv[1]) as f: + root = ast.parse(f.read()) + compiler = Compiler() + compiler.visit(root) + print(compiler.output) >From c8525de8f369b99af869ce22170111ba0fea9b70 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Fri, 3 Jan 2025 14:20:48 -0800 Subject: [PATCH 2/7] Support the return operation --- .../formatter-bytecode/python_to_assembly.py | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py index 6e2adbe093fda..98c03832227cc 100755 --- a/lldb/examples/formatter-bytecode/python_to_assembly.py +++ b/lldb/examples/formatter-bytecode/python_to_assembly.py @@ -34,12 +34,10 @@ class Compiler(ast.NodeVisitor): locals: list[str] buffer: io.StringIO - final_buffer: io.StringIO def __init__(self) -> None: self.locals = [] self.buffer = io.StringIO() - self.final_buffer = io.StringIO() def visit_FunctionDef(self, node: ast.FunctionDef) -> None: # Initialize `locals` with the (positional) arguments. @@ -56,26 +54,19 @@ def visit_Compare(self, node: ast.Compare) -> None: def visit_If(self, node: ast.If) -> None: self.visit(node.test) - # Does the body `return`? - has_return = any(isinstance(x, ast.Return) for x in node.body) - self._output("{") self._visit_each(node.body) - if not node.orelse and not has_return: - # No else, and no early exit: a simple `if` - self._output("} if") - return - - self._output("}") if node.orelse: - # Handle else. - self._output("{") + self._output("} {") self._visit_each(node.orelse) self._output("} ifelse") - elif has_return: - # Convert early exit into an `ifelse`. - self._output("{") - self._output("} ifelse", final=True) + else: + self._output("} if") + + def visit_Return(self, node: ast.Return) -> None: + if node.value: + self.visit(node.value) + self._output("return") def visit_Constant(self, node: ast.Constant) -> None: if isinstance(node.value, str): @@ -128,13 +119,12 @@ def _visit_each(self, nodes: list[ast.AST]) -> None: for child in nodes: self.visit(child) - def _output(self, x: Any, final: bool = False) -> None: - dest = self.final_buffer if final else self.buffer - print(x, file=dest) + def _output(self, x: Any) -> None: + print(x, file=self.buffer) @property def output(self) -> str: - return compiler.buffer.getvalue() + compiler.final_buffer.getvalue() + return compiler.buffer.getvalue() if __name__ == "__main__": >From c3314558a831ec7c906a3f63616a3cad4fd1ecad Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Tue, 6 Jan 2026 10:49:21 -0800 Subject: [PATCH 3/7] Add Python bytecode translator --- .../bytecode_to_bytecode.py | 141 ++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100755 lldb/examples/formatter-bytecode/bytecode_to_bytecode.py diff --git a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py new file mode 100755 index 0000000000000..89227b094957c --- /dev/null +++ b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py @@ -0,0 +1,141 @@ +#!/usr/bin/python3 + +import dis +import sys +from types import CodeType +from typing import Iterable, Iterator, cast + + +# TODO: strlen, fmt +_SELECTORS = { + "Cast": "@cast", + "GetChildAtIndex": "@get_child_at_index", + "GetChildIndex": "@get_child_index", + "GetChildMemberWithName": "@get_child_with_name", + "GetNumChildren": "@get_num_children", + "GetSummary": "@summary", + "GetTemplateArgumentType": "@get_template_argument_type", + "GetType": "@get_type", + "GetValue": "@get_value", + "GetValueAsAddress": "@get_value_as_address", + "GetValueAsSigned": "@get_value_as_signed", + "GetValueAsUnsigned": "@get_value_as_unsigned", +} + + +def _main(source_file): + with open(source_file) as f: + source_code = f.read() + bytecode = dis.Bytecode(source_code) + for func_body in _function_bodies(bytecode): + instructions = dis.get_instructions(func_body) + for op in _translate(instructions): + print(op) + + +def _function_bodies(bytecode: dis.Bytecode) -> Iterable[CodeType]: + """ + Iterate the function bodies (code object children) of the given Bytecode. + """ + for const in bytecode.codeobj.co_consts: + if hasattr(const, "co_code"): + yield const + + +def _translate(instructions: Iterator[dis.Instruction]) -> list[str]: + """ + Convert Python instructions to LLDB data formatter bytecode operations. + """ + result = [] + _translate_list(list(instructions), result) + return result + + +def _translate_list(instructions: list[dis.Instruction], result: list[str]): + """ + Convert sequences of Python bytecode to sequences of LLDB data formatter + bytecode. + + This function performs course grained translations - sequences of input to + sequences of output. For translations of individual instructions, see + `_translate_instruction`. + """ + while instructions: + inst = instructions.pop(0) + op = inst.opname + if op == "LOAD_METHOD": + # Method call sequences begin with a LOAD_METHOD instruction, then + # load the arguments on to the stack, and end with the CALL_METHOD + # instruction. + if selector := _SELECTORS.get(inst.argval): + while instructions: + if instructions[0] == "LOAD_METHOD": + # Begin a nested method call. + _translate_list(instructions, result) + else: + # TODO: Can LOAD_METHOD, ..., CALL_METHOD sequences + # contain flow control? If so this needs to gather + # instructions and call `_translate_list`, instead of + # handling each instruction individually. + x = instructions.pop(0) + if x.opname != "CALL_METHOD": + result.append(_translate_instruction(x)) + else: + result.append(f"{selector} call") + break + elif op == "POP_JUMP_IF_FALSE": + # Convert to an `{ ... } if` sequence. + result.append("{") + offset = cast(int, inst.arg) + idx = _index_of_offset(instructions, offset) + # Split the condional block prefix from the remaining instructions. + block = instructions[:idx] + del instructions[:idx] + _translate_list(block, result) + result.append("} if") + else: + result.append(_translate_instruction(inst)) + + +def _translate_instruction(inst: dis.Instruction) -> str: + """ + Convert a single Python bytecode instruction to an LLDB data formatter + bytecode operation. + + This function performs one-to-one translations. For translations of + sequences of instructions, see `_translate_list`. + """ + op = inst.opname + if op == "COMPARE_OP": + if inst.argval == "==": + return "=" + elif op == "LOAD_CONST": + if isinstance(inst.argval, str): + # TODO: Handle strings with inner double quotes ("). Alternatively, + # use `repr()` and allow the bytecode assembly to use single quotes. + return f'"{inst.argval}"' + elif isinstance(inst.argval, bool): + num = int(inst.argval) + return f"{num}" + else: + return inst.argrepr + elif op == "LOAD_FAST": + return f"{inst.arg} pick # {inst.argval}" + elif op == "RETURN_VALUE": + return "return" + elif op in ("STORE_FAST", "STORE_NAME"): + # This is fake. There is no `put` operation (yet?). + return f"{inst.arg} put # {inst.argval}" + return op + + +def _index_of_offset(instructions: list[dis.Instruction], offset) -> int: + """Find the index of the instruction having the given offset.""" + for i, inst in enumerate(instructions): + if inst.offset == offset: + return i + raise ValueError(f"invalid offset: {offset}") + + +if __name__ == "__main__": + _main(sys.argv[1]) >From 4df27154518ab326306d09a2de14abb7e85cb7e8 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Tue, 3 Mar 2026 10:55:06 -0800 Subject: [PATCH 4/7] Add initial support for compiling synthetic formatter classes --- .../formatter-bytecode/python_to_assembly.py | 266 +++++++++++++++--- 1 file changed, 225 insertions(+), 41 deletions(-) diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py index 98c03832227cc..693a3dabb6348 100755 --- a/lldb/examples/formatter-bytecode/python_to_assembly.py +++ b/lldb/examples/formatter-bytecode/python_to_assembly.py @@ -3,12 +3,15 @@ import ast import io import sys -from typing import Any +from copy import copy +from typing import Any, Optional, Sequence, Union, cast BUILTINS = { "Cast": "@cast", + "GetChildAtIndex": "@get_child_at_index", "GetChildMemberWithName": "@get_child_with_name", - "GetSummary": "@get_summary", + "GetSummary": "@summary", + "GetSyntheticValue": "@get_synthetic_value", "GetTemplateArgumentType": "@get_template_argument_type", "GetType": "@get_type", "GetValueAsUnsigned": "@get_value_as_unsigned", @@ -23,26 +26,115 @@ ast.GtE: "=>", } +# Maps Python method names in a formatter class to their bytecode signatures. +METHOD_SIGS = { + "__init__": "@init", + "update": "@update", + "num_children": "@get_num_children", + "get_child_index": "@get_child_index", + "get_child_at_index": "@get_child_at_index", + "get_value": "@get_value", +} + + +class CompilerError(Exception): + lineno: int + + def __init__(self, message, node: Union[ast.expr, ast.stmt]) -> None: + super().__init__(message) + self.lineno = node.lineno + + class Compiler(ast.NodeVisitor): - # Track the stack index of locals variables. - # - # This is essentially an ordered dictionary, where the key is an index on - # the stack, and the value is the name of the variable whose value is at - # that index. - # - # Ex: `locals[0]` is the name of the first value pushed on the stack, etc. + # Names of locals in bottom-to-top stack order. locals[0] is the + # oldest/deepest; locals[-1] is the most recently pushed. locals: list[str] + # Names of visible attrs in bottom-to-top stack order. Always holds the + # full combined frame for the method being compiled: grows incrementally + # during __init__/update, and is set to the combined list before getter + # methods are compiled. + attrs: list[str] + + # Temporaries currently on the stack above the locals/attrs frame. + # Always 0 at statement boundaries. + num_temps: int + + # Bytecode signature of the method being compiled, or None for top-level + # functions. + current_sig: Optional[str] + buffer: io.StringIO def __init__(self) -> None: self.locals = [] + self.attrs = [] + self.num_temps = 0 + self.current_sig = None self.buffer = io.StringIO() + def compile(self, source_file: str) -> str: + with open(source_file) as f: + root = ast.parse(f.read()) + self.visit(root) + return self.buffer.getvalue() + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + # Compile methods in a fixed order so that attrs is fully populated + # before getter methods are compiled. + methods = {} + for item in node.body: + if isinstance(item, ast.FunctionDef): + if item.name not in METHOD_SIGS: + raise CompilerError(f"unsupported method: {item.name}", item) + methods[item.name] = item + + self.attrs = [] + if method := methods.get("__init__"): + self._compile_method(method) + # self.attrs now holds init's attrs. update's attrs are appended above + # them, so after update self.attrs is the combined init+update list. + if method := methods.get("update"): + self._compile_method(method) + + for method_name, method in methods.items(): + if method_name not in ("__init__", "update"): + self._compile_method(method) + + def _compile_method(self, node: ast.FunctionDef) -> None: + self.current_sig = METHOD_SIGS[node.name] + self.num_temps = 0 + + # Strip 'self' (and 'internal_dict' for __init__) from the arg list; + # the remaining args become the initial locals. + args = copy(node.args.args) + args.pop(0) # drop 'self' + if node.name == "__init__": + args.pop() # drop trailing 'internal_dict' + + self.locals = [arg.arg for arg in args] + + # Compile into a temporary buffer so the signature line can be + # emitted first. + saved_buffer = self.buffer + self.buffer = io.StringIO() + + self._visit_each(node.body) + + method_output = self.buffer.getvalue() + self.buffer = saved_buffer + self._output(f"@{self.current_sig}:") + self._output(method_output) + + self.locals.clear() + self.current_sig = None + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - # Initialize `locals` with the (positional) arguments. + # Top-level function (not inside a class). + self.current_sig = None + self.attrs = [] self.locals = [arg.arg for arg in node.args.args] - self.generic_visit(node) + self._visit_each(node.body) self.locals.clear() def visit_Compare(self, node: ast.Compare) -> None: @@ -50,13 +142,18 @@ def visit_Compare(self, node: ast.Compare) -> None: # XXX: Does not handle multiple comparisons, ex: `0 < x < 10` self.visit(node.comparators[0]) self._output(COMPS[type(node.ops[0])]) + # The comparison consumes two values and produces one. + self.num_temps -= 1 def visit_If(self, node: ast.If) -> None: self.visit(node.test) + # `if`/`ifelse` consumes the condition. + self.num_temps = 0 self._output("{") self._visit_each(node.body) if node.orelse: + self.num_temps = 0 self._output("} {") self._visit_each(node.orelse) self._output("} ifelse") @@ -64,6 +161,7 @@ def visit_If(self, node: ast.If) -> None: self._output("} if") def visit_Return(self, node: ast.Return) -> None: + self.num_temps = 0 if node.value: self.visit(node.value) self._output("return") @@ -75,61 +173,147 @@ def visit_Constant(self, node: ast.Constant) -> None: self._output(int(node.value)) else: self._output(node.value) + self.num_temps += 1 def visit_Call(self, node: ast.Call) -> None: - if isinstance(node.func, ast.Attribute): - # The receiver is the left hande side of the dot. - receiver = node.func.value - method = node.func.attr + func = node.func + if isinstance(func, ast.Attribute): + receiver = func.value + method = func.attr + # self is not a valid call receiver. + if isinstance(receiver, ast.Name) and receiver.id == "self": + raise CompilerError( + "self is not a valid call receiver; use self.attr to read an attribute", + node, + ) if selector := BUILTINS.get(method): - # Visit the method's receiver to have its value on the stack. self.visit(receiver) - # Visit the args to position them on the stack. self._visit_each(node.args) self._output(f"{selector} call") - else: - # TODO: fail - print(f"error: unsupported method {node.func.attr}", file=sys.stderr) + # `call` pops the receiver and all args, and pushes one result. + self.num_temps -= len(node.args) + return + raise CompilerError(f"unsupported method: {method}", node) + + if isinstance(func, ast.Name): + raise CompilerError(f"unsupported function: {func.id}", node) + + raise CompilerError("unsupported function call expression", node) def visit_Assign(self, node: ast.Assign) -> None: - # Visit RHS first, putting values on the stack. - self.visit(node.value) - # Determine the name(s). Either a single Name, or a Tuple of Names. + self.num_temps = 0 + target = node.targets[0] + + # Handle self.attr = expr (attribute assignment). + if ( + isinstance(target, ast.Attribute) + and isinstance(target.value, ast.Name) + and target.value.id == "self" + ): + if self.current_sig not in ("@init", "@update"): + raise CompilerError( + "attribute assignment is only allowed in __init__ and update", + node, + ) + + attr = target.attr + if attr in self.attrs: + raise CompilerError(f"attribute '{attr}' is already assigned", node) + + # If the RHS is an argument (the only kind of local permitted in + # __init__) - then it is already on the stack in place, and no + # evaluation is needed. + is_arg = ( + isinstance(node.value, ast.Name) + and self._local_index(node.value) is not None + ) + if not is_arg: + # Evaluate the RHS, leaving its value on the stack. + self.visit(node.value) + + # Record the attr. + self.attrs.append(attr) + return + + # Handle local variable assignment. + if self.current_sig in ("@init", "@update"): + raise CompilerError( + "local variable assignment is not allowed in __init__ or update; " + "use attribute assignment (self.attr = ...) instead", + node, + ) + + # Visit RHS, leaving its value on the stack. + self.visit(node.value) if isinstance(target, ast.Name): - names = [target.id] + names = [target] elif isinstance(target, ast.Tuple): - # These tuple elements are Name nodes. - names = [x.id for x in target.elts] + names = cast(list[ast.Name], target.elts) + else: + names = [] # Forget any previous bindings of these names. # Their values are orphaned on the stack. - for local in self.locals: - if local in names: - old_idx = self.locals.index(local) - self.locals[old_idx] = "" + for name in names: + idx = self._local_index(name) + if idx is not None: + self.locals[idx] = "" - self.locals.extend(names) + self.locals.extend(x.id for x in names) + + def visit_Attribute(self, node: ast.Attribute) -> None: + # Only self.attr reads are supported here. + if not (isinstance(node.value, ast.Name) and node.value.id == "self"): + raise CompilerError( + "unsupported attribute access (only self.attr is supported)", node + ) + attr_idx = self._attr_index(node.attr, node) + pick_idx = self.num_temps + attr_idx + self._output(f"{pick_idx} pick # self.{node.attr}") + self.num_temps += 1 def visit_Name(self, node: ast.Name) -> None: - idx = self.locals.index(node.id) + idx = self._stack_index(node) + if idx is None: + raise CompilerError(f"unknown local variable: {node.id}", node) self._output(f"{idx} pick # {node.id}") + self.num_temps += 1 - def _visit_each(self, nodes: list[ast.AST]) -> None: + def _visit_each(self, nodes: Sequence[ast.AST]) -> None: for child in nodes: self.visit(child) + def _attr_index(self, name: str, node: ast.expr) -> int: + # self.attrs is always the full visible attr frame, so the index is + # the direct pick offset with no further adjustment. + try: + return self.attrs.index(name) + except ValueError: + raise CompilerError(f"unknown attribute: {name}", node) + + def _stack_index(self, name: ast.Name) -> Optional[int]: + # Offset past all attrs and any in-flight temporaries. + idx = self._local_index(name) + if idx is None: + return None + return len(self.attrs) + idx + self.num_temps + + def _local_index(self, name: ast.Name) -> Optional[int]: + try: + return self.locals.index(name.id) + except ValueError: + return None + def _output(self, x: Any) -> None: print(x, file=self.buffer) - @property - def output(self) -> str: - return compiler.buffer.getvalue() - if __name__ == "__main__": - with open(sys.argv[1]) as f: - root = ast.parse(f.read()) + source_file = sys.argv[1] compiler = Compiler() - compiler.visit(root) - print(compiler.output) + try: + output = compiler.compile(source_file) + print(output) + except CompilerError as e: + print(f"{source_file}:{e.lineno}: {e}", file=sys.stderr) >From 08646420e365c811ca060bf7139c40398a024b74 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Tue, 3 Mar 2026 14:51:34 -0800 Subject: [PATCH 5/7] Fix unhandled cases in visit_Assign --- lldb/examples/formatter-bytecode/python_to_assembly.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py index 693a3dabb6348..2695c6ea075f1 100755 --- a/lldb/examples/formatter-bytecode/python_to_assembly.py +++ b/lldb/examples/formatter-bytecode/python_to_assembly.py @@ -244,14 +244,15 @@ def visit_Assign(self, node: ast.Assign) -> None: node, ) - # Visit RHS, leaving its value on the stack. - self.visit(node.value) if isinstance(target, ast.Name): names = [target] elif isinstance(target, ast.Tuple): names = cast(list[ast.Name], target.elts) else: - names = [] + raise CompilerError("unsupported assignment target", node) + + # Visit RHS, leaving its value on the stack. + self.visit(node.value) # Forget any previous bindings of these names. # Their values are orphaned on the stack. >From 081ac068fde264c21802fe25270d8b124fa6e5e4 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Fri, 6 Mar 2026 12:22:52 -0800 Subject: [PATCH 6/7] Move Python compiler into formatter_bytecode --- lldb/examples/python/formatter_bytecode.py | 354 ++++++++++++++++++++- 1 file changed, 344 insertions(+), 10 deletions(-) diff --git a/lldb/examples/python/formatter_bytecode.py b/lldb/examples/python/formatter_bytecode.py index 471cb2c5b38ff..a188ee1819d24 100644 --- a/lldb/examples/python/formatter_bytecode.py +++ b/lldb/examples/python/formatter_bytecode.py @@ -17,9 +17,12 @@ import re import io +import ast +import enum import textwrap +from copy import copy from dataclasses import dataclass -from typing import BinaryIO, TextIO, Tuple, Union +from typing import Any, BinaryIO, Optional, Sequence, TextIO, Tuple, Union, cast BINARY_VERSION = 1 @@ -173,22 +176,17 @@ def _segment_by_signature(input: list[str]) -> list[Tuple[str, list[str]]]: signature = None tokens = [] - def conclude_segment(): - if not tokens: - raise ValueError(f"empty signature: {signature}") - segments.append((signature, tokens)) - for token in input: if _SIGNATURE_LABEL.match(token): if signature: - conclude_segment() + segments.append((signature, tokens)) signature = token[1:-1] # strip leading @, trailing : tokens = [] else: tokens.append(token) if signature: - conclude_segment() + segments.append((signature, tokens)) return segments @@ -295,7 +293,8 @@ def assemble_file(type_name: str, input: TextIO) -> BytecodeSection: input_tokens = _tokenize(input.read()) signatures = [] for sig, tokens in _segment_by_signature(input_tokens): - signatures.append((sig, assemble_tokens(tokens))) + if tokens: + signatures.append((sig, assemble_tokens(tokens))) return BytecodeSection(type_name, flags=0, signatures=signatures) @@ -662,6 +661,313 @@ def next_byte(): assert False return data[-1] +################################################################################ +# Python Compiler +################################################################################ + +_BUILTINS = { + "Cast": "@cast", + "GetChildAtIndex": "@get_child_at_index", + "GetChildMemberWithName": "@get_child_with_name", + "GetSummary": "@summary", + "GetSyntheticValue": "@get_synthetic_value", + "GetTemplateArgumentType": "@get_template_argument_type", + "GetType": "@get_type", + "GetValueAsUnsigned": "@get_value_as_unsigned", +} + +_COMPS = { + ast.Eq: "=", + ast.NotEq: "!=", + ast.Lt: "<", + ast.LtE: "=<", + ast.Gt: ">", + ast.GtE: "=>", +} + +# Maps Python method names in a formatter class to their bytecode signatures. +_METHOD_SIGS = { + "__init__": "@init", + "update": "@update", + "num_children": "@get_num_children", + "get_child_index": "@get_child_index", + "get_child_at_index": "@get_child_at_index", + "get_value": "@get_value", +} + + +class CompilerError(Exception): + lineno: int + + def __init__(self, message, node: Union[ast.expr, ast.stmt]) -> None: + super().__init__(message) + self.lineno = node.lineno + + +class Compiler(ast.NodeVisitor): + # Names of locals in bottom-to-top stack order. locals[0] is the + # oldest/deepest; locals[-1] is the most recently pushed. + locals: list[str] + + # Names of visible attrs in bottom-to-top stack order. Always holds the + # full combined frame for the method being compiled: grows incrementally + # during __init__/update, and is set to the combined list before getter + # methods are compiled. + attrs: list[str] + + # Temporaries currently on the stack above the locals/attrs frame. + # Always 0 at statement boundaries. + num_temps: int + + # Bytecode signature of the method being compiled, or None for top-level + # functions. + current_sig: Optional[str] + + buffer: io.StringIO + + def __init__(self) -> None: + self.locals = [] + self.attrs = [] + self.num_temps = 0 + self.current_sig = None + self.buffer = io.StringIO() + + def compile(self, source_file: str) -> str: + with open(source_file) as f: + root = ast.parse(f.read()) + self.visit(root) + return self.buffer.getvalue() + + def visit_ClassDef(self, node: ast.ClassDef) -> None: + # Compile methods in a fixed order so that attrs is fully populated + # before getter methods are compiled. + methods = {} + for item in node.body: + if isinstance(item, ast.FunctionDef): + if item.name not in _METHOD_SIGS: + raise CompilerError(f"unsupported method: {item.name}", item) + methods[item.name] = item + + self.attrs = [] + if method := methods.get("__init__"): + self._compile_method(method) + # self.attrs now holds init's attrs. update's attrs are appended above + # them, so after update self.attrs is the combined init+update list. + if method := methods.get("update"): + self._compile_method(method) + + for method_name, method in methods.items(): + if method_name not in ("__init__", "update"): + self._compile_method(method) + + def _compile_method(self, node: ast.FunctionDef) -> None: + self.current_sig = _METHOD_SIGS[node.name] + self.num_temps = 0 + + # Strip 'self' (and 'internal_dict' for __init__) from the arg list; + # the remaining args become the initial locals. + args = copy(node.args.args) + args.pop(0) # drop 'self' + if node.name == "__init__": + args.pop() # drop trailing 'internal_dict' + + self.locals = [arg.arg for arg in args] + + # Compile into a temporary buffer so the signature line can be + # emitted first. + saved_buffer = self.buffer + self.buffer = io.StringIO() + + self._visit_each(node.body) + + method_output = self.buffer.getvalue() + self.buffer = saved_buffer + self._output(f"{self.current_sig}:") + self._output(method_output) + + self.locals.clear() + self.current_sig = None + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + # Top-level function (not inside a class). + self.current_sig = None + self.attrs = [] + self.locals = [arg.arg for arg in node.args.args] + self._visit_each(node.body) + self.locals.clear() + + def visit_Compare(self, node: ast.Compare) -> None: + self.visit(node.left) + # XXX: Does not handle multiple comparisons, ex: `0 < x < 10` + self.visit(node.comparators[0]) + self._output(_COMPS[type(node.ops[0])]) + # The comparison consumes two values and produces one. + self.num_temps -= 1 + + def visit_If(self, node: ast.If) -> None: + self.visit(node.test) + # `if`/`ifelse` consumes the condition. + self.num_temps = 0 + + self._output("{") + self._visit_each(node.body) + if node.orelse: + self.num_temps = 0 + self._output("} {") + self._visit_each(node.orelse) + self._output("} ifelse") + else: + self._output("} if") + + def visit_Return(self, node: ast.Return) -> None: + self.num_temps = 0 + if node.value: + self.visit(node.value) + self._output("return") + + def visit_Constant(self, node: ast.Constant) -> None: + if isinstance(node.value, str): + self._output(f'"{node.value}"') + elif isinstance(node.value, bool): + self._output(int(node.value)) + else: + self._output(node.value) + self.num_temps += 1 + + def visit_Call(self, node: ast.Call) -> None: + func = node.func + if isinstance(func, ast.Attribute): + receiver = func.value + method = func.attr + # self is not a valid call receiver. + if isinstance(receiver, ast.Name) and receiver.id == "self": + raise CompilerError( + "self is not a valid call receiver; use self.attr to read an attribute", + node, + ) + if selector := _BUILTINS.get(method): + self.visit(receiver) + self._visit_each(node.args) + self._output(f"{selector} call") + # `call` pops the receiver and all args, and pushes one result. + self.num_temps -= len(node.args) + return + raise CompilerError(f"unsupported method: {method}", node) + + if isinstance(func, ast.Name): + raise CompilerError(f"unsupported function: {func.id}", node) + + raise CompilerError("unsupported function call expression", node) + + def visit_Assign(self, node: ast.Assign) -> None: + self.num_temps = 0 + + target = node.targets[0] + + # Handle self.attr = expr (attribute assignment). + if ( + isinstance(target, ast.Attribute) + and isinstance(target.value, ast.Name) + and target.value.id == "self" + ): + if self.current_sig not in ("@init", "@update"): + raise CompilerError( + "attribute assignment is only allowed in __init__ and update", + node, + ) + + attr = target.attr + if attr in self.attrs: + raise CompilerError(f"attribute '{attr}' is already assigned", node) + + # If the RHS is an argument (the only kind of local permitted in + # __init__) - then it is already on the stack in place, and no + # evaluation is needed. + is_arg = ( + isinstance(node.value, ast.Name) + and self._local_index(node.value) is not None + ) + if not is_arg: + # Evaluate the RHS, leaving its value on the stack. + self.visit(node.value) + + # Record the attr. + self.attrs.append(attr) + return + + # Handle local variable assignment. + if self.current_sig in ("@init", "@update"): + raise CompilerError( + "local variable assignment is not allowed in __init__ or update; " + "use attribute assignment (self.attr = ...) instead", + node, + ) + + if isinstance(target, ast.Name): + names = [target] + elif isinstance(target, ast.Tuple): + names = cast(list[ast.Name], target.elts) + else: + raise CompilerError("unsupported assignment target", node) + + # Visit RHS, leaving its value on the stack. + self.visit(node.value) + + # Forget any previous bindings of these names. + # Their values are orphaned on the stack. + for name in names: + idx = self._local_index(name) + if idx is not None: + self.locals[idx] = "" + + self.locals.extend(x.id for x in names) + + def visit_Attribute(self, node: ast.Attribute) -> None: + # Only self.attr reads are supported here. + if not (isinstance(node.value, ast.Name) and node.value.id == "self"): + raise CompilerError( + "unsupported attribute access (only self.attr is supported)", node + ) + attr_idx = self._attr_index(node.attr, node) + pick_idx = self.num_temps + attr_idx + self._output(f"{pick_idx} pick") # "# self.{node.attr}" + self.num_temps += 1 + + def visit_Name(self, node: ast.Name) -> None: + idx = self._stack_index(node) + if idx is None: + raise CompilerError(f"unknown local variable: {node.id}", node) + self._output(f"{idx} pick") # "# {node.id}" + self.num_temps += 1 + + def _visit_each(self, nodes: Sequence[ast.AST]) -> None: + for child in nodes: + self.visit(child) + + def _attr_index(self, name: str, node: ast.expr) -> int: + # self.attrs is always the full visible attr frame, so the index is + # the direct pick offset with no further adjustment. + try: + return self.attrs.index(name) + except ValueError: + raise CompilerError(f"unknown attribute: {name}", node) + + def _stack_index(self, name: ast.Name) -> Optional[int]: + # Offset past all attrs and any in-flight temporaries. + idx = self._local_index(name) + if idx is None: + return None + return len(self.attrs) + idx + self.num_temps + + def _local_index(self, name: ast.Name) -> Optional[int]: + try: + return self.locals.index(name.id) + except ValueError: + return None + + def _output(self, x: Any) -> None: + print(x, file=self.buffer) + ################################################################################ # Helper functions. @@ -717,6 +1023,12 @@ def _main(): mode = parser.add_mutually_exclusive_group() mode.add_argument( "-c", + "--compile", + action="store_true", + help="compile Python into bytecode", + ) + mode.add_argument( + "-a", "--assemble", action="store_true", help="assemble assembly into bytecode", @@ -727,6 +1039,7 @@ def _main(): action="store_true", help="disassemble bytecode", ) + parser.add_argument("-n", "--type-name", help="source type of formatter") parser.add_argument( "-o", "--output", @@ -742,7 +1055,28 @@ def _main(): parser.add_argument("-t", "--test", action="store_true", help="run unit tests") args = parser.parse_args() - if args.assemble: + if args.compile: + if not args.type_name: + parser.error("--type-name is required with --compile") + if not args.output: + parser.error("--output is required with --compile") + compiler = Compiler() + try: + assembly = compiler.compile(args.input) + except CompilerError as e: + print(f"{args.input}:{e.lineno}: {e}", file=sys.stderr) + return + + section = assemble_file(args.type_name, io.StringIO(assembly)) + if args.format == "binary": + with open(args.output, "wb") as output: + section.write_binary(output) + else: # args.format == "c" + with open(args.output, "w") as output: + section.write_source(output) + elif args.assemble: + if not args.type_name: + parser.error("--type-name is required with --assemble") if not args.output: parser.error("--output is required with --assemble") with open(args.input) as input: >From e3cf78cd351304a0da41bd294592e47546123845 Mon Sep 17 00:00:00 2001 From: Dave Lee <[email protected]> Date: Fri, 6 Mar 2026 13:05:48 -0800 Subject: [PATCH 7/7] Delete old files --- .../bytecode_to_bytecode.py | 141 -------- .../formatter-bytecode/optional_summary.py | 14 - .../formatter-bytecode/python_to_assembly.py | 320 ------------------ 3 files changed, 475 deletions(-) delete mode 100755 lldb/examples/formatter-bytecode/bytecode_to_bytecode.py delete mode 100644 lldb/examples/formatter-bytecode/optional_summary.py delete mode 100755 lldb/examples/formatter-bytecode/python_to_assembly.py diff --git a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py b/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py deleted file mode 100755 index 89227b094957c..0000000000000 --- a/lldb/examples/formatter-bytecode/bytecode_to_bytecode.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/python3 - -import dis -import sys -from types import CodeType -from typing import Iterable, Iterator, cast - - -# TODO: strlen, fmt -_SELECTORS = { - "Cast": "@cast", - "GetChildAtIndex": "@get_child_at_index", - "GetChildIndex": "@get_child_index", - "GetChildMemberWithName": "@get_child_with_name", - "GetNumChildren": "@get_num_children", - "GetSummary": "@summary", - "GetTemplateArgumentType": "@get_template_argument_type", - "GetType": "@get_type", - "GetValue": "@get_value", - "GetValueAsAddress": "@get_value_as_address", - "GetValueAsSigned": "@get_value_as_signed", - "GetValueAsUnsigned": "@get_value_as_unsigned", -} - - -def _main(source_file): - with open(source_file) as f: - source_code = f.read() - bytecode = dis.Bytecode(source_code) - for func_body in _function_bodies(bytecode): - instructions = dis.get_instructions(func_body) - for op in _translate(instructions): - print(op) - - -def _function_bodies(bytecode: dis.Bytecode) -> Iterable[CodeType]: - """ - Iterate the function bodies (code object children) of the given Bytecode. - """ - for const in bytecode.codeobj.co_consts: - if hasattr(const, "co_code"): - yield const - - -def _translate(instructions: Iterator[dis.Instruction]) -> list[str]: - """ - Convert Python instructions to LLDB data formatter bytecode operations. - """ - result = [] - _translate_list(list(instructions), result) - return result - - -def _translate_list(instructions: list[dis.Instruction], result: list[str]): - """ - Convert sequences of Python bytecode to sequences of LLDB data formatter - bytecode. - - This function performs course grained translations - sequences of input to - sequences of output. For translations of individual instructions, see - `_translate_instruction`. - """ - while instructions: - inst = instructions.pop(0) - op = inst.opname - if op == "LOAD_METHOD": - # Method call sequences begin with a LOAD_METHOD instruction, then - # load the arguments on to the stack, and end with the CALL_METHOD - # instruction. - if selector := _SELECTORS.get(inst.argval): - while instructions: - if instructions[0] == "LOAD_METHOD": - # Begin a nested method call. - _translate_list(instructions, result) - else: - # TODO: Can LOAD_METHOD, ..., CALL_METHOD sequences - # contain flow control? If so this needs to gather - # instructions and call `_translate_list`, instead of - # handling each instruction individually. - x = instructions.pop(0) - if x.opname != "CALL_METHOD": - result.append(_translate_instruction(x)) - else: - result.append(f"{selector} call") - break - elif op == "POP_JUMP_IF_FALSE": - # Convert to an `{ ... } if` sequence. - result.append("{") - offset = cast(int, inst.arg) - idx = _index_of_offset(instructions, offset) - # Split the condional block prefix from the remaining instructions. - block = instructions[:idx] - del instructions[:idx] - _translate_list(block, result) - result.append("} if") - else: - result.append(_translate_instruction(inst)) - - -def _translate_instruction(inst: dis.Instruction) -> str: - """ - Convert a single Python bytecode instruction to an LLDB data formatter - bytecode operation. - - This function performs one-to-one translations. For translations of - sequences of instructions, see `_translate_list`. - """ - op = inst.opname - if op == "COMPARE_OP": - if inst.argval == "==": - return "=" - elif op == "LOAD_CONST": - if isinstance(inst.argval, str): - # TODO: Handle strings with inner double quotes ("). Alternatively, - # use `repr()` and allow the bytecode assembly to use single quotes. - return f'"{inst.argval}"' - elif isinstance(inst.argval, bool): - num = int(inst.argval) - return f"{num}" - else: - return inst.argrepr - elif op == "LOAD_FAST": - return f"{inst.arg} pick # {inst.argval}" - elif op == "RETURN_VALUE": - return "return" - elif op in ("STORE_FAST", "STORE_NAME"): - # This is fake. There is no `put` operation (yet?). - return f"{inst.arg} put # {inst.argval}" - return op - - -def _index_of_offset(instructions: list[dis.Instruction], offset) -> int: - """Find the index of the instruction having the given offset.""" - for i, inst in enumerate(instructions): - if inst.offset == offset: - return i - raise ValueError(f"invalid offset: {offset}") - - -if __name__ == "__main__": - _main(sys.argv[1]) diff --git a/lldb/examples/formatter-bytecode/optional_summary.py b/lldb/examples/formatter-bytecode/optional_summary.py deleted file mode 100644 index 68e672d86613d..0000000000000 --- a/lldb/examples/formatter-bytecode/optional_summary.py +++ /dev/null @@ -1,14 +0,0 @@ -def OptionalSummaryProvider(valobj, _): - failure = 2 - storage = valobj.GetChildMemberWithName("Storage") - hasVal = storage.GetChildMemberWithName("hasVal").GetValueAsUnsigned(failure) - if hasVal == failure: - return "<could not read Optional>" - - if hasVal == 0: - return "None" - - underlying_type = storage.GetType().GetTemplateArgumentType(0) - value = storage.GetChildMemberWithName("value") - value = value.Cast(underlying_type) - return value.GetSummary() diff --git a/lldb/examples/formatter-bytecode/python_to_assembly.py b/lldb/examples/formatter-bytecode/python_to_assembly.py deleted file mode 100755 index 2695c6ea075f1..0000000000000 --- a/lldb/examples/formatter-bytecode/python_to_assembly.py +++ /dev/null @@ -1,320 +0,0 @@ -#!/usr/bin/python3 - -import ast -import io -import sys -from copy import copy -from typing import Any, Optional, Sequence, Union, cast - -BUILTINS = { - "Cast": "@cast", - "GetChildAtIndex": "@get_child_at_index", - "GetChildMemberWithName": "@get_child_with_name", - "GetSummary": "@summary", - "GetSyntheticValue": "@get_synthetic_value", - "GetTemplateArgumentType": "@get_template_argument_type", - "GetType": "@get_type", - "GetValueAsUnsigned": "@get_value_as_unsigned", -} - -COMPS = { - ast.Eq: "=", - ast.NotEq: "!=", - ast.Lt: "<", - ast.LtE: "=<", - ast.Gt: ">", - ast.GtE: "=>", -} - -# Maps Python method names in a formatter class to their bytecode signatures. -METHOD_SIGS = { - "__init__": "@init", - "update": "@update", - "num_children": "@get_num_children", - "get_child_index": "@get_child_index", - "get_child_at_index": "@get_child_at_index", - "get_value": "@get_value", -} - - -class CompilerError(Exception): - lineno: int - - def __init__(self, message, node: Union[ast.expr, ast.stmt]) -> None: - super().__init__(message) - self.lineno = node.lineno - - -class Compiler(ast.NodeVisitor): - # Names of locals in bottom-to-top stack order. locals[0] is the - # oldest/deepest; locals[-1] is the most recently pushed. - locals: list[str] - - # Names of visible attrs in bottom-to-top stack order. Always holds the - # full combined frame for the method being compiled: grows incrementally - # during __init__/update, and is set to the combined list before getter - # methods are compiled. - attrs: list[str] - - # Temporaries currently on the stack above the locals/attrs frame. - # Always 0 at statement boundaries. - num_temps: int - - # Bytecode signature of the method being compiled, or None for top-level - # functions. - current_sig: Optional[str] - - buffer: io.StringIO - - def __init__(self) -> None: - self.locals = [] - self.attrs = [] - self.num_temps = 0 - self.current_sig = None - self.buffer = io.StringIO() - - def compile(self, source_file: str) -> str: - with open(source_file) as f: - root = ast.parse(f.read()) - self.visit(root) - return self.buffer.getvalue() - - def visit_ClassDef(self, node: ast.ClassDef) -> None: - # Compile methods in a fixed order so that attrs is fully populated - # before getter methods are compiled. - methods = {} - for item in node.body: - if isinstance(item, ast.FunctionDef): - if item.name not in METHOD_SIGS: - raise CompilerError(f"unsupported method: {item.name}", item) - methods[item.name] = item - - self.attrs = [] - if method := methods.get("__init__"): - self._compile_method(method) - # self.attrs now holds init's attrs. update's attrs are appended above - # them, so after update self.attrs is the combined init+update list. - if method := methods.get("update"): - self._compile_method(method) - - for method_name, method in methods.items(): - if method_name not in ("__init__", "update"): - self._compile_method(method) - - def _compile_method(self, node: ast.FunctionDef) -> None: - self.current_sig = METHOD_SIGS[node.name] - self.num_temps = 0 - - # Strip 'self' (and 'internal_dict' for __init__) from the arg list; - # the remaining args become the initial locals. - args = copy(node.args.args) - args.pop(0) # drop 'self' - if node.name == "__init__": - args.pop() # drop trailing 'internal_dict' - - self.locals = [arg.arg for arg in args] - - # Compile into a temporary buffer so the signature line can be - # emitted first. - saved_buffer = self.buffer - self.buffer = io.StringIO() - - self._visit_each(node.body) - - method_output = self.buffer.getvalue() - self.buffer = saved_buffer - self._output(f"@{self.current_sig}:") - self._output(method_output) - - self.locals.clear() - self.current_sig = None - - def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - # Top-level function (not inside a class). - self.current_sig = None - self.attrs = [] - self.locals = [arg.arg for arg in node.args.args] - self._visit_each(node.body) - self.locals.clear() - - def visit_Compare(self, node: ast.Compare) -> None: - self.visit(node.left) - # XXX: Does not handle multiple comparisons, ex: `0 < x < 10` - self.visit(node.comparators[0]) - self._output(COMPS[type(node.ops[0])]) - # The comparison consumes two values and produces one. - self.num_temps -= 1 - - def visit_If(self, node: ast.If) -> None: - self.visit(node.test) - # `if`/`ifelse` consumes the condition. - self.num_temps = 0 - - self._output("{") - self._visit_each(node.body) - if node.orelse: - self.num_temps = 0 - self._output("} {") - self._visit_each(node.orelse) - self._output("} ifelse") - else: - self._output("} if") - - def visit_Return(self, node: ast.Return) -> None: - self.num_temps = 0 - if node.value: - self.visit(node.value) - self._output("return") - - def visit_Constant(self, node: ast.Constant) -> None: - if isinstance(node.value, str): - self._output(f'"{node.value}"') - elif isinstance(node.value, bool): - self._output(int(node.value)) - else: - self._output(node.value) - self.num_temps += 1 - - def visit_Call(self, node: ast.Call) -> None: - func = node.func - if isinstance(func, ast.Attribute): - receiver = func.value - method = func.attr - # self is not a valid call receiver. - if isinstance(receiver, ast.Name) and receiver.id == "self": - raise CompilerError( - "self is not a valid call receiver; use self.attr to read an attribute", - node, - ) - if selector := BUILTINS.get(method): - self.visit(receiver) - self._visit_each(node.args) - self._output(f"{selector} call") - # `call` pops the receiver and all args, and pushes one result. - self.num_temps -= len(node.args) - return - raise CompilerError(f"unsupported method: {method}", node) - - if isinstance(func, ast.Name): - raise CompilerError(f"unsupported function: {func.id}", node) - - raise CompilerError("unsupported function call expression", node) - - def visit_Assign(self, node: ast.Assign) -> None: - self.num_temps = 0 - - target = node.targets[0] - - # Handle self.attr = expr (attribute assignment). - if ( - isinstance(target, ast.Attribute) - and isinstance(target.value, ast.Name) - and target.value.id == "self" - ): - if self.current_sig not in ("@init", "@update"): - raise CompilerError( - "attribute assignment is only allowed in __init__ and update", - node, - ) - - attr = target.attr - if attr in self.attrs: - raise CompilerError(f"attribute '{attr}' is already assigned", node) - - # If the RHS is an argument (the only kind of local permitted in - # __init__) - then it is already on the stack in place, and no - # evaluation is needed. - is_arg = ( - isinstance(node.value, ast.Name) - and self._local_index(node.value) is not None - ) - if not is_arg: - # Evaluate the RHS, leaving its value on the stack. - self.visit(node.value) - - # Record the attr. - self.attrs.append(attr) - return - - # Handle local variable assignment. - if self.current_sig in ("@init", "@update"): - raise CompilerError( - "local variable assignment is not allowed in __init__ or update; " - "use attribute assignment (self.attr = ...) instead", - node, - ) - - if isinstance(target, ast.Name): - names = [target] - elif isinstance(target, ast.Tuple): - names = cast(list[ast.Name], target.elts) - else: - raise CompilerError("unsupported assignment target", node) - - # Visit RHS, leaving its value on the stack. - self.visit(node.value) - - # Forget any previous bindings of these names. - # Their values are orphaned on the stack. - for name in names: - idx = self._local_index(name) - if idx is not None: - self.locals[idx] = "" - - self.locals.extend(x.id for x in names) - - def visit_Attribute(self, node: ast.Attribute) -> None: - # Only self.attr reads are supported here. - if not (isinstance(node.value, ast.Name) and node.value.id == "self"): - raise CompilerError( - "unsupported attribute access (only self.attr is supported)", node - ) - attr_idx = self._attr_index(node.attr, node) - pick_idx = self.num_temps + attr_idx - self._output(f"{pick_idx} pick # self.{node.attr}") - self.num_temps += 1 - - def visit_Name(self, node: ast.Name) -> None: - idx = self._stack_index(node) - if idx is None: - raise CompilerError(f"unknown local variable: {node.id}", node) - self._output(f"{idx} pick # {node.id}") - self.num_temps += 1 - - def _visit_each(self, nodes: Sequence[ast.AST]) -> None: - for child in nodes: - self.visit(child) - - def _attr_index(self, name: str, node: ast.expr) -> int: - # self.attrs is always the full visible attr frame, so the index is - # the direct pick offset with no further adjustment. - try: - return self.attrs.index(name) - except ValueError: - raise CompilerError(f"unknown attribute: {name}", node) - - def _stack_index(self, name: ast.Name) -> Optional[int]: - # Offset past all attrs and any in-flight temporaries. - idx = self._local_index(name) - if idx is None: - return None - return len(self.attrs) + idx + self.num_temps - - def _local_index(self, name: ast.Name) -> Optional[int]: - try: - return self.locals.index(name.id) - except ValueError: - return None - - def _output(self, x: Any) -> None: - print(x, file=self.buffer) - - -if __name__ == "__main__": - source_file = sys.argv[1] - compiler = Compiler() - try: - output = compiler.compile(source_file) - print(output) - except CompilerError as e: - print(f"{source_file}:{e.lineno}: {e}", file=sys.stderr) _______________________________________________ lldb-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits
