================
@@ -119,16 +136,89 @@ def define_selector(n, name):
# Compiler.
################################################################################
+_SIGNATURE_LABEL = re.compile(f"@(?:{SIGNATURE_NAMES}):$")
+
+
+def _tokenize(assembler: str) -> list[str]:
+ """Convert string of assembly into tokens."""
+ # With one exception, tokens are simply sequences of non-space characters.
+ # The one exception is string literals, which may have spaces.
+
+ # To parse strings, which can contain escaped contents, use a "Friedl
+ # unrolled loop". The high level of such a regex is:
+ # open normal* ( special normal* )* close
+ # which for string literals is:
+ string_literal = r'" [^"\\]* (?: \\. [^"\\]* )* "'
+
+ return re.findall(rf"{string_literal} | \S+", assembler, re.VERBOSE)
+
+
+def _segment_by_signature(input: list[str]) -> list[Tuple[str, list[str]]]:
+ """Segment the input tokens along signature labels."""
+ segments = []
+
+ # Loop state
+ signature = None
+ tokens = []
+
+ def conclude_segment():
+ if not tokens:
+ raise ValueError(f"empty signature: {signature}")
+ segments.append((signature, tokens))
+
+ for token in input:
+ if _SIGNATURE_LABEL.match(token):
+ if signature:
+ conclude_segment()
+ signature = token[1:-1] # strip leading @, trailing :
+ tokens = []
+ else:
+ tokens.append(token)
+
+ if signature:
+ conclude_segment()
+
+ return segments
+
+
+def compile_file(type_name: str, input: TextIO, output: BinaryIO) -> None:
+ input_tokens = _tokenize(input.read())
+
+ signatures = {}
+ for sig, tokens in _segment_by_signature(input_tokens):
+ if sig in signatures:
+ raise ValueError(f"duplicate signature: {sig}")
+ signatures[sig] = compile_tokens(tokens)
+
+ # FIXME: review use of ints below, check if any are in fact uleb.
+ bin = bytearray()
+ bin.extend(_to_uleb(len(type_name)))
+ bin.extend(bytes(type_name, encoding="utf-8"))
+ flags = 0
+ bin.extend(_to_byte(flags))
+ for sig, bc in signatures.items():
+ bin.extend(_to_byte(SIGNATURES[sig]))
+ bin.extend(_to_uleb(len(bc)))
+ bin.extend(bc)
+
+ # FIXME: is version a uleb?
+ output.write(_to_byte(BINARY_VERSION))
----------------
kastiglione wrote:
Good point, all the ints in this function are all guaranteed to be less than
0x7f (flags, signature, version).
https://github.com/llvm/llvm-project/pull/183804
_______________________________________________
lldb-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/lldb-commits