Tested-by: Nicholas Pratte <npra...@iol.unh.edu> Reviewed-by: Nicholas Pratte <npra...@iol.unh.edu>
On Thu, Jun 6, 2024 at 5:34 PM Luca Vizzarro <luca.vizza...@arm.com> wrote: > > Adds parsing text into a custom dataclass. It provides a new > `TextParser` dataclass to be inherited. This implements the `parse` > method, which combined with the parser functions, it can automatically > parse the value for each field. > > This new utility will facilitate and simplify the parsing of complex > command outputs, while ensuring that the codebase does not get bloated > and stays flexible. > > Signed-off-by: Luca Vizzarro <luca.vizza...@arm.com> > Reviewed-by: Paul Szczepanek <paul.szczepa...@arm.com> > --- > dts/framework/exception.py | 9 ++ > dts/framework/parser.py | 229 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 238 insertions(+) > create mode 100644 dts/framework/parser.py > > diff --git a/dts/framework/exception.py b/dts/framework/exception.py > index cce1e0231a..d9d690037d 100644 > --- a/dts/framework/exception.py > +++ b/dts/framework/exception.py > @@ -31,6 +31,8 @@ class ErrorSeverity(IntEnum): > #: > SSH_ERR = 4 > #: > + INTERNAL_ERR = 5 > + #: > DPDK_BUILD_ERR = 10 > #: > TESTCASE_VERIFY_ERR = 20 > @@ -192,3 +194,10 @@ def __init__(self, suite_name: str) -> None: > def __str__(self) -> str: > """Add some context to the string representation.""" > return f"Blocking suite {self._suite_name} failed." > + > + > +class InternalError(DTSError): > + """An internal error or bug has occurred in DTS.""" > + > + #: > + severity: ClassVar[ErrorSeverity] = ErrorSeverity.INTERNAL_ERR > diff --git a/dts/framework/parser.py b/dts/framework/parser.py > new file mode 100644 > index 0000000000..741dfff821 > --- /dev/null > +++ b/dts/framework/parser.py > @@ -0,0 +1,229 @@ > +# SPDX-License-Identifier: BSD-3-Clause > +# Copyright(c) 2024 Arm Limited > + > +"""Parsing utility module. > + > +This module provides :class:`~TextParser` which can be used to model any > dataclass to a block of > +text. > +""" > + > +import re > +from abc import ABC > +from dataclasses import MISSING, dataclass, fields > +from functools import partial > +from typing import Any, Callable, TypedDict, cast > + > +from typing_extensions import Self > + > +from framework.exception import InternalError > + > + > +class ParserFn(TypedDict): > + """Parser function in a dict compatible with the > :func:`dataclasses.field` metadata param.""" > + > + #: > + TextParser_fn: Callable[[str], Any] > + > + > +@dataclass > +class TextParser(ABC): > + r"""Helper abstract dataclass that parses a text according to the > fields' rules. > + > + In order to enable text parsing in a dataclass, subclass it with > :class:`TextParser`. > + > + The provided `parse` method is a factory which parses the supplied text > and creates an instance > + with populated dataclass fields. This takes text as an argument and for > each field in the > + dataclass, the field's parser function is run against the whole text. > The returned value is then > + assigned to the field of the new instance. If the field does not have a > parser function its > + default value or factory is used instead. If no default is available > either, an exception is > + raised. > + > + This class provides a selection of parser functions and a function to > wrap parser functions with > + generic functions. Parser functions are designed to be passed to the > fields' metadata param. The > + most commonly used parser function is expected to be the `find` method, > which runs a regular > + expression against the text to find matches. > + > + Example: > + The following example makes use of and demonstrates every parser > function available: > + > + ..code:: python > + > + from dataclasses import dataclass, field > + from enum import Enum > + from framework.parser import TextParser > + > + class Colour(Enum): > + BLACK = 1 > + WHITE = 2 > + > + @classmethod > + def from_str(cls, text: str): > + match text: > + case "black": > + return cls.BLACK > + case "white": > + return cls.WHITE > + case _: > + return None # unsupported colour > + > + @classmethod > + def make_parser(cls): > + # make a parser function that finds a match and > + # then makes it a Colour object through Colour.from_str > + return TextParser.wrap(TextParser.find(r"is a (\w+)"), > cls.from_str) > + > + @dataclass > + class Animal(TextParser): > + kind: str = field(metadata=TextParser.find(r"is a \w+ > (\w+)")) > + name: str = field(metadata=TextParser.find(r"^(\w+)")) > + colour: Colour = field(metadata=Colour.make_parser()) > + age: int = field(metadata=TextParser.find_int(r"aged (\d+)")) > + > + steph = Animal.parse("Stephanie is a white cat aged 10") > + print(steph) # Animal(kind='cat', name='Stephanie', > colour=<Colour.WHITE: 2>, age=10) > + """ > + > + """============ BEGIN PARSER FUNCTIONS ============""" > + > + @staticmethod > + def wrap(parser_fn: ParserFn, wrapper_fn: Callable) -> ParserFn: > + """Makes a wrapped parser function. > + > + `parser_fn` is called and if a non-None value is returned, > `wrapper_function` is called with > + it. Otherwise the function returns early with None. In pseudo-code: > + > + intermediate_value := parser_fn(input) > + if intermediary_value is None then > + output := None > + else > + output := wrapper_fn(intermediate_value) > + > + Args: > + parser_fn: The dictionary storing the parser function to be > wrapped. > + wrapper_fn: The function that wraps `parser_fn`. > + > + Returns: > + ParserFn: A dictionary for the `dataclasses.field` metadata > argument containing the > + newly wrapped parser function. > + """ > + inner_fn = parser_fn["TextParser_fn"] > + > + def _composite_parser_fn(text: str) -> Any: > + intermediate_value = inner_fn(text) > + if intermediate_value is None: > + return None > + return wrapper_fn(intermediate_value) > + > + return ParserFn(TextParser_fn=_composite_parser_fn) > + > + @staticmethod > + def find( > + pattern: str | re.Pattern[str], > + flags: re.RegexFlag = re.RegexFlag(0), > + named: bool = False, > + ) -> ParserFn: > + """Makes a parser function that finds a regular expression match in > the text. > + > + If the pattern has any capturing groups, it returns None if no match > was found, otherwise a > + tuple containing the values per each group is returned. If the > pattern has only one > + capturing group and a match was found, its value is returned. If the > pattern has no > + capturing groups then either True or False is returned if the > pattern had a match or not. > + > + Args: > + pattern: The regular expression pattern. > + flags: The regular expression flags. Ignored if the given > pattern is already compiled. > + named: If set to True only the named capturing groups will be > returned, as a dictionary. > + > + Returns: > + ParserFn: A dictionary for the `dataclasses.field` metadata > argument containing the find > + parser function. > + """ > + if isinstance(pattern, str): > + pattern = re.compile(pattern, flags) > + > + def _find(text: str) -> Any: > + m = pattern.search(text) > + if m is None: > + return None if pattern.groups > 0 else False > + > + if pattern.groups == 0: > + return True > + > + if named: > + return m.groupdict() > + > + matches = m.groups() > + if len(matches) == 1: > + return matches[0] > + > + return matches > + > + return ParserFn(TextParser_fn=_find) > + > + @staticmethod > + def find_int( > + pattern: str | re.Pattern[str], > + flags: re.RegexFlag = re.RegexFlag(0), > + int_base: int = 0, > + ) -> ParserFn: > + """Makes a parser function that converts the match of :meth:`~find` > to int. > + > + This function is compatible only with a pattern containing one > capturing group. > + > + Args: > + pattern: The regular expression pattern. > + flags: The regular expression flags. Ignored if the given > pattern is already compiled. > + int_base: The base of the number to convert from. > + > + Raises: > + InternalError: If the pattern does not have exactly one > capturing group. > + > + Returns: > + ParserFn: A dictionary for the `dataclasses.field` metadata > argument containing the > + :meth:`~find` parser function wrapped by the int built-in. > + """ > + if isinstance(pattern, str): > + pattern = re.compile(pattern, flags) > + > + if pattern.groups != 1: > + raise InternalError("only one capturing group is allowed with > this parser function") > + > + return TextParser.wrap(TextParser.find(pattern), partial(int, > base=int_base)) > + > + """============ END PARSER FUNCTIONS ============""" > + > + @classmethod > + def parse(cls, text: str) -> Self: > + """Creates a new instance of the class from the given text. > + > + A new class instance is created with all the fields that have a > parser function in their > + metadata. Fields without one are ignored and are expected to have a > default value, otherwise > + the class initialization will fail. > + > + A field is populated with the value returned by its corresponding > parser function. > + > + Args: > + text: the text to parse > + > + Raises: > + InternalError: if the parser did not find a match and the field > does not have a default > + value or default factory. > + > + Returns: > + A new instance of the class. > + """ > + fields_values = {} > + for field in fields(cls): > + parse = cast(ParserFn, field.metadata).get("TextParser_fn") > + if parse is None: > + continue > + > + value = parse(text) > + if value is not None: > + fields_values[field.name] = value > + elif field.default is MISSING and field.default_factory is > MISSING: > + raise InternalError( > + f"parser for field {field.name} returned None, but the > field has no default" > + ) > + > + return cls(**fields_values) > -- > 2.34.1 >