Tested-by: Nicholas Pratte <npra...@iol.unh.edu>
Reviewed-by: Nicholas Pratte <npra...@iol.unh.edu>

On Thu, Jun 6, 2024 at 5:34 PM Luca Vizzarro <luca.vizza...@arm.com> wrote:
>
> Adds parsing text into a custom dataclass. It provides a new
> `TextParser` dataclass to be inherited. This implements the `parse`
> method, which combined with the parser functions, it can automatically
> parse the value for each field.
>
> This new utility will facilitate and simplify the parsing of complex
> command outputs, while ensuring that the codebase does not get bloated
> and stays flexible.
>
> Signed-off-by: Luca Vizzarro <luca.vizza...@arm.com>
> Reviewed-by: Paul Szczepanek <paul.szczepa...@arm.com>
> ---
>  dts/framework/exception.py |   9 ++
>  dts/framework/parser.py    | 229 +++++++++++++++++++++++++++++++++++++
>  2 files changed, 238 insertions(+)
>  create mode 100644 dts/framework/parser.py
>
> diff --git a/dts/framework/exception.py b/dts/framework/exception.py
> index cce1e0231a..d9d690037d 100644
> --- a/dts/framework/exception.py
> +++ b/dts/framework/exception.py
> @@ -31,6 +31,8 @@ class ErrorSeverity(IntEnum):
>      #:
>      SSH_ERR = 4
>      #:
> +    INTERNAL_ERR = 5
> +    #:
>      DPDK_BUILD_ERR = 10
>      #:
>      TESTCASE_VERIFY_ERR = 20
> @@ -192,3 +194,10 @@ def __init__(self, suite_name: str) -> None:
>      def __str__(self) -> str:
>          """Add some context to the string representation."""
>          return f"Blocking suite {self._suite_name} failed."
> +
> +
> +class InternalError(DTSError):
> +    """An internal error or bug has occurred in DTS."""
> +
> +    #:
> +    severity: ClassVar[ErrorSeverity] = ErrorSeverity.INTERNAL_ERR
> diff --git a/dts/framework/parser.py b/dts/framework/parser.py
> new file mode 100644
> index 0000000000..741dfff821
> --- /dev/null
> +++ b/dts/framework/parser.py
> @@ -0,0 +1,229 @@
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright(c) 2024 Arm Limited
> +
> +"""Parsing utility module.
> +
> +This module provides :class:`~TextParser` which can be used to model any 
> dataclass to a block of
> +text.
> +"""
> +
> +import re
> +from abc import ABC
> +from dataclasses import MISSING, dataclass, fields
> +from functools import partial
> +from typing import Any, Callable, TypedDict, cast
> +
> +from typing_extensions import Self
> +
> +from framework.exception import InternalError
> +
> +
> +class ParserFn(TypedDict):
> +    """Parser function in a dict compatible with the 
> :func:`dataclasses.field` metadata param."""
> +
> +    #:
> +    TextParser_fn: Callable[[str], Any]
> +
> +
> +@dataclass
> +class TextParser(ABC):
> +    r"""Helper abstract dataclass that parses a text according to the 
> fields' rules.
> +
> +    In order to enable text parsing in a dataclass, subclass it with 
> :class:`TextParser`.
> +
> +    The provided `parse` method is a factory which parses the supplied text 
> and creates an instance
> +    with populated dataclass fields. This takes text as an argument and for 
> each field in the
> +    dataclass, the field's parser function is run against the whole text. 
> The returned value is then
> +    assigned to the field of the new instance. If the field does not have a 
> parser function its
> +    default value or factory is used instead. If no default is available 
> either, an exception is
> +    raised.
> +
> +    This class provides a selection of parser functions and a function to 
> wrap parser functions with
> +    generic functions. Parser functions are designed to be passed to the 
> fields' metadata param. The
> +    most commonly used parser function is expected to be the `find` method, 
> which runs a regular
> +    expression against the text to find matches.
> +
> +    Example:
> +        The following example makes use of and demonstrates every parser 
> function available:
> +
> +        ..code:: python
> +
> +            from dataclasses import dataclass, field
> +            from enum import Enum
> +            from framework.parser import TextParser
> +
> +            class Colour(Enum):
> +                BLACK = 1
> +                WHITE = 2
> +
> +                @classmethod
> +                def from_str(cls, text: str):
> +                    match text:
> +                        case "black":
> +                            return cls.BLACK
> +                        case "white":
> +                            return cls.WHITE
> +                        case _:
> +                            return None # unsupported colour
> +
> +                @classmethod
> +                def make_parser(cls):
> +                    # make a parser function that finds a match and
> +                    # then makes it a Colour object through Colour.from_str
> +                    return TextParser.wrap(TextParser.find(r"is a (\w+)"), 
> cls.from_str)
> +
> +            @dataclass
> +            class Animal(TextParser):
> +                kind: str = field(metadata=TextParser.find(r"is a \w+ 
> (\w+)"))
> +                name: str = field(metadata=TextParser.find(r"^(\w+)"))
> +                colour: Colour = field(metadata=Colour.make_parser())
> +                age: int = field(metadata=TextParser.find_int(r"aged (\d+)"))
> +
> +            steph = Animal.parse("Stephanie is a white cat aged 10")
> +            print(steph) # Animal(kind='cat', name='Stephanie', 
> colour=<Colour.WHITE: 2>, age=10)
> +    """
> +
> +    """============ BEGIN PARSER FUNCTIONS ============"""
> +
> +    @staticmethod
> +    def wrap(parser_fn: ParserFn, wrapper_fn: Callable) -> ParserFn:
> +        """Makes a wrapped parser function.
> +
> +        `parser_fn` is called and if a non-None value is returned, 
> `wrapper_function` is called with
> +        it. Otherwise the function returns early with None. In pseudo-code:
> +
> +            intermediate_value := parser_fn(input)
> +            if intermediary_value is None then
> +                output := None
> +            else
> +                output := wrapper_fn(intermediate_value)
> +
> +        Args:
> +            parser_fn: The dictionary storing the parser function to be 
> wrapped.
> +            wrapper_fn: The function that wraps `parser_fn`.
> +
> +        Returns:
> +            ParserFn: A dictionary for the `dataclasses.field` metadata 
> argument containing the
> +                newly wrapped parser function.
> +        """
> +        inner_fn = parser_fn["TextParser_fn"]
> +
> +        def _composite_parser_fn(text: str) -> Any:
> +            intermediate_value = inner_fn(text)
> +            if intermediate_value is None:
> +                return None
> +            return wrapper_fn(intermediate_value)
> +
> +        return ParserFn(TextParser_fn=_composite_parser_fn)
> +
> +    @staticmethod
> +    def find(
> +        pattern: str | re.Pattern[str],
> +        flags: re.RegexFlag = re.RegexFlag(0),
> +        named: bool = False,
> +    ) -> ParserFn:
> +        """Makes a parser function that finds a regular expression match in 
> the text.
> +
> +        If the pattern has any capturing groups, it returns None if no match 
> was found, otherwise a
> +        tuple containing the values per each group is returned. If the 
> pattern has only one
> +        capturing group and a match was found, its value is returned. If the 
> pattern has no
> +        capturing groups then either True or False is returned if the 
> pattern had a match or not.
> +
> +        Args:
> +            pattern: The regular expression pattern.
> +            flags: The regular expression flags. Ignored if the given 
> pattern is already compiled.
> +            named: If set to True only the named capturing groups will be 
> returned, as a dictionary.
> +
> +        Returns:
> +            ParserFn: A dictionary for the `dataclasses.field` metadata 
> argument containing the find
> +                parser function.
> +        """
> +        if isinstance(pattern, str):
> +            pattern = re.compile(pattern, flags)
> +
> +        def _find(text: str) -> Any:
> +            m = pattern.search(text)
> +            if m is None:
> +                return None if pattern.groups > 0 else False
> +
> +            if pattern.groups == 0:
> +                return True
> +
> +            if named:
> +                return m.groupdict()
> +
> +            matches = m.groups()
> +            if len(matches) == 1:
> +                return matches[0]
> +
> +            return matches
> +
> +        return ParserFn(TextParser_fn=_find)
> +
> +    @staticmethod
> +    def find_int(
> +        pattern: str | re.Pattern[str],
> +        flags: re.RegexFlag = re.RegexFlag(0),
> +        int_base: int = 0,
> +    ) -> ParserFn:
> +        """Makes a parser function that converts the match of :meth:`~find` 
> to int.
> +
> +        This function is compatible only with a pattern containing one 
> capturing group.
> +
> +        Args:
> +            pattern: The regular expression pattern.
> +            flags: The regular expression flags. Ignored if the given 
> pattern is already compiled.
> +            int_base: The base of the number to convert from.
> +
> +        Raises:
> +            InternalError: If the pattern does not have exactly one 
> capturing group.
> +
> +        Returns:
> +            ParserFn: A dictionary for the `dataclasses.field` metadata 
> argument containing the
> +                :meth:`~find` parser function wrapped by the int built-in.
> +        """
> +        if isinstance(pattern, str):
> +            pattern = re.compile(pattern, flags)
> +
> +        if pattern.groups != 1:
> +            raise InternalError("only one capturing group is allowed with 
> this parser function")
> +
> +        return TextParser.wrap(TextParser.find(pattern), partial(int, 
> base=int_base))
> +
> +    """============ END PARSER FUNCTIONS ============"""
> +
> +    @classmethod
> +    def parse(cls, text: str) -> Self:
> +        """Creates a new instance of the class from the given text.
> +
> +        A new class instance is created with all the fields that have a 
> parser function in their
> +        metadata. Fields without one are ignored and are expected to have a 
> default value, otherwise
> +        the class initialization will fail.
> +
> +        A field is populated with the value returned by its corresponding 
> parser function.
> +
> +        Args:
> +            text: the text to parse
> +
> +        Raises:
> +            InternalError: if the parser did not find a match and the field 
> does not have a default
> +                value or default factory.
> +
> +        Returns:
> +            A new instance of the class.
> +        """
> +        fields_values = {}
> +        for field in fields(cls):
> +            parse = cast(ParserFn, field.metadata).get("TextParser_fn")
> +            if parse is None:
> +                continue
> +
> +            value = parse(text)
> +            if value is not None:
> +                fields_values[field.name] = value
> +            elif field.default is MISSING and field.default_factory is 
> MISSING:
> +                raise InternalError(
> +                    f"parser for field {field.name} returned None, but the 
> field has no default"
> +                )
> +
> +        return cls(**fields_values)
> --
> 2.34.1
>

Reply via email to