Richard Cooper has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/70257?usp=email )
Change subject: scons: Fix lex.py for Python3.11 build.
......................................................................
scons: Fix lex.py for Python3.11 build.
The code generation in gem5's build system requires the use of Regular
Expression flags when defining the regular expressions used for
tokenization. However, the Python Lex-Yacc (PLY) [1] library used by
gem5 does not allow the user sufficient control of the flags for RE
compilation.
Previously, gem5 used inline RE flags to control RE compilation.
However, from Python 3.11, inline RE flags must be at the start of the
RE string. Because PLY wraps the user supplied RE strings before
compilation, there is no way for the user to supply a RE string with
the inline flag at the start. This makes gem5 incompatible with Python
3.11 when using PLY.
This patch modifies gem5's copy of PLY with a specialisation of the
`re.compile` function that re-formats the user supplied RE string to
convert inline RE flags to explicit RE flags.
This patch is intended as a temporary stop-gap until PLY can be fixed
upstream.
See the gem5 Issue Tracker [2] for more details.
[1] https://github.com/dabeaz/ply
[2] https://gem5.atlassian.net/browse/GEM5-1321
Change-Id: I1b89416bc080e94e333ef6ef94c124293f384e01
---
M ext/ply/ply/lex.py
1 file changed, 34 insertions(+), 4 deletions(-)
diff --git a/ext/ply/ply/lex.py b/ext/ply/ply/lex.py
index f95bcdb..82eebc7 100644
--- a/ext/ply/ply/lex.py
+++ b/ext/ply/ply/lex.py
@@ -52,6 +52,36 @@
# This regular expression is used to match valid token names
_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
+
+def _inline_flag_aware_re_compile(re_str, flags=0x0):
+ """Provide an alternative implementation of `re.compile` that allows
+ inline flags that are not at the start of the regular expression
+ string.
+
+ From Python 3.11, the `re` module only supports inline flags at
+ the start of the RE string. This makes it impossible to modify the
+ Lexer strings when using PLY, because PLY embeds the user supplied
+ token REs, and does not provide sufficient control of the `flags`
+ argument.
+
+ """
+ _flags_map = {
+ ("(?a)", b"(?a)"): re.ASCII,
+ ("(?i)", b"(?i)"): re.IGNORECASE,
+ ("(?L)", b"(?L)"): re.LOCALE,
+ ("(?m)", b"(?m)"): re.MULTILINE,
+ ("(?s)", b"(?s)"): re.DOTALL,
+ ("(?x)", b"(?x)"): re.VERBOSE,
+ }
+ for (pattern_s, pattern_b), flag in _flags_map.items():
+ pattern = pattern_b if isinstance(re_str, bytes) else pattern_s
+ replacement = b"" if isinstance(re_str, bytes) else ""
+ if pattern in re_str:
+ flags |= flag
+ re_str = re_str.replace(pattern, replacement)
+ return re.compile(re_str, flags)
+
+
# Exception thrown when invalid token encountered and no default error
# handler is defined.
class LexError(Exception):
@@ -230,7 +260,7 @@
titem = []
txtitem = []
for pat, func_name in lre:
- titem.append((re.compile(pat, lextab._lexreflags),
_names_to_funcs(func_name, fdict)))
+ titem.append((_inline_flag_aware_re_compile(pat,
lextab._lexreflags), _names_to_funcs(func_name, fdict)))
self.lexstatere[statename] = titem
self.lexstateretext[statename] = txtitem
@@ -495,7 +525,7 @@
return []
regex = '|'.join(relist)
try:
- lexre = re.compile(regex, reflags)
+ lexre = _inline_flag_aware_re_compile(regex, reflags)
# Build the index to function map for the matching engine
lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1)
@@ -757,7 +787,7 @@
continue
try:
- c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)),
self.reflags)
+ c = _inline_flag_aware_re_compile('(?P<%s>%s)' %
(fname, _get_regex(f)), self.reflags)
if c.match(''):
self.log.error("%s:%d: Regular expression for
rule '%s' matches empty string", file, line, f.__name__)
self.error = True
@@ -781,7 +811,7 @@
continue
try:
- c = re.compile('(?P<%s>%s)' % (name, r), self.reflags)
+ c = _inline_flag_aware_re_compile('(?P<%s>%s)' %
(name, r), self.reflags)
if (c.match('')):
self.log.error("Regular expression for rule '%s'
matches empty string", name)
self.error = True
--
To view, visit
https://gem5-review.googlesource.com/c/public/gem5/+/70257?usp=email
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-MessageType: newchange
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I1b89416bc080e94e333ef6ef94c124293f384e01
Gerrit-Change-Number: 70257
Gerrit-PatchSet: 1
Gerrit-Owner: Richard Cooper <richard.coo...@arm.com>
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org