Author: Carl Friedrich Bolz-Tereick <[email protected]>
Branch: fix-sre-problems
Changeset: r94133:c93d31a2fabd
Date: 2018-03-26 14:41 +0200
http://bitbucket.org/pypy/pypy/changeset/c93d31a2fabd/
Log: stop using greenfields in sre, instead pass the pattern around
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -77,15 +77,15 @@
w_import = space.getattr(w_builtin, space.newtext("__import__"))
return space.call_function(w_import, space.newtext("re"))
-def matchcontext(space, ctx):
+def matchcontext(space, ctx, pattern):
try:
- return rsre_core.match_context(ctx)
+ return rsre_core.match_context(ctx, pattern)
except rsre_core.Error as e:
raise OperationError(space.w_RuntimeError, space.newtext(e.msg))
-def searchcontext(space, ctx):
+def searchcontext(space, ctx, pattern):
try:
- return rsre_core.search_context(ctx)
+ return rsre_core.search_context(ctx, pattern)
except rsre_core.Error as e:
raise OperationError(space.w_RuntimeError, space.newtext(e.msg))
@@ -144,12 +144,12 @@
@unwrap_spec(pos=int, endpos=int)
def match_w(self, w_string, pos=0, endpos=sys.maxint):
ctx = self.make_ctx(w_string, pos, endpos)
- return self.getmatch(ctx, matchcontext(self.space, ctx))
+ return self.getmatch(ctx, matchcontext(self.space, ctx, self.code))
@unwrap_spec(pos=int, endpos=int)
def search_w(self, w_string, pos=0, endpos=sys.maxint):
ctx = self.make_ctx(w_string, pos, endpos)
- return self.getmatch(ctx, searchcontext(self.space, ctx))
+ return self.getmatch(ctx, searchcontext(self.space, ctx, self.code))
@unwrap_spec(pos=int, endpos=int)
def findall_w(self, w_string, pos=0, endpos=sys.maxint):
@@ -157,7 +157,7 @@
matchlist_w = []
ctx = self.make_ctx(w_string, pos, endpos)
while ctx.match_start <= ctx.end:
- if not searchcontext(space, ctx):
+ if not searchcontext(space, ctx, self.code):
break
num_groups = self.num_groups
w_emptystr = space.newtext("")
@@ -182,7 +182,7 @@
# this also works as the implementation of the undocumented
# scanner() method.
ctx = self.make_ctx(w_string, pos, endpos)
- scanner = W_SRE_Scanner(self, ctx)
+ scanner = W_SRE_Scanner(self, ctx, self.code)
return scanner
@unwrap_spec(maxsplit=int)
@@ -193,7 +193,7 @@
last = 0
ctx = self.make_ctx(w_string)
while not maxsplit or n < maxsplit:
- if not searchcontext(space, ctx):
+ if not searchcontext(space, ctx, self.code):
break
if ctx.match_start == ctx.match_end: # zero-width match
if ctx.match_start == ctx.end: # or end of string
@@ -274,8 +274,8 @@
else:
sublist_w = []
n = last_pos = 0
+ pattern = self.code
while not count or n < count:
- pattern = ctx.pattern
sub_jitdriver.jit_merge_point(
self=self,
use_builder=use_builder,
@@ -292,7 +292,7 @@
n=n, last_pos=last_pos, sublist_w=sublist_w
)
space = self.space
- if not searchcontext(space, ctx):
+ if not searchcontext(space, ctx, pattern):
break
if last_pos < ctx.match_start:
_sub_append_slice(
@@ -388,7 +388,7 @@
srepat.space = space
srepat.w_pattern = w_pattern # the original uncompiled pattern
srepat.flags = flags
- srepat.code = code
+ srepat.code = rsre_core.CompiledPattern(code)
srepat.num_groups = groups
srepat.w_groupindex = w_groupindex
srepat.w_indexgroup = w_indexgroup
@@ -611,10 +611,11 @@
# Our version is also directly iterable, to make finditer() easier.
class W_SRE_Scanner(W_Root):
- def __init__(self, pattern, ctx):
+ def __init__(self, pattern, ctx, code):
self.space = pattern.space
self.srepat = pattern
self.ctx = ctx
+ self.code = code
# 'self.ctx' is always a fresh context in which no searching
# or matching succeeded so far.
@@ -624,19 +625,19 @@
def next_w(self):
if self.ctx.match_start > self.ctx.end:
raise OperationError(self.space.w_StopIteration, self.space.w_None)
- if not searchcontext(self.space, self.ctx):
+ if not searchcontext(self.space, self.ctx, self.code):
raise OperationError(self.space.w_StopIteration, self.space.w_None)
return self.getmatch(True)
def match_w(self):
if self.ctx.match_start > self.ctx.end:
return self.space.w_None
- return self.getmatch(matchcontext(self.space, self.ctx))
+ return self.getmatch(matchcontext(self.space, self.ctx, self.code))
def search_w(self):
if self.ctx.match_start > self.ctx.end:
return self.space.w_None
- return self.getmatch(searchcontext(self.space, self.ctx))
+ return self.getmatch(searchcontext(self.space, self.ctx, self.code))
def getmatch(self, found):
if found:
diff --git a/rpython/rlib/rsre/rpy/_sre.py b/rpython/rlib/rsre/rpy/_sre.py
--- a/rpython/rlib/rsre/rpy/_sre.py
+++ b/rpython/rlib/rsre/rpy/_sre.py
@@ -1,4 +1,4 @@
-from rpython.rlib.rsre import rsre_char
+from rpython.rlib.rsre import rsre_char, rsre_core
from rpython.rlib.rarithmetic import intmask
VERSION = "2.7.6"
@@ -12,7 +12,7 @@
pass
def compile(pattern, flags, code, *args):
- raise GotIt([intmask(i) for i in code], flags, args)
+ raise GotIt(rsre_core.CompiledPattern([intmask(i) for i in code]), flags,
args)
def get_code(regexp, flags=0, allargs=False):
diff --git a/rpython/rlib/rsre/rsre_char.py b/rpython/rlib/rsre/rsre_char.py
--- a/rpython/rlib/rsre/rsre_char.py
+++ b/rpython/rlib/rsre/rsre_char.py
@@ -152,17 +152,16 @@
##### Charset evaluation
@jit.unroll_safe
-def check_charset(ctx, ppos, char_code):
+def check_charset(ctx, pattern, ppos, char_code):
"""Checks whether a character matches set of arbitrary length.
The set starts at pattern[ppos]."""
negated = False
result = False
- pattern = ctx.pattern
while True:
- opcode = pattern[ppos]
+ opcode = pattern.pattern[ppos]
for i, function in set_dispatch_unroll:
if opcode == i:
- newresult, ppos = function(ctx, ppos, char_code)
+ newresult, ppos = function(ctx, pattern, ppos, char_code)
result |= newresult
break
else:
@@ -177,50 +176,44 @@
return not result
return result
-def set_literal(ctx, index, char_code):
+def set_literal(ctx, pattern, index, char_code):
# <LITERAL> <code>
- pat = ctx.pattern
- match = pat[index+1] == char_code
+ match = pattern.pattern[index+1] == char_code
return match, index + 2
-def set_category(ctx, index, char_code):
+def set_category(ctx, pattern, index, char_code):
# <CATEGORY> <code>
- pat = ctx.pattern
- match = category_dispatch(pat[index+1], char_code)
+ match = category_dispatch(pattern.pattern[index+1], char_code)
return match, index + 2
-def set_charset(ctx, index, char_code):
+def set_charset(ctx, pattern, index, char_code):
# <CHARSET> <bitmap> (16 bits per code word)
- pat = ctx.pattern
if CODESIZE == 2:
match = char_code < 256 and \
- (pat[index+1+(char_code >> 4)] & (1 << (char_code & 15)))
+ (pattern.pattern[index+1+(char_code >> 4)] & (1 << (char_code
& 15)))
return match, index + 17 # skip bitmap
else:
match = char_code < 256 and \
- (pat[index+1+(char_code >> 5)] & (1 << (char_code & 31)))
+ (pattern.pattern[index+1+(char_code >> 5)] & (1 << (char_code
& 31)))
return match, index + 9 # skip bitmap
-def set_range(ctx, index, char_code):
+def set_range(ctx, pattern, index, char_code):
# <RANGE> <lower> <upper>
- pat = ctx.pattern
- match = int_between(pat[index+1], char_code, pat[index+2] + 1)
+ match = int_between(pattern.pattern[index+1], char_code,
pattern.pattern[index+2] + 1)
return match, index + 3
-def set_range_ignore(ctx, index, char_code):
+def set_range_ignore(ctx, pattern, index, char_code):
# <RANGE_IGNORE> <lower> <upper>
# the char_code is already lower cased
- pat = ctx.pattern
- lower = pat[index + 1]
- upper = pat[index + 2]
+ lower = pattern.pattern[index + 1]
+ upper = pattern.pattern[index + 2]
match1 = int_between(lower, char_code, upper + 1)
match2 = int_between(lower, getupper(char_code, ctx.flags), upper + 1)
return match1 | match2, index + 3
-def set_bigcharset(ctx, index, char_code):
+def set_bigcharset(ctx, pattern, index, char_code):
# <BIGCHARSET> <blockcount> <256 blockindices> <blocks>
- pat = ctx.pattern
- count = pat[index+1]
+ count = pattern.pattern[index+1]
index += 2
if CODESIZE == 2:
@@ -238,7 +231,7 @@
return False, index
shift = 5
- block = pat[index + (char_code >> (shift + 5))]
+ block = pattern.pattern[index + (char_code >> (shift + 5))]
block_shift = char_code >> 5
if BIG_ENDIAN:
@@ -247,23 +240,22 @@
block = (block >> block_shift) & 0xFF
index += 256 / CODESIZE
- block_value = pat[index+(block * (32 / CODESIZE)
+ block_value = pattern.pattern[index+(block * (32 / CODESIZE)
+ ((char_code & 255) >> shift))]
match = (block_value & (1 << (char_code & ((8 * CODESIZE) - 1))))
index += count * (32 / CODESIZE) # skip blocks
return match, index
-def set_unicode_general_category(ctx, index, char_code):
+def set_unicode_general_category(ctx, pattern, index, char_code):
# Unicode "General category property code" (not used by Python).
- # A general category is two letters. 'pat[index+1]' contains both
+ # A general category is two letters. 'pattern.pattern[index+1]' contains
both
# the first character, and the second character shifted by 8.
# http://en.wikipedia.org/wiki/Unicode_character_property#General_Category
# Also supports single-character categories, if the second character is 0.
# Negative matches are triggered by bit number 7.
assert unicodedb is not None
cat = unicodedb.category(char_code)
- pat = ctx.pattern
- category_code = pat[index + 1]
+ category_code = pattern.pattern[index + 1]
first_character = category_code & 0x7F
second_character = (category_code >> 8) & 0x7F
negative_match = category_code & 0x80
diff --git a/rpython/rlib/rsre/rsre_core.py b/rpython/rlib/rsre/rsre_core.py
--- a/rpython/rlib/rsre/rsre_core.py
+++ b/rpython/rlib/rsre/rsre_core.py
@@ -83,9 +83,31 @@
def __init__(self, msg):
self.msg = msg
+
+class CompiledPattern(object):
+ _immutable_fields_ = ['pattern[*]']
+
+ def __init__(self, pattern):
+ self.pattern = pattern
+ # check we don't get the old value of MAXREPEAT
+ # during the untranslated tests
+ if not we_are_translated():
+ assert 65535 not in pattern
+
+ def pat(self, index):
+ jit.promote(self)
+ check_nonneg(index)
+ result = self.pattern[index]
+ # Check that we only return non-negative integers from this helper.
+ # It is possible that self.pattern contains negative integers
+ # (see set_charset() and set_bigcharset() in rsre_char.py)
+ # but they should not be fetched via this helper here.
+ assert result >= 0
+ return result
+
class AbstractMatchContext(object):
"""Abstract base class"""
- _immutable_fields_ = ['pattern[*]', 'flags', 'end']
+ _immutable_fields_ = ['pattern', 'flags', 'end']
match_start = 0
match_end = 0
match_marks = None
@@ -97,30 +119,17 @@
# and they must not be more than len(string).
check_nonneg(match_start)
check_nonneg(end)
+ assert isinstance(pattern, CompiledPattern)
self.pattern = pattern
self.match_start = match_start
self.end = end
self.flags = flags
- # check we don't get the old value of MAXREPEAT
- # during the untranslated tests
- if not we_are_translated():
- assert 65535 not in pattern
def reset(self, start):
self.match_start = start
self.match_marks = None
self.match_marks_flat = None
- def pat(self, index):
- check_nonneg(index)
- result = self.pattern[index]
- # Check that we only return non-negative integers from this helper.
- # It is possible that self.pattern contains negative integers
- # (see set_charset() and set_bigcharset() in rsre_char.py)
- # but they should not be fetched via this helper here.
- assert result >= 0
- return result
-
@not_rpython
def str(self, index):
"""Must be overridden in a concrete subclass.
@@ -265,16 +274,16 @@
class MatchResult(object):
subresult = None
- def move_to_next_result(self, ctx):
+ def move_to_next_result(self, ctx, pattern):
# returns either 'self' or None
result = self.subresult
if result is None:
return
- if result.move_to_next_result(ctx):
+ if result.move_to_next_result(ctx, pattern):
return self
- return self.find_next_result(ctx)
+ return self.find_next_result(ctx, pattern)
- def find_next_result(self, ctx):
+ def find_next_result(self, ctx, pattern):
raise NotImplementedError
MATCHED_OK = MatchResult()
@@ -287,11 +296,11 @@
self.start_marks = marks
@jit.unroll_safe
- def find_first_result(self, ctx):
+ def find_first_result(self, ctx, pattern):
ppos = jit.hint(self.ppos, promote=True)
- while ctx.pat(ppos):
- result = sre_match(ctx, ppos + 1, self.start_ptr, self.start_marks)
- ppos += ctx.pat(ppos)
+ while pattern.pat(ppos):
+ result = sre_match(ctx, pattern, ppos + 1, self.start_ptr,
self.start_marks)
+ ppos += pattern.pat(ppos)
if result is not None:
self.subresult = result
self.ppos = ppos
@@ -300,7 +309,7 @@
class RepeatOneMatchResult(MatchResult):
install_jitdriver('RepeatOne',
- greens=['nextppos', 'ctx.pattern'],
+ greens=['nextppos', 'pattern'],
reds=['ptr', 'self', 'ctx'],
debugprint=(1, 0)) # indices in 'greens'
@@ -310,13 +319,14 @@
self.start_ptr = ptr
self.start_marks = marks
- def find_first_result(self, ctx):
+ def find_first_result(self, ctx, pattern):
ptr = self.start_ptr
nextppos = self.nextppos
while ptr >= self.minptr:
ctx.jitdriver_RepeatOne.jit_merge_point(
- self=self, ptr=ptr, ctx=ctx, nextppos=nextppos)
- result = sre_match(ctx, nextppos, ptr, self.start_marks)
+ self=self, ptr=ptr, ctx=ctx, nextppos=nextppos,
+ pattern=pattern)
+ result = sre_match(ctx, pattern, nextppos, ptr, self.start_marks)
ptr -= 1
if result is not None:
self.subresult = result
@@ -327,7 +337,7 @@
class MinRepeatOneMatchResult(MatchResult):
install_jitdriver('MinRepeatOne',
- greens=['nextppos', 'ppos3', 'ctx.pattern'],
+ greens=['nextppos', 'ppos3', 'pattern'],
reds=['ptr', 'self', 'ctx'],
debugprint=(2, 0)) # indices in 'greens'
@@ -338,39 +348,40 @@
self.start_ptr = ptr
self.start_marks = marks
- def find_first_result(self, ctx):
+ def find_first_result(self, ctx, pattern):
ptr = self.start_ptr
nextppos = self.nextppos
ppos3 = self.ppos3
while ptr <= self.maxptr:
ctx.jitdriver_MinRepeatOne.jit_merge_point(
- self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3)
- result = sre_match(ctx, nextppos, ptr, self.start_marks)
+ self=self, ptr=ptr, ctx=ctx, nextppos=nextppos, ppos3=ppos3,
+ pattern=pattern)
+ result = sre_match(ctx, pattern, nextppos, ptr, self.start_marks)
if result is not None:
self.subresult = result
self.start_ptr = ptr
return self
- if not self.next_char_ok(ctx, ptr, ppos3):
+ if not self.next_char_ok(ctx, pattern, ptr, ppos3):
break
ptr += 1
- def find_next_result(self, ctx):
+ def find_next_result(self, ctx, pattern):
ptr = self.start_ptr
- if not self.next_char_ok(ctx, ptr, self.ppos3):
+ if not self.next_char_ok(ctx, pattern, ptr, self.ppos3):
return
self.start_ptr = ptr + 1
- return self.find_first_result(ctx)
+ return self.find_first_result(ctx, pattern)
- def next_char_ok(self, ctx, ptr, ppos):
+ def next_char_ok(self, ctx, pattern, ptr, ppos):
if ptr == ctx.end:
return False
- op = ctx.pat(ppos)
+ op = pattern.pat(ppos)
for op1, checkerfn in unroll_char_checker:
if op1 == op:
- return checkerfn(ctx, ptr, ppos)
+ return checkerfn(ctx, pattern, ptr, ppos)
# obscure case: it should be a single char pattern, but isn't
# one of the opcodes in unroll_char_checker (see test_ext_opcode)
- return sre_match(ctx, ppos, ptr, self.start_marks) is not None
+ return sre_match(ctx, pattern, ppos, ptr, self.start_marks) is not None
class AbstractUntilMatchResult(MatchResult):
@@ -391,17 +402,17 @@
class MaxUntilMatchResult(AbstractUntilMatchResult):
install_jitdriver('MaxUntil',
- greens=['ppos', 'tailppos', 'match_more', 'ctx.pattern'],
+ greens=['ppos', 'tailppos', 'match_more', 'pattern'],
reds=['ptr', 'marks', 'self', 'ctx'],
debugprint=(3, 0, 2))
- def find_first_result(self, ctx):
- return self.search_next(ctx, match_more=True)
+ def find_first_result(self, ctx, pattern):
+ return self.search_next(ctx, pattern, match_more=True)
- def find_next_result(self, ctx):
- return self.search_next(ctx, match_more=False)
+ def find_next_result(self, ctx, pattern):
+ return self.search_next(ctx, pattern, match_more=False)
- def search_next(self, ctx, match_more):
+ def search_next(self, ctx, pattern, match_more):
ppos = self.ppos
tailppos = self.tailppos
ptr = self.cur_ptr
@@ -409,12 +420,13 @@
while True:
ctx.jitdriver_MaxUntil.jit_merge_point(
ppos=ppos, tailppos=tailppos, match_more=match_more,
- ptr=ptr, marks=marks, self=self, ctx=ctx)
+ ptr=ptr, marks=marks, self=self, ctx=ctx,
+ pattern=pattern)
if match_more:
- max = ctx.pat(ppos+2)
+ max = pattern.pat(ppos+2)
if max == rsre_char.MAXREPEAT or self.num_pending < max:
# try to match one more 'item'
- enum = sre_match(ctx, ppos + 3, ptr, marks)
+ enum = sre_match(ctx, pattern, ppos + 3, ptr, marks)
else:
enum = None # 'max' reached, no more matches
else:
@@ -425,9 +437,9 @@
self.num_pending -= 1
ptr = p.ptr
marks = p.marks
- enum = p.enum.move_to_next_result(ctx)
+ enum = p.enum.move_to_next_result(ctx, pattern)
#
- min = ctx.pat(ppos+1)
+ min = pattern.pat(ppos+1)
if enum is not None:
# matched one more 'item'. record it and continue.
last_match_length = ctx.match_end - ptr
@@ -447,7 +459,7 @@
# 'item' no longer matches.
if self.num_pending >= min:
# try to match 'tail' if we have enough 'item'
- result = sre_match(ctx, tailppos, ptr, marks)
+ result = sre_match(ctx, pattern, tailppos, ptr, marks)
if result is not None:
self.subresult = result
self.cur_ptr = ptr
@@ -457,23 +469,23 @@
class MinUntilMatchResult(AbstractUntilMatchResult):
- def find_first_result(self, ctx):
- return self.search_next(ctx, resume=False)
+ def find_first_result(self, ctx, pattern):
+ return self.search_next(ctx, pattern, resume=False)
- def find_next_result(self, ctx):
- return self.search_next(ctx, resume=True)
+ def find_next_result(self, ctx, pattern):
+ return self.search_next(ctx, pattern, resume=True)
- def search_next(self, ctx, resume):
+ def search_next(self, ctx, pattern, resume):
# XXX missing jit support here
ppos = self.ppos
- min = ctx.pat(ppos+1)
- max = ctx.pat(ppos+2)
+ min = pattern.pat(ppos+1)
+ max = pattern.pat(ppos+2)
ptr = self.cur_ptr
marks = self.cur_marks
while True:
# try to match 'tail' if we have enough 'item'
if not resume and self.num_pending >= min:
- result = sre_match(ctx, self.tailppos, ptr, marks)
+ result = sre_match(ctx, pattern, self.tailppos, ptr, marks)
if result is not None:
self.subresult = result
self.cur_ptr = ptr
@@ -483,12 +495,12 @@
if max == rsre_char.MAXREPEAT or self.num_pending < max:
# try to match one more 'item'
- enum = sre_match(ctx, ppos + 3, ptr, marks)
+ enum = sre_match(ctx, pattern, ppos + 3, ptr, marks)
#
# zero-width match protection
if self.num_pending >= min:
while enum is not None and ptr == ctx.match_end:
- enum = enum.move_to_next_result(ctx)
+ enum = enum.move_to_next_result(ctx, pattern)
else:
enum = None # 'max' reached, no more matches
@@ -502,7 +514,7 @@
self.num_pending -= 1
ptr = p.ptr
marks = p.marks
- enum = p.enum.move_to_next_result(ctx)
+ enum = p.enum.move_to_next_result(ctx, pattern)
# matched one more 'item'. record it and continue
self.pending = Pending(ptr, marks, enum, self.pending)
@@ -514,13 +526,13 @@
@specializectx
@jit.unroll_safe
-def sre_match(ctx, ppos, ptr, marks):
+def sre_match(ctx, pattern, ppos, ptr, marks):
"""Returns either None or a MatchResult object. Usually we only need
the first result, but there is the case of REPEAT...UNTIL where we
need all results; in that case we use the method move_to_next_result()
of the MatchResult."""
while True:
- op = ctx.pat(ppos)
+ op = pattern.pat(ppos)
ppos += 1
#jit.jit_debug("sre_match", op, ppos, ptr)
@@ -563,33 +575,33 @@
elif op == OPCODE_ASSERT:
# assert subpattern
# <ASSERT> <0=skip> <1=back> <pattern>
- ptr1 = ptr - ctx.pat(ppos+1)
+ ptr1 = ptr - pattern.pat(ppos+1)
saved = ctx.fullmatch_only
ctx.fullmatch_only = False
- stop = ptr1 < 0 or sre_match(ctx, ppos + 2, ptr1, marks) is None
+ stop = ptr1 < 0 or sre_match(ctx, pattern, ppos + 2, ptr1, marks)
is None
ctx.fullmatch_only = saved
if stop:
return
marks = ctx.match_marks
- ppos += ctx.pat(ppos)
+ ppos += pattern.pat(ppos)
elif op == OPCODE_ASSERT_NOT:
# assert not subpattern
# <ASSERT_NOT> <0=skip> <1=back> <pattern>
- ptr1 = ptr - ctx.pat(ppos+1)
+ ptr1 = ptr - pattern.pat(ppos+1)
saved = ctx.fullmatch_only
ctx.fullmatch_only = False
- stop = (ptr1 >= 0 and sre_match(ctx, ppos + 2, ptr1, marks)
+ stop = (ptr1 >= 0 and sre_match(ctx, pattern, ppos + 2, ptr1,
marks)
is not None)
ctx.fullmatch_only = saved
if stop:
return
- ppos += ctx.pat(ppos)
+ ppos += pattern.pat(ppos)
elif op == OPCODE_AT:
# match at given position (e.g. at beginning, at boundary, etc.)
# <AT> <code>
- if not sre_at(ctx, ctx.pat(ppos), ptr):
+ if not sre_at(ctx, pattern.pat(ppos), ptr):
return
ppos += 1
@@ -597,14 +609,14 @@
# alternation
# <BRANCH> <0=skip> code <JUMP> ... <NULL>
result = BranchMatchResult(ppos, ptr, marks)
- return result.find_first_result(ctx)
+ return result.find_first_result(ctx, pattern)
elif op == OPCODE_CATEGORY:
# seems to be never produced, but used by some tests from
# pypy/module/_sre/test
# <CATEGORY> <category>
if (ptr == ctx.end or
- not rsre_char.category_dispatch(ctx.pat(ppos), ctx.str(ptr))):
+ not rsre_char.category_dispatch(pattern.pat(ppos),
ctx.str(ptr))):
return
ptr += 1
ppos += 1
@@ -612,7 +624,7 @@
elif op == OPCODE_GROUPREF:
# match backreference
# <GROUPREF> <groupnum>
- startptr, length = get_group_ref(marks, ctx.pat(ppos))
+ startptr, length = get_group_ref(marks, pattern.pat(ppos))
if length < 0:
return # group was not previously defined
if not match_repeated(ctx, ptr, startptr, length):
@@ -623,7 +635,7 @@
elif op == OPCODE_GROUPREF_IGNORE:
# match backreference
# <GROUPREF> <groupnum>
- startptr, length = get_group_ref(marks, ctx.pat(ppos))
+ startptr, length = get_group_ref(marks, pattern.pat(ppos))
if length < 0:
return # group was not previously defined
if not match_repeated_ignore(ctx, ptr, startptr, length):
@@ -634,44 +646,44 @@
elif op == OPCODE_GROUPREF_EXISTS:
# conditional match depending on the existence of a group
# <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ...
- _, length = get_group_ref(marks, ctx.pat(ppos))
+ _, length = get_group_ref(marks, pattern.pat(ppos))
if length >= 0:
ppos += 2 # jump to 'codeyes'
else:
- ppos += ctx.pat(ppos+1) # jump to 'codeno'
+ ppos += pattern.pat(ppos+1) # jump to 'codeno'
elif op == OPCODE_IN:
# match set member (or non_member)
# <IN> <skip> <set>
- if ptr >= ctx.end or not rsre_char.check_charset(ctx, ppos+1,
+ if ptr >= ctx.end or not rsre_char.check_charset(ctx, pattern,
ppos+1,
ctx.str(ptr)):
return
- ppos += ctx.pat(ppos)
+ ppos += pattern.pat(ppos)
ptr += 1
elif op == OPCODE_IN_IGNORE:
# match set member (or non_member), ignoring case
# <IN> <skip> <set>
- if ptr >= ctx.end or not rsre_char.check_charset(ctx, ppos+1,
+ if ptr >= ctx.end or not rsre_char.check_charset(ctx, pattern,
ppos+1,
ctx.lowstr(ptr)):
return
- ppos += ctx.pat(ppos)
+ ppos += pattern.pat(ppos)
ptr += 1
elif op == OPCODE_INFO:
# optimization info block
# <INFO> <0=skip> <1=flags> <2=min> ...
- if (ctx.end - ptr) < ctx.pat(ppos+2):
+ if (ctx.end - ptr) < pattern.pat(ppos+2):
return
- ppos += ctx.pat(ppos)
+ ppos += pattern.pat(ppos)
elif op == OPCODE_JUMP:
- ppos += ctx.pat(ppos)
+ ppos += pattern.pat(ppos)
elif op == OPCODE_LITERAL:
# match literal string
# <LITERAL> <code>
- if ptr >= ctx.end or ctx.str(ptr) != ctx.pat(ppos):
+ if ptr >= ctx.end or ctx.str(ptr) != pattern.pat(ppos):
return
ppos += 1
ptr += 1
@@ -679,7 +691,7 @@
elif op == OPCODE_LITERAL_IGNORE:
# match literal string, ignoring case
# <LITERAL_IGNORE> <code>
- if ptr >= ctx.end or ctx.lowstr(ptr) != ctx.pat(ppos):
+ if ptr >= ctx.end or ctx.lowstr(ptr) != pattern.pat(ppos):
return
ppos += 1
ptr += 1
@@ -687,14 +699,14 @@
elif op == OPCODE_MARK:
# set mark
# <MARK> <gid>
- gid = ctx.pat(ppos)
+ gid = pattern.pat(ppos)
marks = Mark(gid, ptr, marks)
ppos += 1
elif op == OPCODE_NOT_LITERAL:
# match if it's not a literal string
# <NOT_LITERAL> <code>
- if ptr >= ctx.end or ctx.str(ptr) == ctx.pat(ppos):
+ if ptr >= ctx.end or ctx.str(ptr) == pattern.pat(ppos):
return
ppos += 1
ptr += 1
@@ -702,7 +714,7 @@
elif op == OPCODE_NOT_LITERAL_IGNORE:
# match if it's not a literal string, ignoring case
# <NOT_LITERAL> <code>
- if ptr >= ctx.end or ctx.lowstr(ptr) == ctx.pat(ppos):
+ if ptr >= ctx.end or ctx.lowstr(ptr) == pattern.pat(ppos):
return
ppos += 1
ptr += 1
@@ -715,22 +727,22 @@
# decode the later UNTIL operator to see if it is actually
# a MAX_UNTIL or MIN_UNTIL
- untilppos = ppos + ctx.pat(ppos)
+ untilppos = ppos + pattern.pat(ppos)
tailppos = untilppos + 1
- op = ctx.pat(untilppos)
+ op = pattern.pat(untilppos)
if op == OPCODE_MAX_UNTIL:
# the hard case: we have to match as many repetitions as
# possible, followed by the 'tail'. we do this by
# remembering each state for each possible number of
# 'item' matching.
result = MaxUntilMatchResult(ppos, tailppos, ptr, marks)
- return result.find_first_result(ctx)
+ return result.find_first_result(ctx, pattern)
elif op == OPCODE_MIN_UNTIL:
# first try to match the 'tail', and if it fails, try
# to match one more 'item' and try again
result = MinUntilMatchResult(ppos, tailppos, ptr, marks)
- return result.find_first_result(ctx)
+ return result.find_first_result(ctx, pattern)
else:
raise Error("missing UNTIL after REPEAT")
@@ -743,17 +755,18 @@
# use the MAX_REPEAT operator.
# <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
start = ptr
- minptr = start + ctx.pat(ppos+1)
+ minptr = start + pattern.pat(ppos+1)
if minptr > ctx.end:
return # cannot match
- ptr = find_repetition_end(ctx, ppos+3, start, ctx.pat(ppos+2),
+ ptr = find_repetition_end(ctx, pattern, ppos+3, start,
+ pattern.pat(ppos+2),
marks)
# when we arrive here, ptr points to the tail of the target
# string. check if the rest of the pattern matches,
# and backtrack if not.
- nextppos = ppos + ctx.pat(ppos)
+ nextppos = ppos + pattern.pat(ppos)
result = RepeatOneMatchResult(nextppos, minptr, ptr, marks)
- return result.find_first_result(ctx)
+ return result.find_first_result(ctx, pattern)
elif op == OPCODE_MIN_REPEAT_ONE:
# match repeated sequence (minimizing regexp).
@@ -763,26 +776,26 @@
# use the MIN_REPEAT operator.
# <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail
start = ptr
- min = ctx.pat(ppos+1)
+ min = pattern.pat(ppos+1)
if min > 0:
minptr = ptr + min
if minptr > ctx.end:
return # cannot match
# count using pattern min as the maximum
- ptr = find_repetition_end(ctx, ppos+3, ptr, min, marks)
+ ptr = find_repetition_end(ctx, pattern, ppos+3, ptr, min,
marks)
if ptr < minptr:
return # did not match minimum number of times
maxptr = ctx.end
- max = ctx.pat(ppos+2)
+ max = pattern.pat(ppos+2)
if max != rsre_char.MAXREPEAT:
maxptr1 = start + max
if maxptr1 <= maxptr:
maxptr = maxptr1
- nextppos = ppos + ctx.pat(ppos)
+ nextppos = ppos + pattern.pat(ppos)
result = MinRepeatOneMatchResult(nextppos, ppos+3, maxptr,
ptr, marks)
- return result.find_first_result(ctx)
+ return result.find_first_result(ctx, pattern)
else:
raise Error("bad pattern code %d" % op)
@@ -816,7 +829,7 @@
return True
@specializectx
-def find_repetition_end(ctx, ppos, ptr, maxcount, marks):
+def find_repetition_end(ctx, pattern, ppos, ptr, maxcount, marks):
end = ctx.end
ptrp1 = ptr + 1
# First get rid of the cases where we don't have room for any match.
@@ -826,16 +839,16 @@
# The idea is to be fast for cases like re.search("b+"), where we expect
# the common case to be a non-match. It's much faster with the JIT to
# have the non-match inlined here rather than detect it in the fre() call.
- op = ctx.pat(ppos)
+ op = pattern.pat(ppos)
for op1, checkerfn in unroll_char_checker:
if op1 == op:
- if checkerfn(ctx, ptr, ppos):
+ if checkerfn(ctx, pattern, ptr, ppos):
break
return ptr
else:
# obscure case: it should be a single char pattern, but isn't
# one of the opcodes in unroll_char_checker (see test_ext_opcode)
- return general_find_repetition_end(ctx, ppos, ptr, maxcount, marks)
+ return general_find_repetition_end(ctx, pattern, ppos, ptr, maxcount,
marks)
# It matches at least once. If maxcount == 1 (relatively common),
# then we are done.
if maxcount == 1:
@@ -846,14 +859,14 @@
end1 = ptr + maxcount
if end1 <= end:
end = end1
- op = ctx.pat(ppos)
+ op = pattern.pat(ppos)
for op1, fre in unroll_fre_checker:
if op1 == op:
- return fre(ctx, ptrp1, end, ppos)
+ return fre(ctx, pattern, ptrp1, end, ppos)
raise Error("rsre.find_repetition_end[%d]" % op)
@specializectx
-def general_find_repetition_end(ctx, ppos, ptr, maxcount, marks):
+def general_find_repetition_end(ctx, patern, ppos, ptr, maxcount, marks):
# moved into its own JIT-opaque function
end = ctx.end
if maxcount != rsre_char.MAXREPEAT:
@@ -861,63 +874,65 @@
end1 = ptr + maxcount
if end1 <= end:
end = end1
- while ptr < end and sre_match(ctx, ppos, ptr, marks) is not None:
+ while ptr < end and sre_match(ctx, patern, ppos, ptr, marks) is not None:
ptr += 1
return ptr
@specializectx
-def match_ANY(ctx, ptr, ppos): # dot wildcard.
+def match_ANY(ctx, pattern, ptr, ppos): # dot wildcard.
return not rsre_char.is_linebreak(ctx.str(ptr))
-def match_ANY_ALL(ctx, ptr, ppos):
+def match_ANY_ALL(ctx, pattern, ptr, ppos):
return True # match anything (including a newline)
@specializectx
-def match_IN(ctx, ptr, ppos):
- return rsre_char.check_charset(ctx, ppos+2, ctx.str(ptr))
+def match_IN(ctx, pattern, ptr, ppos):
+ return rsre_char.check_charset(ctx, pattern, ppos+2, ctx.str(ptr))
@specializectx
-def match_IN_IGNORE(ctx, ptr, ppos):
- return rsre_char.check_charset(ctx, ppos+2, ctx.lowstr(ptr))
+def match_IN_IGNORE(ctx, pattern, ptr, ppos):
+ return rsre_char.check_charset(ctx, pattern, ppos+2, ctx.lowstr(ptr))
@specializectx
-def match_LITERAL(ctx, ptr, ppos):
- return ctx.str(ptr) == ctx.pat(ppos+1)
+def match_LITERAL(ctx, pattern, ptr, ppos):
+ return ctx.str(ptr) == pattern.pat(ppos+1)
@specializectx
-def match_LITERAL_IGNORE(ctx, ptr, ppos):
- return ctx.lowstr(ptr) == ctx.pat(ppos+1)
+def match_LITERAL_IGNORE(ctx, pattern, ptr, ppos):
+ return ctx.lowstr(ptr) == pattern.pat(ppos+1)
@specializectx
-def match_NOT_LITERAL(ctx, ptr, ppos):
- return ctx.str(ptr) != ctx.pat(ppos+1)
+def match_NOT_LITERAL(ctx, pattern, ptr, ppos):
+ return ctx.str(ptr) != pattern.pat(ppos+1)
@specializectx
-def match_NOT_LITERAL_IGNORE(ctx, ptr, ppos):
- return ctx.lowstr(ptr) != ctx.pat(ppos+1)
+def match_NOT_LITERAL_IGNORE(ctx, pattern, ptr, ppos):
+ return ctx.lowstr(ptr) != pattern.pat(ppos+1)
def _make_fre(checkerfn):
if checkerfn == match_ANY_ALL:
- def fre(ctx, ptr, end, ppos):
+ def fre(ctx, pattern, ptr, end, ppos):
return end
elif checkerfn == match_IN:
install_jitdriver_spec('MatchIn',
- greens=['ppos', 'ctx.pattern'],
+ greens=['ppos', 'pattern'],
reds=['ptr', 'end', 'ctx'],
debugprint=(1, 0))
@specializectx
- def fre(ctx, ptr, end, ppos):
+ def fre(ctx, pattern, ptr, end, ppos):
while True:
ctx.jitdriver_MatchIn.jit_merge_point(ctx=ctx, ptr=ptr,
- end=end, ppos=ppos)
- if ptr < end and checkerfn(ctx, ptr, ppos):
+ end=end, ppos=ppos,
+ pattern=pattern)
+ if ptr < end and checkerfn(ctx, pattern, ptr, ppos):
ptr += 1
else:
return ptr
elif checkerfn == match_IN_IGNORE:
install_jitdriver_spec('MatchInIgnore',
- greens=['ppos', 'ctx.pattern'],
+ greens=['ppos', 'pattern'],
reds=['ptr', 'end', 'ctx'],
debugprint=(1, 0))
@specializectx
- def fre(ctx, ptr, end, ppos):
+ def fre(ctx, pattern, ptr, end, ppos):
while True:
ctx.jitdriver_MatchInIgnore.jit_merge_point(ctx=ctx, ptr=ptr,
- end=end, ppos=ppos)
- if ptr < end and checkerfn(ctx, ptr, ppos):
+ end=end, ppos=ppos,
+ pattern=pattern)
+ if ptr < end and checkerfn(ctx, pattern, ptr, ppos):
ptr += 1
else:
return ptr
@@ -925,8 +940,8 @@
# in the other cases, the fre() function is not JITted at all
# and is present as a residual call.
@specializectx
- def fre(ctx, ptr, end, ppos):
- while ptr < end and checkerfn(ctx, ptr, ppos):
+ def fre(ctx, pattern, ptr, end, ppos):
+ while ptr < end and checkerfn(ctx, pattern, ptr, ppos):
ptr += 1
return ptr
fre = func_with_new_name(fre, 'fre_' + checkerfn.__name__)
@@ -1037,10 +1052,11 @@
return start, end
def match(pattern, string, start=0, end=sys.maxint, flags=0, fullmatch=False):
+ assert isinstance(pattern, CompiledPattern)
start, end = _adjust(start, end, len(string))
ctx = StrMatchContext(pattern, string, start, end, flags)
ctx.fullmatch_only = fullmatch
- if match_context(ctx):
+ if match_context(ctx, pattern):
return ctx
else:
return None
@@ -1049,105 +1065,106 @@
return match(pattern, string, start, end, flags, fullmatch=True)
def search(pattern, string, start=0, end=sys.maxint, flags=0):
+ assert isinstance(pattern, CompiledPattern)
start, end = _adjust(start, end, len(string))
ctx = StrMatchContext(pattern, string, start, end, flags)
- if search_context(ctx):
+ if search_context(ctx, pattern):
return ctx
else:
return None
install_jitdriver('Match',
- greens=['ctx.pattern'], reds=['ctx'],
+ greens=['pattern'], reds=['ctx'],
debugprint=(0,))
-def match_context(ctx):
+def match_context(ctx, pattern):
ctx.original_pos = ctx.match_start
if ctx.end < ctx.match_start:
return False
- ctx.jitdriver_Match.jit_merge_point(ctx=ctx)
- return sre_match(ctx, 0, ctx.match_start, None) is not None
+ ctx.jitdriver_Match.jit_merge_point(ctx=ctx, pattern=pattern)
+ return sre_match(ctx, pattern, 0, ctx.match_start, None) is not None
-def search_context(ctx):
+def search_context(ctx, pattern):
ctx.original_pos = ctx.match_start
if ctx.end < ctx.match_start:
return False
base = 0
charset = False
- if ctx.pat(base) == OPCODE_INFO:
- flags = ctx.pat(2)
+ if pattern.pat(base) == OPCODE_INFO:
+ flags = pattern.pat(2)
if flags & rsre_char.SRE_INFO_PREFIX:
- if ctx.pat(5) > 1:
- return fast_search(ctx)
+ if pattern.pat(5) > 1:
+ return fast_search(ctx, pattern)
else:
charset = (flags & rsre_char.SRE_INFO_CHARSET)
- base += 1 + ctx.pat(1)
- if ctx.pat(base) == OPCODE_LITERAL:
- return literal_search(ctx, base)
+ base += 1 + pattern.pat(1)
+ if pattern.pat(base) == OPCODE_LITERAL:
+ return literal_search(ctx, pattern, base)
if charset:
- return charset_search(ctx, base)
- return regular_search(ctx, base)
+ return charset_search(ctx, pattern, base)
+ return regular_search(ctx, pattern, base)
install_jitdriver('RegularSearch',
- greens=['base', 'ctx.pattern'],
+ greens=['base', 'pattern'],
reds=['start', 'ctx'],
debugprint=(1, 0))
-def regular_search(ctx, base):
+def regular_search(ctx, pattern, base):
start = ctx.match_start
while start <= ctx.end:
ctx.jitdriver_RegularSearch.jit_merge_point(ctx=ctx, start=start,
- base=base)
- if sre_match(ctx, base, start, None) is not None:
+ base=base, pattern=pattern)
+ if sre_match(ctx, pattern, base, start, None) is not None:
ctx.match_start = start
return True
start += 1
return False
install_jitdriver_spec("LiteralSearch",
- greens=['base', 'character', 'ctx.pattern'],
+ greens=['base', 'character', 'pattern'],
reds=['start', 'ctx'],
debugprint=(2, 0, 1))
@specializectx
-def literal_search(ctx, base):
+def literal_search(ctx, pattern, base):
# pattern starts with a literal character. this is used
# for short prefixes, and if fast search is disabled
- character = ctx.pat(base + 1)
+ character = pattern.pat(base + 1)
base += 2
start = ctx.match_start
while start < ctx.end:
ctx.jitdriver_LiteralSearch.jit_merge_point(ctx=ctx, start=start,
- base=base, character=character)
+ base=base, character=character,
pattern=pattern)
if ctx.str(start) == character:
- if sre_match(ctx, base, start + 1, None) is not None:
+ if sre_match(ctx, pattern, base, start + 1, None) is not None:
ctx.match_start = start
return True
start += 1
return False
install_jitdriver_spec("CharsetSearch",
- greens=['base', 'ctx.pattern'],
+ greens=['base', 'pattern'],
reds=['start', 'ctx'],
debugprint=(1, 0))
@specializectx
-def charset_search(ctx, base):
+def charset_search(ctx, pattern, base):
# pattern starts with a character from a known set
start = ctx.match_start
while start < ctx.end:
ctx.jitdriver_CharsetSearch.jit_merge_point(ctx=ctx, start=start,
- base=base)
- if rsre_char.check_charset(ctx, 5, ctx.str(start)):
- if sre_match(ctx, base, start, None) is not None:
+ base=base, pattern=pattern)
+ if rsre_char.check_charset(ctx, pattern, 5, ctx.str(start)):
+ if sre_match(ctx, pattern, base, start, None) is not None:
ctx.match_start = start
return True
start += 1
return False
install_jitdriver_spec('FastSearch',
- greens=['i', 'prefix_len', 'ctx.pattern'],
+ greens=['i', 'prefix_len', 'pattern'],
reds=['string_position', 'ctx'],
debugprint=(2, 0))
@specializectx
-def fast_search(ctx):
+def fast_search(ctx, pattern):
# skips forward in a string as fast as possible using information from
# an optimization info block
# <INFO> <1=skip> <2=flags> <3=min> <4=...>
@@ -1155,17 +1172,18 @@
string_position = ctx.match_start
if string_position >= ctx.end:
return False
- prefix_len = ctx.pat(5)
+ prefix_len = pattern.pat(5)
assert prefix_len >= 0
i = 0
while True:
ctx.jitdriver_FastSearch.jit_merge_point(ctx=ctx,
- string_position=string_position, i=i, prefix_len=prefix_len)
+ string_position=string_position, i=i, prefix_len=prefix_len,
+ pattern=pattern)
char_ord = ctx.str(string_position)
- if char_ord != ctx.pat(7 + i):
+ if char_ord != pattern.pat(7 + i):
if i > 0:
overlap_offset = prefix_len + (7 - 1)
- i = ctx.pat(overlap_offset + i)
+ i = pattern.pat(overlap_offset + i)
continue
else:
i += 1
@@ -1173,22 +1191,22 @@
# found a potential match
start = string_position + 1 - prefix_len
assert start >= 0
- prefix_skip = ctx.pat(6)
+ prefix_skip = pattern.pat(6)
ptr = start + prefix_skip
- #flags = ctx.pat(2)
+ #flags = pattern.pat(2)
#if flags & rsre_char.SRE_INFO_LITERAL:
# # matched all of pure literal pattern
# ctx.match_start = start
# ctx.match_end = ptr
# ctx.match_marks = None
# return True
- pattern_offset = ctx.pat(1) + 1
+ pattern_offset = pattern.pat(1) + 1
ppos_start = pattern_offset + 2 * prefix_skip
- if sre_match(ctx, ppos_start, ptr, None) is not None:
+ if sre_match(ctx, pattern, ppos_start, ptr, None) is not None:
ctx.match_start = start
return True
overlap_offset = prefix_len + (7 - 1)
- i = ctx.pat(overlap_offset + i)
+ i = pattern.pat(overlap_offset + i)
string_position += 1
if string_position >= ctx.end:
return False
diff --git a/rpython/rlib/rsre/test/test_char.py
b/rpython/rlib/rsre/test/test_char.py
--- a/rpython/rlib/rsre/test/test_char.py
+++ b/rpython/rlib/rsre/test/test_char.py
@@ -1,10 +1,16 @@
-from rpython.rlib.rsre import rsre_char
+from rpython.rlib.rsre import rsre_char, rsre_core
from rpython.rlib.rsre.rsre_char import SRE_FLAG_LOCALE, SRE_FLAG_UNICODE
def setup_module(mod):
from rpython.rlib.unicodedata import unicodedb
rsre_char.set_unicode_db(unicodedb)
+
+def check_charset(pattern, idx, char):
+ p = rsre_core.CompiledPattern(pattern)
+ return rsre_char.check_charset(Ctx(p), p, idx, char)
+
+
UPPER_PI = 0x3a0
LOWER_PI = 0x3c0
INDIAN_DIGIT = 0x966
@@ -157,12 +163,12 @@
pat_neg = [70, ord(cat) | 0x80, 0]
for c in positive:
assert unicodedb.category(ord(c)).startswith(cat)
- assert rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
- assert not rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
+ assert check_charset(pat_pos, 0, ord(c))
+ assert not check_charset(pat_neg, 0, ord(c))
for c in negative:
assert not unicodedb.category(ord(c)).startswith(cat)
- assert not rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
- assert rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
+ assert not check_charset(pat_pos, 0, ord(c))
+ assert check_charset(pat_neg, 0, ord(c))
def cat2num(cat):
return ord(cat[0]) | (ord(cat[1]) << 8)
@@ -173,17 +179,16 @@
pat_neg = [70, cat2num(cat) | 0x80, 0]
for c in positive:
assert unicodedb.category(ord(c)) == cat
- assert rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
- assert not rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
+ assert check_charset(pat_pos, 0, ord(c))
+ assert not check_charset(pat_neg, 0, ord(c))
for c in negative:
assert unicodedb.category(ord(c)) != cat
- assert not rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
- assert rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
+ assert not check_charset(pat_pos, 0, ord(c))
+ assert check_charset(pat_neg, 0, ord(c))
# test for how the common 'L&' pattern might be compiled
pat = [70, cat2num('Lu'), 70, cat2num('Ll'), 70, cat2num('Lt'), 0]
- assert rsre_char.check_charset(Ctx(pat), 0, 65) # Lu
- assert rsre_char.check_charset(Ctx(pat), 0, 99) # Ll
- assert rsre_char.check_charset(Ctx(pat), 0, 453) # Lt
- assert not rsre_char.check_charset(Ctx(pat), 0, 688) # Lm
- assert not rsre_char.check_charset(Ctx(pat), 0, 5870) # Nl
+ assert check_charset(pat, 0, 65) # Lu
+ assert check_charset(pat, 0, 99) # Lcheck_charset(pat, 0, 453) # Lt
+ assert not check_charset(pat, 0, 688) # Lm
+ assert not check_charset(pat, 0, 5870) # Nl
diff --git a/rpython/rlib/rsre/test/test_ext_opcode.py
b/rpython/rlib/rsre/test/test_ext_opcode.py
--- a/rpython/rlib/rsre/test/test_ext_opcode.py
+++ b/rpython/rlib/rsre/test/test_ext_opcode.py
@@ -17,10 +17,10 @@
# it's a valid optimization because \1 is always one character long
r = [MARK, 0, ANY, MARK, 1, REPEAT_ONE, 6, 0, MAXREPEAT,
GROUPREF, 0, SUCCESS, SUCCESS]
- assert rsre_core.match(r, "aaa").match_end == 3
+ assert rsre_core.match(rsre_core.CompiledPattern(r), "aaa").match_end == 3
def test_min_repeat_one_with_backref():
# Python 3.5 compiles "(.)\1*?b" using MIN_REPEAT_ONE
r = [MARK, 0, ANY, MARK, 1, MIN_REPEAT_ONE, 6, 0, MAXREPEAT,
GROUPREF, 0, SUCCESS, LITERAL, 98, SUCCESS]
- assert rsre_core.match(r, "aaab").match_end == 4
+ assert rsre_core.match(rsre_core.CompiledPattern(r), "aaab").match_end == 4
diff --git a/rpython/rlib/rsre/test/test_match.py
b/rpython/rlib/rsre/test/test_match.py
--- a/rpython/rlib/rsre/test/test_match.py
+++ b/rpython/rlib/rsre/test/test_match.py
@@ -9,7 +9,7 @@
def test_get_code_repetition():
c1 = get_code(r"a+")
c2 = get_code(r"a+")
- assert c1 == c2
+ assert c1.pattern == c2.pattern
class TestMatch:
@@ -305,6 +305,6 @@
rsre_char.set_unicode_db(unicodedb)
#
r = get_code(u"[\U00010428-\U0001044f]", re.I)
- assert r.count(27) == 1 # OPCODE_RANGE
- r[r.index(27)] = 32 # => OPCODE_RANGE_IGNORE
+ assert r.pattern.count(27) == 1 # OPCODE_RANGE
+ r.pattern[r.pattern.index(27)] = 32 # => OPCODE_RANGE_IGNORE
assert rsre_core.match(r, u"\U00010428")
diff --git a/rpython/rlib/rsre/test/test_re.py
b/rpython/rlib/rsre/test/test_re.py
--- a/rpython/rlib/rsre/test/test_re.py
+++ b/rpython/rlib/rsre/test/test_re.py
@@ -426,31 +426,6 @@
assert pat.match(p) is not None
assert pat.match(p).span() == (0,256)
- def test_pickling(self):
- import pickle
- self.pickle_test(pickle)
- import cPickle
- self.pickle_test(cPickle)
- # old pickles expect the _compile() reconstructor in sre module
- import warnings
- original_filters = warnings.filters[:]
- try:
- warnings.filterwarnings("ignore", "The sre module is deprecated",
- DeprecationWarning)
- from sre import _compile
- finally:
- warnings.filters = original_filters
-
- def pickle_test(self, pickle):
- oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
- s = pickle.dumps(oldpat)
- newpat = pickle.loads(s)
- # Not using object identity for _sre.py, since some Python builds do
- # not seem to preserve that in all cases (observed on an UCS-4 build
- # of 2.4.1).
- #self.assertEqual(oldpat, newpat)
- assert oldpat.__dict__ == newpat.__dict__
-
def test_constants(self):
assert re.I == re.IGNORECASE
assert re.L == re.LOCALE
diff --git a/rpython/rlib/rsre/test/test_zinterp.py
b/rpython/rlib/rsre/test/test_zinterp.py
--- a/rpython/rlib/rsre/test/test_zinterp.py
+++ b/rpython/rlib/rsre/test/test_zinterp.py
@@ -11,6 +11,7 @@
rsre_core.search(pattern, string)
#
unicodestr = unichr(n) * n
+ pattern = rsre_core.CompiledPattern(pattern)
ctx = rsre_core.UnicodeMatchContext(pattern, unicodestr,
0, len(unicodestr), 0)
rsre_core.search_context(ctx)
diff --git a/rpython/rlib/rsre/test/test_zjit.py
b/rpython/rlib/rsre/test/test_zjit.py
--- a/rpython/rlib/rsre/test/test_zjit.py
+++ b/rpython/rlib/rsre/test/test_zjit.py
@@ -6,7 +6,7 @@
from rpython.rtyper.annlowlevel import llstr, hlstr
def entrypoint1(r, string, repeat):
- r = array2list(r)
+ r = rsre_core.CompiledPattern(array2list(r))
string = hlstr(string)
match = None
for i in range(repeat):
@@ -17,7 +17,7 @@
return match.match_end
def entrypoint2(r, string, repeat):
- r = array2list(r)
+ r = rsre_core.CompiledPattern(array2list(r))
string = hlstr(string)
match = None
for i in range(repeat):
@@ -48,13 +48,13 @@
def meta_interp_match(self, pattern, string, repeat=1):
r = get_code(pattern)
- return self.meta_interp(entrypoint1, [list2array(r), llstr(string),
+ return self.meta_interp(entrypoint1, [list2array(r.pattern),
llstr(string),
repeat],
listcomp=True, backendopt=True)
def meta_interp_search(self, pattern, string, repeat=1):
r = get_code(pattern)
- return self.meta_interp(entrypoint2, [list2array(r), llstr(string),
+ return self.meta_interp(entrypoint2, [list2array(r.pattern),
llstr(string),
repeat],
listcomp=True, backendopt=True)
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit