Steve Holden wrote:
Michael Spencer wrote:
Andrew Dalke wrote:
I see you assume that only \w+ can fit inside of a %()
in a format string. The actual Python code allows anything
up to the balanced closed parens.
Gah! I guess that torpedoes the regexp approach, then.
Thanks for looking at this
Michael
While Andrew may have found the "fatal flaw" in your scheme, it's worth
pointing out that it works just fine for my original use case.
regards
Steve
Thanks. Here's a version that overcomes the 'fatal' flaw.
class StringFormatInfo(object):
def __init__(self, template):
self.template = template
self.parse()
def tokenizer(self):
lexer = TinyLexer(self.template)
self.format_type = "POSITIONAL"
while lexer.search("\%"):
if lexer.match("\%"):
continue
format = {}
name = lexer.takeparens()
if name is not None:
self.format_type = "MAPPING"
format['name'] = name
format['conversion'] = lexer.match("[\#0\-\+]")
format['width'] = lexer.match("\d+|\*")
format['precision'] = lexer.match("\.") and \
lexer.match("\d+|\*") or None
format['lengthmodifier'] = lexer.match("[hlL]")
ftype = lexer.match('[diouxXeEfFgGcrs]')
if not ftype:
raise ValueError
else:
format['type'] = ftype
yield format
def parse(self):
self.formats = formats = list(self.tokenizer())
if self.format_type == "MAPPING":
self.format_names = dict((format['name'], format['type'])
for format in formats)
else:
format_names = []
for format in formats:
if format['width'] == '*':
format_names.append('width')
if format['precision'] == '*':
format_names.append('precision')
format_names.append(format['type'])
self.format_names = tuple(format_names)
def __mod__(self, values):
return self.template % values
def __repr__(self):
return "%s Template: %s\nArguments: %s" % \
(self.format_type, self.template, self.format_names)
__str__ = __repr__
SFI = StringFormatInfo
def tests():
print SFI('%(arg1)s %% %(arg2).*f %()s %s')
print SFI('%s %*.*d %*s')
print SFI('%(this(is)a.--test!)s')
import re
class TinyLexer(object):
def __init__(self, text):
self.text = text
self.ptr = 0
self.len = len(text)
self.re_cache = {}
def match(self, regexp, consume = True, anchor = True):
if isinstance(regexp, basestring):
cache = self.re_cache
if regexp not in cache:
cache[regexp] = re.compile(regexp)
regexp = cache[regexp]
matcher = anchor and regexp.match or regexp.search
match = matcher(self.text, self.ptr)
if not match:
return None
if consume:
self.ptr = match.end()
return match.group()
def search(self, regexp, consume = True):
return self.match(regexp, consume=True, anchor=False)
def takeparens(self):
start = self.ptr
if self.text[start] != '(':
return None
out = ''
level = 1
self.ptr += 1
while self.ptr < self.len:
nextchar = self.text[self.ptr]
level += (nextchar == '(') - (nextchar == ')')
self.ptr += 1
if level == 0:
return out
out += nextchar
raise ValueError, "Unmatched parentheses"
--
http://mail.python.org/mailman/listinfo/python-list