Steve Holden wrote:
Michael Spencer wrote:

Andrew Dalke wrote:

I see you assume that only \w+ can fit inside of a %()
in a format string.  The actual Python code allows anything
up to the balanced closed parens.

Gah! I guess that torpedoes the regexp approach, then.

Thanks for looking at this

Michael

While Andrew may have found the "fatal flaw" in your scheme, it's worth pointing out that it works just fine for my original use case.

regards
 Steve

Thanks. Here's a version that overcomes the 'fatal' flaw.

class StringFormatInfo(object):

    def __init__(self, template):
        self.template = template
        self.parse()

    def tokenizer(self):
        lexer = TinyLexer(self.template)
        self.format_type = "POSITIONAL"
        while lexer.search("\%"):
            if lexer.match("\%"):
                continue
            format = {}
            name = lexer.takeparens()
            if name is not None:
                self.format_type = "MAPPING"
            format['name'] = name
            format['conversion'] = lexer.match("[\#0\-\+]")
            format['width'] = lexer.match("\d+|\*")
            format['precision'] = lexer.match("\.") and \
                lexer.match("\d+|\*") or None
            format['lengthmodifier'] = lexer.match("[hlL]")
            ftype = lexer.match('[diouxXeEfFgGcrs]')
            if not ftype:
                raise ValueError
            else:
                format['type'] = ftype
            yield format

    def parse(self):
        self.formats = formats = list(self.tokenizer())
        if self.format_type == "MAPPING":
            self.format_names = dict((format['name'], format['type'])
                            for format in formats)
        else:
            format_names = []
            for format in formats:
                if format['width'] == '*':
                    format_names.append('width')
                if format['precision'] == '*':
                    format_names.append('precision')
                format_names.append(format['type'])
            self.format_names = tuple(format_names)

    def __mod__(self, values):
        return self.template % values

    def __repr__(self):
        return "%s Template: %s\nArguments: %s" % \
                (self.format_type, self.template, self.format_names)
    __str__ = __repr__

SFI = StringFormatInfo

def tests():
    print SFI('%(arg1)s %% %(arg2).*f %()s %s')
    print SFI('%s %*.*d %*s')
    print SFI('%(this(is)a.--test!)s')


import re

class TinyLexer(object):
    def __init__(self, text):
        self.text = text
        self.ptr = 0
        self.len = len(text)
        self.re_cache = {}

    def match(self, regexp, consume = True, anchor = True):
        if isinstance(regexp, basestring):
            cache = self.re_cache
            if regexp not in cache:
                cache[regexp] = re.compile(regexp)
            regexp = cache[regexp]
        matcher = anchor and regexp.match or regexp.search
        match = matcher(self.text, self.ptr)
        if not match:
            return None
        if consume:
            self.ptr = match.end()
        return match.group()

    def search(self, regexp, consume = True):
        return self.match(regexp, consume=True, anchor=False)

    def takeparens(self):
        start = self.ptr
        if self.text[start] != '(':
            return None
        out = ''
        level = 1
        self.ptr += 1
        while self.ptr < self.len:
            nextchar = self.text[self.ptr]
            level += (nextchar == '(') - (nextchar == ')')
            self.ptr += 1
            if level == 0:
                return out
            out += nextchar
        raise ValueError, "Unmatched parentheses"





--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to