On Wed, 20 Apr 2005 17:09:16 -0700, Michael Spencer <[EMAIL PROTECTED]> wrote:
>Andrew Dalke wrote: > >> I see you assume that only \w+ can fit inside of a %() >> in a format string. The actual Python code allows anything >> up to the balanced closed parens. >> >Gah! I guess that torpedoes the regexp approach, then. > >Thanks for looking at this > I brute-forced a str subclass that will call a mapping object's __getitem__ for both kinds of format spec and '*' specs. Just to see what it would take. I didn't go the whole way loking for a __format__ method on the mapping object, along the lines I suggested in a previous post. Someone else's turn again ;-) This has not been tested thoroughly... The approach is to scan the original format string and put pieces into an out list and then ''.join that for final ouput. The pieces are the non-format parts and string from doing the formatting as formats are found. %(name) format args are retrieved from the mapping object by name as usual, and saved as the arg for rewritten plain format made from the tail after %(name), which is the same tail as %tail, except that the value is already retrieved. Next '*' or decimal strings are packed into the rewritten format, etc. The '*' values are retrieved by integer values passed to mapobj[i] and incremented each time. If the arg value was not retrieved by name, that's another mapobj[i]. Then the conversion is done with the plain format. The tests have MixFmt(fmt, verbose=True) % MapObj(position_params, namedict) and the verbose prints each rewritten format and arg and result as it appends them to out. ----< mixfmt.py >------------------------------------------------------------------------ # mixfmt.py -- a string subclass with __mod__ permitting mixed '%(name)s %s' formatting import re class MixFmtError(Exception): pass class MixFmt(str): def __new__(cls, s, **kw): return str.__new__(cls, s) def __init__(self, *a, **kw): self._verbose = kw.get('verbose') # Michael Spencer's regex, slightly modded, but only for reference, since XXX note parse_format = re.compile(r''' ( \% # placeholder (?:\(\w*\))? # 0 or 1 "named" groups XXX "%( (any)(balanced) parens )s" is legal! [\#0\-\+]? # 0 or 1 conversion flags (?:\* | \d+)? # optional minimum conversion width (?:\.\* | \.\d+)? # optional precision [hlL]? # optional length modifier [diouxXeEfFgGcrs] # conversion type - note %% omitted ) ''', re.VERBOSE) def __mod__(self, mapobj): """ The '%' MixFmt string operation allowing both %(whatever)fmt and %fmt by calling mapobj[whatever] for named args, and mapobj[i] sequentially counting i for each '*' width or precision spec, and unnamed args. It is up to the mapobj to handle this. See MapObj example used in tests. """ out = [] iarg = 0 pos, end = 0, len(self) sentinel = object() while pos<end: pos, last = self.find('%', pos), pos while pos>=0 and self[pos:pos+2] == '%%': pos+=2 pos = self.find('%', pos) if pos<0: out.append(self[last:].replace('%%','%')); break # here we have start of fmt with % at pos out.append(self[last:pos].replace('%%','%')) last = pos plain_arg = sentinel pos = pos+1 if self[pos]=='(': # scan for balanced matching ')' brk = 1; pos+=1 while brk>0: nextrp = self.find(')',pos) if nextrp<0: raise MixFmtError, 'no match for "(" at %s'%(pos+1) nextlp = self.find('(', pos) if nextlp>=0: if nextlp<nextrp: brk+=1; pos = nextlp+1 else: pos = nextrp+1 brk-=1 else: brk-=1 pos = nextrp+1 plain_arg = mapobj[self[last+2:pos-1]] # else: normal part starts here, at pos plain_fmt = '%' # [\#0\-\+]? # 0 or 1 conversion flags if pos<end and self[pos] in '#0-+': plain_fmt += self[pos]; pos+=1 # (?:\* | \d+)? # optional minimum conversion width if pos<end and self[pos]=='*': plain_fmt += str(mapobj[iarg]); pos+=1; iarg+=1 elif pos<end and self[pos].isdigit(): eod = pos+1 while eod<end and self[eod].isdigit(): eod+=1 plain_fmt += self[pos:eod] pos = eod #(?:\.\* | \.\d+)? # optional precision if self[pos] == '.': plain_fmt += '.' pos +=1 if pos<end and self[pos]=='*': plain_fmt += str(mapobj[iarg]); pos+=1; iarg+=1 elif pos<end and self[pos].isdigit(): eod = pos+1 while eod<end and self[eod].isdigit(): eod+=1 plain_fmt += self[pos:eod] pos = eod #[hlL]? # optional length modifier if pos<end and self[pos] in 'hlL': plain_fmt += self[pos]; pos+=1 #[diouxXeEfFgGcrs] # conversion type - note %% omitted if pos<end and self[pos] in 'diouxXeEfFgGcrs': plain_fmt += self[pos]; pos+=1 else: raise MixFmtError, 'Bad conversion type %r at %s' %(self[pos], pos) if plain_arg is sentinel: # need arg plain_arg = mapobj[iarg]; iarg+=1 result = plain_fmt % (plain_arg,) if self._verbose: print ' -> %r %% %r => %r' % (plain_fmt, (plain_arg,), result) out.append(result) return ''.join(out) class MapObj(object): """ Example for test. Handles both named and positional (integer) keys for MixFmt(fmtstring) % MapObj(posargs, namedict) """ def __init__(self, *args, **kw): self.args = args self.kw = kw def __getitem__(self, i): if isinstance(i, int): return self.args[i] else: try: return self.kw[i] except KeyError: return '<KeyError:%r>'%i def test(fmt, *args, **namedict): print '\n==== test with:\n %r\n %s\n %s' %(fmt, args, namedict) print MixFmt(fmt, verbose=True) % MapObj(*args, **namedict) def testseq(): test('(no %%)') test('%s', *['first']) test('%(sym)s',**dict(sym='second')) test('%s %*.*d %*s', *['third -- expect " 012 ab" after colon:', 5, 3, 12, 4, 'ab']) test('%(arg1)s %% %(arg2).*f %()s %s', *[3, 'last'], **{ 'arg1':'fourth -- expect " % 2.220 NULL? last" after colon:', 'arg2':2.22, '':'NULL?'}) #'%s %*.*d %*s', *['expect " 345 ab"??:', 2, 1, 12345, 4, 'ab']) test('fifth -- non-key name: %(this(is)a.--test!)s') if __name__ == '__main__': import sys if not sys.argv[1:]: raise SystemExit,'Usage: python24 mixfmt.py -test | fmt ([key =] (s | (-i|-f) num)+ )*' fmt, rawargs = sys.argv[1], iter(sys.argv[2:]) if fmt == '-test': testseq(); raise SystemExit args = [] namedict = {}; to_name_dict=False for arg in rawargs: if arg == '-i': arg = int(rawargs.next()) if arg == '-f': arg = float(rawargs.next()) if arg == '=': to_name_dict = True elif to_name_dict: namedict[args.pop()] = arg; to_name_dict=False else: args.append(arg) test(fmt, *args, **namedict) ----------------------------------------------------------------------------------------- Result of py24 mixfmt.py -test: [10:06] C:\pywk\pymods>py24 mixfmt.py -test ==== test with: '(no %%)' () {} (no %) ==== test with: '%s' ('first',) {} -> '%s' % ('first',) => 'first' first ==== test with: '%(sym)s' () {'sym': 'second'} -> '%s' % ('second',) => 'second' second ==== test with: '%s %*.*d %*s' ('third -- expect " 012 ab" after colon:', 5, 3, 12, 4, 'ab') {} -> '%s' % ('third -- expect " 012 ab" after colon:',) => 'third -- expect " 012 ab" after colon:' -> '%5.3d' % (12,) => ' 012' -> '%4s' % ('ab',) => ' ab' third -- expect " 012 ab" after colon: 012 ab ==== test with: '%(arg1)s %% %(arg2).*f %()s %s' (3, 'last') {'': 'NULL?', 'arg1': 'fourth -- expect " % 2.220 NULL? last" after colon:', 'arg2': 2.2200000 000000002} -> '%s' % ('fourth -- expect " % 2.220 NULL? last" after colon:',) => 'fourth -- expect " % 2.220 NULL? last" after colon:' -> '%.3f' % (2.2200000000000002,) => '2.220' -> '%s' % ('NULL?',) => 'NULL?' -> '%s' % ('last',) => 'last' fourth -- expect " % 2.220 NULL? last" after colon: % 2.220 NULL? last ==== test with: 'fifth -- non-key name: %(this(is)a.--test!)s' () {} -> '%s' % ("<KeyError:'this(is)a.--test!'>",) => "<KeyError:'this(is)a.--test!'>" fifth -- non-key name: <KeyError:'this(is)a.--test!'> You can also run it interactively with one format and some args, e.g., [10:25] C:\pywk\pymods>py24 mixfmt.py Usage: python24 mixfmt.py -test | fmt ([key =] (s | (-i|-f) num)+ )* [10:25] C:\pywk\pymods>py24 mixfmt.py "%*.*f %(hi)s" -i 6 -i 3 -f 3.5 hi = hello ==== test with: '%*.*f %(hi)s' (6, 3, 3.5) {'hi': 'hello'} -> '%6.3f' % (3.5,) => ' 3.500' -> '%s' % ('hello',) => 'hello' 3.500 hello Regards, Bengt Richter -- http://mail.python.org/mailman/listinfo/python-list