qwweeeit wrote: > For a python code I am writing I need to remove all strings > definitions from source and substitute them with a place-holder. > > To make clearer: > line 45 sVar="this is the string assigned to sVar" > must be converted in: > line 45 sVar=s00001 > > Such substitution is recorded in a file under: > s0001[line 45]="this is the string assigned to sVar" > > For curious guys: > I am trying to implement a cross variable reference tool and the > variability (in lenght) of the string definitions (expecially if > multi-line) can cause display problems. > > I need your help in correctly identifying the strings (also embedding > the r'xx..' or u'yy...' as part of the string definition). The problem > is mainly on the multi-line definitions or in cached strings > (embedding chr() definitions or escape sequences).
Hello, I have written a few python parsers before. Here is my attempt :) # string_mapper.py from __future__ import generators# python 2.2 import keyword, os, sys, traceback import cStringIO, token, tokenize def StringNamer(num=0): '''This is a name creating generator''' while 1: num += 1 stringname = 's'+str(num).zfill(6) yield stringname class ReplaceParser(object): """ >>> filein = open('yourfilehere.py').read() >>> replacer = ReplaceParser(filein, out=sys.stdout) >>> replacer.format() >>> replacer.StringMap """ def __init__(self, raw, out=sys.stdout): ''' Store the source text. ''' self.raw =raw.expandtabs().strip() self.out = out self.StringName = StringNamer() self.StringMap = {} def format(self): ''' Parse and send the source. ''' self.lines = [0, 0] pos = 0 self.temp = cStringIO.StringIO() while 1: pos = self.raw.find('\n', pos) + 1 if not pos: break self.lines.append(pos) self.lines.append(len(self.raw)) self.pos = 0 text = cStringIO.StringIO(self.raw) try: tokenize.tokenize(text.readline, self) except tokenize.TokenError, ex: traceback.print_exc() def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line): ''' Token handler. ''' oldpos = self.pos newpos = self.lines[srow] + scol self.pos = newpos + len(toktext) if toktype in [token.NEWLINE, tokenize.NL]: self.out.write('\n') return if newpos > oldpos: self.out.write(self.raw[oldpos:newpos]) if toktype in [token.INDENT, token.DEDENT]: self.pos = newpos return if (toktype == token.STRING): sname = self.StringName.next() self.StringMap[sname] = toktext toktext = sname self.out.write(toktext) self.out.flush() return hth, M.E.Farmer -- http://mail.python.org/mailman/listinfo/python-list