"Raymond Hettinger" <[EMAIL PROTECTED]> wrote in message > The source for the tokenize module covers all these bases.
> Raymond Hettinger # tokenize text replace import keyword, os, sys, traceback import string, cStringIO import token, tokenize ###################################################################### class Parser: """python source code tokenizing text replacer """ def __init__(self, raw, out=sys.stdout): ''' Store the source text & set some flags. ''' self.raw = string.strip(string.expandtabs(raw)) self.out = out def format(self, search='' ,replace='', replacetokentype=token.NAME): ''' Parse and send text. ''' # Store line offsets in self.lines self.lines = [0, 0] pos = 0 self.temp = cStringIO.StringIO() self.searchtext = search self.replacetext = replace self.replacetokentype = replacetokentype # Gather lines while 1: pos = string.find(self.raw, '\n', pos) + 1 if not pos: break self.lines.append(pos) self.lines.append(len(self.raw)) # Wrap text in a filelike object self.pos = 0 text = cStringIO.StringIO(self.raw) # Parse the source. ## Tokenize calls the __call__ ## function for each token till done. try: tokenize.tokenize(text.readline, self) except tokenize.TokenError, ex: traceback.print_exc() def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line): ''' Token handler. ''' # calculate new positions oldpos = self.pos newpos = self.lines[srow] + scol self.pos = newpos + len(toktext) # handle newlines if toktype in [token.NEWLINE, tokenize.NL]: self.out.write('\n') return # send the original whitespace, if needed if newpos > oldpos: self.out.write(self.raw[oldpos:newpos]) # skip indenting tokens if toktype in [token.INDENT, token.DEDENT]: self.pos = newpos return # search for matches to our searchtext # customize this for your exact needs if (toktype == self.replacetokentype and toktext == self.searchtext): toktext = self.replacetext # write it out self.out.write(toktext) return ###################################################################### # just an example def Main(): import sys if sys.argv[0]: filein = open(sys.argv[0]).read() Parser(filein, out=sys.stdout).format('tokenize', 'MyNewName') ###################################################################### if __name__ == '__main__': Main() # end of code This is an example of how to use tokenize to replace names that match a search string. If you wanted to only replace strings and not names then change the replacetokentype to token.STRING instead of token.NAME etc... HTH, M.E.Farmer -- http://mail.python.org/mailman/listinfo/python-list