qwweeeit wrote: > Thanks! If you answer to my posts one more time I could consider you as > my tutor... > > It was strange to have found a bug...! In any case I will not go deeper > into the matter, because for me it's enough your explanatiom. > I corrected the problem by hand removing the tokens spanning multiple lines > (there were only 8 cases...). > > Instead I haven't understood your hint about comments... > I succeded in realizing a python script which removes comments. > > Here it is (in all its cumbersome and criptic appearence!...): > > # removeCommentsTok.py > import tokenize > Input = "pippo1" > Output = "pippo2" > f = open(Input) > fOut=open(Output,"w") > > nLastLine=0 > for i in tokenize.generate_tokens(f.readline): > . if i[0]==52 and nLastLine != (i[2])[0]: > . . fOut.write((i[4].replace(i[1],'')).rstrip()+'\n') > . . nLastLine=(i[2])[0] > . elif i[0]==4 and nLastLine != (i[2])[0]: > . . fOut.write((i[4])) > . . nLastLine=(i[2])[0] > f.close() > fOut.close() > > Some explanations for the guys like me...: > - 52 and 4 are the arbitrary codes for comments and NEWLINE respectively > - the comment removing is obtained by clearing the comment (i[1]) in the > input line (i[4]) > - I also right trimmed the line to get rid off the remaining blanks. Tokenizer sends multiline strings and comments as a single token.
###################################################################### # python comment and whitespace stripper :) ###################################################################### import keyword, os, sys, traceback import StringIO import token, tokenize __credits__ = 'just another tool that I needed' __version__ = '.7' __author__ = 'M.E.Farmer' __date__ = 'Jan 15 2005, Oct 24 2004' ###################################################################### class Stripper: """python comment and whitespace stripper :) """ def __init__(self, raw): self.raw = raw def format(self, out=sys.stdout, comments=0, spaces=1, untabify=1, eol='unix'): ''' strip comments, strip extra whitespace, convert EOL's from Python code. ''' # Store line offsets in self.lines self.lines = [0, 0] pos = 0 # Strips the first blank line if 1 self.lasttoken = 1 self.temp = StringIO.StringIO() self.spaces = spaces self.comments = comments if untabify: self.raw = self.raw.expandtabs() self.raw = self.raw.rstrip()+' ' self.out = out self.raw = self.raw.replace('\r\n', '\n') self.raw = self.raw.replace('\r', '\n') self.lineend = '\n' # Gather lines while 1: pos = self.raw.find(self.lineend, pos) + 1 if not pos: break self.lines.append(pos) self.lines.append(len(self.raw)) # Wrap text in a filelike object self.pos = 0 text = StringIO.StringIO(self.raw) # Parse the source. ## Tokenize calls the __call__ ## function for each token till done. try: tokenize.tokenize(text.readline, self) except tokenize.TokenError, ex: traceback.print_exc() # Ok now we write it to a file # but we also need to clean the whitespace # between the lines and at the ends. self.temp.seek(0) # Mac CR if eol == 'mac': self.lineend = '\r' # Windows CR LF elif eol == 'win': self.lineend = '\r\n' # Unix LF else: self.lineend = '\n' for line in self.temp.readlines(): if spaces == -1: self.out.write(line.rstrip()+self.lineend) else: if not line.isspace(): self.lasttoken=0 self.out.write(line.rstrip()+self.lineend) else: self.lasttoken+=1 if self.lasttoken<=self.spaces and self.spaces: self.out.write(self.lineend) def __call__(self, toktype, toktext, (srow,scol), (erow,ecol), line): ''' Token handler. ''' # calculate new positions oldpos = self.pos newpos = self.lines[srow] + scol self.pos = newpos + len(toktext) #kill the comments if not self.comments: # Kill the comments ? if toktype == tokenize.COMMENT: return # handle newlines if toktype in [token.NEWLINE, tokenize.NL]: self.temp.write(self.lineend) return # send the original whitespace, if needed if newpos > oldpos: self.temp.write(self.raw[oldpos:newpos]) # skip indenting tokens if toktype in [token.INDENT, token.DEDENT]: self.pos = newpos return # send text to the temp file self.temp.write(toktext) return ###################################################################### def Main(): import sys if sys.argv[1]: filein = open(sys.argv[1]).read() Stripper(filein).format(out=sys.stdout, comments=1, untabify=1, eol='win') ###################################################################### if __name__ == '__main__': Main() M.E.Farmer -- http://mail.python.org/mailman/listinfo/python-list