srry I needed some sleep. it works oke. But if you want to answer a question.
I use this code: ---------------------------------------------------------- import StringIO import re import urllib2,htmllib, formatter class mvbHTMLParser(htmllib.HTMLParser): def __init__(self, formatter, verbose=0): htmllib.HTMLParser.__init__(self,formatter,verbose) def getContent(url): try: line = urllib2.urlopen(url) htmlToText(line.read().lower()) except IOError,(strerror): print strerror def htmlToText(html): file = StringIO.StringIO() f = formatter.AbstractFormatter(formatter.DumbWriter(file)) p = mvbHTMLParser(f) p.feed(html) p.close() print file.getvalue() getContent('http://www.zquare.nl/test.html') ---------------------------------------------------------- then the output is: text_text a_link[1] that's oke but how to delete [n] like this? : del = re.compile(r'[0-9]',).sub Thanks for the fast helping, GC-Martijn -- http://mail.python.org/mailman/listinfo/python-list