you may define a start_a in MyHTMLParser. e.g. import htmllib import formatter
class HTML_Parser(htmllib.HTMLParser): def __init__(self): htmllib.HTMLParser.__init__(self, formatter.AbstractFormatter(formatter.NullWriter())) def start_a(self, args): for key, value in args: if key.lower() == 'href': print value html = HTML_Parser() html.feed(open(r'a.htm','r').read()) html.close() On 24 Sep 2005 10:13:30 -0700, George <[EMAIL PROTECTED]> wrote: > How can I parse an HTML file and collect only that the A tags. I have a > start for the code but an unable to figure out how to finish the code. > HTML_parse gets the data from the URL document. Thanks for the help > > def HTML_parse(data): > from HTMLParser import HTMLParser > parser = MyHTMLParser() > > parser.feed(data) > > class MyHTMLParser(HTMLParser): > > def handle_starttag(self, tag, attrs): > > def handle_endtag(self, tag): > > def read_page(URL): > "this function returns the entire content of the specified URL > document" > import urllib > connect = urllib.urlopen(url) > data = connect.read() > connect.close() > return data > > -- > http://mail.python.org/mailman/listinfo/python-list > -- Best Regards, Leo Jay -- http://mail.python.org/mailman/listinfo/python-list