Whoops! Forgot an executable example ;).
Attached, and also available at
http://issola.caltech.edu/~t/transfer/test-enc.py http://issola.caltech.edu/~t/transfer/test-enc.html
Run 'python test-enc.py test-enc.html' and note that htmllib.HTMLParser-based parsers give different output than HTMLParser.HTMLParser-based parsers.
cheers, --titus
#!/usr/bin/env python2.4 import htmllib import HTMLParser import formatter
### a simple mix-in to demonstrate the problem. class MixinTest: def start_option(self, attrs): print '==> OPTION starting', attrs # Definition of entities -- derived classes may override entitydefs = \ {'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''} def handle_entityref(self, name): print '==> HANDLING ENTITY', name table = self.entitydefs if name in table: self.handle_data(table[name]) else: self.unknown_entityref(name) return #### class htmllib_Parser(MixinTest, htmllib.HTMLParser): def __init__(self): htmllib.HTMLParser.__init__(self, formatter.NullFormatter()) class nonhtmllib_Parser(MixinTest, HTMLParser.HTMLParser): def handle_starttag(self, name, attrs): "Redirect OPTION tag ==> MixinTest.start_option" if name == 'option': self.start_option(attrs) pass ### import sys data = open(sys.argv[1]).read() print 'PARSING with htmllib.HTMLParser' htmllib_p = htmllib_Parser() htmllib_p.feed(data) print '\nPARSING with HTMLParser.HTMLParser' nonhtmllib_p = nonhtmllib_Parser() nonhtmllib_p.feed(data)Size of pizza (measured in "): Small (6) Medium (10) Large (14)
-- http://mail.python.org/mailman/listinfo/python-list