I came across its usage in StackOverflow somewhere, but didn't see it in the docs. I'm using 2.7.
I needed it while writing a class for generating text documents out of HTML documents for attaching to emails, which lowers spam scores. I lifted the basis for this from the top answer here: https://tinyurl.com/yb92x8ra While not complete, I thought it might be of interest. Improvements welcome: ##################################################### from HTMLParser import HTMLParser def main(): parser = TextExtractor() html = ''' <html><head>head</head><body> <p>"Hi there!"</p> <script> some javascript </script> <style> class{style}</style> <scrip>Print this</scrip> <b><And this></b> </body> </html> ''' print parser.strip_tags(html) class TextExtractor(HTMLParser): def __init__(self): HTMLParser.__init__(self) self.silent_tag = None self.fed = [] self.silent_tags = ['head', 'script', 'style'] def handle_starttag(self, tag, atts): if tag in self.silent_tags: self.silent_tag = tag def handle_endtag(self, tag): if tag == self.silent_tag: self.silent_tag = None def handle_data(self, d): if not self.silent_tag: self.fed.append(d) def handle_entityref(self, name): self.fed.append(self.unescape("&%s;" % name)) def get_data(self): return ''.join(self.fed) def strip_tags(self, html): self.feed(html) data = self.get_data() self.fed = [] self.reset() return data main() ##################################################### Output: "Hi there!" Print this <And this> Toby -- https://mail.python.org/mailman/listinfo/python-list