New in trunk. Screen scraping capabilities.
Example: >>> import re >>> from gluon.html import web2pyHTMLParser >>> from urllib import urlopen >>> html=urlopen('http://nobelprize.org/nobel_prizes/physics/laureates/1921/einstein-bio.html').read() >>> tree=web2pyHTMLParser(html).tree ### NEW!! >>> elements=tree.elements('div') # search by tag type >>> elements=tree.elements(_id="Einstein") # search by attribute value (id for >>> example) >>> elements=tree.elements(find='Einstein') # text search NEW!! >>> elements=tree.elements(find=re.compile('Einstein')) # search via regex NEW!! >>> print elements[0] <title>Albert Einstein - Biography</title> >>> print elements[0][0] Albert Einstein - Biography >>> elements[0].append(SPAN(' modified')) <title>Albert Einstein - Biography<span>modified</span></title> >>> print tree <html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml"> <head> <title>Albert Einstein - Biography<span>modified<span></title> ...