ProvoWallis wrote: > Thanks. One more question, though. > > I'm not sure how to limit the scope of my search so that I'm just > extracting the id attribute from the sections that I want. I.e., I want > the id attributes from the forms in sections 1 and 3 but not from 2. > > Maybe I'm missing something. >
If the data has closing tags this is easily achieved using a dom or sax parser, but here is a slightly modified version, very ugly but simple. hope this helps. Adonis --- from HTMLParser import HTMLParser data = """<main-section no="1"> <form id="graphic_1.tif"> <form id="graphic_2.tif"> <main-section no="2"> <form id="graphic_3.tif"> <main-section no="3"> <form id="graphic_4.tif"> <form id="graphic_5.tif"> <form id="graphic_6.tif"> """ class ParseForms(HTMLParser): _section = None _secDict = dict() def getSection(self, key): return self._secDict.get(str(key)) def handle_starttag(self, tag, attrs): if tag == "form": if not self._secDict.has_key(self._section): self._secDict[self._section] = [dict(attrs).get('id')] else: self._secDict[self._section].append(dict(attrs).get('id')) if tag == "main-section": self._section = dict(attrs).get('no') if __name__ == "__main__": parser = ParseForms() parser.feed(data) print parser.getSection(1) print parser.getSection(3) -- http://mail.python.org/mailman/listinfo/python-list