> If document order doesn't matter, try sorting the elements of each level in > the two documents by some arbitrary deterministic key, such as (tag name, > text, attr count, whatever), and then compare them in order, instead of trying > to find matches in multiple passes. itertools.groupby() might be your friend > here.
I think that sorting multiple times by each attribute will cost more than I've managed to do: from lxml import etree from collections import deque import string, re, time def xmlEqual(xmlStr1, xmlStr2): et1 = etree.XML(xmlStr1) et2 = etree.XML(xmlStr2) let1 = [x for x in et1.iter()] let2 = [x for x in et2.iter()] if len(let1) != len(let2): return False while let1: el = let1.pop(0) foundEl = findMatchingElem(el, let2) if foundEl is None: return False let2.remove(foundEl) return True def findMatchingElem(el, eList): for elem in eList: if elemsEqual(el, elem): return elem return None def elemsEqual(el1, el2): if el1.tag != el2.tag or el1.attrib != el2.attrib: return False # no requirement for text checking for now #if el1.text != el2.text or el1.tail != el2.tail: #return False path1 = el1.getroottree().getpath(el1) path2 = el2.getroottree().getpath(el2) idxRE = re.compile(r"(\[\d*\])") path1 = idxRE.sub("", path1) path2 = idxRE.sub("", path2) if path1 != path2: return False return True Notice that if documents are in exact same order, each element is compared only once! -- http://mail.python.org/mailman/listinfo/python-list