def merge(pairings): sets = {} for x1, x2 in pairings: newset = (sets.get(x1, frozenset([x1])) | sets.get(x2, frozenset([x2]))) for i in newset: sets[i] = newset
return [list(aset) for aset in set(sets.itervalues())]
Looks good. I used it as inspiration for this new one, which takes less time for large datasets, and especially for those where a good number of merges are expected (at least that looks to be the pattern; with some large datasets this takes less than a quarter of the time as the one above). I'm sure it can be improved still -- yours is still faster in many cases.
def merge2(pairings): elements = {} sets = [] for x1, x2 in pairings: i = [elements.get(x1, -1), elements.get(x2, -1)] i.sort() if i[1] == -1: i[1] = len(sets) sets.append(set([x1, x2])) elif i[0] == -1: sets[i[1]] |= set([x1, x2]) elif i[0] == i[1]: continue else: sets[i[1]] |= sets[i[0]] sets[i[0]].clear()
for x in sets[i[1]]: elements[x] = i[1]
return [list(s) for s in sets if s]
# Comparison import profile import random
# Play with these values xMax, nPairs = 1000, 5000
l = [[random.randint(0, xMax), random.randint(0, xMax)] for i in range(nPairs)]
print 'merge2:' profile.run('merge2(l)') # Mine
print 'merge:' profile.run('merge(l)') # Reinhold's
-- Brian Beck Adventurer of the First Order -- http://mail.python.org/mailman/listinfo/python-list