Reinhold Birkenfeld wrote:
def merge(pairings):
    sets = {}
    for x1, x2 in pairings:
        newset = (sets.get(x1, frozenset([x1]))
                  | sets.get(x2, frozenset([x2])))
        for i in newset:
            sets[i] = newset

return [list(aset) for aset in set(sets.itervalues())]

Looks good. I used it as inspiration for this new one, which takes less time for large datasets, and especially for those where a good number of merges are expected (at least that looks to be the pattern; with some large datasets this takes less than a quarter of the time as the one above). I'm sure it can be improved still -- yours is still faster in many cases.


def merge2(pairings):
    elements = {}
    sets = []
    for x1, x2 in pairings:
        i = [elements.get(x1, -1), elements.get(x2, -1)]
        i.sort()
        if i[1] == -1:
            i[1] = len(sets)
            sets.append(set([x1, x2]))
        elif i[0] == -1:
            sets[i[1]] |= set([x1, x2])
        elif i[0] == i[1]:
            continue
        else:
            sets[i[1]] |= sets[i[0]]
            sets[i[0]].clear()

        for x in sets[i[1]]:
           elements[x] = i[1]

    return [list(s) for s in sets if s]

# Comparison
import profile
import random

# Play with these values
xMax, nPairs = 1000, 5000

l = [[random.randint(0, xMax), random.randint(0, xMax)] for i in range(nPairs)]

print 'merge2:'
profile.run('merge2(l)') # Mine

print 'merge:'
profile.run('merge(l)') # Reinhold's

--
Brian Beck
Adventurer of the First Order
--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to