Hi ! > Thanks Jeremy. I am in the process of converting my stuff to use sets! I > wouldn't have thought it would have made that big a deal! I guess it is > live and learn. > If you have simplified records with big amount of data, you can trying dbhash. With this you don't get out from memory...
dd import dbhash import time import random import gc import sys itemcount = 250000 db = dbhash.open('test.dbh','w') for i in range(itemcount): db[str(i)] = str(i) littlelist = [] littleset = set() while len(littlelist) < 1000: x = str(random.randint(0, itemcount-1)) if not (x in littlelist): littlelist.append(x) littleset.add(x) def DBHash(): gc.collect() hk = db.has_key st = time.time() newlist = [] for val in littlelist: if hk(val): newlist.append(val) et = time.time() print "Size", len(newlist) newlist.sort() print "Hash", hash(str(newlist)) print "Time", "%04f"%(et-st) print def Set(): gc.collect() largeset = set() for i in range(itemcount): largeset.add(str(i)) st = time.time() newset = largeset.intersection(littleset) newsetlist = [] while newset: newsetlist.append(newset.pop()) et = time.time() print "Size", len(newsetlist) newsetlist.sort() print "Hash", hash(str(newsetlist)) print "Time", "%04f"%(et-st) DBHash() Set() -- http://mail.python.org/mailman/listinfo/python-list