Klaas wrote:
22.2s 20m25s[3]
20m to insert 1m keys? You are doing something wrong.
Hi Mike.
I've put together some simplified test code, but the bsddb
module gives 11m for 1M keys:
Number generator test for 1000000 number ranges
with a maximum of 3 wildcard digits.
Wed May 17 22:18:17 2006 dictionary population started
Wed May 17 22:18:26 2006 dictionary population stopped, duration 8.6s
Wed May 17 22:18:27 2006 StorageBerkeleyDB population started
Wed May 17 22:29:32 2006 StorageBerkeleyDB population stopped, duration 665.6s
Wed May 17 22:29:33 2006 StorageSQLite population started
Wed May 17 22:30:38 2006 StorageSQLite population stopped, duration 65.5s
test code is attached.
With bdb's it is crucial to insert keys in bytestring-sorted order.
For the bsddb test, I'm using a plain string. (The module docs list a
string being the only datatype supported for both keys & values).
Also, be sure to give it a decent amount of cache.
The bsddb.hashopen() factory seems to have a bug in this regard; if you
supply a cachesize argument, then it barfs:
...
File "bsddb-test.py", line 67, in runtest
db = bsddb.hashopen(None, flag='c', cachesize=8192)
File "/usr/lib/python2.4/bsddb/__init__.py", line 288, in hashopen
if cachesize is not None: d.set_cachesize(0, cachesize)
bsddb._db.DBInvalidArgError: (22, 'Invalid argument -- DB->set_cachesize: method not permitted when environment
specified')
I'll file a bug report on this if it isn't already fixed.
Cheers,
Chris
import bsddb
import random
import time
import sqlite
import psyco
psyco.full()
class WallClockTimer(object):
'''Simple timer for measuring tests.'''
def __init__(self, msg):
self.start = time.time()
self.msg = msg
print time.ctime(self.start), self.msg, 'started'
def stop(self):
end = time.time()
duration = end - self.start
print time.ctime(end), self.msg, 'stopped, duration %.1fs' % duration
class NumberGen(object):
'''Phone number generator for testing different methods of storage.'''
def __init__(self, range_start, range_end, n, max_wildcards):
print '''Number generator test for %d number ranges
with a maximum of %d wildcard digits.''' % (n, max_wildcards)
wildcards = range(0, max_wildcards + 1)
# generate n unique numbers and store as keys in number_hash
timer = WallClockTimer('dictionary population')
self.number_hash = {}
for i in xrange(0, n):
unique = False
while not unique:
wildcard_digits = self.gen_wildcard_digits(wildcards)
num = self.gen_number(range_start, range_end)
if wildcard_digits > 0:
num /= (10 ** wildcard_digits)
key = (num, wildcard_digits)
if self.number_hash.has_key(key):
unique = False
else:
unique = True
self.number_hash[key] = None
timer.stop()
def gen_wildcard_digits(self, wildcards):
return random.choice(wildcards)
def gen_number(self, start, end):
return int(random.uniform(start, end))
def storage_test(self, StorageTestClass):
test = StorageTestClass(self.number_hash)
class StorageTest(object):
'''base class for testing storage. Derive a test
class and provide your own runtest() method.'''
def __init__(self, number_hash):
timer = WallClockTimer('%s population' % type(self).__name__)
self.runtest(number_hash)
timer.stop()
class StorageBerkeleyDB(StorageTest):
def runtest(self, number_hash):
db = bsddb.hashopen(None, flag='c', cachesize=8192)
for (num, wildcard_digits) in number_hash.keys():
key = '%d:%d' % (num, wildcard_digits)
db[key] = None
db.close()
class StorageSQLite(StorageTest):
def runtest(self, number_hash):
db = sqlite.connect(':memory:')
cursor = db.cursor()
cursor.execute('CREATE TABLE numbers (num INTEGER, wd INTEGER)')
q = """insert into numbers (num, wd) values (%d, %d)"""
for (num, wildcard_digits) in number_hash.keys():
cursor.execute(q, num, wildcard_digits)
db.commit()
db.close()
if __name__ == '__main__':
test_vals = {'range_start' : 61880 * 10 ** 7,
'range_end' : 61891 * 10 ** 7,
'n' : 1 * 10 ** 4,
'max_wildcards' : 3}
ngen = NumberGen(test_vals['range_start'], test_vals['range_end'],
test_vals['n'], test_vals['max_wildcards'])
ngen.storage_test(StorageBerkeleyDB)
ngen.storage_test(StorageSQLite)
--
http://mail.python.org/mailman/listinfo/python-list