On 5/4/2012 10:46 AM Tim Chase said...
I hit a few snags testing this on my winxp w/python2.6.1 in that getsize
wasn't finding the file as it was created in two parts with .dat and
.dir extension.
Also, setting key failed as update returns None.
The changes I needed to make are marked below.
Emile
import os
import hashlib
import random
from string import letters
import anydbm
KB = 1024
MB = KB * KB
GB = MB * KB
DESIRED_SIZE = 1 * GB
KEYS_TO_SAMPLE = 20
FNAME = "mydata.db"
FDATNAME = r"mydata.db.dat"
i = 0
md5 = hashlib.md5()
db = anydbm.open(FNAME, 'c')
try:
print("Generating junk data...")
while os.path.getsize(FNAME)< 6*GB:
while os.path.getsize(FDATNAME) < 6*GB:
key = md5.update(str(i))[:16]
md5.update(str(i))
key = md5.hexdigest()[:16]
size = random.randrange(1*KB, 4*KB)
value = ''.join(random.choice(letters)
for _ in range(size))
db[key] = value
i += 1
print("Gathering %i sample keys" % KEYS_TO_SAMPLE)
keys_of_interest = random.sample(db.keys(), KEYS_TO_SAMPLE)
finally:
db.close()
print("Reopening for a cold sample set in case it matters")
db = anydbm.open(FNAME)
try:
print("Performing %i lookups")
for key in keys_of_interest:
v = db[key]
print("Done")
finally:
db.close()
--
http://mail.python.org/mailman/listinfo/python-list