On 5/4/2012 10:46 AM Tim Chase said...

I hit a few snags testing this on my winxp w/python2.6.1 in that getsize wasn't finding the file as it was created in two parts with .dat and .dir extension.

Also, setting key failed as update returns None.

The changes I needed to make are marked below.

Emile

   import os
   import hashlib
   import random
   from string import letters

   import anydbm

   KB = 1024
   MB = KB * KB
   GB = MB * KB
   DESIRED_SIZE = 1 * GB
   KEYS_TO_SAMPLE = 20
   FNAME = "mydata.db"

FDATNAME = r"mydata.db.dat"


   i = 0
   md5 = hashlib.md5()
   db = anydbm.open(FNAME, 'c')
   try:
     print("Generating junk data...")
     while os.path.getsize(FNAME)<  6*GB:

  while os.path.getsize(FDATNAME) < 6*GB:

       key = md5.update(str(i))[:16]

    md5.update(str(i))
    key = md5.hexdigest()[:16]

       size = random.randrange(1*KB, 4*KB)
       value = ''.join(random.choice(letters)
         for _ in range(size))
       db[key] = value
       i += 1
     print("Gathering %i sample keys" % KEYS_TO_SAMPLE)
     keys_of_interest = random.sample(db.keys(), KEYS_TO_SAMPLE)
   finally:
     db.close()

   print("Reopening for a cold sample set in case it matters")
   db = anydbm.open(FNAME)
   try:
     print("Performing %i lookups")
     for key in keys_of_interest:
       v = db[key]
     print("Done")
   finally:
     db.close()



--
http://mail.python.org/mailman/listinfo/python-list

Reply via email to