I have a function that takes a dict and a string, returns a list:

def trie_search(trie, s):
     ...
     return res

mk_trie(the_trie) takes 15 seconds to build from a static list of 750,000 
words.txt so I don't want to call it every time I need to call trie_search().

So I want to build the_trie on startup, and let it hang around in memory (even 
if it gets swapped in and out.)  I realize that django instances can come and 
go 
as appache feels like it, so I can't just "run on server start"

I am considering using twisted to create a little server that does nothing but 
host trie_search(s) (note the lack of trie.)  I realize that it will have to 
return a string - that is ok, it is just a list of words found in words.txt.

But twisted seems like overkill.  otoh, I do want to do something with twisted, 
cus there is another project waiting for me to learn twisted.

Anyway, are there any other ways I can make this happen?  below is the code 
that 
shows various attempts to pickle the trie, and on the few systems I have tried, 
it takes 2x as long to de-pickle as it does to build the trie from scratch.

Carl K


same code is here for a while: http://deadbeefbabe.org/paste/5163
# trieer.py

def trie_insert(trie, s):
         "Insert target s into the trie if it doesn't already exist"

         if not s:
                 raise ValueError
         for c in s:
                 if not c in trie:
                         # trie[c] = dict(parent=trie)
                         trie[c] = dict()
                 trie = trie[c]
         trie["target"] = True

def mk_trie(the_trie):
     """
     given an empty trie and a filename,
     add all the words in the file to the trie.
     """

     # make a 7 word dict for quick testing.  # out to use words.txt
     for w in ['a','at','cat','catfish','fish','fat','foo']:
          trie_insert(the_trie, w)
     return

     for word in file('words.txt','rb').readlines():
         word=word.strip()
         trie_insert(the_trie, word)
     return

def gettrie():
     """
     Try to unpickle the trie of words.
     If it fiales (like it isnt there), build it and pickle it.
     Return the trei.
     """

     import cPickle, zlib
     # dict='dict.pkl.zip'
     try:
         print "trying to de-pickle..."
         the_trie=cPickle.load(open('words.pkl','rb'))
         # the_trie=pickle.load(open(dict,'rb'))
         # 
the_trie=pickle.loads(zlib.decompress(open('dict.pkl.zip','rb').read()))
        # the_trie=eval(open('dict.dict','rb').read())

     except:
         # something happened, make a new trie and pickle it.
         print "building trie..."
         the_trie = {"parent": None}
         mk_trie(the_trie)

         print "pickling tree..."
         cPickle.dump(the_trie,open('words.pkl','wb'),-1)
         # 
open('dict.pkl.zip','wb').write(zlib.compress(pickle.dumps(the_trie),9))
         # open('dict.dict','wb').write(the_trie.__repr__())
         # open('dict.dict','wb').write(the_trie.__str__())

     return the_trie

def trie_search(trie, s):
         "Answer the list of targets in trie that are found in s"

         res = []
         target = ""
         for c in s:
                 target = target + c
                 if not c in trie:
                         return res
                 trie = trie[c]
                 if trie.get("target"):
                         res.append(target)
         return res

the_trie=gettrie()

print "analyzing string..."
s = 'xfatcatfishx'
# s = 'nafpoolamplussystematiclaustilt'
for i in range(len(s)):
     chunk=s[i:]
     words=trie_search(the_trie, s[i:])
     if words:
         print chunk, words


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Django users" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at 
http://groups.google.com/group/django-users?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to