I have a function that takes a dict and a string, returns a list:
def trie_search(trie, s):
...
return res
mk_trie(the_trie) takes 15 seconds to build from a static list of 750,000
words.txt so I don't want to call it every time I need to call trie_search().
So I want to build the_trie on startup, and let it hang around in memory (even
if it gets swapped in and out.) I realize that django instances can come and
go
as appache feels like it, so I can't just "run on server start"
I am considering using twisted to create a little server that does nothing but
host trie_search(s) (note the lack of trie.) I realize that it will have to
return a string - that is ok, it is just a list of words found in words.txt.
But twisted seems like overkill. otoh, I do want to do something with twisted,
cus there is another project waiting for me to learn twisted.
Anyway, are there any other ways I can make this happen? below is the code
that
shows various attempts to pickle the trie, and on the few systems I have tried,
it takes 2x as long to de-pickle as it does to build the trie from scratch.
Carl K
same code is here for a while: http://deadbeefbabe.org/paste/5163
# trieer.py
def trie_insert(trie, s):
"Insert target s into the trie if it doesn't already exist"
if not s:
raise ValueError
for c in s:
if not c in trie:
# trie[c] = dict(parent=trie)
trie[c] = dict()
trie = trie[c]
trie["target"] = True
def mk_trie(the_trie):
"""
given an empty trie and a filename,
add all the words in the file to the trie.
"""
# make a 7 word dict for quick testing. # out to use words.txt
for w in ['a','at','cat','catfish','fish','fat','foo']:
trie_insert(the_trie, w)
return
for word in file('words.txt','rb').readlines():
word=word.strip()
trie_insert(the_trie, word)
return
def gettrie():
"""
Try to unpickle the trie of words.
If it fiales (like it isnt there), build it and pickle it.
Return the trei.
"""
import cPickle, zlib
# dict='dict.pkl.zip'
try:
print "trying to de-pickle..."
the_trie=cPickle.load(open('words.pkl','rb'))
# the_trie=pickle.load(open(dict,'rb'))
#
the_trie=pickle.loads(zlib.decompress(open('dict.pkl.zip','rb').read()))
# the_trie=eval(open('dict.dict','rb').read())
except:
# something happened, make a new trie and pickle it.
print "building trie..."
the_trie = {"parent": None}
mk_trie(the_trie)
print "pickling tree..."
cPickle.dump(the_trie,open('words.pkl','wb'),-1)
#
open('dict.pkl.zip','wb').write(zlib.compress(pickle.dumps(the_trie),9))
# open('dict.dict','wb').write(the_trie.__repr__())
# open('dict.dict','wb').write(the_trie.__str__())
return the_trie
def trie_search(trie, s):
"Answer the list of targets in trie that are found in s"
res = []
target = ""
for c in s:
target = target + c
if not c in trie:
return res
trie = trie[c]
if trie.get("target"):
res.append(target)
return res
the_trie=gettrie()
print "analyzing string..."
s = 'xfatcatfishx'
# s = 'nafpoolamplussystematiclaustilt'
for i in range(len(s)):
chunk=s[i:]
words=trie_search(the_trie, s[i:])
if words:
print chunk, words
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"Django users" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at
http://groups.google.com/group/django-users?hl=en
-~----------~----~----~----~------~----~------~--~---