You're making a new countDict for each line read from the file... is
that what you meant to do? Or are you trying to count word occurrences
across the whole file?
--
In general, any time string manipulation is going slowly, ask yourself,
"Can I use the re module for this?"
# disclaimer: untested code. probably contains typos
import re
word_finder = re.compile('[a-z0-9_]+', re.I)
def count_words (string, word_finder = word_finder): # avoid global
lookups
countDict = {}
for match in word_finder.finditer(string):
word = match.group(0)
countDict[word] = countDict.get(word,0) + 1
return countDict
f = open(filename)
for i, line in enumerate(f.xreadlines()):
countDict = count_words(line)
print "Line %s" % i
for word in sorted(countDict.keys()):
print " %s %s" % (word, countDict[word])
f.close()
--
http://mail.python.org/mailman/listinfo/python-list