SpellChecker
I used this code.It works fine,but on word not whole text.I want to extend this code to correct text file not only a word,but i don't know.If you have any help,please inform me. This is the code: import re, collections def words(text): return re.findall('[a-z]+', text.lower()) def train(features): model = collections.defaultdict(lambda: 1) for f in features: model[f] += 1 return model NWORDS = train(words(file('big.txt').read())) alphabet = 'abcdefghijklmnopqrstuvwxyz' def edits1(word): n = len(word) return set([word[0:i]+word[i+1:] for i in range(n)] + # deletion [word[0:i]+word[i+1]+word[i]+word[i+2:] for i in range (n-1)] + # transposition [word[0:i]+c+word[i+1:] for i in range(n) for c in alphabet] + # alteration [word[0:i]+c+word[i:] for i in range(n+1) for c in alphabet]) # insertion def known_edits2(word): return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS) def known(words): return set(w for w in words if w in NWORDS) def correct(word): candidates = known([word]) or known(edits1(word)) or known_edits2 (word) or [word] return max(candidates, key=lambda w: NWORDS[w]) -- http://mail.python.org/mailman/listinfo/python-list
Re: SpellChecker
On May 20, 12:37 pm, Mike Kazantsev wrote: > abosalim wrote: > > I used this code.It works fine,but on word not whole text.I want to > > extend this code to correct > > text file not only a word,but i don't know.If you have any help,please > > inform me. > ... > > def correct(word): > > candidates = known([word]) or known(edits1(word)) or known_edits2 > > (word) or [word] > > return max(candidates, key=lambda w: NWORDS[w]) > > Here I assume that "word" is any string consisting of letters, feel free > to add your own check in place of str.isalpha, like word length or case. > Note that simple ops like concatenation work much faster with buffers > than str / unicode. > > text = 'some text to correct (anything, really)' > result = buffer('') > > word, c = buffer(''), '' > for c in text: > if c.isalpha(): word += c > else: > if word: > result += correct(word) > word = buffer('') > result += c > > -- > Mike Kazantsev // fraggod.net > > signature.asc > < 1KViewDownload Thanks a lot -- http://mail.python.org/mailman/listinfo/python-list
Need Help
#Gui import re, collections from Tkinter import * from nltk_lite import tokenize def words(text): return re.findall('[a-z]+', text.lower()) def train(features): model = collections.defaultdict(lambda: 1) for f in features: model[f] += 1 return model NWORDS = train(words(file('big1.txt').read())) alphabet = 'abcdefghijklmnopqrstuvwxyz' def edits1(word): s = [(word[:i], word[i:]) for i in range(len(word) + 1)] deletes = [a + b[1:] for a, b in s if b] transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b)>1] replaces = [a + c + b[1:] for a, b in s for c in alphabet if b] inserts = [a + c + b for a, b in s for c in alphabet] return set(deletes + transposes + replaces + inserts) def known_edits2(word): return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS) def known(words): return set(w for w in words if w in NWORDS) def correct(word): candidates = known([word]) or known(edits1(word)) or known_edits2 (word) or [word] return max(candidates, key=lambda w: NWORDS[w]) def textcorrect(str): s=[] for i in tokenize.whitespace(str): s.append(correct(i)) result= " ".join(s) return result GUI Part# class Ex3: def __init__(self, master): self.frame = Frame(master) self.frame.pack() self.field = Entry(self.frame) self.field.pack(side=TOP) self.contents = StringVar() self.field.config(text=self.contents) self.stop = Button(self.frame, text="Exit", fg="red", command=self.frame.quit) self.stop.pack(side=RIGHT) self.enter = Button(self.frame, text="Correct", fg="black", command=self.new) self.enter.pack(side=LEFT) def new(self): self.string = self.contents.get() #pass contents of textfield to words()method above self.res= words(self.string) self.frame2 = Frame() self.frame2.pack() self.words = Label(self.frame2, text=self.res, fg="blue", font= ("Arial", 16)) self.words.pack() root = Tk() Ex3 = Ex3(root) root.mainloop() It print text without correction.I think it didn't enter the method or any method above the gui. Please if you have any information,inform me -- http://mail.python.org/mailman/listinfo/python-list
Re: Need Help
On May 25, 5:39 am, Steven D'Aprano wrote: > On Mon, 25 May 2009 00:16:19 +0200, Piet van Oostrum wrote: > > By the way, it is better to add python code as attachment instead of > > inline text because some news software might fold the lines like in your > > posting, making it difficult to reconstruct the code. > > Except that some news software might not show the attachment at all. > > -- > Steven I modified the method,but it can't identified it.(self.res=textcorrect (self.string) NameError: global name 'textcorrect' is not defined) This is what i done: import re, collections from Tkinter import * from nltk_lite import tokenize class Ex3: def __init__(self, master): self.frame = Frame(master) self.frame.pack() self.field = Entry(self.frame) self.field.pack(side=TOP) self.contents = StringVar() self.field.config(text=self.contents) self.stop = Button(self.frame, text="Exit", fg="red", command=self.frame.quit) self.stop.pack(side=RIGHT) self.enter = Button(self.frame, text="Correct", fg="black", command=self.new) self.enter.pack(side=LEFT) def words(text): return re.findall('[a-z]+', text.lower()) def train(features): model = collections.defaultdict(lambda: 1) for f in features: model[f] += 1 return model NWORDS = train(words(file('big1.txt').read())) alphabet = 'abcdefghijklmnopqrstuvwxyz' def edits1(word): s = [(word[:i], word[i:]) for i in range(len(word) + 1)] deletes = [a + b[1:] for a, b in s if b] transposes = [a + b[1] + b[0] + b[2:] for a, b in s if len(b) >1] replaces = [a + c + b[1:] for a, b in s for c in alphabet if b] inserts = [a + c + b for a, b in s for c in alphabet] return set(deletes + transposes + replaces + inserts) def known_edits2(word): return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS) def known(words): return set(w for w in words if w in NWORDS) def correct(word): candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word] return max(candidates, key=lambda w: NWORDS[w]) def textcorrect(str): s=[] for i in tokenize.whitespace(str): s.append(correct(i)) result= " ".join(s) return result def new(self): self.string = self.contents.get() #pass contents of textfield to textcorrect()method above self.res=textcorrect(self.string) self.frame2 = Frame() self.frame2.pack() self.words = Label(self.frame2, text=self.res, fg="blue", font= ("Arial", 16)) self.words.pack() root = Tk() Ex3 = Ex3(root) root.mainloop() -- http://mail.python.org/mailman/listinfo/python-list