本文整理汇总了Python中EditModel.EditModel.editProbabilities方法的典型用法代码示例。如果您正苦于以下问题:Python EditModel.editProbabilities方法的具体用法?Python EditModel.editProbabilities怎么用?Python EditModel.editProbabilities使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类EditModel.EditModel
的用法示例。
在下文中一共展示了EditModel.editProbabilities方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from EditModel import EditModel [as 别名]
# 或者: from EditModel.EditModel import editProbabilities [as 别名]
class SpellCorrect:
"""Holds edit model, language model, corpus. trains"""
def __init__(self, lm, corpus):
"""initializes the language model."""
self.languageModel = lm
self.editModel = EditModel('../data/count_1edit.txt', corpus)
def evaluate(self, corpus):
"""Tests this speller on a corpus, returns a SpellingResult"""
numCorrect = 0
numTotal = 0
testData = corpus.generateTestCases()
for sentence in testData:
if sentence.isEmpty():
continue
errorSentence = sentence.getErrorSentence()
hypothesis = self.correctSentence(errorSentence)
if sentence.isCorrection(hypothesis):
numCorrect += 1
numTotal += 1
return SpellingResult(numCorrect, numTotal)
def correctSentence(self, sentence):
"""Takes a list of words, returns a corrected list of words."""
if len(sentence) == 0:
return []
argmax_i = 0
argmax_w = sentence[0]
maxscore = float('-inf')
maxlm = float('-inf')
maxedit = float('-inf')
# skip start and end tokens
for i in range(1, len(sentence) - 1):
word = sentence[i]
editProbs = self.editModel.editProbabilities(word)
for alternative, editscore in editProbs.iteritems():
if alternative == word:
continue
sentence[i] = alternative
lmscore = self.languageModel.score(sentence)
if editscore != 0:
editscore = math.log(editscore)
else:
editscore = float('-inf')
score = lmscore + editscore
if score >= maxscore:
maxscore = score
maxlm = lmscore
maxedit = editscore
argmax_i = i
argmax_w = alternative
sentence[i] = word # restores sentence to original state before moving on
argmax = list(sentence) # copy it
argmax[argmax_i] = argmax_w # correct it
return argmax
def correctCorpus(self, corpus):
"""Corrects a whole corpus, returns a JSON representation of the output."""
string_list = [] # we will join these with commas, bookended with []
sentences = corpus.corpus
for sentence in sentences:
uncorrected = sentence.getErrorSentence()
corrected = self.correctSentence(uncorrected) # List<String>
word_list = '["%s"]' % '","'.join(corrected)
string_list.append(word_list)
output = '[%s]' % ','.join(string_list)
return output
示例2: __init__
# 需要导入模块: from EditModel import EditModel [as 别名]
# 或者: from EditModel.EditModel import editProbabilities [as 别名]
class SpellCorrect:
"""Spelling corrector for sentences. Holds edit model, language model and the corpus."""
def __init__(self, lm, corpus):
self.languageModel = lm
self.editModel = EditModel('../data/count_1edit.txt', corpus)
def correctSentence(self, sentence):
"""Assuming exactly one error per sentence, returns the most probable corrected sentence.
Sentence is a list of words."""
if len(sentence) == 0:
return []
#bestSentence = sentence[:] #copy of sentence
trySentence = sentence[:] #copy of sentence
bestScore = float('-inf')
# checking original sentence score: #print self.languageModel.score(bestSentence)
for i in xrange(1, len(sentence) - 1): #ignore <s> and </s>
# TODO: select the maximum probability sentence here, according to the noisy channel model.
# Tip: self.editModel.editProbabilities(word) gives edits and log-probabilities according to your edit model.
# You should iterate through these values instead of enumerating all edits.
# Tip: self.languageModel.score(trialSentence) gives log-probability of a sentence
# checking contents : # print self.editModel.editProbabilities(sentence[i])
for w, p in self.editModel.editProbabilities(sentence[i]):
trySentence[i] = w
if self.languageModel.score(trySentence) + p > bestScore: # p : channel model
bestScore = self.languageModel.score(trySentence) + p # self.languageModel.score(trySentence) : prior
bestSentence = trySentence[:]
trySentence[i] = sentence[i]
#pass
# if True: #bestSentence != sentence:
# print self.languageModel.score(sentence), ' '.join(sentence)
# print self.languageModel.score(bestSentence),' '.join(bestSentence)
# print
return bestSentence
def evaluate(self, corpus):
"""Tests this speller on a corpus, returns a SpellingResult"""
numCorrect = 0
numTotal = 0
testData = corpus.generateTestCases()
for sentence in testData:
if sentence.isEmpty():
continue
errorSentence = sentence.getErrorSentence()
hypothesis = self.correctSentence(errorSentence)
if sentence.isCorrection(hypothesis):
numCorrect += 1
numTotal += 1
return SpellingResult(numCorrect, numTotal)
def correctCorpus(self, corpus):
"""Corrects a whole corpus, returns a JSON representation of the output."""
string_list = [] # we will join these with commas, bookended with []
sentences = corpus.corpus
for sentence in sentences:
uncorrected = sentence.getErrorSentence()
corrected = self.correctSentence(uncorrected)
word_list = '["%s"]' % '","'.join(corrected)
string_list.append(word_list)
output = '[%s]' % ','.join(string_list)
return output