當前位置: 首頁>>代碼示例>>Python>>正文


Python EditModel.editProbabilities方法代碼示例

本文整理匯總了Python中EditModel.EditModel.editProbabilities方法的典型用法代碼示例。如果您正苦於以下問題:Python EditModel.editProbabilities方法的具體用法?Python EditModel.editProbabilities怎麽用?Python EditModel.editProbabilities使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在EditModel.EditModel的用法示例。


在下文中一共展示了EditModel.editProbabilities方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from EditModel import EditModel [as 別名]
# 或者: from EditModel.EditModel import editProbabilities [as 別名]
class SpellCorrect:
  """Holds edit model, language model, corpus. trains"""


  def __init__(self, lm, corpus):
    """initializes the language model."""
    self.languageModel = lm
    self.editModel = EditModel('../data/count_1edit.txt', corpus)


  def evaluate(self, corpus):
    """Tests this speller on a corpus, returns a SpellingResult"""
    numCorrect = 0
    numTotal = 0
    testData = corpus.generateTestCases()
    for sentence in testData:
      if sentence.isEmpty():
        continue
      errorSentence = sentence.getErrorSentence()
      hypothesis = self.correctSentence(errorSentence)
      if sentence.isCorrection(hypothesis):
        numCorrect += 1
      numTotal += 1
    return SpellingResult(numCorrect, numTotal)

  def correctSentence(self, sentence):
    """Takes a list of words, returns a corrected list of words."""
    if len(sentence) == 0:
      return []
    argmax_i = 0
    argmax_w = sentence[0]
    maxscore = float('-inf')
    maxlm = float('-inf')
    maxedit = float('-inf')

    # skip start and end tokens
    for i in range(1, len(sentence) - 1):
      word = sentence[i]
      editProbs = self.editModel.editProbabilities(word)
      for alternative, editscore in editProbs.iteritems():
        if alternative == word:
          continue
        sentence[i] = alternative
        lmscore = self.languageModel.score(sentence)
        if editscore != 0:
          editscore = math.log(editscore)
        else:
          editscore = float('-inf')
        score = lmscore + editscore
        if score >= maxscore:
          maxscore = score
          maxlm = lmscore
          maxedit = editscore
          argmax_i = i
          argmax_w = alternative

      sentence[i] = word # restores sentence to original state before moving on
    argmax = list(sentence) # copy it
    argmax[argmax_i] = argmax_w # correct it
    return argmax


  def correctCorpus(self, corpus):
    """Corrects a whole corpus, returns a JSON representation of the output."""
    string_list = [] # we will join these with commas,  bookended with []
    sentences = corpus.corpus
    for sentence in sentences:
      uncorrected = sentence.getErrorSentence()
      corrected = self.correctSentence(uncorrected) # List<String>
      word_list = '["%s"]' % '","'.join(corrected)
      string_list.append(word_list)
    output = '[%s]' % ','.join(string_list)
    return output
開發者ID:gpoulter,項目名稱:nlp-pa2-autocorrect,代碼行數:75,代碼來源:SpellCorrect.py

示例2: __init__

# 需要導入模塊: from EditModel import EditModel [as 別名]
# 或者: from EditModel.EditModel import editProbabilities [as 別名]
class SpellCorrect:
  """Spelling corrector for sentences. Holds edit model, language model and the corpus."""

  def __init__(self, lm, corpus):
    self.languageModel = lm
    self.editModel = EditModel('../data/count_1edit.txt', corpus)

  def correctSentence(self, sentence):
    """Assuming exactly one error per sentence, returns the most probable corrected sentence.
       Sentence is a list of words."""

    if len(sentence) == 0:
      return []

    #bestSentence = sentence[:] #copy of sentence
    trySentence  = sentence[:] #copy of sentence
    bestScore = float('-inf')

    # checking original sentence score: #print self.languageModel.score(bestSentence)

    for i in xrange(1, len(sentence) - 1): #ignore <s> and </s>
      # TODO: select the maximum probability sentence here, according to the noisy channel model.
      # Tip: self.editModel.editProbabilities(word) gives edits and log-probabilities according to your edit model.
      #      You should iterate through these values instead of enumerating all edits.
      # Tip: self.languageModel.score(trialSentence) gives log-probability of a sentence
      # checking contents : # print self.editModel.editProbabilities(sentence[i])
      for w, p in self.editModel.editProbabilities(sentence[i]):
          trySentence[i] = w
          if self.languageModel.score(trySentence) + p > bestScore:   # p : channel model
              bestScore = self.languageModel.score(trySentence) + p   # self.languageModel.score(trySentence) : prior
              bestSentence = trySentence[:]
      trySentence[i] = sentence[i]


      #pass

    # if True: #bestSentence != sentence:
    #     print self.languageModel.score(sentence), ' '.join(sentence)
    #     print self.languageModel.score(bestSentence),' '.join(bestSentence)
    #     print

    return bestSentence

  def evaluate(self, corpus):  
    """Tests this speller on a corpus, returns a SpellingResult"""
    numCorrect = 0
    numTotal = 0
    testData = corpus.generateTestCases()
    for sentence in testData:
      if sentence.isEmpty():
        continue
      errorSentence = sentence.getErrorSentence()
      hypothesis = self.correctSentence(errorSentence)
      if sentence.isCorrection(hypothesis):
        numCorrect += 1
      numTotal += 1
    return SpellingResult(numCorrect, numTotal)

  def correctCorpus(self, corpus): 
    """Corrects a whole corpus, returns a JSON representation of the output."""
    string_list = [] # we will join these with commas,  bookended with []
    sentences = corpus.corpus
    for sentence in sentences:
      uncorrected = sentence.getErrorSentence()
      corrected = self.correctSentence(uncorrected)
      word_list = '["%s"]' % '","'.join(corrected)
      string_list.append(word_list)
    output = '[%s]' % ','.join(string_list)
    return output
開發者ID:hitoshinagano,項目名稱:NLP-Stanford,代碼行數:71,代碼來源:SpellCorrect.py


注:本文中的EditModel.EditModel.editProbabilities方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。