当前位置: 首页>>代码示例>>Python>>正文


Python UnigramTagger.tag_sents方法代码示例

本文整理汇总了Python中nltk.tag.UnigramTagger.tag_sents方法的典型用法代码示例。如果您正苦于以下问题:Python UnigramTagger.tag_sents方法的具体用法?Python UnigramTagger.tag_sents怎么用?Python UnigramTagger.tag_sents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.tag.UnigramTagger的用法示例。


在下文中一共展示了UnigramTagger.tag_sents方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: trainUniTnT

# 需要导入模块: from nltk.tag import UnigramTagger [as 别名]
# 或者: from nltk.tag.UnigramTagger import tag_sents [as 别名]
 def trainUniTnT(self):
     """train unigram and tnt seperatly without DefaultTagger"""
     self.split_into_folds()
     for k in range(1, (self.folds + 1)):
         train_sents = sum(self.foldlist[: (self.folds - 1)], [])
         tnt_tagger = tnt.TnT(N=100)
         tnt_tagger.train(train_sents)
         print(str(k) + " fold: tnt evaluated")
         unigram = UnigramTagger(train_sents)
         print(str(k) + " fold: unigram evaluated")
         to_tag = [untag(i) for i in self.foldlist[self.folds - 1]]
         self.tnt_tagged += tnt_tagger.tag_sents(to_tag)
         self.uni_tagged += unigram.tag_sents(to_tag)
         self.org_tagged += self.foldlist[self.folds - 1]
         self.foldlist = [self.foldlist[self.folds - 1]] + self.foldlist[: (self.folds - 1)]
     self.tnt = tnt_tagger
     self.unigram = unigram
     self.tnt_avg_acc = accuracy(sum(self.org_tagged, []), sum(self.tnt_tagged, []))
     self.uni_avg_acc = accuracy(sum(self.org_tagged, []), sum(self.uni_tagged, []))
     print("Accuracy of concatenated tnt-tagged sentences: ", self.tnt_avg_acc)
     print("Accuracy of concatenated unigram-tagged sentences: ", self.uni_avg_acc)
     (self.tnt_tagprecision, self.tnt_tagrecall) = self.tagprecision_recall(
         tnt_tagger, self.tnt_tagged, self.org_tagged
     )
     (self.unigram_tagprecision, self.unigram_tagrecall) = self.tagprecision_recall(
         unigram, self.uni_tagged, self.org_tagged
     )
     # delete following values so that trainRegexp has the inicial values
     self.org_tagged = []
     self.foldlist = []
     for i in range(1, self.folds + 1):
         self.foldlist.append(self.create_fold(i))
开发者ID:Batene,项目名称:Bamanankan,代码行数:34,代码来源:CrossValidation.py

示例2: getUnigramTaggerAccuracy

# 需要导入模块: from nltk.tag import UnigramTagger [as 别名]
# 或者: from nltk.tag.UnigramTagger import tag_sents [as 别名]
def getUnigramTaggerAccuracy(trainingSet, testingSet):
    # trains and returns the accuracy of the UnigramTagger

    # get untagged sentences and gold POS tags
    testingUntaggedSentences = [[taggedWord[0] for taggedWord in sentence] for sentence in testingSet]
    testingGoldPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in testingSet]

    # train tagger
    unigramTagger = UnigramTagger(trainingSet)

    # test tagger and get predicted POS tags
    unigramTaggedSentences = unigramTagger.tag_sents(testingUntaggedSentences)
    unigramTaggedSentencesPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in unigramTaggedSentences]

    # calculate and return accuracy
    return calculateAccuracy(testingGoldPOSTags, unigramTaggedSentencesPOSTags)
开发者ID:kyajmiller,项目名称:LING-539,代码行数:18,代码来源:q2.py

示例3: trainALL

# 需要导入模块: from nltk.tag import UnigramTagger [as 别名]
# 或者: from nltk.tag.UnigramTagger import tag_sents [as 别名]
 def trainALL(self, last):
     self.split_into_folds()
     for k in range(1, (self.folds + 1)):
         train_sents = sum(self.foldlist[: (self.folds - 1)], [])
         crf = CRFTagger(training_opt={"max_iterations": 100, "max_linesearch": 10, "c1": 0.0001, "c2": 1.0})
         crf_trained = crf.train(
             train_sents,
             "Models/model.crfCrossValidation1" + str(k) + self.option_tone + self.option_tag + ".tagger",
         )
         print(str(k) + " fold: crf")
         tnt_tagger = tnt.TnT(unk=DefaultTagger("n"), Trained=True, N=100)
         tnt_tagger.train(train_sents)
         print(str(k) + " fold: tnt")
         tag_set = set()
         symbols = set()
         for i in train_sents:
             for j in i:
                 tag_set.add(j[1])
                 symbols.add(j[0])
         trainer = HiddenMarkovModelTrainer(list(tag_set), list(symbols))
         hmm = trainer.train_supervised(train_sents, estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins))
         print(str(k) + " fold: hmm")
         if last == "U":
             lasttagger = UnigramTagger(train_sents, backoff=DefaultTagger("n"))
             print(str(k) + " fold: unigram")
         if last == "B":
             if self.option_tone == "tonal" and self.option_tag == "Affixes":
                 regex = RegexpTonalSA(DefaultTagger("n"))
             if self.option_tone == "tonal" and self.option_tag == "POS":
                 regex = RegexpTonal(DefaultTagger("n"))
             if self.option_tone == "nontonal" and self.option_tag == "Affixes":
                 regex = RegexpSA(DefaultTagger("n"))
             if self.option_tone == "nontonal" and self.option_tag == "POS":
                 regex = Regexp(DefaultTagger("n"))
             dic = dictionary_backoff(self.option_tone, regex)
             affix = AffixTagger(train_sents, min_stem_length=0, affix_length=-4, backoff=dic)
             lasttagger = BigramTagger(train_sents, backoff=affix)
             print(str(k) + " fold: bigram")
         to_tag = [untag(i) for i in self.foldlist[self.folds - 1]]
         self.crf_tagged += crf.tag_sents(to_tag)
         self.tnt_tagged += tnt_tagger.tag_sents(to_tag)
         self.hmm_tagged += hmm.tag_sents(to_tag)
         self.lasttagger_tagged += lasttagger.tag_sents(to_tag)
         self.org_tagged += self.foldlist[self.folds - 1]
         self.foldlist = [self.foldlist[self.folds - 1]] + self.foldlist[: (self.folds - 1)]
     self.crf = crf
     self.tnt = tnt_tagger
     self.hmm = hmm
     self.lasttagger = lasttagger
     org_words = sum(self.org_tagged, [])
     self.crf_avg_acc = accuracy(org_words, sum(self.crf_tagged, []))
     self.tnt_avg_acc = accuracy(org_words, sum(self.tnt_tagged, []))
     self.hmm_avg_acc = accuracy(org_words, sum(self.hmm_tagged, []))
     self.lasttagger_avg_acc = accuracy(org_words, sum(self.lasttagger_tagged, []))
     print("Accuracy of concatenated crf-tagged sentences: ", self.crf_avg_acc)
     print("Accuracy of concatenated tnt-tagged sentences: ", self.tnt_avg_acc)
     print("Accuracy of concatenated hmm-tagged sentences: ", self.hmm_avg_acc)
     print("Accuracy of concatenated " + last + "-tagged sentences: ", self.lasttagger_avg_acc)
     (self.crf_tagprecision, self.crf_tagrecall) = self.tagprecision_recall(crf, self.crf_tagged, self.org_tagged)
     (self.tnt_tagprecision, self.tnt_tagrecall) = self.tagprecision_recall(
         tnt_tagger, self.tnt_tagged, self.org_tagged
     )
     (self.hmm_tagprecision, self.hmm_tagrecall) = self.tagprecision_recall(hmm, self.hmm_tagged, self.org_tagged)
     (self.lasttagger_tagprecision, self.lasttagger_tagrecall) = self.tagprecision_recall(
         lasttagger, self.lasttagger_tagged, self.org_tagged
     )
     self.org_tagged = []
     self.foldlist = []
     for i in range(1, self.folds + 1):
         self.foldlist.append(self.create_fold(i))
开发者ID:Batene,项目名称:Bamanankan,代码行数:72,代码来源:CrossValidation.py


注:本文中的nltk.tag.UnigramTagger.tag_sents方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。