本文整理匯總了Python中nltk.tag.UnigramTagger.tag_sents方法的典型用法代碼示例。如果您正苦於以下問題:Python UnigramTagger.tag_sents方法的具體用法?Python UnigramTagger.tag_sents怎麽用?Python UnigramTagger.tag_sents使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類nltk.tag.UnigramTagger
的用法示例。
在下文中一共展示了UnigramTagger.tag_sents方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: trainUniTnT
# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag_sents [as 別名]
def trainUniTnT(self):
"""train unigram and tnt seperatly without DefaultTagger"""
self.split_into_folds()
for k in range(1, (self.folds + 1)):
train_sents = sum(self.foldlist[: (self.folds - 1)], [])
tnt_tagger = tnt.TnT(N=100)
tnt_tagger.train(train_sents)
print(str(k) + " fold: tnt evaluated")
unigram = UnigramTagger(train_sents)
print(str(k) + " fold: unigram evaluated")
to_tag = [untag(i) for i in self.foldlist[self.folds - 1]]
self.tnt_tagged += tnt_tagger.tag_sents(to_tag)
self.uni_tagged += unigram.tag_sents(to_tag)
self.org_tagged += self.foldlist[self.folds - 1]
self.foldlist = [self.foldlist[self.folds - 1]] + self.foldlist[: (self.folds - 1)]
self.tnt = tnt_tagger
self.unigram = unigram
self.tnt_avg_acc = accuracy(sum(self.org_tagged, []), sum(self.tnt_tagged, []))
self.uni_avg_acc = accuracy(sum(self.org_tagged, []), sum(self.uni_tagged, []))
print("Accuracy of concatenated tnt-tagged sentences: ", self.tnt_avg_acc)
print("Accuracy of concatenated unigram-tagged sentences: ", self.uni_avg_acc)
(self.tnt_tagprecision, self.tnt_tagrecall) = self.tagprecision_recall(
tnt_tagger, self.tnt_tagged, self.org_tagged
)
(self.unigram_tagprecision, self.unigram_tagrecall) = self.tagprecision_recall(
unigram, self.uni_tagged, self.org_tagged
)
# delete following values so that trainRegexp has the inicial values
self.org_tagged = []
self.foldlist = []
for i in range(1, self.folds + 1):
self.foldlist.append(self.create_fold(i))
示例2: getUnigramTaggerAccuracy
# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag_sents [as 別名]
def getUnigramTaggerAccuracy(trainingSet, testingSet):
# trains and returns the accuracy of the UnigramTagger
# get untagged sentences and gold POS tags
testingUntaggedSentences = [[taggedWord[0] for taggedWord in sentence] for sentence in testingSet]
testingGoldPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in testingSet]
# train tagger
unigramTagger = UnigramTagger(trainingSet)
# test tagger and get predicted POS tags
unigramTaggedSentences = unigramTagger.tag_sents(testingUntaggedSentences)
unigramTaggedSentencesPOSTags = [[taggedWord[1] for taggedWord in sentence] for sentence in unigramTaggedSentences]
# calculate and return accuracy
return calculateAccuracy(testingGoldPOSTags, unigramTaggedSentencesPOSTags)
示例3: trainALL
# 需要導入模塊: from nltk.tag import UnigramTagger [as 別名]
# 或者: from nltk.tag.UnigramTagger import tag_sents [as 別名]
def trainALL(self, last):
self.split_into_folds()
for k in range(1, (self.folds + 1)):
train_sents = sum(self.foldlist[: (self.folds - 1)], [])
crf = CRFTagger(training_opt={"max_iterations": 100, "max_linesearch": 10, "c1": 0.0001, "c2": 1.0})
crf_trained = crf.train(
train_sents,
"Models/model.crfCrossValidation1" + str(k) + self.option_tone + self.option_tag + ".tagger",
)
print(str(k) + " fold: crf")
tnt_tagger = tnt.TnT(unk=DefaultTagger("n"), Trained=True, N=100)
tnt_tagger.train(train_sents)
print(str(k) + " fold: tnt")
tag_set = set()
symbols = set()
for i in train_sents:
for j in i:
tag_set.add(j[1])
symbols.add(j[0])
trainer = HiddenMarkovModelTrainer(list(tag_set), list(symbols))
hmm = trainer.train_supervised(train_sents, estimator=lambda fd, bins: LidstoneProbDist(fd, 0.1, bins))
print(str(k) + " fold: hmm")
if last == "U":
lasttagger = UnigramTagger(train_sents, backoff=DefaultTagger("n"))
print(str(k) + " fold: unigram")
if last == "B":
if self.option_tone == "tonal" and self.option_tag == "Affixes":
regex = RegexpTonalSA(DefaultTagger("n"))
if self.option_tone == "tonal" and self.option_tag == "POS":
regex = RegexpTonal(DefaultTagger("n"))
if self.option_tone == "nontonal" and self.option_tag == "Affixes":
regex = RegexpSA(DefaultTagger("n"))
if self.option_tone == "nontonal" and self.option_tag == "POS":
regex = Regexp(DefaultTagger("n"))
dic = dictionary_backoff(self.option_tone, regex)
affix = AffixTagger(train_sents, min_stem_length=0, affix_length=-4, backoff=dic)
lasttagger = BigramTagger(train_sents, backoff=affix)
print(str(k) + " fold: bigram")
to_tag = [untag(i) for i in self.foldlist[self.folds - 1]]
self.crf_tagged += crf.tag_sents(to_tag)
self.tnt_tagged += tnt_tagger.tag_sents(to_tag)
self.hmm_tagged += hmm.tag_sents(to_tag)
self.lasttagger_tagged += lasttagger.tag_sents(to_tag)
self.org_tagged += self.foldlist[self.folds - 1]
self.foldlist = [self.foldlist[self.folds - 1]] + self.foldlist[: (self.folds - 1)]
self.crf = crf
self.tnt = tnt_tagger
self.hmm = hmm
self.lasttagger = lasttagger
org_words = sum(self.org_tagged, [])
self.crf_avg_acc = accuracy(org_words, sum(self.crf_tagged, []))
self.tnt_avg_acc = accuracy(org_words, sum(self.tnt_tagged, []))
self.hmm_avg_acc = accuracy(org_words, sum(self.hmm_tagged, []))
self.lasttagger_avg_acc = accuracy(org_words, sum(self.lasttagger_tagged, []))
print("Accuracy of concatenated crf-tagged sentences: ", self.crf_avg_acc)
print("Accuracy of concatenated tnt-tagged sentences: ", self.tnt_avg_acc)
print("Accuracy of concatenated hmm-tagged sentences: ", self.hmm_avg_acc)
print("Accuracy of concatenated " + last + "-tagged sentences: ", self.lasttagger_avg_acc)
(self.crf_tagprecision, self.crf_tagrecall) = self.tagprecision_recall(crf, self.crf_tagged, self.org_tagged)
(self.tnt_tagprecision, self.tnt_tagrecall) = self.tagprecision_recall(
tnt_tagger, self.tnt_tagged, self.org_tagged
)
(self.hmm_tagprecision, self.hmm_tagrecall) = self.tagprecision_recall(hmm, self.hmm_tagged, self.org_tagged)
(self.lasttagger_tagprecision, self.lasttagger_tagrecall) = self.tagprecision_recall(
lasttagger, self.lasttagger_tagged, self.org_tagged
)
self.org_tagged = []
self.foldlist = []
for i in range(1, self.folds + 1):
self.foldlist.append(self.create_fold(i))