本文整理汇总了Python中nltk.tag.UnigramTagger.train方法的典型用法代码示例。如果您正苦于以下问题:Python UnigramTagger.train方法的具体用法?Python UnigramTagger.train怎么用?Python UnigramTagger.train使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tag.UnigramTagger
的用法示例。
在下文中一共展示了UnigramTagger.train方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_pos_model
# 需要导入模块: from nltk.tag import UnigramTagger [as 别名]
# 或者: from nltk.tag.UnigramTagger import train [as 别名]
def make_pos_model(model_type):
now = time.time()
reader = TaggedCorpusReader('.', 'greek_training_set.pos')
train_sents = reader.tagged_sents()
if model_type == 'unigram':
tagger = UnigramTagger(train_sents)
file = 'unigram.pickle'
elif model_type == 'bigram':
tagger = BigramTagger(train_sents)
file = 'bigram.pickle'
elif model_type == 'trigram':
tagger = TrigramTagger(train_sents)
file = 'trigram.pickle'
elif model_type == 'backoff':
tagger1 = UnigramTagger(train_sents)
tagger2 = BigramTagger(train_sents, backoff=tagger1)
tagger = TrigramTagger(train_sents, backoff=tagger2)
file = '123grambackoff.pickle'
elif model_type == 'tnt':
tagger = tnt.TnT()
tagger.train(train_sents)
file = 'tnt.pickle'
else:
print('Invalid model_type.')
_dir = os.path.expanduser('~/greek_models_cltk/taggers/pos')
path = os.path.join(_dir, file)
with open(path, 'wb') as f:
pickle.dump(tagger, f)
print('Completed training {0} model in {1} seconds to {2}.'.format(model_type, time.time() - now, path))
示例2: contextual_rules
# 需要导入模块: from nltk.tag import UnigramTagger [as 别名]
# 或者: from nltk.tag.UnigramTagger import train [as 别名]
def contextual_rules(wikicorpus_dir, context_file):
sentences = wikicorpus(wikicorpus_dir, words=1000000)
ANONYMOUS = "anonymous"
for s in sentences:
for i, (w, tag) in enumerate(s):
if tag == "NP": # NP = proper noun in Parole tagset.
s[i] = (ANONYMOUS, "NP")
ctx = fntbl37()
tagger = UnigramTagger(sentences)
tagger = BrillTaggerTrainer(tagger, ctx, trace=0)
tagger = tagger.train(sentences, max_rules=100)
#print tagger.evaluate(wikicorpus(10000, start=1))
with open(context_file, "w") as f:
for rule in tagger.rules():
f.write("%s\n" % rule)