本文整理汇总了Python中pycrfsuite.Trainer方法的典型用法代码示例。如果您正苦于以下问题:Python pycrfsuite.Trainer方法的具体用法?Python pycrfsuite.Trainer怎么用?Python pycrfsuite.Trainer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycrfsuite
的用法示例。
在下文中一共展示了pycrfsuite.Trainer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_data, model_file):
'''
Train the CRF tagger using CRFSuite
:params train_data : is the list of annotated sentences.
:type train_data : list (list(tuple(str,str)))
:params model_file : the model will be saved to this file.
'''
trainer = pycrfsuite.Trainer(verbose=self._verbose)
trainer.set_params(self._training_options)
for sent in train_data:
tokens,labels = zip(*sent)
features = [self._feature_func(tokens,i) for i in range(len(tokens))]
trainer.append(features,labels)
# Now train the model, the output should be model_file
trainer.train(model_file)
# Save the model file
self.set_model_file(model_file)
示例2: train
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, nerdocs, mode_filename):
"""Train a CRF model using given documents.
Parameters
----------
nerdocs: list of estnltk.estner.ner.Document.
The documents for model training.
mode_filename: str
The fielname where to save the model.
"""
trainer = pycrfsuite.Trainer(algorithm=self.algorithm,
params={'c2': self.c2},
verbose=self.verbose)
for doc in nerdocs:
for snt in doc.sentences:
xseq = [t.feature_list() for t in snt]
yseq = [t.label for t in snt]
trainer.append(xseq, yseq)
trainer.train(mode_filename)
示例3: train_crf
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train_crf(x_train, y_train):
print('Training...')
trainer = pycrfsuite.Trainer(verbose=False)
for xseq, yseq in zip(x_train, y_train):
trainer.append(xseq, yseq)
trainer.set_params({
'c1': 1.0, # coefficient for L1 penalty
'c2': 1e-3, # coefficient for L2 penalty
'max_iterations': 500, # stop earlier
# include transitions that are possible, but not observed
'feature.possible_transitions': True
})
trainer.train(param.crf_path)
示例4: test_tag_formats
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test_tag_formats(tmpdir, xseq, yseq):
# make all coefficients 1 and check that results are the same
model_filename = str(tmpdir.join('model.crfsuite'))
xseq = [dict((key, 1) for key in x) for x in xseq]
trainer = Trainer()
trainer.set('c2', 1e-6) # make sure model overfits
trainer.append(xseq, yseq)
trainer.train(model_filename)
with Tagger().open(model_filename) as tagger:
assert tagger.tag(xseq) == yseq
# strings
with Tagger().open(model_filename) as tagger:
data = [x.keys() for x in xseq]
assert tagger.tag(data) == yseq
示例5: test_training_messages
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test_training_messages(tmpdir, xseq, yseq):
class CapturingTrainer(Trainer):
def __init__(self):
self.messages = []
def message(self, message):
self.messages.append(message)
trainer = CapturingTrainer()
trainer.select('lbfgs')
trainer.append(xseq, yseq)
assert not trainer.messages
model_filename = str(tmpdir.join('model.crfsuite'))
trainer.train(model_filename)
assert trainer.messages
assert 'type: CRF1d\n' in trainer.messages
# print("".join(trainer.messages))
示例6: test_training_messages_exception
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test_training_messages_exception(tmpdir, xseq, yseq):
class MyException(Exception):
pass
class BadTrainer(Trainer):
def message(self, message):
raise MyException("error")
trainer = BadTrainer()
trainer.select('lbfgs')
trainer.append(xseq, yseq)
model_filename = str(tmpdir.join('model.crfsuite'))
with pytest.raises(MyException):
trainer.train(model_filename)
示例7: train
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_data, model_file):
'''
Train the CRF tagger using CRFSuite
:params train_data : is the list of annotated sentences.
:type train_data : list (list(tuple(str,str)))
:params model_file : the model will be saved to this file.
'''
trainer = pycrfsuite.Trainer(verbose=self._verbose)
trainer.set_params(self._training_options)
for sent in train_data:
tokens, labels = zip(*sent)
features = [self._feature_func(tokens, i) for i in range(len(tokens))]
trainer.append(features, labels)
# Now train the model, the output should be model_file
trainer.train(model_file)
# Save the model file
self.set_model_file(model_file)
示例8: fit
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def fit(self, X, y, **params, model_path):
# sklearn requires parameters to be declared as fields of the estimator,
# an we can't have a full stop there. Replace with an underscore
params = {k.replace('_', '.'): v for k, v in self.__dict__.items()}
trainer = pycrfsuite.Trainer(verbose=False, params=params)
for raw_text, labels in zip(X, y):
tokens = tokenize(raw_text)
trainer.append(tokens2features(tokens), labels)
trainer.train(model_path)
reload(parserator)
示例9: trainModel
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def trainModel(training_data, module, model_path,
params_to_set={'c1':0.1, 'c2':0.01, 'feature.minfreq':0}):
trainer = pycrfsuite.Trainer(verbose=False, params=params_to_set)
for _, components in training_data:
tokens, labels = list(zip(*components))
trainer.append(module.tokens2features(tokens), labels)
trainer.train(model_path)
示例10: __init__
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def __init__(self, enumerations=100, L1Penalty=1.0, L2Penalty=1e-3):
self.crf_feature_train = crf.Trainer(verbose=False)
self.crf_feature_train.set_params({
'c1': L1Penalty,
'c2': L2Penalty,
'max_iterations': enumerations,
'feature.possible_transitions': True
})
#Method to append more features to the trainer
#More features include TOKEN and its respective POS
#It also includes the act tag of the sentence
示例11: train
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_x, train_y, out_model):
trainer = pycrfsuite.Trainer(verbose=False)
for x, y in zip(train_x, train_y):
if x and y:
trainer.append(x, y)
trainer.set_params({
'c1': 1.0, # coefficient for L1 penalty
'c2': 1e-3, # coefficient for L2 penalty
'max_iterations': 50, # stop earlier
'feature.possible_transitions': True # include transitions that are possible, but not observed
})
trainer.train(out_model)
print(trainer.logparser.last_iteration)
示例12: train
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, sentences, model):
"""Train the CRF tagger using CRFSuite.
:params sentences: Annotated sentences.
:params model: Path to save pickled model.
"""
trainer = pycrfsuite.Trainer(verbose=True)
trainer.set_params(self.params)
for sentence in sentences:
tokens, labels = zip(*sentence)
features = [self._get_features(tokens, i) for i in range(len(tokens))]
trainer.append(features, labels)
trainer.train(model)
self.load(model)
示例13: train
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_x, train_y, save_file='model.crfsuite'):
trainer = pycrfsuite.Trainer(verbose=False)
for xseq, yseq in zip(train_x, train_y):
trainer.append(xseq, yseq)
trainer.set_params({
'c1': 1.0, # coefficient for L1 penalty
'c2': 1e-3, # coefficient for L2 penalty
'max_iterations': 50, # stop earlier
'feature.possible_transitions': True
})
trainer.train(save_file)
self.__tagger.open(save_file)
示例14: train
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_sentences, model_name):
"""
Train NER model for given model
:param train_sentences:
:param model_name:
:return:
"""
features = [self.sent_to_features(s) for s in train_sentences]
labels = [self.sent_to_labels(s) for s in train_sentences]
trainer = pycrfsuite.Trainer(verbose=False)
for xseq, yseq in zip(features, labels):
trainer.append(xseq, yseq)
trainer.set_params({
'c1': 1.0, # coefficient for L1 penalty
'c2': 1e-3, # coefficient for L2 penalty
'max_iterations': 50, # stop earlier
# include transitions that are possible, but not observed
'feature.possible_transitions': True
})
trainer.train('model_files/%s.model' % model_name)
return True
# Extract Labels from BIO tagged sentence
示例15: test
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test():
X_train = [[{'foo': 1, 'bar': 0, 's':0, 'p': 4, 'd':True, 'a':0.7, 'b': 0.5, 'c': 9},
{'foo': 0, 'baz': 1, 's':0, 'p': 0, 'd': False, 'a':8.7, 'b': 7.5, 'c': 1}]]
X_train = [[['foo=1', 'bar=0', 'c=9', 's=0', 'sd=12', 'cd=2', 'ca=3', 'd=True', 'cc=89'],
['foo=4', 'bar=7', 'c=3', 's=1', 'sd=8', 'cd=9', 'ca=1','d=False', 'cc=18']]]
y_train = [['0', '1']]
#print('x train: ', y_train[0])
trainer = pycrfsuite.Trainer(verbose=False)
for xseq, yseq in zip(X_train, y_train):
print('x: ', xseq)
print('y: ', yseq)
trainer.append(xseq, yseq)
trainer.set_params({
'c1': 1.0, # coefficient for L1 penalty
'c2': 1e-3, # coefficient for L2 penalty
'max_iterations': 500, # stop earlier
# include transitions that are possible, but not observed
'feature.possible_transitions': True
})
trainer.train('conll2002-esp.crfsuite')
#print (len(trainer.logparser.iterations), trainer.logparser.iterations[-1])
tagger = pycrfsuite.Tagger()
tagger.open('conll2002-esp.crfsuite')
print("Predicted:", ' '.join(tagger.tag(X_train[0])))
print("Correct: ", ' '.join(y_train[0]))