当前位置: 首页>>代码示例>>Python>>正文


Python pycrfsuite.Trainer方法代码示例

本文整理汇总了Python中pycrfsuite.Trainer方法的典型用法代码示例。如果您正苦于以下问题:Python pycrfsuite.Trainer方法的具体用法?Python pycrfsuite.Trainer怎么用?Python pycrfsuite.Trainer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pycrfsuite的用法示例。


在下文中一共展示了pycrfsuite.Trainer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_data, model_file):
        '''
        Train the CRF tagger using CRFSuite  
        :params train_data : is the list of annotated sentences.        
        :type train_data : list (list(tuple(str,str)))
        :params model_file : the model will be saved to this file.     
         
        '''
        trainer = pycrfsuite.Trainer(verbose=self._verbose)
        trainer.set_params(self._training_options)
        
        for sent in train_data:
            tokens,labels = zip(*sent)
            features = [self._feature_func(tokens,i) for i in range(len(tokens))]
            trainer.append(features,labels)
                        
        # Now train the model, the output should be model_file
        trainer.train(model_file)
        # Save the model file
        self.set_model_file(model_file) 
开发者ID:rafasashi,项目名称:razzy-spinner,代码行数:22,代码来源:crf.py

示例2: train

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, nerdocs, mode_filename):
        """Train a CRF model using given documents.

        Parameters
        ----------
        nerdocs: list of estnltk.estner.ner.Document.
            The documents for model training.
        mode_filename: str
            The fielname where to save the model.
        """

        trainer = pycrfsuite.Trainer(algorithm=self.algorithm,
                                     params={'c2': self.c2},
                                     verbose=self.verbose)

        for doc in nerdocs:
            for snt in doc.sentences:
                xseq = [t.feature_list() for t in snt]
                yseq = [t.label for t in snt]
                trainer.append(xseq, yseq)

        trainer.train(mode_filename) 
开发者ID:estnltk,项目名称:estnltk,代码行数:24,代码来源:crfsuiteutil.py

示例3: train_crf

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train_crf(x_train, y_train):
    print('Training...')
    trainer = pycrfsuite.Trainer(verbose=False)
    for xseq, yseq in zip(x_train, y_train):
        trainer.append(xseq, yseq)
    
    trainer.set_params({
        'c1': 1.0,   # coefficient for L1 penalty
        'c2': 1e-3,  # coefficient for L2 penalty
        'max_iterations': 500,  # stop earlier

        # include transitions that are possible, but not observed
        'feature.possible_transitions': True
    })

    trainer.train(param.crf_path) 
开发者ID:susht3,项目名称:webQA_sequence_labelling_pytorch,代码行数:18,代码来源:crf.py

示例4: test_tag_formats

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test_tag_formats(tmpdir, xseq, yseq):
    # make all coefficients 1 and check that results are the same
    model_filename = str(tmpdir.join('model.crfsuite'))
    xseq = [dict((key, 1) for key in x) for x in xseq]

    trainer = Trainer()
    trainer.set('c2', 1e-6)  # make sure model overfits
    trainer.append(xseq, yseq)
    trainer.train(model_filename)

    with Tagger().open(model_filename) as tagger:
        assert tagger.tag(xseq) == yseq

    # strings
    with Tagger().open(model_filename) as tagger:
        data = [x.keys() for x in xseq]
        assert tagger.tag(data) == yseq 
开发者ID:scrapinghub,项目名称:python-crfsuite,代码行数:19,代码来源:test_tagger.py

示例5: test_training_messages

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test_training_messages(tmpdir, xseq, yseq):

    class CapturingTrainer(Trainer):
        def __init__(self):
            self.messages = []

        def message(self, message):
            self.messages.append(message)

    trainer = CapturingTrainer()
    trainer.select('lbfgs')
    trainer.append(xseq, yseq)
    assert not trainer.messages

    model_filename = str(tmpdir.join('model.crfsuite'))
    trainer.train(model_filename)
    assert trainer.messages
    assert 'type: CRF1d\n' in trainer.messages
    # print("".join(trainer.messages)) 
开发者ID:scrapinghub,项目名称:python-crfsuite,代码行数:21,代码来源:test_trainer.py

示例6: test_training_messages_exception

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test_training_messages_exception(tmpdir, xseq, yseq):

    class MyException(Exception):
        pass

    class BadTrainer(Trainer):
        def message(self, message):
            raise MyException("error")

    trainer = BadTrainer()
    trainer.select('lbfgs')
    trainer.append(xseq, yseq)

    model_filename = str(tmpdir.join('model.crfsuite'))

    with pytest.raises(MyException):
        trainer.train(model_filename) 
开发者ID:scrapinghub,项目名称:python-crfsuite,代码行数:19,代码来源:test_trainer.py

示例7: train

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_data, model_file):
        '''
        Train the CRF tagger using CRFSuite
        :params train_data : is the list of annotated sentences.
        :type train_data : list (list(tuple(str,str)))
        :params model_file : the model will be saved to this file.

        '''
        trainer = pycrfsuite.Trainer(verbose=self._verbose)
        trainer.set_params(self._training_options)

        for sent in train_data:
            tokens, labels = zip(*sent)
            features = [self._feature_func(tokens, i) for i in range(len(tokens))]
            trainer.append(features, labels)

        # Now train the model, the output should be model_file
        trainer.train(model_file)
        # Save the model file
        self.set_model_file(model_file) 
开发者ID:V1EngineeringInc,项目名称:V1EngineeringInc-Docs,代码行数:22,代码来源:crf.py

示例8: fit

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def fit(self, X, y, **params, model_path):
        # sklearn requires parameters to be declared as fields of the estimator,
        # an we can't have a full stop there. Replace with an underscore
        params = {k.replace('_', '.'): v for k, v in self.__dict__.items()}
        trainer = pycrfsuite.Trainer(verbose=False, params=params)
        for raw_text, labels in zip(X, y):
            tokens = tokenize(raw_text)
            trainer.append(tokens2features(tokens), labels)
        trainer.train(model_path)
        reload(parserator) 
开发者ID:datamade,项目名称:parserator,代码行数:12,代码来源:utils.py

示例9: trainModel

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def trainModel(training_data, module, model_path,
               params_to_set={'c1':0.1, 'c2':0.01, 'feature.minfreq':0}):

    trainer = pycrfsuite.Trainer(verbose=False, params=params_to_set)

    for _, components in training_data:
        tokens, labels = list(zip(*components))
        trainer.append(module.tokens2features(tokens), labels)

    trainer.train(model_path) 
开发者ID:datamade,项目名称:parserator,代码行数:12,代码来源:training.py

示例10: __init__

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def __init__(self, enumerations=100, L1Penalty=1.0, L2Penalty=1e-3):
        self.crf_feature_train = crf.Trainer(verbose=False)
        self.crf_feature_train.set_params({
            'c1': L1Penalty,
            'c2': L2Penalty,
            'max_iterations': enumerations,
            'feature.possible_transitions': True
        })
    
    #Method to append more features to the trainer
    #More features include TOKEN and its respective POS
    #It also includes the act tag of the sentence 
开发者ID:skashyap7,项目名称:TBBTCorpus,代码行数:14,代码来源:pycrfsuite.py

示例11: train

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_x, train_y, out_model):
        trainer = pycrfsuite.Trainer(verbose=False)
        for x, y in zip(train_x, train_y):
            if x and y:
                trainer.append(x, y)

        trainer.set_params({
            'c1': 1.0,                            # coefficient for L1 penalty
            'c2': 1e-3,                           # coefficient for L2 penalty
            'max_iterations': 50,                 # stop earlier
            'feature.possible_transitions': True  # include transitions that are possible, but not observed
        })

        trainer.train(out_model)
        print(trainer.logparser.last_iteration) 
开发者ID:jiaeyan,项目名称:Jiayan,代码行数:17,代码来源:crf_sent_tagger.py

示例12: train

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, sentences, model):
        """Train the CRF tagger using CRFSuite.

        :params sentences: Annotated sentences.
        :params model: Path to save pickled model.
        """
        trainer = pycrfsuite.Trainer(verbose=True)
        trainer.set_params(self.params)
        for sentence in sentences:
            tokens, labels = zip(*sentence)
            features = [self._get_features(tokens, i) for i in range(len(tokens))]
            trainer.append(features, labels)
        trainer.train(model)
        self.load(model) 
开发者ID:mcs07,项目名称:ChemDataExtractor,代码行数:16,代码来源:tag.py

示例13: train

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_x, train_y, save_file='model.crfsuite'):
        trainer = pycrfsuite.Trainer(verbose=False)
        for xseq, yseq in zip(train_x, train_y):
            trainer.append(xseq, yseq)
        trainer.set_params({
            'c1': 1.0,   # coefficient for L1 penalty
            'c2': 1e-3,  # coefficient for L2 penalty
            'max_iterations': 50,  # stop earlier
            'feature.possible_transitions': True
        })
        trainer.train(save_file)
        self.__tagger.open(save_file) 
开发者ID:Hironsan,项目名称:HotPepperGourmetDialogue,代码行数:14,代码来源:extractor.py

示例14: train

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def train(self, train_sentences, model_name):
        """
        Train NER model for given model
        :param train_sentences:
        :param model_name:
        :return:
        """
        features = [self.sent_to_features(s) for s in train_sentences]
        labels = [self.sent_to_labels(s) for s in train_sentences]

        trainer = pycrfsuite.Trainer(verbose=False)
        for xseq, yseq in zip(features, labels):
            trainer.append(xseq, yseq)

        trainer.set_params({
            'c1': 1.0,  # coefficient for L1 penalty
            'c2': 1e-3,  # coefficient for L2 penalty
            'max_iterations': 50,  # stop earlier

            # include transitions that are possible, but not observed
            'feature.possible_transitions': True
        })
        trainer.train('model_files/%s.model' % model_name)
        return True

    # Extract Labels from BIO tagged sentence 
开发者ID:alfredfrancis,项目名称:ai-chatbot-framework,代码行数:28,代码来源:entity_extractor.py

示例15: test

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Trainer [as 别名]
def test():
    X_train = [[{'foo': 1, 'bar': 0, 's':0, 'p': 4, 'd':True, 'a':0.7, 'b': 0.5, 'c': 9}, 
            {'foo': 0, 'baz': 1, 's':0, 'p': 0, 'd': False, 'a':8.7, 'b': 7.5, 'c': 1}]]
    X_train = [[['foo=1', 'bar=0', 'c=9', 's=0', 'sd=12', 'cd=2', 'ca=3', 'd=True', 'cc=89'], 
            ['foo=4', 'bar=7', 'c=3', 's=1', 'sd=8', 'cd=9', 'ca=1','d=False', 'cc=18']]]
    y_train = [['0', '1']]
    #print('x train: ', y_train[0])


    trainer = pycrfsuite.Trainer(verbose=False)

    for xseq, yseq in zip(X_train, y_train):
        print('x: ', xseq)
        print('y: ', yseq)
        trainer.append(xseq, yseq)

    trainer.set_params({
        'c1': 1.0,   # coefficient for L1 penalty
        'c2': 1e-3,  # coefficient for L2 penalty
        'max_iterations': 500,  # stop earlier

        # include transitions that are possible, but not observed
        'feature.possible_transitions': True
    })



    trainer.train('conll2002-esp.crfsuite')
    #print (len(trainer.logparser.iterations), trainer.logparser.iterations[-1])


    tagger = pycrfsuite.Tagger()
    tagger.open('conll2002-esp.crfsuite')

    print("Predicted:", ' '.join(tagger.tag(X_train[0])))
    print("Correct:  ", ' '.join(y_train[0])) 
开发者ID:susht3,项目名称:webQA_sequence_labelling_pytorch,代码行数:38,代码来源:crf.py


注:本文中的pycrfsuite.Trainer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。