Python pycrfsuite.Tagger方法代码示例

本文整理汇总了Python中pycrfsuite.Tagger方法的典型用法代码示例。如果您正苦于以下问题：Python pycrfsuite.Tagger方法的具体用法？Python pycrfsuite.Tagger怎么用？Python pycrfsuite.Tagger使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycrfsuite的用法示例。

在下文中一共展示了pycrfsuite.Tagger方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: eval

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def eval(self, test_x, test_y, crf_model):
        tagger = pycrfsuite.Tagger()
        tagger.open(crf_model)

        y_pred = []
        for feat_list in test_x:
            preds = tagger.tag(feat_list)
            y_pred.append(preds)

        lb = LabelBinarizer()
        y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
        y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))

        tagset = sorted(set(lb.classes_))
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        print(classification_report(
            y_true_all,
            y_pred_all,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
            digits=5
        ))

开发者ID:jiaeyan，项目名称:Jiayan，代码行数:25，代码来源:crf_sent_tagger.py

示例2: init

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self):
        self.model = pycrfsuite.Tagger()
        filepath = join(dirname(__file__), "chunk_crf_2017_10_12.bin")
        self.model.open(filepath)

        template = [
            "T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower",
            "T[2].lower",
            "T[0].istitle", "T[-1].istitle", "T[1].istitle",
            # word unigram and bigram
            "T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]",
            "T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]",
            # pos unigram and bigram
            "T[-2][1]", "T[-1][1]", "T[0][1]", "T[1][1]", "T[2][1]",
            "T[-2,-1][1]", "T[-1,0][1]", "T[0,1][1]", "T[1,2][1]",
            # chunk
            "T[-3][2]", "T[-2][2]", "T[-1][2]",
        ]
        self.transformer = TaggedTransformer(template)

开发者ID:undertheseanlp，项目名称:underthesea，代码行数:21，代码来源:model_crf.py

示例3: init

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self):
        self.model = pycrfsuite.Tagger()
        filepath = join(dirname(__file__), "pos_crf_2017_10_11.bin")
        self.model.open(filepath)

        template = [
            "T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower",
            "T[2].lower",
            "T[0].istitle", "T[-1].istitle", "T[1].istitle",
            # word unigram and bigram
            "T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]",
            "T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]",
            # pos unigram and bigram
            "T[-3][1]", "T[-2][1]", "T[-1][1]",
            "T[-3,-2][1]", "T[-2,-1][1]",
        ]
        self.transformer = TaggedTransformer(template)

开发者ID:undertheseanlp，项目名称:underthesea，代码行数:19，代码来源:model_crf.py

示例4: init

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self):
        self.model = pycrfsuite.Tagger()
        filepath = join(dirname(__file__), "ner_crf_2017_10_12.bin")
        self.model.open(filepath)

        template = [
            "T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower",
            "T[2].lower",
            "T[0].istitle", "T[-1].istitle", "T[1].istitle", "T[-2].istitle",
            "T[2].istitle",
            # word unigram and bigram
            "T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]",
            "T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]",
            # pos unigram and bigram
            "T[-2][1]", "T[-1][1]", "T[0][1]", "T[1][1]", "T[2][1]",
            "T[-2,-1][1]", "T[-1,0][1]", "T[0,1][1]", "T[1,2][1]",
            # ner
            "T[-3][3]", "T[-2][3]", "T[-1][3]",
        ]
        self.transformer = TaggedTransformer(template)

开发者ID:undertheseanlp，项目名称:underthesea，代码行数:22，代码来源:model_crf.py

示例5: predict

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def predict(self, model_name, sentence):
        """
        Predict NER labels for given model and query
        :param model_name:
        :param sentence:
        :return:
        """
        from app.nlu.tasks import pos_tagger

        doc = spacy_tokenizer(sentence)
        words = [token.text for token in doc]
        tagged_token = pos_tagger(sentence)
        tagger = pycrfsuite.Tagger()
        tagger.open("{}/{}.model".format(app.config["MODELS_DIR"], model_name))
        predicted_labels = tagger.tag(self.sent_to_features(tagged_token))
        extracted_entities = self.crf2json(
            zip(words, predicted_labels))
        return self.replace_synonyms(extracted_entities)

开发者ID:alfredfrancis，项目名称:ai-chatbot-framework，代码行数:20，代码来源:entity_extractor.py

示例6: test_tag_formats

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def test_tag_formats(tmpdir, xseq, yseq):
    # make all coefficients 1 and check that results are the same
    model_filename = str(tmpdir.join('model.crfsuite'))
    xseq = [dict((key, 1) for key in x) for x in xseq]

    trainer = Trainer()
    trainer.set('c2', 1e-6)  # make sure model overfits
    trainer.append(xseq, yseq)
    trainer.train(model_filename)

    with Tagger().open(model_filename) as tagger:
        assert tagger.tag(xseq) == yseq

    # strings
    with Tagger().open(model_filename) as tagger:
        data = [x.keys() for x in xseq]
        assert tagger.tag(data) == yseq

开发者ID:scrapinghub，项目名称:python-crfsuite，代码行数:19，代码来源:test_tagger.py

示例7: tag

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def tag(self, tokens):
        '''
        Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by 
                       - Train a new model using ``train'' function 
                       - Use the pre-trained model which is set via ``set_model_file'' function  
        :params tokens : list of tokens needed to tag. 
        :type tokens : list(str)
        :return : list of tagged tokens. 
        :rtype : list (tuple(str,str)) 
        '''
        
        return self.tag_sents([tokens])[0]

开发者ID:rafasashi，项目名称:razzy-spinner，代码行数:14，代码来源:crf.py

示例8: load_models

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def load_models(lang, dir=None):
    global trie
    global tagger
    global lemmatiser
    if dir != None:
        reldir = dir
    trie = pickle.load(open(os.path.join(reldir, lang + '.marisa'), 'rb'))
    tagger = pycrfsuite.Tagger()
    tagger.open(os.path.join(reldir, lang + '.msd.model'))
    lemmatiser = {'model': pickle.load(open(os.path.join(reldir, lang + '.lexicon.guesser'), 'rb')),
                  'lexicon': pickle.load(open(os.path.join(reldir, lang + '.lexicon'), 'rb'))}

开发者ID:clarinsi，项目名称:reldi-tagger，代码行数:13，代码来源:tagger.py

示例9: compareTaggers

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def compareTaggers(model1, model2, string_list, module_name):
    """
    Compare two models. Given a list of strings, prints out tokens & tags
    whenever the two taggers parse a string differently. This is for spot-checking models
    :param tagger1: a .crfsuite filename
    :param tagger2: another .crfsuite filename
    :param string_list: a list of strings to be checked
    :param module_name: name of a parser module
    """
    module = __import__(module_name)

    tagger1 = pycrfsuite.Tagger()
    tagger1.open(module_name+'/'+model1)
    tagger2 = pycrfsuite.Tagger()
    tagger2.open(module_name+'/'+model2)

    count_discrepancies = 0

    for string in string_list:
        tokens = module.tokenize(string)
        if tokens:
            features = module.tokens2features(tokens)

            tags1 = tagger1.tag(features)
            tags2 = tagger2.tag(features)

            if tags1 != tags2:
                count_discrepancies += 1
                print('\n')
                print("%s. %s" %(count_discrepancies, string))
                
                print('-'*75)
                print_spaced('token', model1, model2)
                print('-'*75)
                for token in zip(tokens, tags1, tags2):
                    print_spaced(token[0], token[1], token[2])
    print("\n\n%s of %s strings were labeled differently"%(count_discrepancies, len(string_list)))

开发者ID:datamade，项目名称:parserator，代码行数:39，代码来源:spotcheck.py

示例10: base_Crf

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def base_Crf(self, directry, datafolder, output, crfmodel):
        self.crftrainer(directry, crfmodel)
        self.crf_tag = crf.Tagger()
        self.crf_tag.open(crfmodel)
        self.crfpred(datafolder, output)

开发者ID:skashyap7，项目名称:TBBTCorpus，代码行数:7，代码来源:pycrfsuite.py

示例11: load

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def load(self, crf_model):
        self.tagger = pycrfsuite.Tagger()
        self.tagger.open(crf_model)

开发者ID:jiaeyan，项目名称:Jiayan，代码行数:5，代码来源:crf_sent_tagger.py

示例12: eval

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def eval(self, test_x, test_y, crf_model):
        tagger = pycrfsuite.Tagger()
        tagger.open(crf_model)

        pred_y = []
        for feat_list in test_x:
            preds = tagger.tag(feat_list)
            pred_y.append(preds)

        y_trues = [tag for tag in list(chain.from_iterable(test_y)) if tag not in {'B', 'M', 'E3', 'E2'}]
        y_preds = [tag for tag in list(chain.from_iterable(pred_y)) if tag not in {'B', 'M', 'E3', 'E2'}]

        lb = LabelBinarizer()
        y_true_all = lb.fit_transform(y_trues)
        y_pred_all = lb.transform(y_preds)

        tagset = sorted(set(lb.classes_))
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        print(classification_report(
            y_true_all,
            y_pred_all,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
            digits=5
        ))

开发者ID:jiaeyan，项目名称:Jiayan，代码行数:28，代码来源:crf_punctuator.py

示例13: init

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self, model_path=None):
        if not model_path:
            model_path = join(dirname(__file__), "wt_crf_2018_09_13.bin")
        estimator = pycrfsuite.Tagger()
        estimator.open(model_path)
        self.estimator = estimator

开发者ID:undertheseanlp，项目名称:underthesea，代码行数:8，代码来源:model.py

示例14: _predict_func

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def _predict_func(self, model, sentence_dict, crftype):
        crf_model_file = 'temp/{0}.{1}.model'.format(self.model_uuid, crftype)
        """
        with open(crf_model_file, 'wb') as fp:
            fp.write(model['model_binary'])
        if crftype == 'crfsharp':
            for postfix in crfsharp_other_postfixes:
                with open(crf_model_file + postfix, 'wb') as fp:
                    fp.write(model['model_binary' + postfix.replace('.','_')])
        """
        self._load_crf_model_files(model, crf_model_file, crftype)

        predicted_dict = dict()
        score_dict = dict()
        begin_time = arrow.get()
        if crftype == 'crfsuite':
            # Init tagger
            tagger = pycrfsuite.Tagger()
            tagger.open(crf_model_file)

            # Tagging sentences with tagger
            for srcid, sentence in sentence_dict.items():
                predicted = tagger.tag(self._calc_features(sentence))
                predicted_dict[srcid] = predicted
                score_dict[srcid] = tagger.probability(predicted)
        elif crftype == 'crfsharp':
            tagger = CRFSharp(base_dir='./temp',
                              template='./model/scrabble.template',
                              thread=thread_num,
                              nbest=1,
                              modelfile=crf_model_file,
                              maxiter=crfsharp_maxiter
                              )
            srcids = list(sentence_dict.keys())
            sentences = [sentence_dict[srcid] for srcid in srcids]
            res = tagger.decode(sentences, srcids)
            for srcid in srcids:
                best_cand = res[srcid]['cands'][0]
                predicted_dict[srcid] = best_cand['token_predict']
                score_dict[srcid] = best_cand['prop']
        return predicted_dict, score_dict

开发者ID:plastering，项目名称:plastering，代码行数:43，代码来源:ir2entities.py

示例15: _predict_func

# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def _predict_func(self, model, sentence_dict, crftype):
        crf_model_file = 'temp/{0}.{1}.model'.format(self.model_uuid, crftype)
        self._load_crf_model_files(model, crf_model_file, crftype)

        predicted_dict = dict()
        score_dict = dict()
        begin_time = arrow.get()
        if crftype == 'crfsuite':
            # Init tagger
            tagger = pycrfsuite.Tagger()
            tagger.open(crf_model_file)

            # Tagging sentences with tagger
            for srcid, sentences in sentence_dict.items():
                predicteds = {}
                scores = {}
                if self.concatenate_sentences:
                    sentence = self.merge_sentences(sentences)
                    predicted = tagger.tag(self._calc_features(sentence))
                    score = tagger.probability(predicted)
                    predicteds['VendorGivenName'] = predicted
                    scores['VendorGivenName'] = score
                else:
                    for metadata_type, sentence in sentences.items():
                        predicted = tagger.tag(self._calc_features(sentence))
                        score = tagger.probability(predicted)
                        predicteds[metadata_type] = predicted
                        scores[metadata_type] = score
                predicted_dict[srcid] = predicteds
                score_dict[srcid] = scores
        return predicted_dict, score_dict

开发者ID:plastering，项目名称:plastering，代码行数:33，代码来源:char2ir.py

注：本文中的pycrfsuite.Tagger方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。