本文整理汇总了Python中pycrfsuite.Tagger方法的典型用法代码示例。如果您正苦于以下问题:Python pycrfsuite.Tagger方法的具体用法?Python pycrfsuite.Tagger怎么用?Python pycrfsuite.Tagger使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pycrfsuite
的用法示例。
在下文中一共展示了pycrfsuite.Tagger方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: eval
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def eval(self, test_x, test_y, crf_model):
tagger = pycrfsuite.Tagger()
tagger.open(crf_model)
y_pred = []
for feat_list in test_x:
preds = tagger.tag(feat_list)
y_pred.append(preds)
lb = LabelBinarizer()
y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))
tagset = sorted(set(lb.classes_))
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
print(classification_report(
y_true_all,
y_pred_all,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
digits=5
))
示例2: __init__
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self):
self.model = pycrfsuite.Tagger()
filepath = join(dirname(__file__), "chunk_crf_2017_10_12.bin")
self.model.open(filepath)
template = [
"T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower",
"T[2].lower",
"T[0].istitle", "T[-1].istitle", "T[1].istitle",
# word unigram and bigram
"T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]",
"T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]",
# pos unigram and bigram
"T[-2][1]", "T[-1][1]", "T[0][1]", "T[1][1]", "T[2][1]",
"T[-2,-1][1]", "T[-1,0][1]", "T[0,1][1]", "T[1,2][1]",
# chunk
"T[-3][2]", "T[-2][2]", "T[-1][2]",
]
self.transformer = TaggedTransformer(template)
示例3: __init__
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self):
self.model = pycrfsuite.Tagger()
filepath = join(dirname(__file__), "pos_crf_2017_10_11.bin")
self.model.open(filepath)
template = [
"T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower",
"T[2].lower",
"T[0].istitle", "T[-1].istitle", "T[1].istitle",
# word unigram and bigram
"T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]",
"T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]",
# pos unigram and bigram
"T[-3][1]", "T[-2][1]", "T[-1][1]",
"T[-3,-2][1]", "T[-2,-1][1]",
]
self.transformer = TaggedTransformer(template)
示例4: __init__
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self):
self.model = pycrfsuite.Tagger()
filepath = join(dirname(__file__), "ner_crf_2017_10_12.bin")
self.model.open(filepath)
template = [
"T[-2].lower", "T[-1].lower", "T[0].lower", "T[1].lower",
"T[2].lower",
"T[0].istitle", "T[-1].istitle", "T[1].istitle", "T[-2].istitle",
"T[2].istitle",
# word unigram and bigram
"T[-2]", "T[-1]", "T[0]", "T[1]", "T[2]",
"T[-2,-1]", "T[-1,0]", "T[0,1]", "T[1,2]",
# pos unigram and bigram
"T[-2][1]", "T[-1][1]", "T[0][1]", "T[1][1]", "T[2][1]",
"T[-2,-1][1]", "T[-1,0][1]", "T[0,1][1]", "T[1,2][1]",
# ner
"T[-3][3]", "T[-2][3]", "T[-1][3]",
]
self.transformer = TaggedTransformer(template)
示例5: predict
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def predict(self, model_name, sentence):
"""
Predict NER labels for given model and query
:param model_name:
:param sentence:
:return:
"""
from app.nlu.tasks import pos_tagger
doc = spacy_tokenizer(sentence)
words = [token.text for token in doc]
tagged_token = pos_tagger(sentence)
tagger = pycrfsuite.Tagger()
tagger.open("{}/{}.model".format(app.config["MODELS_DIR"], model_name))
predicted_labels = tagger.tag(self.sent_to_features(tagged_token))
extracted_entities = self.crf2json(
zip(words, predicted_labels))
return self.replace_synonyms(extracted_entities)
示例6: test_tag_formats
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def test_tag_formats(tmpdir, xseq, yseq):
# make all coefficients 1 and check that results are the same
model_filename = str(tmpdir.join('model.crfsuite'))
xseq = [dict((key, 1) for key in x) for x in xseq]
trainer = Trainer()
trainer.set('c2', 1e-6) # make sure model overfits
trainer.append(xseq, yseq)
trainer.train(model_filename)
with Tagger().open(model_filename) as tagger:
assert tagger.tag(xseq) == yseq
# strings
with Tagger().open(model_filename) as tagger:
data = [x.keys() for x in xseq]
assert tagger.tag(data) == yseq
示例7: tag
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def tag(self, tokens):
'''
Tag a sentence using Python CRFSuite Tagger. NB before using this function, user should specify the mode_file either by
- Train a new model using ``train'' function
- Use the pre-trained model which is set via ``set_model_file'' function
:params tokens : list of tokens needed to tag.
:type tokens : list(str)
:return : list of tagged tokens.
:rtype : list (tuple(str,str))
'''
return self.tag_sents([tokens])[0]
示例8: load_models
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def load_models(lang, dir=None):
global trie
global tagger
global lemmatiser
if dir != None:
reldir = dir
trie = pickle.load(open(os.path.join(reldir, lang + '.marisa'), 'rb'))
tagger = pycrfsuite.Tagger()
tagger.open(os.path.join(reldir, lang + '.msd.model'))
lemmatiser = {'model': pickle.load(open(os.path.join(reldir, lang + '.lexicon.guesser'), 'rb')),
'lexicon': pickle.load(open(os.path.join(reldir, lang + '.lexicon'), 'rb'))}
示例9: compareTaggers
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def compareTaggers(model1, model2, string_list, module_name):
"""
Compare two models. Given a list of strings, prints out tokens & tags
whenever the two taggers parse a string differently. This is for spot-checking models
:param tagger1: a .crfsuite filename
:param tagger2: another .crfsuite filename
:param string_list: a list of strings to be checked
:param module_name: name of a parser module
"""
module = __import__(module_name)
tagger1 = pycrfsuite.Tagger()
tagger1.open(module_name+'/'+model1)
tagger2 = pycrfsuite.Tagger()
tagger2.open(module_name+'/'+model2)
count_discrepancies = 0
for string in string_list:
tokens = module.tokenize(string)
if tokens:
features = module.tokens2features(tokens)
tags1 = tagger1.tag(features)
tags2 = tagger2.tag(features)
if tags1 != tags2:
count_discrepancies += 1
print('\n')
print("%s. %s" %(count_discrepancies, string))
print('-'*75)
print_spaced('token', model1, model2)
print('-'*75)
for token in zip(tokens, tags1, tags2):
print_spaced(token[0], token[1], token[2])
print("\n\n%s of %s strings were labeled differently"%(count_discrepancies, len(string_list)))
示例10: base_Crf
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def base_Crf(self, directry, datafolder, output, crfmodel):
self.crftrainer(directry, crfmodel)
self.crf_tag = crf.Tagger()
self.crf_tag.open(crfmodel)
self.crfpred(datafolder, output)
示例11: load
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def load(self, crf_model):
self.tagger = pycrfsuite.Tagger()
self.tagger.open(crf_model)
示例12: eval
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def eval(self, test_x, test_y, crf_model):
tagger = pycrfsuite.Tagger()
tagger.open(crf_model)
pred_y = []
for feat_list in test_x:
preds = tagger.tag(feat_list)
pred_y.append(preds)
y_trues = [tag for tag in list(chain.from_iterable(test_y)) if tag not in {'B', 'M', 'E3', 'E2'}]
y_preds = [tag for tag in list(chain.from_iterable(pred_y)) if tag not in {'B', 'M', 'E3', 'E2'}]
lb = LabelBinarizer()
y_true_all = lb.fit_transform(y_trues)
y_pred_all = lb.transform(y_preds)
tagset = sorted(set(lb.classes_))
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
print(classification_report(
y_true_all,
y_pred_all,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
digits=5
))
示例13: __init__
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def __init__(self, model_path=None):
if not model_path:
model_path = join(dirname(__file__), "wt_crf_2018_09_13.bin")
estimator = pycrfsuite.Tagger()
estimator.open(model_path)
self.estimator = estimator
示例14: _predict_func
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def _predict_func(self, model, sentence_dict, crftype):
crf_model_file = 'temp/{0}.{1}.model'.format(self.model_uuid, crftype)
"""
with open(crf_model_file, 'wb') as fp:
fp.write(model['model_binary'])
if crftype == 'crfsharp':
for postfix in crfsharp_other_postfixes:
with open(crf_model_file + postfix, 'wb') as fp:
fp.write(model['model_binary' + postfix.replace('.','_')])
"""
self._load_crf_model_files(model, crf_model_file, crftype)
predicted_dict = dict()
score_dict = dict()
begin_time = arrow.get()
if crftype == 'crfsuite':
# Init tagger
tagger = pycrfsuite.Tagger()
tagger.open(crf_model_file)
# Tagging sentences with tagger
for srcid, sentence in sentence_dict.items():
predicted = tagger.tag(self._calc_features(sentence))
predicted_dict[srcid] = predicted
score_dict[srcid] = tagger.probability(predicted)
elif crftype == 'crfsharp':
tagger = CRFSharp(base_dir='./temp',
template='./model/scrabble.template',
thread=thread_num,
nbest=1,
modelfile=crf_model_file,
maxiter=crfsharp_maxiter
)
srcids = list(sentence_dict.keys())
sentences = [sentence_dict[srcid] for srcid in srcids]
res = tagger.decode(sentences, srcids)
for srcid in srcids:
best_cand = res[srcid]['cands'][0]
predicted_dict[srcid] = best_cand['token_predict']
score_dict[srcid] = best_cand['prop']
return predicted_dict, score_dict
示例15: _predict_func
# 需要导入模块: import pycrfsuite [as 别名]
# 或者: from pycrfsuite import Tagger [as 别名]
def _predict_func(self, model, sentence_dict, crftype):
crf_model_file = 'temp/{0}.{1}.model'.format(self.model_uuid, crftype)
self._load_crf_model_files(model, crf_model_file, crftype)
predicted_dict = dict()
score_dict = dict()
begin_time = arrow.get()
if crftype == 'crfsuite':
# Init tagger
tagger = pycrfsuite.Tagger()
tagger.open(crf_model_file)
# Tagging sentences with tagger
for srcid, sentences in sentence_dict.items():
predicteds = {}
scores = {}
if self.concatenate_sentences:
sentence = self.merge_sentences(sentences)
predicted = tagger.tag(self._calc_features(sentence))
score = tagger.probability(predicted)
predicteds['VendorGivenName'] = predicted
scores['VendorGivenName'] = score
else:
for metadata_type, sentence in sentences.items():
predicted = tagger.tag(self._calc_features(sentence))
score = tagger.probability(predicted)
predicteds[metadata_type] = predicted
scores[metadata_type] = score
predicted_dict[srcid] = predicteds
score_dict[srcid] = scores
return predicted_dict, score_dict