本文整理匯總了Python中seqeval.metrics.sequence_labeling.get_entities方法的典型用法代碼示例。如果您正苦於以下問題:Python sequence_labeling.get_entities方法的具體用法?Python sequence_labeling.get_entities怎麽用?Python sequence_labeling.get_entities使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類seqeval.metrics.sequence_labeling
的用法示例。
在下文中一共展示了sequence_labeling.get_entities方法的11個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: entity_visualization
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def entity_visualization(texts: List[List[str]], labels: List[List[str]],
output_fname='entity_texts.html'):
texts_c = deepcopy(texts)
texts_c = [item[:-1] for item in texts_c]
entities = [get_entities(item) for item in labels]
all_entities = list(set([sub_item[0] for item in entities for sub_item in item]))
all_entities = [item for item in all_entities if item != 'O']
nb_entities = len(all_entities)
if nb_entities > len(ENTITY_COLOR):
rest_nb_colors = nb_entities - len(ENTITY_COLOR)
colors = ENTITY_COLOR + ['#' + ''.join([random.choice('0123456789ABCDEF') for j in range(6)])
for i in range(rest_nb_colors)]
else:
colors = ENTITY_COLOR[:nb_entities]
assert len(colors) == nb_entities
entity_colors = {all_entities[i]: colors[i] for i in range(nb_entities)}
with open(output_fname, 'w') as fout:
for x, y in zip(texts_c, entities):
fout.write(entity2html(x, y, entity_colors))
示例2: _build_response
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def _build_response(self, sent, tags, prob):
words = self.tokenizer(sent)
res = {
'words': words,
'entities': [
]
}
chunks = get_entities(tags)
for chunk_type, chunk_start, chunk_end in chunks:
chunk_end += 1
entity = {
'text': ' '.join(words[chunk_start: chunk_end]),
'type': chunk_type,
'score': float(np.average(prob[chunk_start: chunk_end])),
'beginOffset': chunk_start,
'endOffset': chunk_end
}
res['entities'].append(entity)
return res
示例3: test_get_entities
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def test_get_entities(self):
y_true = ['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O', 'B-PER', 'I-PER']
self.assertEqual(get_entities(y_true), [('MISC', 3, 5), ('PER', 7, 8)])
示例4: test_get_entities_with_suffix_style
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def test_get_entities_with_suffix_style(self):
y_true = ['O', 'O', 'O', 'MISC-B', 'MISC-I', 'MISC-I', 'O', 'PER-B', 'PER-I']
self.assertEqual(get_entities(y_true, suffix=True), [('MISC', 3, 5), ('PER', 7, 8)])
示例5: _build_response
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def _build_response(self, split_text, tags, poss, segs=[], words=[]):
if self.basic_token == 'char':
res = {
'words': split_text,
'pos': poss,
'char_pos': poss,
'char_word': words,
'seg': segs,
'entities': []
}
else:
res = {
'words': split_text,
'pos': poss,
'entities': []
}
chunks = get_entities(tags)
for chunk_type, chunk_start, chunk_end in chunks:
chunk = self.post_process_chunk(chunk_type, chunk_start, chunk_end, split_text, poss)
if chunk is not None:
entity = {
'text': chunk,
'type': chunk_type,
'beginOffset': chunk_start,
'endOffset': chunk_end
}
res['entities'].append(entity)
return res
示例6: call
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def call(self, predictions, log_verbose=False):
''' main func entrypoint'''
preds = predictions["preds"]
output_index = predictions["output_index"]
if output_index is None:
res_file = self.config["solver"]["postproc"].get("res_file", "")
label_path_file = self.config["data"]["task"]["label_vocab"]
else:
res_file = self.config["solver"]["postproc"][output_index].get(
"res_file", "")
label_path_file = self.config["data"]["task"]["label_vocab"][output_index]
if res_file == "":
logging.info(
"Infer res not saved. You can check 'res_file' in your config.")
return
res_dir = os.path.dirname(res_file)
if not os.path.exists(res_dir):
os.makedirs(res_dir)
logging.info("Save inference result to: {}".format(res_file))
preds = ids_to_sentences(preds, label_path_file)
with open(res_file, "w", encoding="utf-8") as in_f:
for i, pre in enumerate(preds):
entities = get_entities(pre) # [('PER', 0, 1), ('LOC', 3, 3)]
if not entities:
in_f.write("Null")
else:
new_line = "\t".join(
[" ".join(map(str, entity)) for entity in entities])
in_f.write(new_line)
in_f.write("\n")
示例7: entities
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def entities(text: List[str], tag: List[str], pred_prob: np.ndarray) -> List[Dict[str, Any]]:
"""Return recognized entities with detailed information according to the tag sequence
Args:
text: List of str. A tokenized (in char level) text sequence,
like ``['我', '在', '上', '海', '上', '學']``
tag: List of str. The corresponding tag sequence of text,
like ``['O', 'O', 'B-LOC', 'I-LOC', 'O', 'O']``
pred_prob: np.ndarray, the probabilities of tag sequence, shaped [num_chars,]
Returns:
List of Dict. Each Dict contains the detailed information of each recognized entity (
name, type, score, offset). Specifically, it will be like:
[{'name': '上海',
'type': 'LOC',
'score': 0.9986118674278259,
'beginOffset': 2,
'endOffset': 4}
...
]
"""
results = []
chunks = sequence_labeling.get_entities(tag)
for chunk_type, chunk_start, chunk_end in chunks:
chunk_end += 1
entity = {
'name': ''.join(text[chunk_start: chunk_end]),
'type': chunk_type,
'score': float(np.average(pred_prob[chunk_start: chunk_end])),
'beginOffset': chunk_start,
'endOffset': chunk_end
}
results.append(entity)
return results
示例8: get_tag_dict
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def get_tag_dict(sequence, tag_texts):
words = sequence.split()
entities = get_entities(tag_texts)
slots = defaultdict(list)
for slot, start_idx, end_idx in entities:
slots[slot].append(" ".join(words[start_idx : end_idx + 1]))
return dict(slots)
示例9: _build_response1
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def _build_response1(self, sent, tags, prob):
words = self.tokenizer(sent)
res = ""
chunks = get_entities(tags)
for index, obj in enumerate(words):
res = res + obj +"\t"+tags[index] +"\n"
if "." in obj:
res = res+"\n"
if "।" in obj:
res = res+"\n"
return res
示例10: transform
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def transform(self, X, y=None):
"""Transform documents to document ids.
Uses the vocabulary learned by fit.
Args:
X : iterable
an iterable which yields either str, unicode or file objects.
y : iterabl, label strings.
Returns:
features: document id matrix.
y: label id matrix.
"""
mentions = []
mentions_char = []
left_contexts = []
right_contexts = []
outputs = []
word_ids = [self._word_vocab.doc2id(doc) for doc in X]
char_ids = [[self._char_vocab.doc2id(w) for w in doc] for doc in X]
ngram_indices = []
for sent in word_ids:
ngrams = self.generate_ngrams(sent, n=4)
ngram_indices.append(ngrams)
for l, r in ngrams:
mentions.append(word_ids[l: r])
mentions_char.append(char_ids[l:r])
left_contexts.append(word_ids[:l])
right_contexts.append(word_ids[r:])
if y is not None:
for ngram, labels in zip(ngram_indices, y):
d = {(begin_offset, end_offset + 1): t for t, begin_offset, end_offset in get_entities(labels)}
for l, r in ngram:
if (l, r) in d:
outputs.append(self._label_vocab[d[(l, r)]])
else:
outputs.append(self._label_vocab)
outputs = np.array(outputs)
inputs = [np.array(left_contexts), np.array(mentions), np.array(mentions_char), np.array(right_contexts)]
if y is not None:
return inputs, outputs
else:
return inputs
示例11: restrict_entities
# 需要導入模塊: from seqeval.metrics import sequence_labeling [as 別名]
# 或者: from seqeval.metrics.sequence_labeling import get_entities [as 別名]
def restrict_entities(text: List[str],
tag: List[str],
pred_prob: np.ndarray,
threshold: float = 0.85) -> List[Dict[str, Any]]:
"""Return restricted entities according to tag sequence: 1) remove those entities of
which scores are lower than threshold; 2) for each entity type, only keep the entity with
the highest score.
Args:
text: List of str. A tokenized (in char level) text sequence,
like ``['我', '在', '上', '海', '上', '學']``
tag: List of str. The corresponding tag sequence of text,
like ``['O', 'O', 'B-LOC', 'I-LOC', 'O', 'O']``
pred_prob: np.ndarray, the probabilities of tag sequence, shaped [num_chars,]
threshold: float. The scores of recognized entities must be higher than threshold.
Returns:
List of Dict. Each Dict contains the detailed information of each filtered entity (
name, type, score, offset). Specifically, it will be like:
[{'name': '上海',
'type': 'LOC',
'score': 0.9986118674278259,
'beginOffset': 2,
'endOffset': 4}
...
]
"""
group_entities = defaultdict(list)
chunks = sequence_labeling.get_entities(tag)
for chunk_type, chunk_start, chunk_end in chunks:
chunk_end += 1
score = float(np.average(pred_prob[chunk_start: chunk_end]))
if score >= threshold:
# remove entities of which scores are lower than threshold
entity = ''.join(text[chunk_start: chunk_end])
group_entities[chunk_type].append((entity, score, chunk_start, chunk_end))
results = []
for entity_type, group in group_entities.items():
entity = sorted(group, key=lambda x: x[1])[-1] # sorted by score
results.append({
'name': entity[0],
'type': entity_type,
'score': entity[1],
'beginOffset': entity[2],
'endOffset': entity[3]
})
return results