本文整理汇总了Python中spacy.tokens.Doc.ents方法的典型用法代码示例。如果您正苦于以下问题:Python Doc.ents方法的具体用法?Python Doc.ents怎么用?Python Doc.ents使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy.tokens.Doc
的用法示例。
在下文中一共展示了Doc.ents方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_doc_retokenize_spans_entity_merge_iob
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import ents [as 别名]
def test_doc_retokenize_spans_entity_merge_iob():
# Test entity IOB stays consistent after merging
words = ["a", "b", "c", "d", "e"]
doc = Doc(Vocab(), words=words)
doc.ents = [
(doc.vocab.strings.add("ent-abc"), 0, 3),
(doc.vocab.strings.add("ent-d"), 3, 4),
]
assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I"
assert doc[2].ent_iob_ == "I"
assert doc[3].ent_iob_ == "B"
with doc.retokenize() as retokenizer:
retokenizer.merge(doc[0:1])
assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I"
words = ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
doc = Doc(Vocab(), words=words)
doc.ents = [
(doc.vocab.strings.add("ent-de"), 3, 5),
(doc.vocab.strings.add("ent-fg"), 5, 7),
]
assert doc[3].ent_iob_ == "B"
assert doc[4].ent_iob_ == "I"
assert doc[5].ent_iob_ == "B"
assert doc[6].ent_iob_ == "I"
with doc.retokenize() as retokenizer:
retokenizer.merge(doc[2:4])
retokenizer.merge(doc[4:6])
retokenizer.merge(doc[7:9])
assert len(doc) == 6
assert doc[3].ent_iob_ == "B"
assert doc[4].ent_iob_ == "I"
示例2: test_doc_add_entities_set_ents_iob
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import ents [as 别名]
def test_doc_add_entities_set_ents_iob(en_vocab):
doc = Doc(en_vocab, words=["This", "is", "a", "lion"])
ner = EntityRecognizer(en_vocab)
ner.begin_training([])
ner(doc)
assert len(list(doc.ents)) == 0
assert [w.ent_iob_ for w in doc] == (["O"] * len(doc))
doc.ents = [(doc.vocab.strings["ANIMAL"], 3, 4)]
assert [w.ent_iob_ for w in doc] == ["", "", "", "B"]
doc.ents = [(doc.vocab.strings["WORD"], 0, 2)]
assert [w.ent_iob_ for w in doc] == ["B", "I", "", ""]
示例3: get_doc
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import ents [as 别名]
def get_doc(vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None):
"""Create Doc object from given vocab, words and annotations."""
pos = pos or [""] * len(words)
tags = tags or [""] * len(words)
heads = heads or [0] * len(words)
deps = deps or [""] * len(words)
for value in deps + tags + pos:
vocab.strings.add(value)
doc = Doc(vocab, words=words)
attrs = doc.to_array([POS, HEAD, DEP])
for i, (p, head, dep) in enumerate(zip(pos, heads, deps)):
attrs[i, 0] = doc.vocab.strings[p]
attrs[i, 1] = head
attrs[i, 2] = doc.vocab.strings[dep]
doc.from_array([POS, HEAD, DEP], attrs)
if ents:
doc.ents = [
Span(doc, start, end, label=doc.vocab.strings[label])
for start, end, label in ents
]
if tags:
for token in doc:
token.tag_ = tags[token.i]
return doc
示例4: test_issue1547
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import ents [as 别名]
def test_issue1547():
"""Test that entity labels still match after merging tokens."""
words = ["\n", "worda", ".", "\n", "wordb", "-", "Biosphere", "2", "-", " \n"]
doc = Doc(Vocab(), words=words)
doc.ents = [Span(doc, 6, 8, label=doc.vocab.strings["PRODUCT"])]
with doc.retokenize() as retokenizer:
retokenizer.merge(doc[5:7])
assert [ent.text for ent in doc.ents]
示例5: test_doc_is_nered
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import ents [as 别名]
def test_doc_is_nered(en_vocab):
words = ["I", "live", "in", "New", "York"]
doc = Doc(en_vocab, words=words)
assert not doc.is_nered
doc.ents = [Span(doc, 3, 5, label="GPE")]
assert doc.is_nered
# Test creating doc from array with unknown values
arr = numpy.array([[0, 0], [0, 0], [0, 0], [384, 3], [384, 1]], dtype="uint64")
doc = Doc(en_vocab, words=words).from_array([ENT_TYPE, ENT_IOB], arr)
assert doc.is_nered
# Test serialization
new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
assert new_doc.is_nered
示例6: test_doc_retokenize_spans_entity_split_iob
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import ents [as 别名]
def test_doc_retokenize_spans_entity_split_iob():
# Test entity IOB stays consistent after merging
words = ["abc", "d", "e"]
doc = Doc(Vocab(), words=words)
doc.ents = [(doc.vocab.strings.add("ent-abcd"), 0, 2)]
assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I"
with doc.retokenize() as retokenizer:
retokenizer.split(doc[0], ["a", "b", "c"], [(doc[0], 1), (doc[0], 2), doc[1]])
assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I"
assert doc[2].ent_iob_ == "I"
assert doc[3].ent_iob_ == "I"
示例7: test_serialize_after_adding_entity
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import ents [as 别名]
def test_serialize_after_adding_entity():
# Re issue #514
vocab = spacy.en.English.Defaults.create_vocab()
entity_recognizer = spacy.en.English.Defaults.create_entity()
doc = Doc(vocab, words=u'This is a sentence about pasta .'.split())
entity_recognizer.add_label('Food')
entity_recognizer(doc)
label_id = vocab.strings[u'Food']
doc.ents = [(label_id, 5,6)]
assert [(ent.label_, ent.text) for ent in doc.ents] == [(u'Food', u'pasta')]
byte_string = doc.to_bytes()