Python Doc.ents方法代碼示例

本文整理匯總了Python中spacy.tokens.Doc.ents方法的典型用法代碼示例。如果您正苦於以下問題：Python Doc.ents方法的具體用法？Python Doc.ents怎麽用？Python Doc.ents使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類spacy.tokens.Doc的用法示例。

在下文中一共展示了Doc.ents方法的7個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_doc_retokenize_spans_entity_merge_iob

# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import ents [as 別名]
def test_doc_retokenize_spans_entity_merge_iob():
    # Test entity IOB stays consistent after merging
    words = ["a", "b", "c", "d", "e"]
    doc = Doc(Vocab(), words=words)
    doc.ents = [
        (doc.vocab.strings.add("ent-abc"), 0, 3),
        (doc.vocab.strings.add("ent-d"), 3, 4),
    ]
    assert doc[0].ent_iob_ == "B"
    assert doc[1].ent_iob_ == "I"
    assert doc[2].ent_iob_ == "I"
    assert doc[3].ent_iob_ == "B"
    with doc.retokenize() as retokenizer:
        retokenizer.merge(doc[0:1])
    assert doc[0].ent_iob_ == "B"
    assert doc[1].ent_iob_ == "I"

    words = ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
    doc = Doc(Vocab(), words=words)
    doc.ents = [
        (doc.vocab.strings.add("ent-de"), 3, 5),
        (doc.vocab.strings.add("ent-fg"), 5, 7),
    ]
    assert doc[3].ent_iob_ == "B"
    assert doc[4].ent_iob_ == "I"
    assert doc[5].ent_iob_ == "B"
    assert doc[6].ent_iob_ == "I"
    with doc.retokenize() as retokenizer:
        retokenizer.merge(doc[2:4])
        retokenizer.merge(doc[4:6])
        retokenizer.merge(doc[7:9])
    assert len(doc) == 6
    assert doc[3].ent_iob_ == "B"
    assert doc[4].ent_iob_ == "I"

開發者ID:spacy-io，項目名稱:spaCy，代碼行數:36，代碼來源:test_retokenize_merge.py

示例2: test_doc_add_entities_set_ents_iob

# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import ents [as 別名]
def test_doc_add_entities_set_ents_iob(en_vocab):
    doc = Doc(en_vocab, words=["This", "is", "a", "lion"])
    ner = EntityRecognizer(en_vocab)
    ner.begin_training([])
    ner(doc)
    assert len(list(doc.ents)) == 0
    assert [w.ent_iob_ for w in doc] == (["O"] * len(doc))
    doc.ents = [(doc.vocab.strings["ANIMAL"], 3, 4)]
    assert [w.ent_iob_ for w in doc] == ["", "", "", "B"]
    doc.ents = [(doc.vocab.strings["WORD"], 0, 2)]
    assert [w.ent_iob_ for w in doc] == ["B", "I", "", ""]

開發者ID:spacy-io，項目名稱:spaCy，代碼行數:13，代碼來源:test_ner.py

示例3: get_doc

# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import ents [as 別名]
def get_doc(vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None):
    """Create Doc object from given vocab, words and annotations."""
    pos = pos or [""] * len(words)
    tags = tags or [""] * len(words)
    heads = heads or [0] * len(words)
    deps = deps or [""] * len(words)
    for value in deps + tags + pos:
        vocab.strings.add(value)

    doc = Doc(vocab, words=words)
    attrs = doc.to_array([POS, HEAD, DEP])
    for i, (p, head, dep) in enumerate(zip(pos, heads, deps)):
        attrs[i, 0] = doc.vocab.strings[p]
        attrs[i, 1] = head
        attrs[i, 2] = doc.vocab.strings[dep]
    doc.from_array([POS, HEAD, DEP], attrs)
    if ents:
        doc.ents = [
            Span(doc, start, end, label=doc.vocab.strings[label])
            for start, end, label in ents
        ]
    if tags:
        for token in doc:
            token.tag_ = tags[token.i]
    return doc

開發者ID:spacy-io，項目名稱:spaCy，代碼行數:27，代碼來源:util.py

示例4: test_issue1547

# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import ents [as 別名]
def test_issue1547():
    """Test that entity labels still match after merging tokens."""
    words = ["\n", "worda", ".", "\n", "wordb", "-", "Biosphere", "2", "-", " \n"]
    doc = Doc(Vocab(), words=words)
    doc.ents = [Span(doc, 6, 8, label=doc.vocab.strings["PRODUCT"])]
    with doc.retokenize() as retokenizer:
        retokenizer.merge(doc[5:7])
    assert [ent.text for ent in doc.ents]

開發者ID:spacy-io，項目名稱:spaCy，代碼行數:10，代碼來源:test_issue1501-2000.py

示例5: test_doc_is_nered

# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import ents [as 別名]
def test_doc_is_nered(en_vocab):
    words = ["I", "live", "in", "New", "York"]
    doc = Doc(en_vocab, words=words)
    assert not doc.is_nered
    doc.ents = [Span(doc, 3, 5, label="GPE")]
    assert doc.is_nered
    # Test creating doc from array with unknown values
    arr = numpy.array([[0, 0], [0, 0], [0, 0], [384, 3], [384, 1]], dtype="uint64")
    doc = Doc(en_vocab, words=words).from_array([ENT_TYPE, ENT_IOB], arr)
    assert doc.is_nered
    # Test serialization
    new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
    assert new_doc.is_nered

開發者ID:spacy-io，項目名稱:spaCy，代碼行數:15，代碼來源:test_doc_api.py

示例6: test_doc_retokenize_spans_entity_split_iob

# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import ents [as 別名]
def test_doc_retokenize_spans_entity_split_iob():
    # Test entity IOB stays consistent after merging
    words = ["abc", "d", "e"]
    doc = Doc(Vocab(), words=words)
    doc.ents = [(doc.vocab.strings.add("ent-abcd"), 0, 2)]
    assert doc[0].ent_iob_ == "B"
    assert doc[1].ent_iob_ == "I"
    with doc.retokenize() as retokenizer:
        retokenizer.split(doc[0], ["a", "b", "c"], [(doc[0], 1), (doc[0], 2), doc[1]])
    assert doc[0].ent_iob_ == "B"
    assert doc[1].ent_iob_ == "I"
    assert doc[2].ent_iob_ == "I"
    assert doc[3].ent_iob_ == "I"

開發者ID:spacy-io，項目名稱:spaCy，代碼行數:15，代碼來源:test_retokenize_split.py

示例7: test_serialize_after_adding_entity

# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import ents [as 別名]
def test_serialize_after_adding_entity():
    # Re issue #514
    vocab = spacy.en.English.Defaults.create_vocab()
    entity_recognizer = spacy.en.English.Defaults.create_entity()

    doc = Doc(vocab, words=u'This is a sentence about pasta .'.split())
    entity_recognizer.add_label('Food')
    entity_recognizer(doc)

    label_id = vocab.strings[u'Food']
    doc.ents = [(label_id, 5,6)]

    assert [(ent.label_, ent.text) for ent in doc.ents] == [(u'Food', u'pasta')]

    byte_string = doc.to_bytes()

開發者ID:adamhadani，項目名稱:spaCy，代碼行數:17，代碼來源:test_serialization.py

注：本文中的spacy.tokens.Doc.ents方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。