本文整理汇总了Python中spacy.tokens.Doc.to_bytes方法的典型用法代码示例。如果您正苦于以下问题:Python Doc.to_bytes方法的具体用法?Python Doc.to_bytes怎么用?Python Doc.to_bytes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy.tokens.Doc
的用法示例。
在下文中一共展示了Doc.to_bytes方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_serialize_doc_roundtrip_disk
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_serialize_doc_roundtrip_disk(en_vocab):
doc = Doc(en_vocab, words=["hello", "world"])
with make_tempdir() as d:
file_path = d / "doc"
doc.to_disk(file_path)
doc_d = Doc(en_vocab).from_disk(file_path)
assert doc.to_bytes() == doc_d.to_bytes()
示例2: test_issue1834
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_issue1834():
"""Test that sentence boundaries & parse/tag flags are not lost
during serialization."""
string = "This is a first sentence . And another one"
doc = Doc(Vocab(), words=string.split())
doc[6].sent_start = True
new_doc = Doc(doc.vocab).from_bytes(doc.to_bytes())
assert new_doc[6].sent_start
assert not new_doc.is_parsed
assert not new_doc.is_tagged
doc.is_parsed = True
doc.is_tagged = True
new_doc = Doc(doc.vocab).from_bytes(doc.to_bytes())
assert new_doc.is_parsed
assert new_doc.is_tagged
示例3: test_issue599
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_issue599(en_vocab):
doc = Doc(en_vocab)
doc.is_tagged = True
doc.is_parsed = True
doc2 = Doc(doc.vocab)
doc2.from_bytes(doc.to_bytes())
assert doc2.is_parsed
示例4: test_serialize_empty_doc
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_serialize_empty_doc(en_vocab):
doc = Doc(en_vocab)
data = doc.to_bytes()
doc2 = Doc(en_vocab)
doc2.from_bytes(data)
assert len(doc) == len(doc2)
for token1, token2 in zip(doc, doc2):
assert token1.text == token2.text
示例5: test_sbd_serialization_projective
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_sbd_serialization_projective(EN):
"""
test that before and after serialization, the sentence boundaries are the same.
"""
example = EN.tokenizer.tokens_from_list(u"I bought a couch from IKEA. It was n't very comfortable .".split(' '))
EN.tagger(example)
apply_transition_sequence(EN, example, ['L-nsubj','S','L-det','R-dobj','D','R-prep','R-pobj','B-ROOT','L-nsubj','R-neg','D','S','L-advmod','R-acomp','D','R-punct'])
example_serialized = Doc(EN.vocab).from_bytes(example.to_bytes())
assert example.to_bytes() == example_serialized.to_bytes()
assert [s.text for s in example.sents] == [s.text for s in example_serialized.sents]
示例6: test_doc_is_nered
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_doc_is_nered(en_vocab):
words = ["I", "live", "in", "New", "York"]
doc = Doc(en_vocab, words=words)
assert not doc.is_nered
doc.ents = [Span(doc, 3, 5, label="GPE")]
assert doc.is_nered
# Test creating doc from array with unknown values
arr = numpy.array([[0, 0], [0, 0], [0, 0], [384, 3], [384, 1]], dtype="uint64")
doc = Doc(en_vocab, words=words).from_array([ENT_TYPE, ENT_IOB], arr)
assert doc.is_nered
# Test serialization
new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
assert new_doc.is_nered
示例7: test_serialize_after_adding_entity
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_serialize_after_adding_entity():
# Re issue #514
vocab = spacy.en.English.Defaults.create_vocab()
entity_recognizer = spacy.en.English.Defaults.create_entity()
doc = Doc(vocab, words=u'This is a sentence about pasta .'.split())
entity_recognizer.add_label('Food')
entity_recognizer(doc)
label_id = vocab.strings[u'Food']
doc.ents = [(label_id, 5,6)]
assert [(ent.label_, ent.text) for ent in doc.ents] == [(u'Food', u'pasta')]
byte_string = doc.to_bytes()
示例8: test_serialize_doc_exclude
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_serialize_doc_exclude(en_vocab):
doc = Doc(en_vocab, words=["hello", "world"])
doc.user_data["foo"] = "bar"
new_doc = Doc(en_vocab).from_bytes(doc.to_bytes())
assert new_doc.user_data["foo"] == "bar"
new_doc = Doc(en_vocab).from_bytes(doc.to_bytes(), exclude=["user_data"])
assert not new_doc.user_data
new_doc = Doc(en_vocab).from_bytes(doc.to_bytes(exclude=["user_data"]))
assert not new_doc.user_data
with pytest.raises(ValueError):
doc.to_bytes(user_data=False)
with pytest.raises(ValueError):
Doc(en_vocab).from_bytes(doc.to_bytes(), tensor=False)
示例9: test_serialize_doc_roundtrip_bytes
# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import to_bytes [as 别名]
def test_serialize_doc_roundtrip_bytes(en_vocab):
doc = Doc(en_vocab, words=["hello", "world"])
doc_b = doc.to_bytes()
new_doc = Doc(en_vocab).from_bytes(doc_b)
assert new_doc.to_bytes() == doc_b