本文整理匯總了Python中spacy.tokens.Doc.from_array方法的典型用法代碼示例。如果您正苦於以下問題:Python Doc.from_array方法的具體用法?Python Doc.from_array怎麽用?Python Doc.from_array使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類spacy.tokens.Doc
的用法示例。
在下文中一共展示了Doc.from_array方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_doc
# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import from_array [as 別名]
def get_doc(vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None):
"""Create Doc object from given vocab, words and annotations."""
pos = pos or [""] * len(words)
tags = tags or [""] * len(words)
heads = heads or [0] * len(words)
deps = deps or [""] * len(words)
for value in deps + tags + pos:
vocab.strings.add(value)
doc = Doc(vocab, words=words)
attrs = doc.to_array([POS, HEAD, DEP])
for i, (p, head, dep) in enumerate(zip(pos, heads, deps)):
attrs[i, 0] = doc.vocab.strings[p]
attrs[i, 1] = head
attrs[i, 2] = doc.vocab.strings[dep]
doc.from_array([POS, HEAD, DEP], attrs)
if ents:
doc.ents = [
Span(doc, start, end, label=doc.vocab.strings[label])
for start, end, label in ents
]
if tags:
for token in doc:
token.tag_ = tags[token.i]
return doc
示例2: test_issue2203
# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import from_array [as 別名]
def test_issue2203(en_vocab):
"""Test that lemmas are set correctly in doc.from_array."""
words = ["I", "'ll", "survive"]
tags = ["PRP", "MD", "VB"]
lemmas = ["-PRON-", "will", "survive"]
tag_ids = [en_vocab.strings.add(tag) for tag in tags]
lemma_ids = [en_vocab.strings.add(lemma) for lemma in lemmas]
doc = Doc(en_vocab, words=words)
# Work around lemma corrpution problem and set lemmas after tags
doc.from_array("TAG", numpy.array(tag_ids, dtype="uint64"))
doc.from_array("LEMMA", numpy.array(lemma_ids, dtype="uint64"))
assert [t.tag_ for t in doc] == tags
assert [t.lemma_ for t in doc] == lemmas
# We need to serialize both tag and lemma, since this is what causes the bug
doc_array = doc.to_array(["TAG", "LEMMA"])
new_doc = Doc(doc.vocab, words=words).from_array(["TAG", "LEMMA"], doc_array)
assert [t.tag_ for t in new_doc] == tags
assert [t.lemma_ for t in new_doc] == lemmas
示例3: test_issue1799
# 需要導入模塊: from spacy.tokens import Doc [as 別名]
# 或者: from spacy.tokens.Doc import from_array [as 別名]
def test_issue1799():
"""Test sentence boundaries are deserialized correctly, even for
non-projective sentences."""
heads_deps = numpy.asarray(
[
[1, 397],
[4, 436],
[2, 426],
[1, 402],
[0, 8206900633647566924],
[18446744073709551615, 440],
[18446744073709551614, 442],
],
dtype="uint64",
)
doc = Doc(Vocab(), words="Just what I was looking for .".split())
doc.vocab.strings.add("ROOT")
doc = doc.from_array([HEAD, DEP], heads_deps)
assert len(list(doc.sents)) == 1