当前位置: 首页>>代码示例>>Python>>正文


Python Doc.from_array方法代码示例

本文整理汇总了Python中spacy.tokens.Doc.from_array方法的典型用法代码示例。如果您正苦于以下问题:Python Doc.from_array方法的具体用法?Python Doc.from_array怎么用?Python Doc.from_array使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在spacy.tokens.Doc的用法示例。


在下文中一共展示了Doc.from_array方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_doc

# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import from_array [as 别名]
def get_doc(vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None):
    """Create Doc object from given vocab, words and annotations."""
    pos = pos or [""] * len(words)
    tags = tags or [""] * len(words)
    heads = heads or [0] * len(words)
    deps = deps or [""] * len(words)
    for value in deps + tags + pos:
        vocab.strings.add(value)

    doc = Doc(vocab, words=words)
    attrs = doc.to_array([POS, HEAD, DEP])
    for i, (p, head, dep) in enumerate(zip(pos, heads, deps)):
        attrs[i, 0] = doc.vocab.strings[p]
        attrs[i, 1] = head
        attrs[i, 2] = doc.vocab.strings[dep]
    doc.from_array([POS, HEAD, DEP], attrs)
    if ents:
        doc.ents = [
            Span(doc, start, end, label=doc.vocab.strings[label])
            for start, end, label in ents
        ]
    if tags:
        for token in doc:
            token.tag_ = tags[token.i]
    return doc
开发者ID:spacy-io,项目名称:spaCy,代码行数:27,代码来源:util.py

示例2: test_issue2203

# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import from_array [as 别名]
def test_issue2203(en_vocab):
    """Test that lemmas are set correctly in doc.from_array."""
    words = ["I", "'ll", "survive"]
    tags = ["PRP", "MD", "VB"]
    lemmas = ["-PRON-", "will", "survive"]
    tag_ids = [en_vocab.strings.add(tag) for tag in tags]
    lemma_ids = [en_vocab.strings.add(lemma) for lemma in lemmas]
    doc = Doc(en_vocab, words=words)
    # Work around lemma corrpution problem and set lemmas after tags
    doc.from_array("TAG", numpy.array(tag_ids, dtype="uint64"))
    doc.from_array("LEMMA", numpy.array(lemma_ids, dtype="uint64"))
    assert [t.tag_ for t in doc] == tags
    assert [t.lemma_ for t in doc] == lemmas
    # We need to serialize both tag and lemma, since this is what causes the bug
    doc_array = doc.to_array(["TAG", "LEMMA"])
    new_doc = Doc(doc.vocab, words=words).from_array(["TAG", "LEMMA"], doc_array)
    assert [t.tag_ for t in new_doc] == tags
    assert [t.lemma_ for t in new_doc] == lemmas
开发者ID:spacy-io,项目名称:spaCy,代码行数:20,代码来源:test_issue2001-2500.py

示例3: test_issue1799

# 需要导入模块: from spacy.tokens import Doc [as 别名]
# 或者: from spacy.tokens.Doc import from_array [as 别名]
def test_issue1799():
    """Test sentence boundaries are deserialized correctly, even for
    non-projective sentences."""
    heads_deps = numpy.asarray(
        [
            [1, 397],
            [4, 436],
            [2, 426],
            [1, 402],
            [0, 8206900633647566924],
            [18446744073709551615, 440],
            [18446744073709551614, 442],
        ],
        dtype="uint64",
    )
    doc = Doc(Vocab(), words="Just what I was looking for .".split())
    doc.vocab.strings.add("ROOT")
    doc = doc.from_array([HEAD, DEP], heads_deps)
    assert len(list(doc.sents)) == 1
开发者ID:spacy-io,项目名称:spaCy,代码行数:21,代码来源:test_issue1501-2000.py


注:本文中的spacy.tokens.Doc.from_array方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。