當前位置: 首頁>>代碼示例>>Python>>正文


Python doc.Doc方法代碼示例

本文整理匯總了Python中spacy.tokens.doc.Doc方法的典型用法代碼示例。如果您正苦於以下問題:Python doc.Doc方法的具體用法?Python doc.Doc怎麽用?Python doc.Doc使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在spacy.tokens.doc的用法示例。


在下文中一共展示了doc.Doc方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: convert_to_flair_format

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def convert_to_flair_format(spacy_model: Language, data: List[Tuple[str, List[Offset]]]) -> List[str]:
    result: List[str] = list()
    for text, offsets in data:
        doc: Doc = spacy_model(text)
        # remove duplicated offsets
        offsets = normalize_offsets(offsets=offsets)
        offset_tuples = list(set([offset.to_tuple() for offset in offsets]))
        gold_annotations = GoldParse(doc, entities=offset_tuples)
        annotations: List[str] = gold_annotations.ner
        assert len(annotations) == len(doc)
        # Flair uses BIOES and Spacy BILUO
        # BILUO for Begin, Inside, Last, Unit, Out
        # BIOES for Begin, Inside, Outside, End, Single
        annotations = [a.replace('L-', 'E-') for a in annotations]
        annotations = [a.replace('U-', 'S-') for a in annotations]
        annotations = ["O" if a == "-" else a for a in annotations]  # replace unknown
        result += [f"{word} {tag}\n" for word, tag in zip(doc, annotations)]
        result.append('\n')
    return result 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:21,代碼來源:import_annotations.py

示例2: convert_offsets_to_spacy_docs

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def convert_offsets_to_spacy_docs(doc_annotated: List[Tuple[str, str, List[Offset]]]) -> List[Doc]:
    """
    Convert a list of tuple of string with their offset to Spacy doc with entities ready
    :param doc_annotated: list of tuple (string, array of offsets)
    :return: list of spacy doc
    """
    model = get_empty_model(load_labels_for_training=False)
    docs: List[Doc] = list()

    for (index, (case_id, text, tags)) in enumerate(doc_annotated):
        doc: Doc = model.make_doc(text)
        ents = list()
        for offset in tags:
            span_doc = doc.char_span(offset.start, offset.end, label=offset.type)
            if span_doc is not None:
                ents.append(span_doc)
            else:
                print("Issue in offset", "Index: " + str(index), "case: " + case_id,
                      text[offset.start:offset.end], text, sep="|")
        doc.ents = ents
        docs.append(doc)
    return docs 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:24,代碼來源:spacy_viewer.py

示例3: __call__

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def __call__(self, doc: Doc):
        """
        The spacy pipeline caller
        :param doc: The Doc token.
        """

        # get matches
        phrase_matches = self.phrase_matcher(doc)
        matches = self.matcher(doc)

        # process them
        for match_id, start, end in phrase_matches + matches:
            # start add them into entities list
            entity = (match_id, start, end)
            doc.ents += (entity,)

        return doc


# add factories 
開發者ID:kororo,項目名稱:excelcy,代碼行數:22,代碼來源:pipe.py

示例4: tokenize

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def tokenize(self, doc: 'Doc') -> typing.List[Token]:

        return [Token(t.text, t.idx) for t in doc] 
開發者ID:weizhenzhao,項目名稱:rasa_nlu,代碼行數:5,代碼來源:spacy_tokenizer.py

示例5: doc_for_text

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def doc_for_text(self, text: Text) -> 'Doc':
        if self.component_config.get("case_sensitive"):
            return self.nlp(text)
        else:
            return self.nlp(text.lower()) 
開發者ID:weizhenzhao,項目名稱:rasa_nlu,代碼行數:7,代碼來源:spacy_utils.py

示例6: extract_entities

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def extract_entities(doc: 'Doc') -> List[Dict[Text, Any]]:
        entities = [
            {
                "entity": ent.label_,
                "value": ent.text,
                "start": ent.start_char,
                "confidence": None,
                "end": ent.end_char
            }
            for ent in doc.ents]
        return entities 
開發者ID:weizhenzhao,項目名稱:rasa_nlu,代碼行數:13,代碼來源:spacy_entity_extractor.py

示例7: __call__

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def __call__(self, doc: Doc):
        for token in doc:
            wordnet = Wordnet(token=token, lang=self.__lang)
            token._.set(WordnetAnnotator.__FIELD, wordnet)

        return doc 
開發者ID:recognai,項目名稱:spacy-wordnet,代碼行數:8,代碼來源:wordnet_annotator.py

示例8: doc_to_fixed_tokens

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def doc_to_fixed_tokens(doc: SpacyDoc) -> List[str]:
    """Fix the tokens in a document to not have exceptions"""
    return [fix_token(t) for t in doc] 
開發者ID:bhoov,項目名稱:exbert,代碼行數:5,代碼來源:aligner.py

示例9: convert_bilou_with_missing_action

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def convert_bilou_with_missing_action(doc: Doc, offsets: List[Tuple[int, int, str]]) -> List[Optional[str]]:
    """
    Convert unknown type token to missing value for NER
    Therefore no Loss will be applied to these tokens
    https://spacy.io/api/goldparse#biluo_tags_from_offsets
    :param doc: text tokenized by Spacy
    :param offsets: original offsets
    :return: list of BILOU types
    """
    result = biluo_tags_from_offsets(doc, offsets)
    return [no_action_bilou if unknown_type_name in action_bilou else action_bilou
            for action_bilou in result] 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:14,代碼來源:convert_to_bilou.py

示例10: convert_unknown_bilou

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def convert_unknown_bilou(doc: Doc, offsets: List[Offset]) -> GoldParse:
    """
    Convert entity offsets to list of BILOU annotations
    and convert UNKNOWN label to Spacy missing values
    https://spacy.io/api/goldparse#biluo_tags_from_offsets
    :param doc: spacy tokenized text
    :param offsets: discovered offsets
    :return: tuple of docs and BILOU annotations
    """
    tupple_offset = [offset.to_tuple() for offset in offsets]
    bilou_annotations = convert_bilou_with_missing_action(doc=doc, offsets=tupple_offset)
    return GoldParse(doc, entities=bilou_annotations) 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:14,代碼來源:convert_to_bilou.py

示例11: convert_unknown_bilou_bulk

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def convert_unknown_bilou_bulk(docs: List[Doc], offsets: List[List[Offset]]) -> List[GoldParse]:
    """
    Convert list of entity offsets to list of BILOU annotations
    and convert UNKNOWN label to Spacy missing values
    https://spacy.io/api/goldparse#biluo_tags_from_offsets
    :param docs: spacy tokenized text
    :param offsets: discovered offsets
    :return: tuple of docs and GoldParse
    """
    list_of_gold_parse = list()
    for doc, current_offsets in zip(docs, offsets):
        bilou_annotations = convert_unknown_bilou(doc=doc,
                                                  offsets=current_offsets)
        list_of_gold_parse.append(bilou_annotations)
    return list_of_gold_parse 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:17,代碼來源:convert_to_bilou.py

示例12: test_bilou_conv

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def test_bilou_conv():
    doc: Doc = pytest.nlp.make_doc("Ceci est un test.")
    offset1 = [Offset(5, 8, "UNKNOWN")]
    assert convert_unknown_bilou(doc, offsets=offset1).ner == ['O', '-', 'O', 'O', 'O']
    assert convert_unknown_bilou_bulk([doc], [offset1])[0].ner == ['O', '-', 'O', 'O', 'O']
    offset2 = [Offset(5, 8, "PERS")]
    assert convert_unknown_bilou(doc, offsets=offset2).ner == ['O', 'U-PERS', 'O', 'O', 'O']
    offset3 = [Offset(0, 4, "UNKNOWN")]
    assert convert_unknown_bilou(doc, offsets=offset3).ner == ['-', 'O', 'O', 'O', 'O'] 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:11,代碼來源:spacy_annotations_test.py

示例13: test_score

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def test_score():
    s = "Le Président, Le Commis-Greffier, Jean-Paul I FFELLI Nelly DUBAS"
    doc: Doc = pytest.nlp.make_doc(s)
    expected_span: GoldParse = GoldParse(doc, entities=[(34, 64, "PERS")])
    predicted_span = doc.char_span(34, 58, "PERS")
    doc.ents = [predicted_span]
    score: Scorer = Scorer()
    score.score(doc, expected_span)
    assert score.ents_per_type == dict([('PERS', {'p': 0.0, 'r': 0.0, 'f': 0.0})])

    predicted_span = doc.char_span(34, 64, "PERS")
    doc.ents = [predicted_span]
    score: Scorer = Scorer()
    score.score(doc, expected_span)
    assert score.ents_per_type == dict([('PERS', {'p': 100.0, 'r': 100.0, 'f': 100.0})]) 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:17,代碼來源:spacy_annotations_test.py

示例14: test_set_span

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def test_set_span():
    s = "Le Président, Le Commis-Greffier, Jean-Paul I FFELLI Nelly DUBAS"
    doc1: Doc = pytest.nlp.make_doc(s)
    doc2: Doc = pytest.nlp.make_doc(s)
    span1 = doc1.char_span(34, 58, "PERS")
    span2 = doc2.char_span(34, 58, "PERS")
    assert {span1.text}.symmetric_difference({span2.text}) == set()
    assert len({span1}.symmetric_difference({span2})) > 0 
開發者ID:ELS-RD,項目名稱:anonymisation,代碼行數:10,代碼來源:spacy_annotations_test.py

示例15: tokenize

# 需要導入模塊: from spacy.tokens import doc [as 別名]
# 或者: from spacy.tokens.doc import Doc [as 別名]
def tokenize(self, doc):
        # type: (Doc) -> List[Token]

        return [Token(t.text, t.idx) for t in doc] 
開發者ID:crownpku,項目名稱:Rasa_NLU_Chi,代碼行數:6,代碼來源:spacy_tokenizer.py


注:本文中的spacy.tokens.doc.Doc方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。