当前位置: 首页>>代码示例>>Python>>正文


Python spacy.gold方法代码示例

本文整理汇总了Python中spacy.gold方法的典型用法代码示例。如果您正苦于以下问题:Python spacy.gold方法的具体用法?Python spacy.gold怎么用?Python spacy.gold使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在spacy的用法示例。


在下文中一共展示了spacy.gold方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: jsonToCrf

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def jsonToCrf(self, json_eg, spacy_nlp):
        
        entity_offsets = []

        for sentence in json_eg['text']:

            doc = spacy_nlp(sentence)

            for i in json_eg['entities']:
                
                entity_offsets.append(tuple((i['rangeFrom'],i['rangeTo'],i['entity'])))
                
            gold = GoldParse(doc, entities=entity_offsets)
            ents = [l[5] for l in gold.orig_annot]
            crf_format = [(doc[i].text, doc[i].tag_, ents[i]) for i in range(len(doc))]
        
        return crf_format 
开发者ID:GeniSysAI,项目名称:NLU,代码行数:19,代码来源:CRF.py

示例2: _from_json_to_crf

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def _from_json_to_crf(self,
                          message,  # type: Message
                          entity_offsets  # type: List[Tuple[int, int, Text]]
                          ):
        # type: (...) -> List[Tuple[Text, Text, Text, Text]]
        """Convert json examples to format of underlying crfsuite."""
        from spacy.gold import GoldParse

        doc = message.get("spacy_doc")
        gold = GoldParse(doc, entities=entity_offsets)
        ents = [l[5] for l in gold.orig_annot]
        if '-' in ents:
            logger.warn("Misaligned entity annotation in sentence '{}'. "
                        "Make sure the start and end values of the "
                        "annotated training examples end at token "
                        "boundaries (e.g. don't include trailing "
                        "whitespaces).".format(doc.text))
        if not self.component_config["BILOU_flag"]:
            for i, label in enumerate(ents):
                if self._bilou_from_label(label) in {"B", "I", "U", "L"}:
                    # removes BILOU prefix from label
                    ents[i] = self._entity_from_label(label)

        return self._from_text_to_crf(message, ents) 
开发者ID:crownpku,项目名称:Rasa_NLU_Chi,代码行数:26,代码来源:crf_entity_extractor.py

示例3: _from_json_to_crf

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def _from_json_to_crf(self,
                          message: Message,
                          entity_offsets: List[Tuple[int, int, Text]]
                          ) -> List[Tuple[Text, Text, Text, Text]]:
        """Convert json examples to format of underlying crfsuite."""

        if self.pos_features:
            from spacy.gold import GoldParse

            doc = message.get("spacy_doc")
            gold = GoldParse(doc, entities=entity_offsets)
            ents = [l[5] for l in gold.orig_annot]
        else:
            tokens = message.get("tokens")
            ents = self._bilou_tags_from_offsets(tokens, entity_offsets)

        if '-' in ents:
            logger.warning("Misaligned entity annotation in sentence '{}'. "
                           "Make sure the start and end values of the "
                           "annotated training examples end at token "
                           "boundaries (e.g. don't include trailing "
                           "whitespaces or punctuation)."
                           "".format(message.text))
        if not self.component_config["BILOU_flag"]:
            for i, label in enumerate(ents):
                if self._bilou_from_label(label) in {"B", "I", "U", "L"}:
                    # removes BILOU prefix from label
                    ents[i] = self._entity_from_label(label)

        return self._from_text_to_crf(message, ents) 
开发者ID:weizhenzhao,项目名称:rasa_nlu,代码行数:32,代码来源:crf_entity_extractor.py

示例4: createDataset

# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def createDataset(self, intents, spacy_nlp):
        
        dataset = []
        entity_offsets = []
        
        intentCounter = 0
        for intent in intents:
            
            sentenceCounter = 0
            for sentence in intent['text']:
                
                doc = spacy_nlp(sentence)
                print(doc.text)

                for entity in intent['entities'][sentenceCounter]:
                    
                    entity_offsets.append(tuple((entity['rangeFrom'],entity['rangeTo'],entity['entity'])))
                
                gold = GoldParse(doc, entities=entity_offsets)
                ents = [l[5] for l in gold.orig_annot]
                crf_format = [(doc[entity].text, doc[entity].tag_, ents[entity]) for i in range(len(doc))]
                dataset.append(crf_format)
                sentenceCounter = sentenceCounter + 1
            
            intentCounter = intentCounter + 1

        return dataset 
开发者ID:GeniSysAI,项目名称:NLU,代码行数:29,代码来源:CRF.py


注:本文中的spacy.gold方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。