本文整理汇总了Python中spacy.gold方法的典型用法代码示例。如果您正苦于以下问题:Python spacy.gold方法的具体用法?Python spacy.gold怎么用?Python spacy.gold使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类spacy
的用法示例。
在下文中一共展示了spacy.gold方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: jsonToCrf
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def jsonToCrf(self, json_eg, spacy_nlp):
entity_offsets = []
for sentence in json_eg['text']:
doc = spacy_nlp(sentence)
for i in json_eg['entities']:
entity_offsets.append(tuple((i['rangeFrom'],i['rangeTo'],i['entity'])))
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
crf_format = [(doc[i].text, doc[i].tag_, ents[i]) for i in range(len(doc))]
return crf_format
示例2: _from_json_to_crf
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def _from_json_to_crf(self,
message, # type: Message
entity_offsets # type: List[Tuple[int, int, Text]]
):
# type: (...) -> List[Tuple[Text, Text, Text, Text]]
"""Convert json examples to format of underlying crfsuite."""
from spacy.gold import GoldParse
doc = message.get("spacy_doc")
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
if '-' in ents:
logger.warn("Misaligned entity annotation in sentence '{}'. "
"Make sure the start and end values of the "
"annotated training examples end at token "
"boundaries (e.g. don't include trailing "
"whitespaces).".format(doc.text))
if not self.component_config["BILOU_flag"]:
for i, label in enumerate(ents):
if self._bilou_from_label(label) in {"B", "I", "U", "L"}:
# removes BILOU prefix from label
ents[i] = self._entity_from_label(label)
return self._from_text_to_crf(message, ents)
示例3: _from_json_to_crf
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def _from_json_to_crf(self,
message: Message,
entity_offsets: List[Tuple[int, int, Text]]
) -> List[Tuple[Text, Text, Text, Text]]:
"""Convert json examples to format of underlying crfsuite."""
if self.pos_features:
from spacy.gold import GoldParse
doc = message.get("spacy_doc")
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
else:
tokens = message.get("tokens")
ents = self._bilou_tags_from_offsets(tokens, entity_offsets)
if '-' in ents:
logger.warning("Misaligned entity annotation in sentence '{}'. "
"Make sure the start and end values of the "
"annotated training examples end at token "
"boundaries (e.g. don't include trailing "
"whitespaces or punctuation)."
"".format(message.text))
if not self.component_config["BILOU_flag"]:
for i, label in enumerate(ents):
if self._bilou_from_label(label) in {"B", "I", "U", "L"}:
# removes BILOU prefix from label
ents[i] = self._entity_from_label(label)
return self._from_text_to_crf(message, ents)
示例4: createDataset
# 需要导入模块: import spacy [as 别名]
# 或者: from spacy import gold [as 别名]
def createDataset(self, intents, spacy_nlp):
dataset = []
entity_offsets = []
intentCounter = 0
for intent in intents:
sentenceCounter = 0
for sentence in intent['text']:
doc = spacy_nlp(sentence)
print(doc.text)
for entity in intent['entities'][sentenceCounter]:
entity_offsets.append(tuple((entity['rangeFrom'],entity['rangeTo'],entity['entity'])))
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
crf_format = [(doc[entity].text, doc[entity].tag_, ents[entity]) for i in range(len(doc))]
dataset.append(crf_format)
sentenceCounter = sentenceCounter + 1
intentCounter = intentCounter + 1
return dataset