本文整理匯總了Python中spacy.gold方法的典型用法代碼示例。如果您正苦於以下問題:Python spacy.gold方法的具體用法?Python spacy.gold怎麽用?Python spacy.gold使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類spacy
的用法示例。
在下文中一共展示了spacy.gold方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: jsonToCrf
# 需要導入模塊: import spacy [as 別名]
# 或者: from spacy import gold [as 別名]
def jsonToCrf(self, json_eg, spacy_nlp):
entity_offsets = []
for sentence in json_eg['text']:
doc = spacy_nlp(sentence)
for i in json_eg['entities']:
entity_offsets.append(tuple((i['rangeFrom'],i['rangeTo'],i['entity'])))
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
crf_format = [(doc[i].text, doc[i].tag_, ents[i]) for i in range(len(doc))]
return crf_format
示例2: _from_json_to_crf
# 需要導入模塊: import spacy [as 別名]
# 或者: from spacy import gold [as 別名]
def _from_json_to_crf(self,
message, # type: Message
entity_offsets # type: List[Tuple[int, int, Text]]
):
# type: (...) -> List[Tuple[Text, Text, Text, Text]]
"""Convert json examples to format of underlying crfsuite."""
from spacy.gold import GoldParse
doc = message.get("spacy_doc")
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
if '-' in ents:
logger.warn("Misaligned entity annotation in sentence '{}'. "
"Make sure the start and end values of the "
"annotated training examples end at token "
"boundaries (e.g. don't include trailing "
"whitespaces).".format(doc.text))
if not self.component_config["BILOU_flag"]:
for i, label in enumerate(ents):
if self._bilou_from_label(label) in {"B", "I", "U", "L"}:
# removes BILOU prefix from label
ents[i] = self._entity_from_label(label)
return self._from_text_to_crf(message, ents)
示例3: _from_json_to_crf
# 需要導入模塊: import spacy [as 別名]
# 或者: from spacy import gold [as 別名]
def _from_json_to_crf(self,
message: Message,
entity_offsets: List[Tuple[int, int, Text]]
) -> List[Tuple[Text, Text, Text, Text]]:
"""Convert json examples to format of underlying crfsuite."""
if self.pos_features:
from spacy.gold import GoldParse
doc = message.get("spacy_doc")
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
else:
tokens = message.get("tokens")
ents = self._bilou_tags_from_offsets(tokens, entity_offsets)
if '-' in ents:
logger.warning("Misaligned entity annotation in sentence '{}'. "
"Make sure the start and end values of the "
"annotated training examples end at token "
"boundaries (e.g. don't include trailing "
"whitespaces or punctuation)."
"".format(message.text))
if not self.component_config["BILOU_flag"]:
for i, label in enumerate(ents):
if self._bilou_from_label(label) in {"B", "I", "U", "L"}:
# removes BILOU prefix from label
ents[i] = self._entity_from_label(label)
return self._from_text_to_crf(message, ents)
示例4: createDataset
# 需要導入模塊: import spacy [as 別名]
# 或者: from spacy import gold [as 別名]
def createDataset(self, intents, spacy_nlp):
dataset = []
entity_offsets = []
intentCounter = 0
for intent in intents:
sentenceCounter = 0
for sentence in intent['text']:
doc = spacy_nlp(sentence)
print(doc.text)
for entity in intent['entities'][sentenceCounter]:
entity_offsets.append(tuple((entity['rangeFrom'],entity['rangeTo'],entity['entity'])))
gold = GoldParse(doc, entities=entity_offsets)
ents = [l[5] for l in gold.orig_annot]
crf_format = [(doc[entity].text, doc[entity].tag_, ents[entity]) for i in range(len(doc))]
dataset.append(crf_format)
sentenceCounter = sentenceCounter + 1
intentCounter = intentCounter + 1
return dataset