本文整理汇总了Python中plac.annotations方法的典型用法代码示例。如果您正苦于以下问题:Python plac.annotations方法的具体用法?Python plac.annotations怎么用?Python plac.annotations使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类plac
的用法示例。
在下文中一共展示了plac.annotations方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: trim_entity_spans
# 需要导入模块: import plac [as 别名]
# 或者: from plac import annotations [as 别名]
def trim_entity_spans(data: list) -> list:
invalid_span_tokens = re.compile(r'\s')
cleaned_data = []
for text, annotations in data:
entities = annotations['entities']
valid_entities = []
for start, end, label in entities:
valid_start = start
valid_end = end
while valid_start < len(text) and invalid_span_tokens.match(
text[valid_start]):
valid_start += 1
while valid_end > 1 and invalid_span_tokens.match(
text[valid_end - 1]):
valid_end -= 1
valid_entities.append([valid_start, valid_end, label])
cleaned_data.append([text, {'entities': valid_entities}])
return cleaned_data
# training data
示例2: trim_entity_spans
# 需要导入模块: import plac [as 别名]
# 或者: from plac import annotations [as 别名]
def trim_entity_spans(data: list) -> list:
"""Removes leading and trailing white spaces from entity spans.
Args:
data (list): The data to be cleaned in spaCy JSON format.
Returns:
list: The cleaned data.
"""
invalid_span_tokens = re.compile(r'\s')
cleaned_data = []
for text, annotations in data:
entities = annotations['entities']
valid_entities = []
for start, end, label in entities:
valid_start = start
valid_end = end
while valid_start < len(text) and invalid_span_tokens.match(
text[valid_start]):
valid_start += 1
while valid_end > 1 and invalid_span_tokens.match(
text[valid_end - 1]):
valid_end -= 1
valid_entities.append([valid_start, valid_end, label])
cleaned_data.append([text, {'entities': valid_entities}])
return cleaned_data
示例3: trim_entity_spans
# 需要导入模块: import plac [as 别名]
# 或者: from plac import annotations [as 别名]
def trim_entity_spans(data: list) -> list:
""" The training data is derived from sources that have a fair bit of
errant whitespace. This function takes a list of annotations and trims
naughty bits of whitespace from the entity spans.
Better safe than sorry."""
invalid_span_tokens = re.compile(r"\s")
cleaned_data = []
for text, annotations in data:
entities = annotations["entities"]
valid_entities = []
for start, end, label in entities:
valid_start = start
valid_end = end
while valid_start < len(text) and invalid_span_tokens.match(
text[valid_start]
):
valid_start += 1
while valid_end > 1 and invalid_span_tokens.match(text[valid_end - 1]):
valid_end -= 1
valid_entities.append([valid_start, valid_end, label])
cleaned_data.append([text, {"entities": valid_entities}])
return cleaned_data