本文整理匯總了Python中plac.annotations方法的典型用法代碼示例。如果您正苦於以下問題:Python plac.annotations方法的具體用法?Python plac.annotations怎麽用?Python plac.annotations使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類plac
的用法示例。
在下文中一共展示了plac.annotations方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: trim_entity_spans
# 需要導入模塊: import plac [as 別名]
# 或者: from plac import annotations [as 別名]
def trim_entity_spans(data: list) -> list:
invalid_span_tokens = re.compile(r'\s')
cleaned_data = []
for text, annotations in data:
entities = annotations['entities']
valid_entities = []
for start, end, label in entities:
valid_start = start
valid_end = end
while valid_start < len(text) and invalid_span_tokens.match(
text[valid_start]):
valid_start += 1
while valid_end > 1 and invalid_span_tokens.match(
text[valid_end - 1]):
valid_end -= 1
valid_entities.append([valid_start, valid_end, label])
cleaned_data.append([text, {'entities': valid_entities}])
return cleaned_data
# training data
示例2: trim_entity_spans
# 需要導入模塊: import plac [as 別名]
# 或者: from plac import annotations [as 別名]
def trim_entity_spans(data: list) -> list:
"""Removes leading and trailing white spaces from entity spans.
Args:
data (list): The data to be cleaned in spaCy JSON format.
Returns:
list: The cleaned data.
"""
invalid_span_tokens = re.compile(r'\s')
cleaned_data = []
for text, annotations in data:
entities = annotations['entities']
valid_entities = []
for start, end, label in entities:
valid_start = start
valid_end = end
while valid_start < len(text) and invalid_span_tokens.match(
text[valid_start]):
valid_start += 1
while valid_end > 1 and invalid_span_tokens.match(
text[valid_end - 1]):
valid_end -= 1
valid_entities.append([valid_start, valid_end, label])
cleaned_data.append([text, {'entities': valid_entities}])
return cleaned_data
示例3: trim_entity_spans
# 需要導入模塊: import plac [as 別名]
# 或者: from plac import annotations [as 別名]
def trim_entity_spans(data: list) -> list:
""" The training data is derived from sources that have a fair bit of
errant whitespace. This function takes a list of annotations and trims
naughty bits of whitespace from the entity spans.
Better safe than sorry."""
invalid_span_tokens = re.compile(r"\s")
cleaned_data = []
for text, annotations in data:
entities = annotations["entities"]
valid_entities = []
for start, end, label in entities:
valid_start = start
valid_end = end
while valid_start < len(text) and invalid_span_tokens.match(
text[valid_start]
):
valid_start += 1
while valid_end > 1 and invalid_span_tokens.match(text[valid_end - 1]):
valid_end -= 1
valid_entities.append([valid_start, valid_end, label])
cleaned_data.append([text, {"entities": valid_entities}])
return cleaned_data