本文整理汇总了Python中allennlp.data.fields.SequenceLabelField方法的典型用法代码示例。如果您正苦于以下问题:Python fields.SequenceLabelField方法的具体用法?Python fields.SequenceLabelField怎么用?Python fields.SequenceLabelField使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.fields
的用法示例。
在下文中一共展示了fields.SequenceLabelField方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, context_tokens: List[Token], tokens: List[Token], tags: List[str] = None,
intents: List[str] = None, dialog_act: Dict[str, Any] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
# print([t.text for t in context_tokens])
fields["context_tokens"] = TextField(context_tokens, self._token_indexers)
fields["tokens"] = TextField(tokens, self._token_indexers)
fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
if tags is not None:
fields["tags"] = SequenceLabelField(tags, fields["tokens"])
if intents is not None:
fields["intents"] = MultiLabelField(intents, label_namespace="intent_labels")
if dialog_act is not None:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
'dialog_act': dialog_act})
else:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
return Instance(fields)
示例2: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None, domain: str = None,
intent: str = None, dialog_act: Dict[str, Any] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
sequence = TextField(tokens, self._token_indexers)
fields["tokens"] = sequence
if tags:
fields["tags"] = SequenceLabelField(tags, sequence)
if domain:
fields["domain"] = LabelField(domain, label_namespace="domain_labels")
if intent:
fields["intent"] = LabelField(intent, label_namespace="intent_labels")
if dialog_act is not None:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
'dialog_act': dialog_act})
else:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
return Instance(fields)
示例3: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, # type: ignore
tokens ,
ner_tags = None) :
u"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
sequence = TextField(tokens, self._token_indexers)
instance_fields = {u'tokens': sequence}
instance_fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})
# Add "tag label" to instance
if ner_tags is not None:
if self._coding_scheme == u"BIOUL":
ner_tags = to_bioul(ner_tags, encoding=u"BIO")
instance_fields[u'tags'] = SequenceLabelField(ner_tags, sequence)
return Instance(instance_fields)
示例4: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, # type: ignore
tokens ,
verb_label ,
tags = None) :
u"""
We take `pre-tokenized` input here, along with a verb label. The verb label should be a
one-hot binary vector, the same length as the tokens, indicating the position of the verb
to find arguments for.
"""
# pylint: disable=arguments-differ
fields = {}
text_field = TextField(tokens, token_indexers=self._token_indexers)
fields[u'tokens'] = text_field
fields[u'verb_indicator'] = SequenceLabelField(verb_label, text_field)
if tags:
fields[u'tags'] = SequenceLabelField(tags, text_field)
if all([x == 0 for x in verb_label]):
verb = None
else:
verb = tokens[verb_label.index(1)].text
fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens],
u"verb": verb})
return Instance(fields)
示例5: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str]=None) -> Instance:
if len(tokens) > self._max_token_len:
tokens = tokens[:self._max_token_len]
print(f'Length of tokens exceeded the limit {self._max_token_len}. Truncating...')
if tags:
tags = tags[:self._max_token_len]
fields = {}
text_field = TextField(tokens, self._token_indexers)
fields['tokens'] = text_field
if tags:
fields['tags'] = SequenceLabelField(tags, text_field)
return Instance(fields)
示例6: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, # type: ignore
sentence_tokens: List[str],
verb_vector: List[int],
entity_vector: List[int],
state_change_types: Optional[List[str]] = None,
state_change_tags: Optional[List[str]] = None) -> Instance:
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
# encode inputs
token_field = TextField([Token(word) for word in sentence_tokens], self._token_indexers)
fields['tokens'] = token_field
fields['verb_span'] = SequenceLabelField(verb_vector, token_field, 'indicator_tags')
fields['entity_span'] = SequenceLabelField(entity_vector, token_field, 'indicator_tags')
# encode outputs
if state_change_types:
fields['state_change_type_labels'] = LabelField(state_change_types, 'state_change_type_labels')
if state_change_tags:
fields['state_change_tags'] = SequenceLabelField(state_change_tags, token_field, 'state_change_tags')
return Instance(fields)
示例7: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(
self, # type: ignore
query: List[str],
slot_tags: List[str] = None,
sql_template: str = None,
) -> Instance:
fields: Dict[str, Field] = {}
tokens = TextField([Token(t) for t in query], self._token_indexers)
fields["tokens"] = tokens
if slot_tags is not None and sql_template is not None:
slot_field = SequenceLabelField(slot_tags, tokens, label_namespace="slot_tags")
template = LabelField(sql_template, label_namespace="template_labels")
fields["slot_tags"] = slot_field
fields["template"] = template
return Instance(fields)
示例8: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, text: str, sentences: List[str] = None, tags: List[int] = None) -> Instance:
if sentences is None:
if self._language == "ru":
sentences = [s.text for s in razdel.sentenize(text)]
else:
sentences = nltk.tokenize.sent_tokenize(text)
sentences_tokens = []
for sentence in sentences[:self._max_sentences_count]:
sentence = sentence.lower() if self._lowercase else sentence
tokens = self._tokenizer.tokenize(sentence)[:self._sentence_max_tokens]
tokens.insert(0, Token(START_SYMBOL))
tokens.append(Token(END_SYMBOL))
indexed_tokens = TextField(tokens, self._source_token_indexers)
sentences_tokens.append(indexed_tokens)
sentences_tokens_indexed = ListField(sentences_tokens)
result = {'source_sentences': sentences_tokens_indexed}
if tags:
result["sentences_tags"] = SequenceLabelField(tags[:self._max_sentences_count], sentences_tokens_indexed)
return Instance(result)
示例9: _fix_tokenization
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def _fix_tokenization(tokenized_sent, bert_embs, old_det_to_new_ind, obj_to_type, token_indexers, pad_ind=-1):
"""
Turn a detection list into what we want: some text, as well as some tags.
:param tokenized_sent: Tokenized sentence with detections collapsed to a list.
:param old_det_to_new_ind: Mapping of the old ID -> new ID (which will be used as the tag)
:param obj_to_type: [person, person, pottedplant] indexed by the old labels
:return: tokenized sentence
"""
new_tokenization_with_tags = []
for tok in tokenized_sent:
if isinstance(tok, list):
for int_name in tok:
obj_type = obj_to_type[int_name]
new_ind = old_det_to_new_ind[int_name]
if new_ind < 0:
raise ValueError("Oh no, the new index is negative! that means it's invalid. {} {}".format(
tokenized_sent, old_det_to_new_ind
))
text_to_use = GENDER_NEUTRAL_NAMES[
new_ind % len(GENDER_NEUTRAL_NAMES)] if obj_type == 'person' else obj_type
new_tokenization_with_tags.append((text_to_use, new_ind))
else:
new_tokenization_with_tags.append((tok, pad_ind))
text_field = BertField([Token(x[0]) for x in new_tokenization_with_tags],
bert_embs,
padding_value=0)
tags = SequenceLabelField([x[1] for x in new_tokenization_with_tags], text_field)
return text_field, tags
示例10: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self,
tokens: List[Token],
pico_tags: List[str] = None):
sequence = TextField(tokens, self._token_indexers)
instance_fields: Dict[str, Field] = {'tokens': sequence}
instance_fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
# Set the field 'labels' according to the specified PIO element
if pico_tags is not None:
instance_fields['tags'] = SequenceLabelField(pico_tags, sequence, self.label_namespace)
return Instance(instance_fields)
示例11: _tokens_distances_fields
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def _tokens_distances_fields(self, tokens):
"""Returns the updated list of tokens and entity distances for the first and second entity as fields."""
tokens, positions1, positions2 = self._tokens_distances(tokens)
t_f = TextField(tokens, self._token_indexers)
p1_f = SequenceLabelField(positions1, t_f)
p2_f = SequenceLabelField(positions2, t_f)
return t_f, p1_f, p2_f
示例12: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
words: List[str] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
sequence = TextField(tokens, self._token_indexers)
fields["tokens"] = sequence
fields["metadata"] = MetadataField({"words": words})
if tags is not None:
labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
return None
rnd = random()
# skip TN
if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
if rnd > self._tn_prob:
return None
# skip TP
else:
if rnd > self._tp_prob:
return None
fields["labels"] = SequenceLabelField(labels, sequence,
label_namespace="labels")
fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
label_namespace="d_tags")
return Instance(fields)
示例13: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance( # type: ignore
self, tokens: List[Token], tags: List[str] = None
) -> Instance:
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
fields: Dict[str, Field] = {}
sequence = TextField(tokens, self._token_indexers)
fields["tokens"] = sequence
fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
if tags is not None:
fields["tags"] = SequenceLabelField(tags, sequence)
return Instance(fields)
示例14: _get_ner_tags_and_mask
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def _get_ner_tags_and_mask(
instance: Instance, input_field_to_attack: str, ignore_tokens: List[str]
):
"""
Used for the NER task. Sets the num_ignore tokens, saves the original predicted tag and a 0/1
mask in the position of the tags
"""
# Set num_ignore_tokens
num_ignore_tokens = 0
input_field: TextField = instance[input_field_to_attack] # type: ignore
for token in input_field.tokens:
if str(token) in ignore_tokens:
num_ignore_tokens += 1
# save the original tags and a 0/1 mask where the tags are
tag_mask = []
original_tags = []
tag_field: SequenceLabelField = instance["tags"] # type: ignore
for label in tag_field.labels:
if label != "O":
tag_mask.append(1)
original_tags.append(label)
num_ignore_tokens += 1
else:
tag_mask.append(0)
return num_ignore_tokens, tag_mask, original_tags
示例15: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens , tags = None) : # type: ignore
u"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields = {}
sequence = TextField(tokens, self._token_indexers)
fields[u"tokens"] = sequence
fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})
if tags is not None:
fields[u"tags"] = SequenceLabelField(tags, sequence)
return Instance(fields)