本文整理汇总了Python中allennlp.data.fields.MetadataField方法的典型用法代码示例。如果您正苦于以下问题:Python fields.MetadataField方法的具体用法?Python fields.MetadataField怎么用?Python fields.MetadataField使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.fields
的用法示例。
在下文中一共展示了fields.MetadataField方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, context_tokens: List[Token], tokens: List[Token], tags: List[str] = None,
intents: List[str] = None, dialog_act: Dict[str, Any] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
# print([t.text for t in context_tokens])
fields["context_tokens"] = TextField(context_tokens, self._token_indexers)
fields["tokens"] = TextField(tokens, self._token_indexers)
fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
if tags is not None:
fields["tags"] = SequenceLabelField(tags, fields["tokens"])
if intents is not None:
fields["intents"] = MultiLabelField(intents, label_namespace="intent_labels")
if dialog_act is not None:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
'dialog_act': dialog_act})
else:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
return Instance(fields)
示例2: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None, domain: str = None,
intent: str = None, dialog_act: Dict[str, Any] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
sequence = TextField(tokens, self._token_indexers)
fields["tokens"] = sequence
if tags:
fields["tags"] = SequenceLabelField(tags, sequence)
if domain:
fields["domain"] = LabelField(domain, label_namespace="domain_labels")
if intent:
fields["intent"] = LabelField(intent, label_namespace="intent_labels")
if dialog_act is not None:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
'dialog_act': dialog_act})
else:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
return Instance(fields)
示例3: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # type: ignore
item_id: Any,
question_text: str,
choice_text_list: List[str],
answer_id: int
) -> Instance:
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
question_tokens = self._tokenizer.tokenize(question_text)
choices_tokens_list = [self._tokenizer.tokenize(x) for x in choice_text_list]
fields['question'] = TextField(question_tokens, self._token_indexers)
fields['choices_list'] = ListField([TextField(x, self._token_indexers) for x in choices_tokens_list])
fields['label'] = LabelField(answer_id, skip_indexing=True)
metadata = {
"id": item_id,
"question_text": question_text,
"choice_text_list": choice_text_list,
"question_tokens": [x.text for x in question_tokens],
"choice_tokens_list": [[x.text for x in ct] for ct in choices_tokens_list],
}
fields["metadata"] = MetadataField(metadata)
return Instance(fields)
示例4: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # type: ignore
tokens ,
ner_tags = None) :
u"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
sequence = TextField(tokens, self._token_indexers)
instance_fields = {u'tokens': sequence}
instance_fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})
# Add "tag label" to instance
if ner_tags is not None:
if self._coding_scheme == u"BIOUL":
ner_tags = to_bioul(ner_tags, encoding=u"BIO")
instance_fields[u'tags'] = SequenceLabelField(ner_tags, sequence)
return Instance(instance_fields)
示例5: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # type: ignore
premise ,
hypothesis ,
label = None) :
# pylint: disable=arguments-differ
fields = {}
premise_tokens = self._tokenizer.tokenize(premise)
hypothesis_tokens = self._tokenizer.tokenize(hypothesis)
fields[u'premise'] = TextField(premise_tokens, self._token_indexers)
fields[u'hypothesis'] = TextField(hypothesis_tokens, self._token_indexers)
if label:
fields[u'label'] = LabelField(label)
metadata = {u"premise_tokens": [x.text for x in premise_tokens],
u"hypothesis_tokens": [x.text for x in hypothesis_tokens]}
fields[u"metadata"] = MetadataField(metadata)
return Instance(fields)
示例6: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # pylint: disable=arguments-differ
premise: str,
hypothesis: str,
label: str = None) -> Instance:
fields: Dict[str, Field] = {}
premise_tokens = [Token(token.text)
for token in self._tokenizer.tokenize(premise)[-self._max_tokens:]]
hypothesis_tokens = [Token(token.text)
for token in self._tokenizer.tokenize(hypothesis)[-self._max_tokens:]]
fields['premise'] = TextField(premise_tokens, self._token_indexers)
fields['hypothesis'] = TextField(hypothesis_tokens, self._token_indexers)
if label:
fields['label'] = LabelField(label)
# metadata = {"premise_tokens": [x.text for x in premise_tokens],
# "hypothesis_tokens": [x.text for x in hypothesis_tokens]}
# fields["metadata"] = MetadataField(metadata)
return Instance(fields)
示例7: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
citation_text: str,
citing_paper_id: str,
cited_paper_id: str,
intent: List[str] = None,
venue: str = None,
section_name: str = None) -> Instance:
citation_tokens = self._tokenizer.tokenize(citation_text)
fields = {
'citation_text': TextField(citation_tokens, self._token_indexers),
}
if section_name is not None:
fields['section_label'] = LabelField(section_name, label_namespace="section_labels")
fields['citing_paper_id'] = MetadataField(citing_paper_id)
fields['cited_paper_id'] = MetadataField(cited_paper_id)
return Instance(fields)
示例8: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
citation_text: str,
citing_paper_id: str,
cited_paper_id: str,
intent: List[str] = None,
section_name: str = None) -> Instance:
citation_tokens = self._tokenizer.tokenize(citation_text)
fields = {
'citation_text': TextField(citation_tokens, self._token_indexers),
}
if section_name is not None:
fields['section_label'] = LabelField(section_name, label_namespace="section_labels")
fields['citing_paper_id'] = MetadataField(citing_paper_id)
fields['cited_paper_id'] = MetadataField(cited_paper_id)
return Instance(fields)
示例9: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, query_id:str, doc_id:str, query_sequence: str, doc_sequence: str) -> Instance: # type: ignore
# pylint: disable=arguments-differ
query_id_field = MetadataField(query_id)
doc_id_field = MetadataField(doc_id)
query_tokenized = self._tokenizer.tokenize(query_sequence)
if self.max_query_length > -1:
query_tokenized = query_tokenized[:self.max_query_length]
if self.min_query_length > -1 and len(query_tokenized) < self.min_query_length:
query_tokenized = query_tokenized + [self.padding_value] * (self.min_query_length - len(query_tokenized))
doc_tokenized = self._tokenizer.tokenize(doc_sequence)
if self.max_doc_length > -1:
doc_tokenized = doc_tokenized[:self.max_doc_length]
if self.min_doc_length > -1 and len(doc_tokenized) < self.min_doc_length:
doc_tokenized = doc_tokenized + [self.padding_value] * (self.min_doc_length - len(doc_tokenized))
doc_field = TextField(query_tokenized + [self.sep_value] + doc_tokenized, self._token_indexers)
return Instance({
"query_id":query_id_field,
"doc_id":doc_id_field,
"doc_tokens":doc_field})
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:27,代码来源:bert_labeled_tuple_loader.py
示例10: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, seq_id:str, seq_text:str) -> Instance: # type: ignore
# pylint: disable=arguments-differ
seq_id_field = MetadataField(seq_id)
seq_tokenized = self._tokenizer.tokenize(seq_text)
if self.max_seq_length > -1:
seq_tokenized = seq_tokenized[:self.max_seq_length]
if self.min_seq_length > -1 and len(seq_tokenized) < self.min_seq_length:
seq_tokenized = seq_tokenized + [self.padding_value] * (self.min_seq_length - len(seq_tokenized))
seq_field = TextField(seq_tokenized, self._token_indexers)
return Instance({
"seq_id":seq_id_field,
"seq_tokens":seq_field})
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:19,代码来源:ir_single_sequence_loader.py
示例11: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # type: ignore
item_id: Any,
question_text: str,
choice_text_list: List[str],
answer_id: int) -> Instance:
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
question_tokens = self._tokenizer.tokenize(question_text)
choices_tokens_list = [self._tokenizer.tokenize(x) for x in choice_text_list]
fields['question'] = TextField(question_tokens, self._token_indexers)
fields['choices_list'] = ListField([TextField(x, self._token_indexers) for x in choices_tokens_list])
fields['label'] = LabelField(answer_id, skip_indexing=True)
metadata = {
"id": item_id,
"question_text": question_text,
"choice_text_list": choice_text_list,
"question_tokens": [x.text for x in question_tokens],
"choice_tokens_list": [[x.text for x in ct] for ct in choices_tokens_list],
}
fields["metadata"] = MetadataField(metadata)
return Instance(fields)
示例12: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # type: ignore
sentence: str,
head: str,
tail: str,
head_type: str=None,
tail_type: str=None,
label: str=None) -> Instance:
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
instance_id = f'{head}#{tail}'
if label:
instance_id = f'{instance_id}#{label}'
fields['metadata'] = MetadataField({'instance_id': instance_id.lower()})
tokens = self._token_splitter.split_words(sentence)
head = self._token_splitter.split_words(head)
tail = self._token_splitter.split_words(tail)
# TODO: this should not be done here
if self._masking_mode == 'ner_least_specific':
logger.info(f"Using masking mode 'ner_least_specific'.")
tokens = ([Token('__start__')]
+ head + [Token('__del1__')] + head_type + [Token('__ent1__')]
+ tail + [Token('__del2__')] + tail_type + [Token('__ent2__')]
+ tokens + [Token('__clf__')])
else:
tokens = [Token('__start__')] + head + [Token('__del1__')] + tail + [Token('__del2__')] + tokens + [Token('__clf__')]
fields['sentence'] = TextField(tokens, self._token_indexers)
if label:
fields['label'] = LabelField(label)
return Instance(fields)
示例13: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
tokens: List[Token],
pico_tags: List[str] = None):
sequence = TextField(tokens, self._token_indexers)
instance_fields: Dict[str, Field] = {'tokens': sequence}
instance_fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
# Set the field 'labels' according to the specified PIO element
if pico_tags is not None:
instance_fields['tags'] = SequenceLabelField(pico_tags, sequence, self.label_namespace)
return Instance(instance_fields)
示例14: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
text: str,
label: str = None,
metadata: Any = None) -> Instance: # type: ignore
text_tokens = self._tokenizer.tokenize(text)
fields = {
'text': TextField(text_tokens, self._token_indexers),
}
if label is not None:
fields['label'] = LabelField(label)
if metadata:
fields['metadata'] = MetadataField(metadata)
return Instance(fields)
示例15: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
words: List[str] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
sequence = TextField(tokens, self._token_indexers)
fields["tokens"] = sequence
fields["metadata"] = MetadataField({"words": words})
if tags is not None:
labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
return None
rnd = random()
# skip TN
if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
if rnd > self._tn_prob:
return None
# skip TP
else:
if rnd > self._tp_prob:
return None
fields["labels"] = SequenceLabelField(labels, sequence,
label_namespace="labels")
fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
label_namespace="d_tags")
return Instance(fields)