本文整理汇总了Python中allennlp.data.instance.Instance方法的典型用法代码示例。如果您正苦于以下问题:Python instance.Instance方法的具体用法?Python instance.Instance怎么用?Python instance.Instance使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.instance
的用法示例。
在下文中一共展示了instance.Instance方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, context_tokens: List[Token], tokens: List[Token], tags: List[str] = None,
intents: List[str] = None, dialog_act: Dict[str, Any] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
# print([t.text for t in context_tokens])
fields["context_tokens"] = TextField(context_tokens, self._token_indexers)
fields["tokens"] = TextField(tokens, self._token_indexers)
fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
if tags is not None:
fields["tags"] = SequenceLabelField(tags, fields["tokens"])
if intents is not None:
fields["intents"] = MultiLabelField(intents, label_namespace="intent_labels")
if dialog_act is not None:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
'dialog_act': dialog_act})
else:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
return Instance(fields)
示例2: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None, domain: str = None,
intent: str = None, dialog_act: Dict[str, Any] = None) -> Instance: # type: ignore
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
sequence = TextField(tokens, self._token_indexers)
fields["tokens"] = sequence
if tags:
fields["tags"] = SequenceLabelField(tags, sequence)
if domain:
fields["domain"] = LabelField(domain, label_namespace="domain_labels")
if intent:
fields["intent"] = LabelField(intent, label_namespace="intent_labels")
if dialog_act is not None:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
'dialog_act': dialog_act})
else:
fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
return Instance(fields)
示例3: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, # type: ignore
tokens: List[str],
entity_1: Tuple[int],
entity_2: Tuple[int],
label: str = None) -> Instance:
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
tokens = [OpenAISplitter._standardize(token) for token in tokens]
tokens = ['__start__'] + tokens[entity_1[0]:entity_1[1]+1] + ['__del1__'] + tokens[entity_2[0]:entity_2[1]+1] + ['__del2__'] + tokens + ['__clf__']
sentence = TextField([Token(text=t) for t in tokens], self._token_indexers)
fields['sentence'] = sentence
#fields['entity1'] = SpanField(*entity_1, sequence_field=sentence)
#fields['entity2'] = SpanField(*entity_2, sequence_field=sentence)
if label:
fields['label'] = LabelField(label)
return Instance(fields)
示例4: _read
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def _read(self, file_path: str) -> Iterable[Instance]:
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, "r") as data_file:
logger.info("Reading instances from lines in file at: %s", file_path)
# Group into alternative divider / sentence chunks.
for is_divider, lines in itertools.groupby(data_file, _is_divider):
# Ignore the divider chunks, so that `lines` corresponds to the words
# of a single sentence.
if not is_divider:
fields = [line.strip().split() for line in lines]
# unzipping trick returns tuples, but our Fields need lists
fields = [list(field) for field in zip(*fields)]
tokens_, _, _, pico_tags = fields
# TextField requires ``Token`` objects
tokens = [Token(token) for token in tokens_]
yield self.text_to_instance(tokens, pico_tags)
示例5: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, query_sequence: str, doc_pos_sequence: str, doc_neg_sequence: str) -> Instance: # type: ignore
# pylint: disable=arguments-differ
query_tokenized = self._tokenizer.tokenize(query_sequence)
if self.max_query_length > -1:
query_tokenized = query_tokenized[:self.max_query_length]
query_field = TextField(query_tokenized, self._token_indexers)
doc_pos_tokenized = self._tokenizer.tokenize(doc_pos_sequence)
if self.max_doc_length > -1:
doc_pos_tokenized = doc_pos_tokenized[:self.max_doc_length]
doc_pos_field = TextField(doc_pos_tokenized, self._token_indexers)
doc_neg_tokenized = self._tokenizer.tokenize(doc_neg_sequence)
if self.max_doc_length > -1:
doc_neg_tokenized = doc_neg_tokenized[:self.max_doc_length]
doc_neg_field = TextField(doc_neg_tokenized, self._token_indexers)
return Instance({
"query_tokens":query_field,
"doc_pos_tokens":doc_pos_field,
"doc_neg_tokens": doc_neg_field})
示例6: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, # type: ignore
item_id: Any,
question_text: str,
choice_text_list: List[str],
answer_id: int
) -> Instance:
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
question_tokens = self._tokenizer.tokenize(question_text)
choices_tokens_list = [self._tokenizer.tokenize(x) for x in choice_text_list]
fields['question'] = TextField(question_tokens, self._token_indexers)
fields['choices_list'] = ListField([TextField(x, self._token_indexers) for x in choices_tokens_list])
fields['label'] = LabelField(answer_id, skip_indexing=True)
metadata = {
"id": item_id,
"question_text": question_text,
"choice_text_list": choice_text_list,
"question_tokens": [x.text for x in question_tokens],
"choice_tokens_list": [[x.text for x in ct] for ct in choices_tokens_list],
}
fields["metadata"] = MetadataField(metadata)
return Instance(fields)
示例7: preprocess
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def preprocess(self, token_batch):
seq_lens = [len(sequence) for sequence in token_batch if sequence]
if not seq_lens:
return []
max_len = min(max(seq_lens), self.max_len)
batches = []
for indexer in self.indexers:
batch = []
for sequence in token_batch:
tokens = sequence[:max_len]
tokens = [Token(token) for token in ['$START'] + tokens]
batch.append(Instance({'tokens': TextField(tokens, indexer)}))
batch = Batch(batch)
batch.index_instances(self.vocab)
batches.append(batch)
return batches
示例8: get_padding_lengths
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def get_padding_lengths(self) -> Dict[str, Dict[str, int]]:
"""
Gets the maximum padding lengths from all `Instances` in this batch. Each `Instance`
has multiple `Fields`, and each `Field` could have multiple things that need padding.
We look at all fields in all instances, and find the max values for each (field_name,
padding_key) pair, returning them in a dictionary.
This can then be used to convert this batch into arrays of consistent length, or to set
model parameters, etc.
"""
padding_lengths: Dict[str, Dict[str, int]] = defaultdict(dict)
all_instance_lengths: List[Dict[str, Dict[str, int]]] = [
instance.get_padding_lengths() for instance in self.instances
]
all_field_lengths: Dict[str, List[Dict[str, int]]] = defaultdict(list)
for instance_lengths in all_instance_lengths:
for field_name, instance_field_lengths in instance_lengths.items():
all_field_lengths[field_name].append(instance_field_lengths)
for field_name, field_lengths in all_field_lengths.items():
for padding_key in field_lengths[0].keys():
max_value = max(x.get(padding_key, 0) for x in field_lengths)
padding_lengths[field_name][padding_key] = max_value
return {**padding_lengths}
示例9: print_statistics
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def print_statistics(self) -> None:
# Make sure if has been indexed first
sequence_field_lengths: Dict[str, List] = defaultdict(list)
for instance in self.instances:
if not instance.indexed:
raise ConfigurationError(
"Instances must be indexed with vocabulary "
"before asking to print dataset statistics."
)
for field, field_padding_lengths in instance.get_padding_lengths().items():
for key, value in field_padding_lengths.items():
sequence_field_lengths[f"{field}.{key}"].append(value)
print("\n\n----Dataset Statistics----\n")
for name, lengths in sequence_field_lengths.items():
print(f"Statistics for {name}:")
print(
f"\tLengths: Mean: {numpy.mean(lengths)}, Standard Dev: {numpy.std(lengths)}, "
f"Max: {numpy.max(lengths)}, Min: {numpy.min(lengths)}"
)
print("\n10 Random instances:")
for i in numpy.random.randint(len(self.instances), size=10):
print(f"Instance {i}:")
print(f"\t{self.instances[i]}")
示例10: _read
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def _read(self, file_path: str) -> Iterable[Instance]:
# if `file_path` is a URL, redirect to the cache
file_path = cached_path(file_path)
with open(file_path, "r") as data_file:
logger.info("Reading instances from lines in file at: %s", file_path)
# Group into alternative divider / sentence chunks.
for is_divider, lines in itertools.groupby(data_file, _is_divider):
# Ignore the divider chunks, so that `lines` corresponds to the words
# of a single sentence.
if not is_divider:
fields = [line.strip().split() for line in lines]
# unzipping trick returns tuples, but our Fields need lists
fields = [list(field) for field in zip(*fields)]
tokens_, pos_tags, chunk_tags, ner_tags = fields
# TextField requires `Token` objects
tokens = [Token(token) for token in tokens_]
yield self.text_to_instance(tokens, pos_tags, chunk_tags, ner_tags)
示例11: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, *inputs) -> Instance:
"""
Does whatever tokenization or processing is necessary to go from textual input to an
`Instance`. The primary intended use for this is with a
:class:`~allennlp.predictors.predictor.Predictor`, which gets text input as a JSON
object and needs to process it to be input to a model.
The intent here is to share code between :func:`_read` and what happens at
model serving time, or any other time you want to make a prediction from new data. We need
to process the data in the same way it was done at training time. Allowing the
`DatasetReader` to process new text lets us accomplish this, as we can just call
`DatasetReader.text_to_instance` when serving predictions.
The input type here is rather vaguely specified, unfortunately. The `Predictor` will
have to make some assumptions about the kind of `DatasetReader` that it's using, in order
to pass it the right information.
"""
raise NotImplementedError
示例12: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self,
premise: str,
hypothesis: str,
hypothesis_structure: str,
label: str = None) -> Instance:
fields: Dict[str, Field] = {}
premise_tokens = self._tokenizer.tokenize(premise)[-self._max_tokens:]
hypothesis_tokens = self._tokenizer.tokenize(hypothesis)[-self._max_tokens:]
fields['premise'] = TextField(premise_tokens, self._token_indexers)
fields['hypothesis'] = TextField(hypothesis_tokens, self._token_indexers)
metadata = {
'premise': premise,
'hypothesis': hypothesis,
'premise_tokens': [token.text for token in premise_tokens],
'hypothesis_tokens': [token.text for token in hypothesis_tokens]
}
fields['metadata'] = MetadataField(metadata)
self._add_structure_to_fields(hypothesis_structure, fields)
if label:
fields['label'] = LabelField(label)
return Instance(fields)
示例13: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, # type: ignore
premise: str,
hypothesis: str,
pid: str = None,
label: str = None) -> Instance:
fields: Dict[str, Field] = {}
premise_tokens = [Token(t) for t in premise.split(' ')] # Removing code for parentheses in NLI
hypothesis_tokens = [Token(t) for t in hypothesis.split(' ')]
if self.max_l is not None:
premise_tokens = premise_tokens[:self.max_l]
hypothesis_tokens = hypothesis_tokens[:self.max_l]
fields['premise'] = TextField(premise_tokens, self._token_indexers)
fields['hypothesis'] = TextField(hypothesis_tokens, self._token_indexers)
if label:
fields['selection_label'] = LabelField(label, label_namespace='selection_labels')
if pid:
fields['pid'] = IdField(pid)
return Instance(fields)
示例14: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, # type: ignore
premise: str,
hypothesis: str,
pid: str = None,
label: str = None) -> Instance:
fields: Dict[str, Field] = {}
premise_tokens = [Token(t) for t in premise.split(' ')] # Removing code for parentheses in NLI
hypothesis_tokens = [Token(t) for t in hypothesis.split(' ')]
if self.max_l is not None:
premise_tokens = premise_tokens[:self.max_l]
hypothesis_tokens = hypothesis_tokens[:self.max_l]
fields['premise'] = TextField(premise_tokens, self._token_indexers)
fields['hypothesis'] = TextField(hypothesis_tokens, self._token_indexers)
if label:
fields['label'] = LabelField(label, label_namespace='labels')
if pid:
fields['pid'] = IdField(pid)
return Instance(fields)
示例15: text_to_instance
# 需要导入模块: from allennlp.data import instance [as 别名]
# 或者: from allennlp.data.instance import Instance [as 别名]
def text_to_instance(self, # type: ignore
tokens ,
ner_tags = None) :
u"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
# pylint: disable=arguments-differ
sequence = TextField(tokens, self._token_indexers)
instance_fields = {u'tokens': sequence}
instance_fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})
# Add "tag label" to instance
if ner_tags is not None:
if self._coding_scheme == u"BIOUL":
ner_tags = to_bioul(ner_tags, encoding=u"BIO")
instance_fields[u'tags'] = SequenceLabelField(ner_tags, sequence)
return Instance(instance_fields)