本文整理汇总了Python中allennlp.data.fields.SpanField方法的典型用法代码示例。如果您正苦于以下问题:Python fields.SpanField方法的具体用法?Python fields.SpanField怎么用?Python fields.SpanField使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.fields
的用法示例。
在下文中一共展示了fields.SpanField方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self, # type: ignore
tokens: List[str],
entity_1: Tuple[int],
entity_2: Tuple[int],
label: str = None) -> Instance:
# pylint: disable=arguments-differ
fields: Dict[str, Field] = {}
tokens = [OpenAISplitter._standardize(token) for token in tokens]
tokens = ['__start__'] + tokens[entity_1[0]:entity_1[1]+1] + ['__del1__'] + tokens[entity_2[0]:entity_2[1]+1] + ['__del2__'] + tokens + ['__clf__']
sentence = TextField([Token(text=t) for t in tokens], self._token_indexers)
fields['sentence'] = sentence
#fields['entity1'] = SpanField(*entity_1, sequence_field=sentence)
#fields['entity2'] = SpanField(*entity_2, sequence_field=sentence)
if label:
fields['label'] = LabelField(label)
return Instance(fields)
示例2: test_as_tensor_converts_span_field_correctly
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_as_tensor_converts_span_field_correctly(self):
span_field = SpanField(2, 3, self.text)
tensor = span_field.as_tensor(span_field.get_padding_lengths()).detach().cpu().numpy()
numpy.testing.assert_array_equal(tensor, numpy.array([2, 3]))
示例3: test_span_field_raises_on_incorrect_label_type
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_on_incorrect_label_type(self):
with pytest.raises(TypeError):
_ = SpanField("hello", 3, self.text)
示例4: test_span_field_raises_on_ill_defined_span
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_on_ill_defined_span(self):
with pytest.raises(ValueError):
_ = SpanField(4, 1, self.text)
示例5: test_span_field_raises_if_span_end_is_greater_than_sentence_length
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_if_span_end_is_greater_than_sentence_length(self):
with pytest.raises(ValueError):
_ = SpanField(1, 30, self.text)
示例6: test_empty_span_field_works
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_empty_span_field_works(self):
span_field = SpanField(1, 3, self.text)
empty_span = span_field.empty_field()
assert empty_span.span_start == -1
assert empty_span.span_end == -1
示例7: test_equality
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_equality(self):
span_field1 = SpanField(2, 3, self.text)
span_field2 = SpanField(2, 3, self.text)
span_field3 = SpanField(
2, 3, TextField([Token(t) for t in ["not", "the", "same", "tokens"]], self.indexers)
)
assert span_field1 == (2, 3)
assert span_field1 == span_field1
assert span_field1 == span_field2
assert span_field1 != span_field3
assert span_field2 != span_field3
示例8: test_span_field_raises_on_incorrect_label_type
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_on_incorrect_label_type(self):
with pytest.raises(TypeError):
_ = SpanField(u"hello", 3, self.text)
示例9: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self, # type: ignore
sent1: str, # Important type information
sent2: str,
pid: str = None,
label: str = None) -> Instance:
fields: Dict[str, Field] = {}
tokenized_text1 = self.bert_tokenizer.tokenize(sent1)
tokenized_text2 = self.bert_tokenizer.tokenize(sent2)
# _truncate_seq_pair(tokenized_text1, tokenized_text2, self.max_l)
tokenized_text1 = tokenized_text1[:self.s1_l]
tokenized_text2 = tokenized_text2[:(self.max_l - len(tokenized_text1))]
joint_tokens_seq = ['[CLS]'] + tokenized_text1 + ['[SEP]'] + tokenized_text2 + ['[SEP]']
text1_len = len(tokenized_text1) + 2
text2_len = len(tokenized_text2) + 1
segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]
joint_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(joint_tokens_seq)
assert len(joint_tokens_ids) == len(segments_ids)
fields['paired_sequence'] = BertIndexField(np.asarray(joint_tokens_ids, dtype=np.int64))
fields['paired_segments_ids'] = BertIndexField(np.asarray(segments_ids, dtype=np.int64))
text1_span = (1, 1 + len(tokenized_text1)) # End is exclusive (important for later use)
text2_span = (text1_span[1] + 1, text1_span[1] + 1 + len(tokenized_text2))
fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['paired_sequence'])
fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['paired_sequence'])
if label:
fields['label'] = LabelField(label, label_namespace='labels')
if pid:
fields['pid'] = IdField(pid)
return Instance(fields)
示例10: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self, # type: ignore
example) -> Instance:
fields: Dict[str, Field] = {}
joint_tokens_seq = ['[CLS]'] + example['query_c_tokens'] + ['[SEP]'] + example['context_c_tokens'] + ['[SEP]']
assert len(joint_tokens_seq) < 512
text1_len = len(example['query_c_tokens']) + 2
text2_len = len(example['context_c_tokens']) + 1
segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]
joint_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(joint_tokens_seq)
assert len(joint_tokens_ids) == len(segments_ids)
fields['paired_sequence'] = BertIndexField(np.asarray(joint_tokens_ids, dtype=np.int64))
fields['paired_segments_ids'] = BertIndexField(np.asarray(segments_ids, dtype=np.int64))
# This text span is begin inclusive and end exclusive.
text1_span = (1, 1 + len(example['query_c_tokens'])) # End is exclusive (important for later use)
text2_span = (text1_span[1] + 1, text1_span[1] + 1 + len(example['context_c_tokens']))
fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['paired_sequence'])
fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['paired_sequence'])
# fields['bert_s2_span'] = SpanField(text2_span)
# fields['bert_s1_span'] = MetadataField(text1_span)
# fields['bert_s2_span'] = MetadataField(text2_span)
# However, the ground truth span is begin and end both inclusive
fields['gt_span'] = SpanField(example['start_position'], example['end_position'], fields['paired_sequence'])
fields['fid'] = IdField(example['fid'])
fields['uid'] = IdField(example['uid'])
return Instance(fields)
示例11: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self, # type: ignore
example) -> Instance:
fields: Dict[str, Field] = {}
joint_tokens_seq = example['paired_c_tokens']
assert len(joint_tokens_seq) <= 512
segments_ids = example['segment_ids']
joint_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(joint_tokens_seq)
assert len(joint_tokens_ids) == len(segments_ids)
fields['paired_sequence'] = BertIndexField(np.asarray(joint_tokens_ids, dtype=np.int64))
fields['paired_segments_ids'] = BertIndexField(np.asarray(segments_ids, dtype=np.int64))
# This text span is begin inclusive and end exclusive.
# text1_span = (1, 1 + len(example['query_c_tokens'])) # End is exclusive (important for later use)
# text2_span = (text1_span[1] + 1, text1_span[1] + 1 + len(example['context_c_tokens']))
# fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['paired_sequence'])
# fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['paired_sequence'])
# fields['bert_s2_span'] = SpanField(text2_span)
# fields['bert_s1_span'] = MetadataField(text1_span)
# fields['bert_s2_span'] = MetadataField(text2_span)
# However, the ground truth span is begin and end both inclusive
fields['gt_span'] = SpanField(example['start_position'], example['end_position'], fields['paired_sequence'])
fields['fid'] = IdField(example['fid'])
fields['uid'] = IdField(example['uid'])
return Instance(fields)
示例12: test_list_field_of_dict_field
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_list_field_of_dict_field(self):
from allennlp.data import Instance
from allennlp.data.iterators import BasicIterator
tokens3 = "The long sentence .".split()
tokens3_field = TextField(
[Token(t) for t in tokens3],
token_indexers={'tokens': SingleIdTokenIndexer()}
)
instance3_fields = {
"candidate_entities": TextField(
[Token("entity1 entity2 entity3"), Token("entity_unk"), Token("entity2 entity3")],
token_indexers=self.entity_indexer),
"candidate_entity_prior": ArrayField(np.array([[0.1, 0.1, 0.8],
[1.0, 0.0, 0.0],
[0.33, 0.67, 0.0]])),
"candidate_spans": ListField(
[SpanField(1, 1, tokens3_field), SpanField(1, 2, tokens3_field), SpanField(1, 3, tokens3_field)],
)
}
iterator = BasicIterator()
iterator.index_with(self.vocab)
instances = [Instance({"candidates": ListField([
DictField(self.instance1_fields),
DictField(self.instance2_fields)])}),
Instance({"candidates": ListField([
DictField(self.instance1_fields),
DictField(instance3_fields)])})
]
for batch in iterator(instances, num_epochs=1, shuffle=False):
pass
self.assertTrue(batch['candidates']['candidate_entities']['entity'].shape == batch['candidates']['candidate_entity_prior'].shape)
示例13: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self, # type: ignore
query: str, # Important type information
context: str,
fid: str = None,
qid: str = None,
selection_label: str = None) -> Instance:
fields: Dict[str, Field] = {}
tokenized_text1 = self.bert_tokenizer.tokenize(query)
tokenized_text2 = self.bert_tokenizer.tokenize(context)
# _truncate_seq_pair(tokenized_text1, tokenized_text2, self.max_l)
tokenized_text1 = tokenized_text1[:self.query_l]
tokenized_text2 = tokenized_text2[:self.context_l]
s1_tokens_seq = ['[CLS]'] + tokenized_text1
s2_tokens_seq = ['[CLS]'] + tokenized_text2
# text1_len = len(tokenized_text1) + 1
# text2_len = len(tokenized_text2) + 1
# segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]
s1_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s1_tokens_seq)
s2_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s2_tokens_seq)
fields['s1_sequence'] = BertIndexField(np.asarray(s1_tokens_ids, dtype=np.int64))
fields['s2_sequence'] = BertIndexField(np.asarray(s2_tokens_ids, dtype=np.int64))
text1_span = (1, len(tokenized_text1)) # End is exclusive (important for later use)
text2_span = (1, len(tokenized_text2))
fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['s1_sequence'])
fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['s2_sequence'])
if selection_label:
fields['label'] = LabelField(selection_label, label_namespace='labels')
assert fid is not None
assert qid is not None
fields['fid'] = IdField(fid)
fields['qid'] = IdField(qid)
return Instance(fields)
示例14: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self, # type: ignore
s1: str, # Important type information
s2: str,
pid: str,
selection_label: str = None) -> Instance:
fields: Dict[str, Field] = {}
tokenized_text1 = self.bert_tokenizer.tokenize(s1)
tokenized_text2 = self.bert_tokenizer.tokenize(s2)
# _truncate_seq_pair(tokenized_text1, tokenized_text2, self.max_l)
tokenized_text1 = tokenized_text1[:self.s1_l]
tokenized_text2 = tokenized_text2[:self.s2_l]
s1_tokens_seq = ['[CLS]'] + tokenized_text1
s2_tokens_seq = ['[CLS]'] + tokenized_text2
# text1_len = len(tokenized_text1) + 1
# text2_len = len(tokenized_text2) + 1
# segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]
s1_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s1_tokens_seq)
s2_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s2_tokens_seq)
fields['s1_sequence'] = BertIndexField(np.asarray(s1_tokens_ids, dtype=np.int64))
fields['s2_sequence'] = BertIndexField(np.asarray(s2_tokens_ids, dtype=np.int64))
text1_span = (1, len(tokenized_text1)) # End is exclusive (important for later use)
text2_span = (1, len(tokenized_text2))
fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['s1_sequence'])
fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['s2_sequence'])
if selection_label:
fields['label'] = LabelField(selection_label, label_namespace='labels')
# assert fid is not None
assert pid is not None
# fields['fid'] = IdField(fid)
fields['pid'] = IdField(pid)
return Instance(fields)
示例15: text_to_instance
# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,
tokenized_text: List[str],
candidate_entities: List[List[str]],
candidate_spans: List[List[int]],
candidate_entity_prior: List[List[float]],
gold_entities: List[str] = None,
doc_id: str = None):
assert doc_id is not None
token_field = TextField([Token(x) for x in tokenized_text], self.token_indexers)
span_fields = ListField([SpanField(*span, token_field) for span in candidate_spans])
candidate_entities = TextField(
[Token(" ".join(candidate_list)) for candidate_list in candidate_entities],
token_indexers=self.entity_indexer)
max_cands = max(len(p) for p in candidate_entity_prior)
for p in candidate_entity_prior:
if len(p) < max_cands:
p.extend([0.0] * (max_cands - len(p)))
np_prior = np.array(candidate_entity_prior)
prior_field = ArrayField(np_prior)
# only one segment
candidate_segment_ids = ArrayField(
np.array([0] * len(candidate_entities)), dtype=np.int
)
fields = {
"tokens": token_field,
"candidate_spans": span_fields,
"candidate_entities": candidate_entities,
"candidate_entity_prior": prior_field,
"candidate_segment_ids": candidate_segment_ids
}
if gold_entities:
labels = TextField([Token(entity) for entity in gold_entities],
token_indexers=self.entity_indexer)
fields["gold_entities"] = labels
fields["doc_id"] = MetadataField(doc_id)
if self.extra_candidate_generators:
tokens = " ".join(tokenized_text)
extra_candidates = {
key: generator.get_mentions_raw_text(tokens, whitespace_tokenize=True)
for key, generator in self.extra_candidate_generators.items()
}
fields['extra_candidates'] = MetadataField(extra_candidates)
return Instance(fields, should_remap_span_indices=self.should_remap_span_indices)