当前位置: 首页>>代码示例>>Python>>正文


Python fields.SpanField方法代码示例

本文整理汇总了Python中allennlp.data.fields.SpanField方法的典型用法代码示例。如果您正苦于以下问题:Python fields.SpanField方法的具体用法?Python fields.SpanField怎么用?Python fields.SpanField使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.fields的用法示例。


在下文中一共展示了fields.SpanField方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,  # type: ignore
                         tokens: List[str],
                         entity_1: Tuple[int],
                         entity_2: Tuple[int],
                         label: str = None) -> Instance:
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        
        tokens = [OpenAISplitter._standardize(token) for token in tokens]
        tokens = ['__start__'] + tokens[entity_1[0]:entity_1[1]+1] + ['__del1__'] + tokens[entity_2[0]:entity_2[1]+1] + ['__del2__'] + tokens + ['__clf__']
            
        sentence = TextField([Token(text=t) for t in tokens], self._token_indexers)
        fields['sentence'] = sentence
        #fields['entity1'] = SpanField(*entity_1, sequence_field=sentence)
        #fields['entity2'] = SpanField(*entity_2, sequence_field=sentence)
        
        if label:
            fields['label'] = LabelField(label)

        return Instance(fields) 
开发者ID:DFKI-NLP,项目名称:DISTRE,代码行数:22,代码来源:semeval_2010_task_8_reader.py

示例2: test_as_tensor_converts_span_field_correctly

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_as_tensor_converts_span_field_correctly(self):
        span_field = SpanField(2, 3, self.text)
        tensor = span_field.as_tensor(span_field.get_padding_lengths()).detach().cpu().numpy()
        numpy.testing.assert_array_equal(tensor, numpy.array([2, 3])) 
开发者ID:allenai,项目名称:allennlp,代码行数:6,代码来源:span_field_test.py

示例3: test_span_field_raises_on_incorrect_label_type

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_on_incorrect_label_type(self):
        with pytest.raises(TypeError):
            _ = SpanField("hello", 3, self.text) 
开发者ID:allenai,项目名称:allennlp,代码行数:5,代码来源:span_field_test.py

示例4: test_span_field_raises_on_ill_defined_span

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_on_ill_defined_span(self):
        with pytest.raises(ValueError):
            _ = SpanField(4, 1, self.text) 
开发者ID:allenai,项目名称:allennlp,代码行数:5,代码来源:span_field_test.py

示例5: test_span_field_raises_if_span_end_is_greater_than_sentence_length

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_if_span_end_is_greater_than_sentence_length(self):
        with pytest.raises(ValueError):
            _ = SpanField(1, 30, self.text) 
开发者ID:allenai,项目名称:allennlp,代码行数:5,代码来源:span_field_test.py

示例6: test_empty_span_field_works

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_empty_span_field_works(self):
        span_field = SpanField(1, 3, self.text)
        empty_span = span_field.empty_field()
        assert empty_span.span_start == -1
        assert empty_span.span_end == -1 
开发者ID:allenai,项目名称:allennlp,代码行数:7,代码来源:span_field_test.py

示例7: test_equality

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_equality(self):
        span_field1 = SpanField(2, 3, self.text)
        span_field2 = SpanField(2, 3, self.text)
        span_field3 = SpanField(
            2, 3, TextField([Token(t) for t in ["not", "the", "same", "tokens"]], self.indexers)
        )

        assert span_field1 == (2, 3)
        assert span_field1 == span_field1
        assert span_field1 == span_field2
        assert span_field1 != span_field3
        assert span_field2 != span_field3 
开发者ID:allenai,项目名称:allennlp,代码行数:14,代码来源:span_field_test.py

示例8: test_span_field_raises_on_incorrect_label_type

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_span_field_raises_on_incorrect_label_type(self):
        with pytest.raises(TypeError):
            _ = SpanField(u"hello", 3, self.text) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:5,代码来源:span_field_test.py

示例9: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,  # type: ignore
                         sent1: str,  # Important type information
                         sent2: str,
                         pid: str = None,
                         label: str = None) -> Instance:

        fields: Dict[str, Field] = {}

        tokenized_text1 = self.bert_tokenizer.tokenize(sent1)
        tokenized_text2 = self.bert_tokenizer.tokenize(sent2)

        # _truncate_seq_pair(tokenized_text1, tokenized_text2, self.max_l)
        tokenized_text1 = tokenized_text1[:self.s1_l]
        tokenized_text2 = tokenized_text2[:(self.max_l - len(tokenized_text1))]

        joint_tokens_seq = ['[CLS]'] + tokenized_text1 + ['[SEP]'] + tokenized_text2 + ['[SEP]']
        text1_len = len(tokenized_text1) + 2
        text2_len = len(tokenized_text2) + 1
        segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]

        joint_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(joint_tokens_seq)
        assert len(joint_tokens_ids) == len(segments_ids)

        fields['paired_sequence'] = BertIndexField(np.asarray(joint_tokens_ids, dtype=np.int64))
        fields['paired_segments_ids'] = BertIndexField(np.asarray(segments_ids, dtype=np.int64))

        text1_span = (1, 1 + len(tokenized_text1)) # End is exclusive (important for later use)
        text2_span = (text1_span[1] + 1, text1_span[1] + 1 + len(tokenized_text2))

        fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['paired_sequence'])
        fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['paired_sequence'])

        if label:
            fields['label'] = LabelField(label, label_namespace='labels')

        if pid:
            fields['pid'] = IdField(pid)

        return Instance(fields) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:41,代码来源:bert_fever_reader.py

示例10: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,  # type: ignore
                         example) -> Instance:

        fields: Dict[str, Field] = {}

        joint_tokens_seq = ['[CLS]'] + example['query_c_tokens'] + ['[SEP]'] + example['context_c_tokens'] + ['[SEP]']
        assert len(joint_tokens_seq) < 512

        text1_len = len(example['query_c_tokens']) + 2
        text2_len = len(example['context_c_tokens']) + 1

        segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]

        joint_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(joint_tokens_seq)
        assert len(joint_tokens_ids) == len(segments_ids)

        fields['paired_sequence'] = BertIndexField(np.asarray(joint_tokens_ids, dtype=np.int64))
        fields['paired_segments_ids'] = BertIndexField(np.asarray(segments_ids, dtype=np.int64))

        # This text span is begin inclusive and end exclusive.
        text1_span = (1, 1 + len(example['query_c_tokens'])) # End is exclusive (important for later use)
        text2_span = (text1_span[1] + 1, text1_span[1] + 1 + len(example['context_c_tokens']))

        fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['paired_sequence'])
        fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['paired_sequence'])
        # fields['bert_s2_span'] = SpanField(text2_span)
        # fields['bert_s1_span'] = MetadataField(text1_span)
        # fields['bert_s2_span'] = MetadataField(text2_span)

        # However, the ground truth span is begin and end both inclusive
        fields['gt_span'] = SpanField(example['start_position'], example['end_position'], fields['paired_sequence'])

        fields['fid'] = IdField(example['fid'])
        fields['uid'] = IdField(example['uid'])

        return Instance(fields) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:38,代码来源:span_pred_reader.py

示例11: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,  # type: ignore
                         example) -> Instance:

        fields: Dict[str, Field] = {}

        joint_tokens_seq = example['paired_c_tokens']
        assert len(joint_tokens_seq) <= 512

        segments_ids = example['segment_ids']

        joint_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(joint_tokens_seq)
        assert len(joint_tokens_ids) == len(segments_ids)

        fields['paired_sequence'] = BertIndexField(np.asarray(joint_tokens_ids, dtype=np.int64))
        fields['paired_segments_ids'] = BertIndexField(np.asarray(segments_ids, dtype=np.int64))

        # This text span is begin inclusive and end exclusive.
        # text1_span = (1, 1 + len(example['query_c_tokens'])) # End is exclusive (important for later use)
        # text2_span = (text1_span[1] + 1, text1_span[1] + 1 + len(example['context_c_tokens']))

        # fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['paired_sequence'])
        # fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['paired_sequence'])
        # fields['bert_s2_span'] = SpanField(text2_span)
        # fields['bert_s1_span'] = MetadataField(text1_span)
        # fields['bert_s2_span'] = MetadataField(text2_span)

        # However, the ground truth span is begin and end both inclusive
        fields['gt_span'] = SpanField(example['start_position'], example['end_position'], fields['paired_sequence'])

        fields['fid'] = IdField(example['fid'])
        fields['uid'] = IdField(example['uid'])

        return Instance(fields) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:35,代码来源:paired_span_pred_reader.py

示例12: test_list_field_of_dict_field

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def test_list_field_of_dict_field(self):
        from allennlp.data import Instance
        from allennlp.data.iterators import BasicIterator

        tokens3 = "The long sentence .".split()
        tokens3_field = TextField(
            [Token(t) for t in tokens3],
            token_indexers={'tokens': SingleIdTokenIndexer()}
        )

        instance3_fields = {
            "candidate_entities": TextField(
                    [Token("entity1 entity2 entity3"), Token("entity_unk"), Token("entity2 entity3")],
                    token_indexers=self.entity_indexer),
            "candidate_entity_prior": ArrayField(np.array([[0.1, 0.1, 0.8],
                                                           [1.0, 0.0, 0.0],
                                                           [0.33, 0.67, 0.0]])),
            "candidate_spans": ListField(
                    [SpanField(1, 1, tokens3_field), SpanField(1, 2, tokens3_field), SpanField(1, 3, tokens3_field)],
            )
        }

        iterator = BasicIterator()
        iterator.index_with(self.vocab)

        instances = [Instance({"candidates": ListField([
                                    DictField(self.instance1_fields),
                                    DictField(self.instance2_fields)])}),
                     Instance({"candidates": ListField([
                                    DictField(self.instance1_fields),
                                    DictField(instance3_fields)])})
        ]

        for batch in iterator(instances, num_epochs=1, shuffle=False):
            pass

        self.assertTrue(batch['candidates']['candidate_entities']['entity'].shape == batch['candidates']['candidate_entity_prior'].shape) 
开发者ID:allenai,项目名称:kb,代码行数:39,代码来源:test_dict_field.py

示例13: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,  # type: ignore
                         query: str,  # Important type information
                         context: str,
                         fid: str = None,
                         qid: str = None,
                         selection_label: str = None) -> Instance:

        fields: Dict[str, Field] = {}

        tokenized_text1 = self.bert_tokenizer.tokenize(query)
        tokenized_text2 = self.bert_tokenizer.tokenize(context)

        # _truncate_seq_pair(tokenized_text1, tokenized_text2, self.max_l)
        tokenized_text1 = tokenized_text1[:self.query_l]
        tokenized_text2 = tokenized_text2[:self.context_l]

        s1_tokens_seq = ['[CLS]'] + tokenized_text1
        s2_tokens_seq = ['[CLS]'] + tokenized_text2

        # text1_len = len(tokenized_text1) + 1
        # text2_len = len(tokenized_text2) + 1

        # segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]

        s1_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s1_tokens_seq)
        s2_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s2_tokens_seq)

        fields['s1_sequence'] = BertIndexField(np.asarray(s1_tokens_ids, dtype=np.int64))
        fields['s2_sequence'] = BertIndexField(np.asarray(s2_tokens_ids, dtype=np.int64))

        text1_span = (1, len(tokenized_text1)) # End is exclusive (important for later use)
        text2_span = (1, len(tokenized_text2))

        fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['s1_sequence'])
        fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['s2_sequence'])

        if selection_label:
            fields['label'] = LabelField(selection_label, label_namespace='labels')

        assert fid is not None
        assert qid is not None
        fields['fid'] = IdField(fid)
        fields['qid'] = IdField(qid)

        return Instance(fields) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:47,代码来源:bert_reader_context_selection.py

示例14: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,  # type: ignore
                         s1: str,  # Important type information
                         s2: str,
                         pid: str,
                         selection_label: str = None) -> Instance:

        fields: Dict[str, Field] = {}

        tokenized_text1 = self.bert_tokenizer.tokenize(s1)
        tokenized_text2 = self.bert_tokenizer.tokenize(s2)

        # _truncate_seq_pair(tokenized_text1, tokenized_text2, self.max_l)
        tokenized_text1 = tokenized_text1[:self.s1_l]
        tokenized_text2 = tokenized_text2[:self.s2_l]

        s1_tokens_seq = ['[CLS]'] + tokenized_text1
        s2_tokens_seq = ['[CLS]'] + tokenized_text2

        # text1_len = len(tokenized_text1) + 1
        # text2_len = len(tokenized_text2) + 1

        # segments_ids = [0 for _ in range(text1_len)] + [1 for _ in range(text2_len)]

        s1_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s1_tokens_seq)
        s2_tokens_ids = self.bert_tokenizer.convert_tokens_to_ids(s2_tokens_seq)

        fields['s1_sequence'] = BertIndexField(np.asarray(s1_tokens_ids, dtype=np.int64))
        fields['s2_sequence'] = BertIndexField(np.asarray(s2_tokens_ids, dtype=np.int64))

        text1_span = (1, len(tokenized_text1)) # End is exclusive (important for later use)
        text2_span = (1, len(tokenized_text2))

        fields['bert_s1_span'] = SpanField(text1_span[0], text1_span[1], fields['s1_sequence'])
        fields['bert_s2_span'] = SpanField(text2_span[0], text2_span[1], fields['s2_sequence'])

        if selection_label:
            fields['label'] = LabelField(selection_label, label_namespace='labels')

        # assert fid is not None
        assert pid is not None
        # fields['fid'] = IdField(fid)
        fields['pid'] = IdField(pid)

        return Instance(fields) 
开发者ID:easonnie,项目名称:semanticRetrievalMRS,代码行数:46,代码来源:bert_fever_verification_separate_seq.py

示例15: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SpanField [as 别名]
def text_to_instance(self,
                         tokenized_text: List[str],
                         candidate_entities: List[List[str]],
                         candidate_spans: List[List[int]],
                         candidate_entity_prior: List[List[float]],
                         gold_entities: List[str] = None,
                         doc_id: str = None):

        assert doc_id is not None

        token_field = TextField([Token(x) for x in tokenized_text], self.token_indexers)
        span_fields = ListField([SpanField(*span, token_field) for span in candidate_spans])

        candidate_entities = TextField(
                [Token(" ".join(candidate_list)) for candidate_list in candidate_entities],
                token_indexers=self.entity_indexer)

        max_cands = max(len(p) for p in candidate_entity_prior)
        for p in candidate_entity_prior:
            if len(p) < max_cands:
                p.extend([0.0] * (max_cands - len(p)))
        np_prior = np.array(candidate_entity_prior)
        prior_field = ArrayField(np_prior)

        # only one segment
        candidate_segment_ids = ArrayField(
                np.array([0] * len(candidate_entities)), dtype=np.int
        )

        fields = {
            "tokens": token_field,
            "candidate_spans": span_fields,
            "candidate_entities": candidate_entities,
            "candidate_entity_prior": prior_field,
            "candidate_segment_ids": candidate_segment_ids
            }
        if gold_entities:
            labels = TextField([Token(entity) for entity in gold_entities],
                               token_indexers=self.entity_indexer)
            fields["gold_entities"] = labels

        fields["doc_id"] = MetadataField(doc_id)

        if self.extra_candidate_generators:
            tokens = " ".join(tokenized_text)
            extra_candidates = {
                    key: generator.get_mentions_raw_text(tokens, whitespace_tokenize=True)
                    for key, generator in self.extra_candidate_generators.items()
            }
            fields['extra_candidates'] = MetadataField(extra_candidates)

        return Instance(fields, should_remap_span_indices=self.should_remap_span_indices) 
开发者ID:allenai,项目名称:kb,代码行数:54,代码来源:wiki_linking_reader.py


注:本文中的allennlp.data.fields.SpanField方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。