当前位置: 首页>>代码示例>>Python>>正文


Python fields.SequenceLabelField方法代码示例

本文整理汇总了Python中allennlp.data.fields.SequenceLabelField方法的典型用法代码示例。如果您正苦于以下问题:Python fields.SequenceLabelField方法的具体用法?Python fields.SequenceLabelField怎么用?Python fields.SequenceLabelField使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.fields的用法示例。


在下文中一共展示了fields.SequenceLabelField方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, context_tokens: List[Token], tokens: List[Token], tags: List[str] = None,
        intents: List[str] = None, dialog_act: Dict[str, Any] = None) -> Instance:  # type: ignore
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        # print([t.text for t in context_tokens])
        fields["context_tokens"] = TextField(context_tokens, self._token_indexers)
        fields["tokens"] = TextField(tokens, self._token_indexers)
        fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        if tags is not None:
            fields["tags"] = SequenceLabelField(tags, fields["tokens"])
        if intents is not None:
            fields["intents"] = MultiLabelField(intents, label_namespace="intent_labels")
        if dialog_act is not None:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
            'dialog_act': dialog_act})
        else:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
        return Instance(fields) 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:23,代码来源:dataset_reader.py

示例2: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None, domain: str = None,
        intent: str = None, dialog_act: Dict[str, Any] = None) -> Instance:  # type: ignore
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        sequence = TextField(tokens, self._token_indexers)
        fields["tokens"] = sequence
        if tags:
            fields["tags"] = SequenceLabelField(tags, sequence)
        if domain:
            fields["domain"] = LabelField(domain, label_namespace="domain_labels")
        if intent:
            fields["intent"] = LabelField(intent, label_namespace="intent_labels")
        if dialog_act is not None:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
            'dialog_act': dialog_act})
        else:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
        return Instance(fields) 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:23,代码来源:dataset_reader.py

示例3: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, # type: ignore
                         tokens             ,
                         ner_tags            = None)            :
        u"""
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        sequence = TextField(tokens, self._token_indexers)
        instance_fields                   = {u'tokens': sequence}
        instance_fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})
        # Add "tag label" to instance
        if ner_tags is not None:
            if self._coding_scheme == u"BIOUL":
                ner_tags = to_bioul(ner_tags, encoding=u"BIO")
            instance_fields[u'tags'] = SequenceLabelField(ner_tags, sequence)
        return Instance(instance_fields) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:18,代码来源:ontonotes_ner.py

示例4: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self,  # type: ignore
                         tokens             ,
                         verb_label           ,
                         tags            = None)            :
        u"""
        We take `pre-tokenized` input here, along with a verb label.  The verb label should be a
        one-hot binary vector, the same length as the tokens, indicating the position of the verb
        to find arguments for.
        """
        # pylint: disable=arguments-differ
        fields                   = {}
        text_field = TextField(tokens, token_indexers=self._token_indexers)
        fields[u'tokens'] = text_field
        fields[u'verb_indicator'] = SequenceLabelField(verb_label, text_field)
        if tags:
            fields[u'tags'] = SequenceLabelField(tags, text_field)

        if all([x == 0 for x in verb_label]):
            verb = None
        else:
            verb = tokens[verb_label.index(1)].text
        fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens],
                                            u"verb": verb})
        return Instance(fields) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:26,代码来源:semantic_role_labeling.py

示例5: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str]=None) -> Instance:

        if len(tokens) > self._max_token_len:
            tokens = tokens[:self._max_token_len]
            print(f'Length of tokens exceeded the limit {self._max_token_len}. Truncating...')
            if tags:
                tags = tags[:self._max_token_len]

        fields = {}

        text_field = TextField(tokens, self._token_indexers)
        fields['tokens'] = text_field
        if tags:
            fields['tags'] = SequenceLabelField(tags, text_field)

        return Instance(fields) 
开发者ID:mhagiwara,项目名称:nanigonet,代码行数:18,代码来源:dataset_reader.py

示例6: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self,  # type: ignore
                         sentence_tokens: List[str],
                         verb_vector: List[int],
                         entity_vector: List[int],
                         state_change_types: Optional[List[str]] = None,
                         state_change_tags: Optional[List[str]] = None) -> Instance:
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}

        # encode inputs
        token_field = TextField([Token(word) for word in sentence_tokens], self._token_indexers)
        fields['tokens'] = token_field
        fields['verb_span'] = SequenceLabelField(verb_vector, token_field, 'indicator_tags')
        fields['entity_span'] = SequenceLabelField(entity_vector, token_field, 'indicator_tags')

        # encode outputs
        if state_change_types:
            fields['state_change_type_labels'] = LabelField(state_change_types, 'state_change_type_labels')
        if state_change_tags:
            fields['state_change_tags'] = SequenceLabelField(state_change_tags, token_field, 'state_change_tags')

        return Instance(fields) 
开发者ID:allenai,项目名称:propara,代码行数:24,代码来源:prolocal_dataset_reader.py

示例7: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(
        self,  # type: ignore
        query: List[str],
        slot_tags: List[str] = None,
        sql_template: str = None,
    ) -> Instance:
        fields: Dict[str, Field] = {}
        tokens = TextField([Token(t) for t in query], self._token_indexers)
        fields["tokens"] = tokens

        if slot_tags is not None and sql_template is not None:
            slot_field = SequenceLabelField(slot_tags, tokens, label_namespace="slot_tags")
            template = LabelField(sql_template, label_namespace="template_labels")
            fields["slot_tags"] = slot_field
            fields["template"] = template

        return Instance(fields) 
开发者ID:allenai,项目名称:allennlp-semparse,代码行数:19,代码来源:template_text2sql.py

示例8: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, text: str, sentences: List[str] = None, tags: List[int] = None) -> Instance:
        if sentences is None:
            if self._language == "ru":
                sentences = [s.text for s in razdel.sentenize(text)]
            else:
                sentences = nltk.tokenize.sent_tokenize(text)
        sentences_tokens = []
        for sentence in sentences[:self._max_sentences_count]:
            sentence = sentence.lower() if self._lowercase else sentence
            tokens = self._tokenizer.tokenize(sentence)[:self._sentence_max_tokens]
            tokens.insert(0, Token(START_SYMBOL))
            tokens.append(Token(END_SYMBOL))
            indexed_tokens = TextField(tokens, self._source_token_indexers)
            sentences_tokens.append(indexed_tokens)

        sentences_tokens_indexed = ListField(sentences_tokens)
        result = {'source_sentences': sentences_tokens_indexed}

        if tags:
            result["sentences_tags"] = SequenceLabelField(tags[:self._max_sentences_count], sentences_tokens_indexed)
        return Instance(result) 
开发者ID:IlyaGusev,项目名称:summarus,代码行数:23,代码来源:summarization_sentence_tagger_reader.py

示例9: _fix_tokenization

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def _fix_tokenization(tokenized_sent, bert_embs, old_det_to_new_ind, obj_to_type, token_indexers, pad_ind=-1):
    """
    Turn a detection list into what we want: some text, as well as some tags.
    :param tokenized_sent: Tokenized sentence with detections collapsed to a list.
    :param old_det_to_new_ind: Mapping of the old ID -> new ID (which will be used as the tag)
    :param obj_to_type: [person, person, pottedplant] indexed by the old labels
    :return: tokenized sentence
    """

    new_tokenization_with_tags = []
    for tok in tokenized_sent:
        if isinstance(tok, list):
            for int_name in tok:
                obj_type = obj_to_type[int_name]
                new_ind = old_det_to_new_ind[int_name]
                if new_ind < 0:
                    raise ValueError("Oh no, the new index is negative! that means it's invalid. {} {}".format(
                        tokenized_sent, old_det_to_new_ind
                    ))
                text_to_use = GENDER_NEUTRAL_NAMES[
                    new_ind % len(GENDER_NEUTRAL_NAMES)] if obj_type == 'person' else obj_type
                new_tokenization_with_tags.append((text_to_use, new_ind))
        else:
            new_tokenization_with_tags.append((tok, pad_ind))

    text_field = BertField([Token(x[0]) for x in new_tokenization_with_tags],
                           bert_embs,
                           padding_value=0)
    tags = SequenceLabelField([x[1] for x in new_tokenization_with_tags], text_field)
    return text_field, tags 
开发者ID:yuweijiang,项目名称:HGL-pytorch,代码行数:32,代码来源:vcr.py

示例10: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self,
                         tokens: List[Token],
                         pico_tags: List[str] = None):
        sequence = TextField(tokens, self._token_indexers)
        instance_fields: Dict[str, Field] = {'tokens': sequence}
        instance_fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        
        # Set the field 'labels' according to the specified PIO element
        if pico_tags is not None:
            instance_fields['tags'] = SequenceLabelField(pico_tags, sequence, self.label_namespace)

        return Instance(instance_fields) 
开发者ID:allenai,项目名称:scibert,代码行数:14,代码来源:ebmnlp.py

示例11: _tokens_distances_fields

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def _tokens_distances_fields(self, tokens):
        """Returns the updated list of tokens and entity distances for the first and second entity as fields."""
        tokens, positions1, positions2 = self._tokens_distances(tokens)
        t_f = TextField(tokens, self._token_indexers)
        p1_f = SequenceLabelField(positions1, t_f)
        p2_f = SequenceLabelField(positions2, t_f)
        return t_f, p1_f, p2_f 
开发者ID:allenai,项目名称:comb_dist_direct_relex,代码行数:9,代码来源:relation_instances_reader.py

示例12: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
                         words: List[str] = None) -> Instance:  # type: ignore
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        sequence = TextField(tokens, self._token_indexers)
        fields["tokens"] = sequence
        fields["metadata"] = MetadataField({"words": words})
        if tags is not None:
            labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
            if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
                return None
            rnd = random()
            # skip TN
            if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
                if rnd > self._tn_prob:
                    return None
            # skip TP
            else:
                if rnd > self._tp_prob:
                    return None

            fields["labels"] = SequenceLabelField(labels, sequence,
                                                  label_namespace="labels")
            fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
                                                  label_namespace="d_tags")
        return Instance(fields) 
开发者ID:plkmo,项目名称:NLP_Toolkit,代码行数:31,代码来源:datareader.py

示例13: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(  # type: ignore
        self, tokens: List[Token], tags: List[str] = None
    ) -> Instance:
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """

        fields: Dict[str, Field] = {}
        sequence = TextField(tokens, self._token_indexers)
        fields["tokens"] = sequence
        fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        if tags is not None:
            fields["tags"] = SequenceLabelField(tags, sequence)
        return Instance(fields) 
开发者ID:allenai,项目名称:allennlp,代码行数:16,代码来源:sequence_tagging.py

示例14: _get_ner_tags_and_mask

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def _get_ner_tags_and_mask(
    instance: Instance, input_field_to_attack: str, ignore_tokens: List[str]
):
    """
    Used for the NER task. Sets the num_ignore tokens, saves the original predicted tag and a 0/1
    mask in the position of the tags
    """
    # Set num_ignore_tokens
    num_ignore_tokens = 0
    input_field: TextField = instance[input_field_to_attack]  # type: ignore
    for token in input_field.tokens:
        if str(token) in ignore_tokens:
            num_ignore_tokens += 1

    # save the original tags and a 0/1 mask where the tags are
    tag_mask = []
    original_tags = []
    tag_field: SequenceLabelField = instance["tags"]  # type: ignore
    for label in tag_field.labels:
        if label != "O":
            tag_mask.append(1)
            original_tags.append(label)
            num_ignore_tokens += 1
        else:
            tag_mask.append(0)
    return num_ignore_tokens, tag_mask, original_tags 
开发者ID:allenai,项目名称:allennlp,代码行数:28,代码来源:input_reduction.py

示例15: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import SequenceLabelField [as 别名]
def text_to_instance(self, tokens             , tags            = None)            :  # type: ignore
        u"""
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields                   = {}
        sequence = TextField(tokens, self._token_indexers)
        fields[u"tokens"] = sequence
        fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})
        if tags is not None:
            fields[u"tags"] = SequenceLabelField(tags, sequence)
        return Instance(fields) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:14,代码来源:sequence_tagging.py


注:本文中的allennlp.data.fields.SequenceLabelField方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。