当前位置: 首页>>代码示例>>Python>>正文


Python fields.MetadataField方法代码示例

本文整理汇总了Python中allennlp.data.fields.MetadataField方法的典型用法代码示例。如果您正苦于以下问题:Python fields.MetadataField方法的具体用法?Python fields.MetadataField怎么用?Python fields.MetadataField使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.fields的用法示例。


在下文中一共展示了fields.MetadataField方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, context_tokens: List[Token], tokens: List[Token], tags: List[str] = None,
        intents: List[str] = None, dialog_act: Dict[str, Any] = None) -> Instance:  # type: ignore
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        # print([t.text for t in context_tokens])
        fields["context_tokens"] = TextField(context_tokens, self._token_indexers)
        fields["tokens"] = TextField(tokens, self._token_indexers)
        fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        if tags is not None:
            fields["tags"] = SequenceLabelField(tags, fields["tokens"])
        if intents is not None:
            fields["intents"] = MultiLabelField(intents, label_namespace="intent_labels")
        if dialog_act is not None:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
            'dialog_act': dialog_act})
        else:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
        return Instance(fields) 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:23,代码来源:dataset_reader.py

示例2: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None, domain: str = None,
        intent: str = None, dialog_act: Dict[str, Any] = None) -> Instance:  # type: ignore
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        sequence = TextField(tokens, self._token_indexers)
        fields["tokens"] = sequence
        if tags:
            fields["tags"] = SequenceLabelField(tags, sequence)
        if domain:
            fields["domain"] = LabelField(domain, label_namespace="domain_labels")
        if intent:
            fields["intent"] = LabelField(intent, label_namespace="intent_labels")
        if dialog_act is not None:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
            'dialog_act': dialog_act})
        else:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
        return Instance(fields) 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:23,代码来源:dataset_reader.py

示例3: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,  # type: ignore
                         item_id: Any,
                         question_text: str,
                         choice_text_list: List[str],
                         answer_id: int
                         ) -> Instance:
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        question_tokens = self._tokenizer.tokenize(question_text)
        choices_tokens_list = [self._tokenizer.tokenize(x) for x in choice_text_list]
        fields['question'] = TextField(question_tokens, self._token_indexers)
        fields['choices_list'] = ListField([TextField(x, self._token_indexers) for x in choices_tokens_list])
        fields['label'] = LabelField(answer_id, skip_indexing=True)

        metadata = {
            "id": item_id,
            "question_text": question_text,
            "choice_text_list": choice_text_list,
            "question_tokens": [x.text for x in question_tokens],
            "choice_tokens_list": [[x.text for x in ct] for ct in choices_tokens_list],
        }

        fields["metadata"] = MetadataField(metadata)

        return Instance(fields) 
开发者ID:allenai,项目名称:OpenBookQA,代码行数:27,代码来源:arc_multichoice_json_reader.py

示例4: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # type: ignore
                         tokens             ,
                         ner_tags            = None)            :
        u"""
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        sequence = TextField(tokens, self._token_indexers)
        instance_fields                   = {u'tokens': sequence}
        instance_fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})
        # Add "tag label" to instance
        if ner_tags is not None:
            if self._coding_scheme == u"BIOUL":
                ner_tags = to_bioul(ner_tags, encoding=u"BIO")
            instance_fields[u'tags'] = SequenceLabelField(ner_tags, sequence)
        return Instance(instance_fields) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:18,代码来源:ontonotes_ner.py

示例5: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,  # type: ignore
                         premise     ,
                         hypothesis     ,
                         label      = None)            :
        # pylint: disable=arguments-differ
        fields                   = {}
        premise_tokens = self._tokenizer.tokenize(premise)
        hypothesis_tokens = self._tokenizer.tokenize(hypothesis)
        fields[u'premise'] = TextField(premise_tokens, self._token_indexers)
        fields[u'hypothesis'] = TextField(hypothesis_tokens, self._token_indexers)
        if label:
            fields[u'label'] = LabelField(label)

        metadata = {u"premise_tokens": [x.text for x in premise_tokens],
                    u"hypothesis_tokens": [x.text for x in hypothesis_tokens]}
        fields[u"metadata"] = MetadataField(metadata)
        return Instance(fields) 
开发者ID:plasticityai,项目名称:magnitude,代码行数:19,代码来源:snli.py

示例6: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, # pylint: disable=arguments-differ
                         premise: str,
                         hypothesis: str,
                         label: str = None) -> Instance:
        fields: Dict[str, Field] = {}
        premise_tokens = [Token(token.text)
                          for token in self._tokenizer.tokenize(premise)[-self._max_tokens:]]
        hypothesis_tokens = [Token(token.text)
                             for token in self._tokenizer.tokenize(hypothesis)[-self._max_tokens:]]

        fields['premise'] = TextField(premise_tokens, self._token_indexers)
        fields['hypothesis'] = TextField(hypothesis_tokens, self._token_indexers)

        if label:
            fields['label'] = LabelField(label)

        # metadata = {"premise_tokens": [x.text for x in premise_tokens],
        #             "hypothesis_tokens": [x.text for x in hypothesis_tokens]}
        # fields["metadata"] = MetadataField(metadata)
        return Instance(fields) 
开发者ID:StonyBrookNLP,项目名称:multee,代码行数:22,代码来源:entailment_pair.py

示例7: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
                         citation_text: str,
                         citing_paper_id: str,
                         cited_paper_id: str,
                         intent: List[str] = None,
                         venue: str = None,
                         section_name: str = None) -> Instance:

        citation_tokens = self._tokenizer.tokenize(citation_text)

        fields = {
            'citation_text': TextField(citation_tokens, self._token_indexers),
        }

        if section_name is not None:
            fields['section_label'] = LabelField(section_name, label_namespace="section_labels")
        fields['citing_paper_id'] = MetadataField(citing_paper_id)
        fields['cited_paper_id'] = MetadataField(cited_paper_id)
        return Instance(fields) 
开发者ID:allenai,项目名称:scicite,代码行数:21,代码来源:citation_data_reader_aclarc_aux.py

示例8: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
                         citation_text: str,
                         citing_paper_id: str,
                         cited_paper_id: str,
                         intent: List[str] = None,
                         section_name: str = None) -> Instance:

        citation_tokens = self._tokenizer.tokenize(citation_text)

        fields = {
            'citation_text': TextField(citation_tokens, self._token_indexers),
        }

        if section_name is not None:
            fields['section_label'] = LabelField(section_name, label_namespace="section_labels")

        fields['citing_paper_id'] = MetadataField(citing_paper_id)
        fields['cited_paper_id'] = MetadataField(cited_paper_id)
        return Instance(fields) 
开发者ID:allenai,项目名称:scicite,代码行数:21,代码来源:citation_data_reader_scicite_aux.py

示例9: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, query_id:str, doc_id:str, query_sequence: str, doc_sequence: str) -> Instance:  # type: ignore
        # pylint: disable=arguments-differ

        query_id_field = MetadataField(query_id)
        doc_id_field = MetadataField(doc_id)

        query_tokenized = self._tokenizer.tokenize(query_sequence)

        if self.max_query_length > -1:
            query_tokenized = query_tokenized[:self.max_query_length]
        if self.min_query_length > -1 and len(query_tokenized) < self.min_query_length:
            query_tokenized = query_tokenized + [self.padding_value] * (self.min_query_length - len(query_tokenized))

        doc_tokenized = self._tokenizer.tokenize(doc_sequence)
        if self.max_doc_length > -1:
            doc_tokenized = doc_tokenized[:self.max_doc_length]
        if self.min_doc_length > -1 and len(doc_tokenized) < self.min_doc_length:
            doc_tokenized = doc_tokenized + [self.padding_value] * (self.min_doc_length - len(doc_tokenized))

        doc_field = TextField(query_tokenized + [self.sep_value] + doc_tokenized, self._token_indexers)

        return Instance({
            "query_id":query_id_field,
            "doc_id":doc_id_field,
            "doc_tokens":doc_field}) 
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:27,代码来源:bert_labeled_tuple_loader.py

示例10: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, seq_id:str, seq_text:str) -> Instance:  # type: ignore
        # pylint: disable=arguments-differ

        seq_id_field = MetadataField(seq_id)

        seq_tokenized = self._tokenizer.tokenize(seq_text)

        if self.max_seq_length > -1:
            seq_tokenized = seq_tokenized[:self.max_seq_length]
        if self.min_seq_length > -1 and len(seq_tokenized) < self.min_seq_length:
            seq_tokenized = seq_tokenized + [self.padding_value] * (self.min_seq_length - len(seq_tokenized))

        seq_field = TextField(seq_tokenized, self._token_indexers)
        
        return Instance({
            "seq_id":seq_id_field,
            "seq_tokens":seq_field}) 
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:19,代码来源:ir_single_sequence_loader.py

示例11: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,  # type: ignore
                         item_id: Any,
                         question_text: str,
                         choice_text_list: List[str],
                         answer_id: int) -> Instance:
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        question_tokens = self._tokenizer.tokenize(question_text)
        choices_tokens_list = [self._tokenizer.tokenize(x) for x in choice_text_list]
        fields['question'] = TextField(question_tokens, self._token_indexers)
        fields['choices_list'] = ListField([TextField(x, self._token_indexers) for x in choices_tokens_list])
        fields['label'] = LabelField(answer_id, skip_indexing=True)

        metadata = {
           "id": item_id,
           "question_text": question_text,
           "choice_text_list": choice_text_list,
           "question_tokens": [x.text for x in question_tokens],
           "choice_tokens_list": [[x.text for x in ct] for ct in choices_tokens_list],
        }

        fields["metadata"] = MetadataField(metadata)

        return Instance(fields) 
开发者ID:allenai,项目名称:ARC-Solvers,代码行数:26,代码来源:arc_multichoice_json_reader.py

示例12: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,  # type: ignore
                         sentence: str,
                         head: str,
                         tail: str,
                         head_type: str=None,
                         tail_type: str=None,
                         label: str=None) -> Instance:
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        
        instance_id = f'{head}#{tail}'
        if label:
            instance_id = f'{instance_id}#{label}'

        fields['metadata'] = MetadataField({'instance_id': instance_id.lower()})

        tokens = self._token_splitter.split_words(sentence)
        head = self._token_splitter.split_words(head)
        tail = self._token_splitter.split_words(tail)

        # TODO: this should not be done here

        if self._masking_mode == 'ner_least_specific':
            logger.info(f"Using masking mode 'ner_least_specific'.")
            tokens = ([Token('__start__')]
                      + head + [Token('__del1__')] + head_type + [Token('__ent1__')]
                      + tail + [Token('__del2__')] + tail_type + [Token('__ent2__')]
                      + tokens + [Token('__clf__')])
        else:
            tokens = [Token('__start__')] + head + [Token('__del1__')] + tail + [Token('__del2__')] + tokens + [Token('__clf__')]

        fields['sentence'] = TextField(tokens, self._token_indexers)
        
        if label:
            fields['label'] = LabelField(label)

        return Instance(fields) 
开发者ID:DFKI-NLP,项目名称:DISTRE,代码行数:39,代码来源:open_nre_nyt_reader.py

示例13: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
                         tokens: List[Token],
                         pico_tags: List[str] = None):
        sequence = TextField(tokens, self._token_indexers)
        instance_fields: Dict[str, Field] = {'tokens': sequence}
        instance_fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        
        # Set the field 'labels' according to the specified PIO element
        if pico_tags is not None:
            instance_fields['tags'] = SequenceLabelField(pico_tags, sequence, self.label_namespace)

        return Instance(instance_fields) 
开发者ID:allenai,项目名称:scibert,代码行数:14,代码来源:ebmnlp.py

示例14: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self,
                         text: str,
                         label: str = None,
                         metadata: Any = None) -> Instance:  # type: ignore
        text_tokens = self._tokenizer.tokenize(text)
        fields = {
            'text': TextField(text_tokens, self._token_indexers),
        }
        if label is not None:
            fields['label'] = LabelField(label)

        if metadata:
            fields['metadata'] = MetadataField(metadata)
        return Instance(fields) 
开发者ID:allenai,项目名称:scibert,代码行数:16,代码来源:classification_dataset_reader.py

示例15: text_to_instance

# 需要导入模块: from allennlp.data import fields [as 别名]
# 或者: from allennlp.data.fields import MetadataField [as 别名]
def text_to_instance(self, tokens: List[Token], tags: List[str] = None,
                         words: List[str] = None) -> Instance:  # type: ignore
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        sequence = TextField(tokens, self._token_indexers)
        fields["tokens"] = sequence
        fields["metadata"] = MetadataField({"words": words})
        if tags is not None:
            labels, detect_tags, complex_flag_dict = self.extract_tags(tags)
            if self._skip_complex and complex_flag_dict[self._skip_complex] > 0:
                return None
            rnd = random()
            # skip TN
            if self._skip_correct and all(x == "CORRECT" for x in detect_tags):
                if rnd > self._tn_prob:
                    return None
            # skip TP
            else:
                if rnd > self._tp_prob:
                    return None

            fields["labels"] = SequenceLabelField(labels, sequence,
                                                  label_namespace="labels")
            fields["d_tags"] = SequenceLabelField(detect_tags, sequence,
                                                  label_namespace="d_tags")
        return Instance(fields) 
开发者ID:plkmo,项目名称:NLP_Toolkit,代码行数:31,代码来源:datareader.py


注:本文中的allennlp.data.fields.MetadataField方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。