当前位置: 首页>>代码示例>>Python>>正文


Python token_indexers.SingleIdTokenIndexer方法代码示例

本文整理汇总了Python中allennlp.data.token_indexers.SingleIdTokenIndexer方法的典型用法代码示例。如果您正苦于以下问题:Python token_indexers.SingleIdTokenIndexer方法的具体用法?Python token_indexers.SingleIdTokenIndexer怎么用?Python token_indexers.SingleIdTokenIndexer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在allennlp.data.token_indexers的用法示例。


在下文中一共展示了token_indexers.SingleIdTokenIndexer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 db: FeverDocDB,
                 sentence_level = False,
                 wiki_tokenizer: Tokenizer = None,
                 claim_tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 filtering: str = None) -> None:
        self._sentence_level = sentence_level
        self._wiki_tokenizer = wiki_tokenizer or WordTokenizer()
        self._claim_tokenizer = claim_tokenizer or WordTokenizer()
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}

        self.db = db

        self.formatter = FEVERGoldFormatter(set(self.db.get_doc_ids()), FEVERLabelSchema(),filtering=filtering)
        self.reader = JSONLineReader() 
开发者ID:sheffieldnlp,项目名称:fever-naacl-2018,代码行数:18,代码来源:reader.py

示例2: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 delimeters: dict = SEQ_DELIMETERS,
                 skip_correct: bool = False,
                 skip_complex: int = 0,
                 lazy: bool = False,
                 max_len: int = None,
                 test_mode: bool = False,
                 tag_strategy: str = "keep_one",
                 tn_prob: float = 0,
                 tp_prob: float = 0,
                 broken_dot_strategy: str = "keep") -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
        self._delimeters = delimeters
        self._max_len = max_len
        self._skip_correct = skip_correct
        self._skip_complex = skip_complex
        self._tag_strategy = tag_strategy
        self._broken_dot_strategy = broken_dot_strategy
        self._test_mode = test_mode
        self._tn_prob = tn_prob
        self._tp_prob = tp_prob 
开发者ID:plkmo,项目名称:NLP_Toolkit,代码行数:25,代码来源:datareader.py

示例3: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(
        self,
        token_indexers: Dict[str, TokenIndexer] = None,
        tag_label: str = "ner",
        feature_labels: Sequence[str] = (),
        coding_scheme: str = "IOB1",
        label_namespace: str = "labels",
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        if tag_label is not None and tag_label not in self._VALID_LABELS:
            raise ConfigurationError("unknown tag label type: {}".format(tag_label))
        for label in feature_labels:
            if label not in self._VALID_LABELS:
                raise ConfigurationError("unknown feature label type: {}".format(label))
        if coding_scheme not in ("IOB1", "BIOUL"):
            raise ConfigurationError("unknown coding_scheme: {}".format(coding_scheme))

        self.tag_label = tag_label
        self.feature_labels = set(feature_labels)
        self.coding_scheme = coding_scheme
        self.label_namespace = label_namespace
        self._original_coding_scheme = "IOB1" 
开发者ID:allenai,项目名称:allennlp,代码行数:26,代码来源:conll2003.py

示例4: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(
        self,
        token_indexers: Dict[str, TokenIndexer] = None,
        tokenizer: Tokenizer = None,
        segment_sentences: bool = False,
        max_sequence_length: int = None,
        skip_label_indexing: bool = False,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self._tokenizer = tokenizer or SpacyTokenizer()
        self._segment_sentences = segment_sentences
        self._max_sequence_length = max_sequence_length
        self._skip_label_indexing = skip_label_indexing
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        if self._segment_sentences:
            self._sentence_segmenter = SpacySentenceSplitter() 
开发者ID:allenai,项目名称:allennlp,代码行数:19,代码来源:text_classification_json.py

示例5: test_count_vocab_items_with_non_default_feature_name

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def test_count_vocab_items_with_non_default_feature_name(self):
        tokenizer = SpacyTokenizer(parse=True)
        tokens = tokenizer.tokenize("This is a sentence.")
        tokens = [Token("<S>")] + [t for t in tokens] + [Token("</S>")]
        indexer = SingleIdTokenIndexer(
            namespace="dep_labels", feature_name="dep_", default_value="NONE"
        )
        counter = defaultdict(lambda: defaultdict(int))
        for token in tokens:
            indexer.count_vocab_items(token, counter)

        assert counter["dep_labels"] == {
            "ROOT": 1,
            "nsubj": 1,
            "det": 1,
            "NONE": 2,
            "attr": 1,
            "punct": 1,
        } 
开发者ID:allenai,项目名称:allennlp,代码行数:21,代码来源:single_id_token_indexer_test.py

示例6: setup_method

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def setup_method(self):
        super().setup_method()
        self.token_indexers = {"tokens": SingleIdTokenIndexer()}
        self.vocab = Vocabulary()
        self.this_index = self.vocab.add_token_to_namespace("this")
        self.is_index = self.vocab.add_token_to_namespace("is")
        self.a_index = self.vocab.add_token_to_namespace("a")
        self.sentence_index = self.vocab.add_token_to_namespace("sentence")
        self.another_index = self.vocab.add_token_to_namespace("another")
        self.yet_index = self.vocab.add_token_to_namespace("yet")
        self.very_index = self.vocab.add_token_to_namespace("very")
        self.long_index = self.vocab.add_token_to_namespace("long")
        instances = [
            self.create_instance(["this", "is", "a", "sentence"]),
            self.create_instance(["this", "is", "another", "sentence"]),
            self.create_instance(["yet", "another", "sentence"]),
            self.create_instance(
                ["this", "is", "a", "very", "very", "very", "very", "long", "sentence"]
            ),
            self.create_instance(["sentence"]),
        ]

        self.instances = instances
        self.lazy_instances = LazyIterable(instances) 
开发者ID:allenai,项目名称:allennlp,代码行数:26,代码来源:sampler_test.py

示例7: test_from_params_extend_config

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def test_from_params_extend_config(self):

        vocab_dir = self.TEST_DIR / "vocab_save"
        original_vocab = Vocabulary(non_padded_namespaces=["tokens"])
        original_vocab.add_token_to_namespace("a", namespace="tokens")
        original_vocab.save_to_files(vocab_dir)

        text_field = TextField(
            [Token(t) for t in ["a", "b"]], {"tokens": SingleIdTokenIndexer("tokens")}
        )
        instances = Batch([Instance({"text": text_field})])

        # If you ask to extend vocab from `directory`, instances must be passed
        # in Vocabulary constructor, or else there is nothing to extend to.
        params = Params({"type": "extend", "directory": vocab_dir})
        with pytest.raises(ConfigurationError):
            _ = Vocabulary.from_params(params)

        # If you ask to extend vocab, `directory` key must be present in params,
        # or else there is nothing to extend from.
        params = Params({"type": "extend"})
        with pytest.raises(ConfigurationError):
            _ = Vocabulary.from_params(params, instances=instances) 
开发者ID:allenai,项目名称:allennlp,代码行数:25,代码来源:vocabulary_test.py

示例8: test_token_padding_lengths_are_computed_correctly

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def test_token_padding_lengths_are_computed_correctly(self):
        field = TextField(
            [Token(t) for t in ["A", "sentence"]],
            token_indexers={
                "field_with_dict": DictReturningTokenIndexer(token_min_padding_length=3),
                "words": SingleIdTokenIndexer("words", token_min_padding_length=3),
                "characters": TokenCharactersIndexer(
                    "characters", min_padding_length=1, token_min_padding_length=3
                ),
            },
        )
        field.index(self.vocab)
        padding_lengths = field.get_padding_lengths()
        assert padding_lengths == {
            "field_with_dict___token_ids": 5,
            "field_with_dict___additional_key": 3,
            "words___tokens": 3,
            "characters___token_characters": 3,
            "characters___num_token_characters": 8,
        }
        tensors = field.as_tensor(padding_lengths)
        assert tensors["field_with_dict"]["additional_key"].tolist()[-1] == 0
        assert tensors["words"]["tokens"].tolist()[-1] == 0
        assert tensors["characters"]["token_characters"].tolist()[-1] == [0] * 8 
开发者ID:allenai,项目名称:allennlp,代码行数:26,代码来源:text_field_test.py

示例9: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 example_filter=None,
                 wn_p_dict=None, wn_feature_list=wn_persistent_api.default_fn_list,
                 max_l=None) -> None:

        super().__init__(lazy=lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
        self._example_filter = example_filter
        self.wn_p_dict = wn_p_dict
        if wn_p_dict is None:
            raise ValueError("Need to specify WN feature dict for FEVER Reader.")
        self.wn_feature_list = wn_feature_list
        self.wn_feature_size = len(self.wn_feature_list) * 3
        self.max_l = max_l 
开发者ID:easonnie,项目名称:combine-FEVER-NSMN,代码行数:18,代码来源:fever_reader_with_wn.py

示例10: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 context_size: int = 0,
                 agent: str = None,
                 random_context_size: bool = True,
                 token_delimiter: str = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._context_size = context_size
        self._agent = agent 
        self._random_context_size = random_context_size
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
        self._token_delimiter = token_delimiter 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:15,代码来源:dataset_reader.py

示例11: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 token_delimiter: str = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
        self._token_delimiter = token_delimiter 
开发者ID:ConvLab,项目名称:ConvLab,代码行数:9,代码来源:dataset_reader.py

示例12: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()} 
开发者ID:DFKI-NLP,项目名称:DISTRE,代码行数:7,代码来源:semeval_2010_task_8_reader.py

示例13: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 masking_mode: str=None,
                 token_indexers: Dict[str, TokenIndexer]=None,
                 lazy: bool=False) -> None:
        super().__init__(lazy)

        if masking_mode and masking_mode.lower() not in ['ner_least_specific', 'ner_most_specific']:
            raise ValueError(f"Masking mode '{masking_mode}' not supported.")

        self._masking_mode = masking_mode
        self._token_splitter = OpenAISplitter()
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()} 
开发者ID:DFKI-NLP,项目名称:DISTRE,代码行数:14,代码来源:open_nre_nyt_reader.py

示例14: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 label_namespace: str = "labels") -> None:
        super().__init__()
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        self.label_namespace = label_namespace 
开发者ID:allenai,项目名称:scibert,代码行数:8,代码来源:ebmnlp.py

示例15: __init__

# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
                 lazy: bool = False,
                 tokenizer: Tokenizer = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 ) -> None:
        super().__init__(lazy)
        self._tokenizer = tokenizer or WordTokenizer()
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()} 
开发者ID:allenai,项目名称:scibert,代码行数:10,代码来源:classification_dataset_reader.py


注:本文中的allennlp.data.token_indexers.SingleIdTokenIndexer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。