本文整理汇总了Python中allennlp.data.token_indexers.SingleIdTokenIndexer方法的典型用法代码示例。如果您正苦于以下问题:Python token_indexers.SingleIdTokenIndexer方法的具体用法?Python token_indexers.SingleIdTokenIndexer怎么用?Python token_indexers.SingleIdTokenIndexer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.token_indexers
的用法示例。
在下文中一共展示了token_indexers.SingleIdTokenIndexer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
db: FeverDocDB,
sentence_level = False,
wiki_tokenizer: Tokenizer = None,
claim_tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
filtering: str = None) -> None:
self._sentence_level = sentence_level
self._wiki_tokenizer = wiki_tokenizer or WordTokenizer()
self._claim_tokenizer = claim_tokenizer or WordTokenizer()
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self.db = db
self.formatter = FEVERGoldFormatter(set(self.db.get_doc_ids()), FEVERLabelSchema(),filtering=filtering)
self.reader = JSONLineReader()
示例2: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
delimeters: dict = SEQ_DELIMETERS,
skip_correct: bool = False,
skip_complex: int = 0,
lazy: bool = False,
max_len: int = None,
test_mode: bool = False,
tag_strategy: str = "keep_one",
tn_prob: float = 0,
tp_prob: float = 0,
broken_dot_strategy: str = "keep") -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self._delimeters = delimeters
self._max_len = max_len
self._skip_correct = skip_correct
self._skip_complex = skip_complex
self._tag_strategy = tag_strategy
self._broken_dot_strategy = broken_dot_strategy
self._test_mode = test_mode
self._tn_prob = tn_prob
self._tp_prob = tp_prob
示例3: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(
self,
token_indexers: Dict[str, TokenIndexer] = None,
tag_label: str = "ner",
feature_labels: Sequence[str] = (),
coding_scheme: str = "IOB1",
label_namespace: str = "labels",
**kwargs,
) -> None:
super().__init__(**kwargs)
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if tag_label is not None and tag_label not in self._VALID_LABELS:
raise ConfigurationError("unknown tag label type: {}".format(tag_label))
for label in feature_labels:
if label not in self._VALID_LABELS:
raise ConfigurationError("unknown feature label type: {}".format(label))
if coding_scheme not in ("IOB1", "BIOUL"):
raise ConfigurationError("unknown coding_scheme: {}".format(coding_scheme))
self.tag_label = tag_label
self.feature_labels = set(feature_labels)
self.coding_scheme = coding_scheme
self.label_namespace = label_namespace
self._original_coding_scheme = "IOB1"
示例4: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(
self,
token_indexers: Dict[str, TokenIndexer] = None,
tokenizer: Tokenizer = None,
segment_sentences: bool = False,
max_sequence_length: int = None,
skip_label_indexing: bool = False,
**kwargs,
) -> None:
super().__init__(**kwargs)
self._tokenizer = tokenizer or SpacyTokenizer()
self._segment_sentences = segment_sentences
self._max_sequence_length = max_sequence_length
self._skip_label_indexing = skip_label_indexing
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if self._segment_sentences:
self._sentence_segmenter = SpacySentenceSplitter()
示例5: test_count_vocab_items_with_non_default_feature_name
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def test_count_vocab_items_with_non_default_feature_name(self):
tokenizer = SpacyTokenizer(parse=True)
tokens = tokenizer.tokenize("This is a sentence.")
tokens = [Token("<S>")] + [t for t in tokens] + [Token("</S>")]
indexer = SingleIdTokenIndexer(
namespace="dep_labels", feature_name="dep_", default_value="NONE"
)
counter = defaultdict(lambda: defaultdict(int))
for token in tokens:
indexer.count_vocab_items(token, counter)
assert counter["dep_labels"] == {
"ROOT": 1,
"nsubj": 1,
"det": 1,
"NONE": 2,
"attr": 1,
"punct": 1,
}
示例6: setup_method
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def setup_method(self):
super().setup_method()
self.token_indexers = {"tokens": SingleIdTokenIndexer()}
self.vocab = Vocabulary()
self.this_index = self.vocab.add_token_to_namespace("this")
self.is_index = self.vocab.add_token_to_namespace("is")
self.a_index = self.vocab.add_token_to_namespace("a")
self.sentence_index = self.vocab.add_token_to_namespace("sentence")
self.another_index = self.vocab.add_token_to_namespace("another")
self.yet_index = self.vocab.add_token_to_namespace("yet")
self.very_index = self.vocab.add_token_to_namespace("very")
self.long_index = self.vocab.add_token_to_namespace("long")
instances = [
self.create_instance(["this", "is", "a", "sentence"]),
self.create_instance(["this", "is", "another", "sentence"]),
self.create_instance(["yet", "another", "sentence"]),
self.create_instance(
["this", "is", "a", "very", "very", "very", "very", "long", "sentence"]
),
self.create_instance(["sentence"]),
]
self.instances = instances
self.lazy_instances = LazyIterable(instances)
示例7: test_from_params_extend_config
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def test_from_params_extend_config(self):
vocab_dir = self.TEST_DIR / "vocab_save"
original_vocab = Vocabulary(non_padded_namespaces=["tokens"])
original_vocab.add_token_to_namespace("a", namespace="tokens")
original_vocab.save_to_files(vocab_dir)
text_field = TextField(
[Token(t) for t in ["a", "b"]], {"tokens": SingleIdTokenIndexer("tokens")}
)
instances = Batch([Instance({"text": text_field})])
# If you ask to extend vocab from `directory`, instances must be passed
# in Vocabulary constructor, or else there is nothing to extend to.
params = Params({"type": "extend", "directory": vocab_dir})
with pytest.raises(ConfigurationError):
_ = Vocabulary.from_params(params)
# If you ask to extend vocab, `directory` key must be present in params,
# or else there is nothing to extend from.
params = Params({"type": "extend"})
with pytest.raises(ConfigurationError):
_ = Vocabulary.from_params(params, instances=instances)
示例8: test_token_padding_lengths_are_computed_correctly
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def test_token_padding_lengths_are_computed_correctly(self):
field = TextField(
[Token(t) for t in ["A", "sentence"]],
token_indexers={
"field_with_dict": DictReturningTokenIndexer(token_min_padding_length=3),
"words": SingleIdTokenIndexer("words", token_min_padding_length=3),
"characters": TokenCharactersIndexer(
"characters", min_padding_length=1, token_min_padding_length=3
),
},
)
field.index(self.vocab)
padding_lengths = field.get_padding_lengths()
assert padding_lengths == {
"field_with_dict___token_ids": 5,
"field_with_dict___additional_key": 3,
"words___tokens": 3,
"characters___token_characters": 3,
"characters___num_token_characters": 8,
}
tensors = field.as_tensor(padding_lengths)
assert tensors["field_with_dict"]["additional_key"].tolist()[-1] == 0
assert tensors["words"]["tokens"].tolist()[-1] == 0
assert tensors["characters"]["token_characters"].tolist()[-1] == [0] * 8
示例9: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False,
example_filter=None,
wn_p_dict=None, wn_feature_list=wn_persistent_api.default_fn_list,
max_l=None) -> None:
super().__init__(lazy=lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
self._example_filter = example_filter
self.wn_p_dict = wn_p_dict
if wn_p_dict is None:
raise ValueError("Need to specify WN feature dict for FEVER Reader.")
self.wn_feature_list = wn_feature_list
self.wn_feature_size = len(self.wn_feature_list) * 3
self.max_l = max_l
示例10: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
context_size: int = 0,
agent: str = None,
random_context_size: bool = True,
token_delimiter: str = None,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False) -> None:
super().__init__(lazy)
self._context_size = context_size
self._agent = agent
self._random_context_size = random_context_size
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self._token_delimiter = token_delimiter
示例11: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
token_delimiter: str = None,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False) -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self._token_delimiter = token_delimiter
示例12: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False) -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
示例13: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
masking_mode: str=None,
token_indexers: Dict[str, TokenIndexer]=None,
lazy: bool=False) -> None:
super().__init__(lazy)
if masking_mode and masking_mode.lower() not in ['ner_least_specific', 'ner_most_specific']:
raise ValueError(f"Masking mode '{masking_mode}' not supported.")
self._masking_mode = masking_mode
self._token_splitter = OpenAISplitter()
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
示例14: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
label_namespace: str = "labels") -> None:
super().__init__()
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
self.label_namespace = label_namespace
示例15: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import SingleIdTokenIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
) -> None:
super().__init__(lazy)
self._tokenizer = tokenizer or WordTokenizer()
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}