本文整理汇总了Python中allennlp.data.token_indexers.TokenIndexer方法的典型用法代码示例。如果您正苦于以下问题:Python token_indexers.TokenIndexer方法的具体用法?Python token_indexers.TokenIndexer怎么用?Python token_indexers.TokenIndexer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.token_indexers
的用法示例。
在下文中一共展示了token_indexers.TokenIndexer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
delimeters: dict = SEQ_DELIMETERS,
skip_correct: bool = False,
skip_complex: int = 0,
lazy: bool = False,
max_len: int = None,
test_mode: bool = False,
tag_strategy: str = "keep_one",
tn_prob: float = 0,
tp_prob: float = 0,
broken_dot_strategy: str = "keep") -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self._delimeters = delimeters
self._max_len = max_len
self._skip_correct = skip_correct
self._skip_complex = skip_complex
self._tag_strategy = tag_strategy
self._broken_dot_strategy = broken_dot_strategy
self._test_mode = test_mode
self._tn_prob = tn_prob
self._tp_prob = tp_prob
示例2: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(
self,
token_indexers: Dict[str, TokenIndexer] = None,
tag_label: str = "ner",
feature_labels: Sequence[str] = (),
coding_scheme: str = "IOB1",
label_namespace: str = "labels",
**kwargs,
) -> None:
super().__init__(**kwargs)
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if tag_label is not None and tag_label not in self._VALID_LABELS:
raise ConfigurationError("unknown tag label type: {}".format(tag_label))
for label in feature_labels:
if label not in self._VALID_LABELS:
raise ConfigurationError("unknown feature label type: {}".format(label))
if coding_scheme not in ("IOB1", "BIOUL"):
raise ConfigurationError("unknown coding_scheme: {}".format(coding_scheme))
self.tag_label = tag_label
self.feature_labels = set(feature_labels)
self.coding_scheme = coding_scheme
self.label_namespace = label_namespace
self._original_coding_scheme = "IOB1"
示例3: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(
self,
token_indexers: Dict[str, TokenIndexer] = None,
tokenizer: Tokenizer = None,
segment_sentences: bool = False,
max_sequence_length: int = None,
skip_label_indexing: bool = False,
**kwargs,
) -> None:
super().__init__(**kwargs)
self._tokenizer = tokenizer or SpacyTokenizer()
self._segment_sentences = segment_sentences
self._max_sequence_length = max_sequence_length
self._skip_label_indexing = skip_label_indexing
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if self._segment_sentences:
self._sentence_segmenter = SpacySentenceSplitter()
示例4: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False,
example_filter=None,
wn_p_dict=None, wn_feature_list=wn_persistent_api.default_fn_list,
max_l=None) -> None:
super().__init__(lazy=lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
self._example_filter = example_filter
self.wn_p_dict = wn_p_dict
if wn_p_dict is None:
raise ValueError("Need to specify WN feature dict for FEVER Reader.")
self.wn_feature_list = wn_feature_list
self.wn_feature_size = len(self.wn_feature_list) * 3
self.max_l = max_l
示例5: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False,
example_filter=None,
wn_p_dict=None, wn_feature_list=wn_persistent_api.default_fn_list,
max_l=None, num_encoding=True, shuffle_sentences=False, ablation=None) -> None:
super().__init__(lazy=lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer(namespace='tokens')}
self._example_filter = example_filter
self.wn_p_dict = wn_p_dict
if wn_p_dict is None:
raise ValueError("Need to specify WN feature dict for FEVER Reader.")
self.wn_feature_list = wn_feature_list
num_encoding_dim = 5 if num_encoding else 0
self.wn_feature_size = len(self.wn_feature_list) * 3 + num_encoding_dim + 2
self.max_l = max_l
self.shuffle_sentences = shuffle_sentences
self.ablation = ablation
if self.ablation is not None and self.ablation['rm_wn']:
self.wn_feature_size -= (len(self.wn_feature_list) * 3 + num_encoding_dim)
elif self.ablation is not None and self.ablation['rm_simi']:
self.wn_feature_size -= 2
示例6: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(
self,
target_namespace: str,
source_tokenizer: Tokenizer = None,
target_tokenizer: Tokenizer = None,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False,
) -> None:
super().__init__(lazy)
self._target_namespace = target_namespace
self._source_tokenizer = source_tokenizer or SpacyTokenizer()
self._target_tokenizer = target_tokenizer or self._source_tokenizer
self._source_token_indexers = source_token_indexers or {
"tokens": SingleIdTokenIndexer()
}
self._target_token_indexers = (
target_token_indexers or self._source_token_indexers
)
warnings.warn(
"The 'copynet' dataset reader has been deprecated in favor of the "
"'copynet_seq2seq' dataset reader (now part of the AllenNLP library).",
DeprecationWarning,
)
示例7: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(
self,
target_namespace: str,
source_tokenizer: Tokenizer = None,
target_tokenizer: Tokenizer = None,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False,
) -> None:
source_tokenizer = source_tokenizer or NL2BashWordSplitter()
target_tokenizer = target_tokenizer or source_tokenizer
super().__init__(
target_namespace,
source_tokenizer=source_tokenizer,
target_tokenizer=target_tokenizer,
source_token_indexers=source_token_indexers,
target_token_indexers=target_token_indexers,
lazy=lazy,
)
示例8: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(
self,
lazy: bool = False,
tokenizer: Tokenizer = None,
sentence_token_indexers: Dict[str, TokenIndexer] = None,
nonterminal_indexers: Dict[str, TokenIndexer] = None,
terminal_indexers: Dict[str, TokenIndexer] = None,
output_agendas: bool = True,
) -> None:
super().__init__(lazy)
self._tokenizer = tokenizer or SpacyTokenizer()
self._sentence_token_indexers = sentence_token_indexers or {
"tokens": SingleIdTokenIndexer()
}
self._nonterminal_indexers = nonterminal_indexers or {
"tokens": SingleIdTokenIndexer("rule_labels")
}
self._terminal_indexers = terminal_indexers or {
"tokens": SingleIdTokenIndexer("rule_labels")
}
self._output_agendas = output_agendas
示例9: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
words_per_instance: int = 35
) -> None:
super().__init__(lazy)
self._tokenizer = tokenizer or WordTokenizer(
start_tokens=[START_SYMBOL],
end_tokens=[END_SYMBOL]
)
self._token_indexers = token_indexers or {
"tokens": SingleIdTokenIndexer(namespace="tokens", lowercase_tokens=True)
}
self._words_per_instance = words_per_instance
示例10: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
wordnet_entity_file: str,
token_indexers: Dict[str, TokenIndexer],
entity_indexer: TokenIndexer,
is_training: bool,
use_surface_form: bool = False,
should_remap_span_indices: bool = True,
extra_candidate_generators: Dict[str, MentionGenerator] = None):
super().__init__(False)
self.mention_generator = WordNetCandidateMentionGenerator(
wordnet_entity_file, use_surface_form=use_surface_form
)
self.token_indexers = token_indexers
self.entity_indexer = {"ids": entity_indexer}
self.is_training = is_training
self.should_remap_span_indices = should_remap_span_indices
self.extra_candidate_generators = extra_candidate_generators
示例11: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
clean_citation: bool = True,
with_elmo: bool = False
# use_lexicon_features: bool = False,
# use_sparse_lexicon_features: bool = False
) -> None:
super().__init__(lazy)
self._clean_citation = clean_citation
self._tokenizer = tokenizer or WordTokenizer()
if with_elmo:
self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
"tokens": SingleIdTokenIndexer()}
else:
self._token_indexers = {"tokens": SingleIdTokenIndexer()}
示例12: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
use_lexicon_features: bool = False,
use_sparse_lexicon_features: bool = False,
with_elmo: bool = False
) -> None:
super().__init__(lazy)
self._tokenizer = tokenizer or WordTokenizer()
if with_elmo:
self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
"tokens": SingleIdTokenIndexer()}
else:
self._token_indexers = {"tokens": SingleIdTokenIndexer()}
self.use_lexicon_features = use_lexicon_features
self.use_sparse_lexicon_features = use_sparse_lexicon_features
if self.use_lexicon_features or self.use_sparse_lexicon_features:
self.lexicons = {**ALL_ACTION_LEXICONS, **ALL_CONCEPT_LEXICONS}
示例13: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
lazy: bool = False,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
clean_citation: bool = True,
with_elmo: bool = False
) -> None:
super().__init__(lazy)
self._clean_citation = clean_citation
self._tokenizer = tokenizer or WordTokenizer()
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if with_elmo:
self._token_indexers = {"elmo": ELMoTokenCharactersIndexer(),
"tokens": SingleIdTokenIndexer()}
else:
self._token_indexers = {"tokens": SingleIdTokenIndexer()}
示例14: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
source_add_start_token: bool = True,
max_doc_length:int = -1,
max_query_length:int = -1,
min_doc_length:int = -1,
min_query_length:int = -1,
lazy: bool = False) -> None:
super().__init__(lazy)
self._tokenizer = tokenizer or WordTokenizer() # little bit faster, useful for multicore proc. word_splitter=SimpleWordSplitter()
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer(lowercase_tokens=True)}
self._source_add_start_token = source_add_start_token
self.max_doc_length = max_doc_length
self.max_query_length = max_query_length
self.min_doc_length = min_doc_length
self.min_query_length = min_query_length
self.padding_value = Token(text = "@@PADDING@@",text_id=0)
示例15: __init__
# 需要导入模块: from allennlp.data import token_indexers [as 别名]
# 或者: from allennlp.data.token_indexers import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer = None,
token_indexers: Dict[str, TokenIndexer] = None,
source_add_start_token: bool = True,
max_doc_length:int = -1,
max_query_length:int = -1,
min_doc_length:int = -1,
min_query_length:int = -1,
lazy: bool = False) -> None:
super().__init__(lazy)
self._tokenizer = tokenizer
self._token_indexers = token_indexers
self._source_add_start_token = source_add_start_token
self.max_doc_length = max_doc_length
self.max_query_length = max_query_length
self.min_doc_length = min_doc_length
self.min_query_length = min_query_length
self.padding_value = Token(text = "[PAD]",text_id=0)
self.sep_value = Token(text = "[SEP]")
开发者ID:sebastian-hofstaetter,项目名称:transformer-kernel-ranking,代码行数:22,代码来源:bert_labeled_tuple_loader.py