本文整理汇总了Python中allennlp.data.token_indexers.token_indexer.TokenIndexer方法的典型用法代码示例。如果您正苦于以下问题:Python token_indexer.TokenIndexer方法的具体用法?Python token_indexer.TokenIndexer怎么用?Python token_indexer.TokenIndexer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类allennlp.data.token_indexers.token_indexer
的用法示例。
在下文中一共展示了token_indexer.TokenIndexer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
source_max_tokens: int = 400,
target_max_tokens: int = 100,
separate_namespaces: bool = False,
target_namespace: str = "target_tokens",
save_copy_fields: bool = False,
save_pgn_fields: bool = False,
lowercase: bool = True,
lazy: bool = True) -> None:
super().__init__(
tokenizer=tokenizer,
source_token_indexers=source_token_indexers,
target_token_indexers=target_token_indexers,
source_max_tokens=source_max_tokens,
target_max_tokens=target_max_tokens,
separate_namespaces=separate_namespaces,
target_namespace=target_namespace,
save_copy_fields=save_copy_fields,
save_pgn_fields=save_pgn_fields,
lowercase=lowercase,
lazy=lazy
)
示例2: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
source_max_tokens: int = 400,
target_max_tokens: int = 100,
separate_namespaces: bool = False,
target_namespace: str = "target_tokens",
save_copy_fields: bool = False,
save_pgn_fields: bool = False,
lowercase: bool = False,
lazy: bool = True) -> None:
super().__init__(
tokenizer=tokenizer,
source_token_indexers=source_token_indexers,
target_token_indexers=target_token_indexers,
source_max_tokens=source_max_tokens,
target_max_tokens=target_max_tokens,
separate_namespaces=separate_namespaces,
target_namespace=target_namespace,
save_copy_fields=save_copy_fields,
save_pgn_fields=save_pgn_fields,
lowercase=lowercase,
lazy=lazy
)
示例3: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer = None,
source_token_indexers: Dict[str, TokenIndexer] = None,
max_sentences_count: int = 30,
sentence_max_tokens: int = 100,
lowercase: bool = True,
lazy: bool = True) -> None:
super().__init__(
tokenizer=tokenizer,
source_token_indexers=source_token_indexers,
max_sentences_count=max_sentences_count,
sentence_max_tokens=sentence_max_tokens,
lowercase=lowercase,
language="ru",
lazy=lazy
)
示例4: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer = None,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
source_max_tokens: int = 400,
target_max_tokens: int = 100,
target_namespace: str = None,
separate_namespaces: bool = False,
save_copy_fields: bool = False,
save_pgn_fields: bool = False) -> None:
super().__init__(
tokenizer=tokenizer,
source_token_indexers=source_token_indexers,
target_token_indexers=target_token_indexers,
source_max_tokens=source_max_tokens,
target_max_tokens=target_max_tokens,
target_namespace=target_namespace,
separate_namespaces=separate_namespaces,
save_copy_fields=save_copy_fields,
save_pgn_fields=save_pgn_fields
)
示例5: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer = None,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
source_max_tokens: int = 800,
target_max_tokens: int = 200,
separate_namespaces: bool = False,
target_namespace: str = "target_tokens",
save_copy_fields: bool = False,
save_pgn_fields: bool = False,
lowercase: bool = False,
lazy: bool = True) -> None:
super().__init__(
tokenizer=tokenizer,
source_token_indexers=source_token_indexers,
target_token_indexers=target_token_indexers,
source_max_tokens=source_max_tokens,
target_max_tokens=target_max_tokens,
separate_namespaces=separate_namespaces,
target_namespace=target_namespace,
save_copy_fields=save_copy_fields,
save_pgn_fields=save_pgn_fields,
lowercase=lowercase,
lazy=lazy
)
示例6: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self, tokens: List[Token], token_indexers: Dict[str, TokenIndexer]) -> None:
self.tokens = tokens
self._token_indexers = token_indexers
self._indexed_tokens: Optional[Dict[str, IndexedTokenList]] = None
if not all(isinstance(x, (Token, SpacyToken)) for x in tokens):
raise ConfigurationError(
"TextFields must be passed Tokens. "
"Found: {} with types {}.".format(tokens, [type(x) for x in tokens])
)
示例7: as_tensor
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def as_tensor(self,
padding_lengths ,
cuda_device = -1) :
tensors = {}
num_tokens = padding_lengths.get(u'num_tokens')
for indexer_name, indexer in list(self._token_indexers.items()):
if num_tokens is None:
# The indexers return different lengths.
# Get the desired_num_tokens for this indexer.
desired_num_tokens = dict((
indexed_tokens_key, padding_lengths[indexed_tokens_key])
for indexed_tokens_key in self._indexer_name_to_indexed_token[indexer_name])
else:
desired_num_tokens = {indexer_name: num_tokens}
indices_to_pad = dict((indexed_tokens_key, self._indexed_tokens[indexed_tokens_key])
for indexed_tokens_key in self._indexer_name_to_indexed_token[indexer_name])
padded_array = indexer.pad_token_sequence(indices_to_pad,
desired_num_tokens, padding_lengths)
# We use the key of the indexer to recognise what the tensor corresponds to within the
# field (i.e. the result of word indexing, or the result of character indexing, for
# example).
# TODO(mattg): we might someday have a TokenIndexer that needs to use something other
# than a LongTensor here, and it's not clear how to signal that. Maybe we'll need to
# add a class method to TokenIndexer to tell us the type? But we can worry about that
# when there's a compelling use case for it.
indexer_tensors = dict((key, torch.LongTensor(array)) for key, array in list(padded_array.items()))
if cuda_device > -1:
for key in list(indexer_tensors.keys()):
indexer_tensors[key] = indexer_tensors[key].cuda(cuda_device)
tensors.update(indexer_tensors)
return tensors
#overrides
示例8: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(
self,
lazy: bool = False,
tables_directory: str = None,
offline_logical_forms_directory: str = None,
max_offline_logical_forms: int = 10,
keep_if_no_logical_forms: bool = False,
tokenizer: Tokenizer = None,
question_token_indexers: Dict[str, TokenIndexer] = None,
table_token_indexers: Dict[str, TokenIndexer] = None,
use_table_for_vocab: bool = False,
max_table_tokens: int = None,
output_agendas: bool = False,
) -> None:
super().__init__(lazy=lazy)
self._tables_directory = tables_directory
self._offline_logical_forms_directory = offline_logical_forms_directory
self._max_offline_logical_forms = max_offline_logical_forms
self._keep_if_no_logical_forms = keep_if_no_logical_forms
self._tokenizer = tokenizer or SpacyTokenizer(pos_tags=True)
self._question_token_indexers = question_token_indexers or {
"tokens": SingleIdTokenIndexer()
}
self._table_token_indexers = table_token_indexers or self._question_token_indexers
self._use_table_for_vocab = use_table_for_vocab
self._max_table_tokens = max_table_tokens
self._output_agendas = output_agendas
示例9: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer,
cnn_tokenized_dir: str=None,
dm_tokenized_dir: str=None,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
source_max_tokens: int = 400,
target_max_tokens: int = 100,
separate_namespaces: bool = False,
target_namespace: str = "target_tokens",
save_copy_fields: bool = False,
save_pgn_fields: bool = False,
lowercase: bool = True,
lazy: bool = True) -> None:
super().__init__(
tokenizer=tokenizer,
source_token_indexers=source_token_indexers,
target_token_indexers=target_token_indexers,
source_max_tokens=source_max_tokens,
target_max_tokens=target_max_tokens,
separate_namespaces=separate_namespaces,
target_namespace=target_namespace,
save_copy_fields=save_copy_fields,
save_pgn_fields=save_pgn_fields,
lowercase=lowercase,
lazy=lazy
)
self._cnn_tokenized_dir = cnn_tokenized_dir
self._dm_tokenized_dir = dm_tokenized_dir
示例10: __init__
# 需要导入模块: from allennlp.data.token_indexers import token_indexer [as 别名]
# 或者: from allennlp.data.token_indexers.token_indexer import TokenIndexer [as 别名]
def __init__(self,
tokenizer: Tokenizer,
source_token_indexers: Dict[str, TokenIndexer] = None,
target_token_indexers: Dict[str, TokenIndexer] = None,
source_max_tokens: int = 400,
target_max_tokens: int = 100,
separate_namespaces: bool = False,
target_namespace: str = "target_tokens",
save_copy_fields: bool = False,
save_pgn_fields: bool = False,
lowercase: bool = True,
lazy: bool = True) -> None:
super().__init__(lazy=lazy)
assert save_pgn_fields or save_copy_fields or (not save_pgn_fields and not save_copy_fields)
self._lowercase = lowercase
self._source_max_tokens = source_max_tokens
self._target_max_tokens = target_max_tokens
self._tokenizer = tokenizer
tokens_indexer = {"tokens": SingleIdTokenIndexer()}
self._source_token_indexers = source_token_indexers or tokens_indexer
self._target_token_indexers = target_token_indexers or tokens_indexer
self._save_copy_fields = save_copy_fields
self._save_pgn_fields = save_pgn_fields
self._target_namespace = "tokens"
if separate_namespaces:
self._target_namespace = target_namespace
second_tokens_indexer = {"tokens": SingleIdTokenIndexer(namespace=target_namespace)}
self._target_token_indexers = target_token_indexers or second_tokens_indexer