本文整理汇总了Python中tensor2tensor.data_generators.text_encoder.NUM_RESERVED_TOKENS属性的典型用法代码示例。如果您正苦于以下问题:Python text_encoder.NUM_RESERVED_TOKENS属性的具体用法?Python text_encoder.NUM_RESERVED_TOKENS怎么用?Python text_encoder.NUM_RESERVED_TOKENS使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类tensor2tensor.data_generators.text_encoder
的用法示例。
在下文中一共展示了text_encoder.NUM_RESERVED_TOKENS属性的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_data
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 别名]
def generate_data(self, data_dir, _, task_id=-1):
def generator_eos(nbr_symbols, max_length, nbr_cases):
"""Shift by NUM_RESERVED_IDS and append EOS token."""
for case in self.generator(nbr_symbols, max_length, nbr_cases):
new_case = {}
for feature in case:
new_case[feature] = [
i + text_encoder.NUM_RESERVED_TOKENS for i in case[feature]
] + [text_encoder.EOS_ID]
yield new_case
utils.generate_dataset_and_shuffle(
generator_eos(self.num_symbols, self.train_length, self.train_size),
self.training_filepaths(data_dir, self.num_shards, shuffled=True),
generator_eos(self.num_symbols, self.dev_length, self.dev_size),
self.dev_filepaths(data_dir, 1, shuffled=True),
shuffle=False)
示例2: hparams
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 别名]
def hparams(self, defaults, unused_model_hparams):
p = defaults
vocab_size = self.num_symbols + text_encoder.NUM_RESERVED_TOKENS
p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)}
p.target_modality = (registry.Modalities.SYMBOL, vocab_size)
p.input_space_id = problem.SpaceID.DIGIT_0
p.target_space_id = problem.SpaceID.DIGIT_1
示例3: __init__
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 别名]
def __init__(self,
chunk_size=1,
num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS):
super(DNAEncoder, self).__init__(num_reserved_ids=num_reserved_ids)
# Build a vocabulary of chunks of size chunk_size
self._chunk_size = chunk_size
tokens = self._tokens()
tokens.sort()
ids = range(self._num_reserved_ids, len(tokens) + self._num_reserved_ids)
self._ids_to_tokens = dict(zip(ids, tokens))
self._tokens_to_ids = dict(zip(tokens, ids))
示例4: hparams
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 别名]
def hparams(self, defaults, unused_model_hparams):
p = defaults
vocab_size = self.num_symbols + text_encoder.NUM_RESERVED_TOKENS
p.modality = {"inputs": modalities.ModalityType.SYMBOL,
"targets": modalities.ModalityType.SYMBOL}
p.vocab_size = {"inputs": vocab_size,
"targets": vocab_size}
p.input_space_id = problem.SpaceID.DIGIT_0
p.target_space_id = problem.SpaceID.DIGIT_1
示例5: hparams
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 别名]
def hparams(self, defaults, unused_model_hparams):
p = defaults
vocab_size = self.num_symbols + text_encoder.NUM_RESERVED_TOKENS
p.modality = {"inputs": modalities.SymbolModality,
"targets": modalities.SymbolModality}
p.vocab_size = {"inputs": vocab_size,
"targets": vocab_size}
p.input_space_id = problem.SpaceID.DIGIT_0
p.target_space_id = problem.SpaceID.DIGIT_1
示例6: __init__
# 需要导入模块: from tensor2tensor.data_generators import text_encoder [as 别名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 别名]
def __init__(self,
vocab_filename=None,
vocab_list=None,
separator="",
num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS):
"""Initialize from a file or list, one token per line.
Handling of reserved tokens works as follows:
- When initializing from a list, we add reserved tokens to the vocab.
- When initializing from a file, we do not add reserved tokens to the vocab.
- When saving vocab files, we save reserved tokens to the file.
Args:
vocab_filename: If not None, the full filename to read vocab from. If this
is not None, then vocab_list should be None.
vocab_list: If not None, a list of elements of the vocabulary. If this is
not None, then vocab_filename should be None.
separator: separator between symbols in original file.
num_reserved_ids: Number of IDs to save for reserved tokens like <EOS>.
"""
super(GraphemePhonemeEncoder, self).__init__(
num_reserved_ids=num_reserved_ids)
if vocab_filename and os.path.exists(vocab_filename):
self._init_vocab_from_file(vocab_filename)
else:
assert vocab_list is not None
self._init_vocab_from_list(vocab_list)
self._separator = separator