本文整理匯總了Python中tensor2tensor.data_generators.text_encoder.NUM_RESERVED_TOKENS屬性的典型用法代碼示例。如果您正苦於以下問題:Python text_encoder.NUM_RESERVED_TOKENS屬性的具體用法?Python text_encoder.NUM_RESERVED_TOKENS怎麽用?Python text_encoder.NUM_RESERVED_TOKENS使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類tensor2tensor.data_generators.text_encoder
的用法示例。
在下文中一共展示了text_encoder.NUM_RESERVED_TOKENS屬性的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: generate_data
# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 別名]
def generate_data(self, data_dir, _, task_id=-1):
def generator_eos(nbr_symbols, max_length, nbr_cases):
"""Shift by NUM_RESERVED_IDS and append EOS token."""
for case in self.generator(nbr_symbols, max_length, nbr_cases):
new_case = {}
for feature in case:
new_case[feature] = [
i + text_encoder.NUM_RESERVED_TOKENS for i in case[feature]
] + [text_encoder.EOS_ID]
yield new_case
utils.generate_dataset_and_shuffle(
generator_eos(self.num_symbols, self.train_length, self.train_size),
self.training_filepaths(data_dir, self.num_shards, shuffled=True),
generator_eos(self.num_symbols, self.dev_length, self.dev_size),
self.dev_filepaths(data_dir, 1, shuffled=True),
shuffle=False)
示例2: hparams
# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 別名]
def hparams(self, defaults, unused_model_hparams):
p = defaults
vocab_size = self.num_symbols + text_encoder.NUM_RESERVED_TOKENS
p.input_modality = {"inputs": (registry.Modalities.SYMBOL, vocab_size)}
p.target_modality = (registry.Modalities.SYMBOL, vocab_size)
p.input_space_id = problem.SpaceID.DIGIT_0
p.target_space_id = problem.SpaceID.DIGIT_1
示例3: __init__
# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 別名]
def __init__(self,
chunk_size=1,
num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS):
super(DNAEncoder, self).__init__(num_reserved_ids=num_reserved_ids)
# Build a vocabulary of chunks of size chunk_size
self._chunk_size = chunk_size
tokens = self._tokens()
tokens.sort()
ids = range(self._num_reserved_ids, len(tokens) + self._num_reserved_ids)
self._ids_to_tokens = dict(zip(ids, tokens))
self._tokens_to_ids = dict(zip(tokens, ids))
示例4: hparams
# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 別名]
def hparams(self, defaults, unused_model_hparams):
p = defaults
vocab_size = self.num_symbols + text_encoder.NUM_RESERVED_TOKENS
p.modality = {"inputs": modalities.ModalityType.SYMBOL,
"targets": modalities.ModalityType.SYMBOL}
p.vocab_size = {"inputs": vocab_size,
"targets": vocab_size}
p.input_space_id = problem.SpaceID.DIGIT_0
p.target_space_id = problem.SpaceID.DIGIT_1
示例5: hparams
# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 別名]
def hparams(self, defaults, unused_model_hparams):
p = defaults
vocab_size = self.num_symbols + text_encoder.NUM_RESERVED_TOKENS
p.modality = {"inputs": modalities.SymbolModality,
"targets": modalities.SymbolModality}
p.vocab_size = {"inputs": vocab_size,
"targets": vocab_size}
p.input_space_id = problem.SpaceID.DIGIT_0
p.target_space_id = problem.SpaceID.DIGIT_1
示例6: __init__
# 需要導入模塊: from tensor2tensor.data_generators import text_encoder [as 別名]
# 或者: from tensor2tensor.data_generators.text_encoder import NUM_RESERVED_TOKENS [as 別名]
def __init__(self,
vocab_filename=None,
vocab_list=None,
separator="",
num_reserved_ids=text_encoder.NUM_RESERVED_TOKENS):
"""Initialize from a file or list, one token per line.
Handling of reserved tokens works as follows:
- When initializing from a list, we add reserved tokens to the vocab.
- When initializing from a file, we do not add reserved tokens to the vocab.
- When saving vocab files, we save reserved tokens to the file.
Args:
vocab_filename: If not None, the full filename to read vocab from. If this
is not None, then vocab_list should be None.
vocab_list: If not None, a list of elements of the vocabulary. If this is
not None, then vocab_filename should be None.
separator: separator between symbols in original file.
num_reserved_ids: Number of IDs to save for reserved tokens like <EOS>.
"""
super(GraphemePhonemeEncoder, self).__init__(
num_reserved_ids=num_reserved_ids)
if vocab_filename and os.path.exists(vocab_filename):
self._init_vocab_from_file(vocab_filename)
else:
assert vocab_list is not None
self._init_vocab_from_list(vocab_list)
self._separator = separator