本文整理汇总了Python中data.PAD_TOKEN属性的典型用法代码示例。如果您正苦于以下问题:Python data.PAD_TOKEN属性的具体用法?Python data.PAD_TOKEN怎么用?Python data.PAD_TOKEN使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类data
的用法示例。
在下文中一共展示了data.PAD_TOKEN属性的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
self._hps = hps
self.pad_id = vocab.word2id(
data.PAD_TOKEN) # id of the PAD token used to pad sequences
self.sec_pad_id = vocab.word2id(data.SEC_PAD_TOKEN)
# initialize the input to the encoder
self.init_encoder_seq(example_list, hps)
# initialize the input and targets for the decoder
self.init_decoder_seq(example_list, hps)
self.store_orig_strings(example_list) # store the original strings
示例2: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences
if hps.model in ['rewriter', 'end2end']:
self.init_rewriter_encoder_seq(example_list, hps) # initialize the input to the rewriter encoder
self.init_rewriter_decoder_seq(example_list, hps) # initialize the input and targets for the rewriter decoder
if hps.model in ['selector', 'end2end']:
self.init_selector_encoder_seq(example_list, hps) # initialize the input to the selector encoder
self.init_selector_target(example_list, hps) # initialize the target to selector
self.store_orig_strings(example_list) # store the original strings
示例3: create_batch
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def create_batch(example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
batch = Batch()
batch.pad_id = vocab.word2id(data.PAD_TOKEN)
batch.init_encoder_seq(example_list, hps) # initialize the input to the encoder
batch.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder
batch.store_orig_strings(example_list) # store the original strings
return batch
示例4: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences
self.init_encoder_seq(example_list, hps) # initialize the input to the encoder
self.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder
self.store_orig_strings(example_list) # store the original strings
示例5: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences
self.init_encoder_seq(example_list, hps) # initialize the input to the encoder
self.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder
self.store_orig_strings(example_list) # store the original strings
示例6: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, vocab, batch_size):
self.batch_size = batch_size
self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences
self.init_encoder_seq(example_list) # initialize the input to the encoder
self.init_decoder_seq(example_list) # initialize the input and targets for the decoder
self.store_orig_strings(example_list) # store the original strings
示例7: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
self.pad_id = vocab.word2id(
data.PAD_TOKEN) # id of the PAD token used to pad sequences
# initialize the input to the encoder
self.init_encoder_seq(example_list, hps)
# initialize the input and targets for the decoder
self.init_decoder_seq(example_list, hps)
self.store_orig_strings(example_list) # store the original strings
示例8: _get_section_words
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def _get_section_words(sec, max_len=None, pad_id=data.PAD_TOKEN, pad=True):
""" given a section (list of sentences), returns a single list of words in that section """
words = ' '.join(sec).split()
if max_len is None:
max_len = len(words)
if pad:
while len(words) < max_len:
words += [pad_id]
return words[:max_len]
示例9: _pad_words
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def _pad_words(words, max_len=None, pad_id=data.PAD_TOKEN):
""" given a section (list of sentences), returns a single list of words in that section """
if max_len is None:
max_len = len(words)
while len(words) < max_len:
words += [pad_id]
return words[:max_len]
示例10: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences
self.init_decoder_seq(example_list, hps) # initialize the input to the encoder
示例11: __init__
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def __init__(self, example_list, hps, vocab):
"""Turns the example_list into a Batch object.
Args:
example_list: List of Example objects
hps: hyperparameters
vocab: Vocabulary object
"""
self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences
if FLAGS.run_method == 'auto-encoder':
self.init_encoder_seq(example_list, hps) # initialize the input to the encoder
self.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder
self.store_orig_strings(example_list) # store the original strings
示例12: main
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def main(unused_argv):
vocab = data.Vocab(FLAGS.vocab_path, 1000000)
# Check for presence of required special tokens.
assert vocab.CheckVocab(data.PAD_TOKEN) > 0
assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0
assert vocab.CheckVocab(data.SENTENCE_START) > 0
assert vocab.CheckVocab(data.SENTENCE_END) > 0
batch_size = 4
if FLAGS.mode == 'decode':
batch_size = FLAGS.beam_size
hps = seq2seq_attention_model.HParams(
mode=FLAGS.mode, # train, eval, decode
min_lr=0.01, # min learning rate.
lr=0.15, # learning rate
batch_size=batch_size,
enc_layers=4,
enc_timesteps=120,
dec_timesteps=30,
min_input_len=2, # discard articles/summaries < than this
num_hidden=256, # for rnn cell
emb_dim=128, # If 0, don't use embedding
max_grad_norm=2,
num_softmax_samples=4096) # If 0, no sampled softmax.
batcher = batch_reader.Batcher(
FLAGS.data_path, vocab, hps, FLAGS.article_key,
FLAGS.abstract_key, FLAGS.max_article_sentences,
FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing,
truncate_input=FLAGS.truncate_input)
tf.set_random_seed(FLAGS.random_seed)
if hps.mode == 'train':
model = seq2seq_attention_model.Seq2SeqAttentionModel(
hps, vocab, num_gpus=FLAGS.num_gpus)
_Train(model, batcher)
elif hps.mode == 'eval':
model = seq2seq_attention_model.Seq2SeqAttentionModel(
hps, vocab, num_gpus=FLAGS.num_gpus)
_Eval(model, batcher, vocab=vocab)
elif hps.mode == 'decode':
decode_mdl_hps = hps
# Only need to restore the 1st step and reuse it since
# we keep and feed in state for each step's output.
decode_mdl_hps = hps._replace(dec_timesteps=1)
model = seq2seq_attention_model.Seq2SeqAttentionModel(
decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)
decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab)
decoder.DecodeLoop()
示例13: main
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def main(unused_argv):
config = importlib.import_module('config.%s' % FLAGS.config)
for argument in FLAGS.override.split(','):
if '=' in argument:
name = argument.split('=')[0]
value = type(getattr(config, name))(argument.split('=')[1])
setattr(config, name, value)
config.input_vocab = data.Vocab(config.input_vocab_file,
config.max_vocab_size) # Max IDs
if config.input_vocab.WordToId(data.PAD_TOKEN) <= 0:
raise ValueError('Invalid PAD_TOKEN id.')
# id of the UNKNOWN_TOKEN should be "0" for copynet model
if config.input_vocab.WordToId(data.UNKNOWN_TOKEN) != 0:
raise ValueError('Invalid UNKOWN_TOKEN id.')
if config.input_vocab.WordToId(data.SENTENCE_START) <= 0:
raise ValueError('Invalid SENTENCE_START id.')
if config.input_vocab.WordToId(data.SENTENCE_END) <= 0:
raise ValueError('Invalid SENTENCE_END id.')
if config.output_vocab_file:
config.output_vocab = data.Vocab(config.output_vocab_file,
config.max_vocab_size) # Max IDs
if config.output_vocab.WordToId(data.PAD_TOKEN) <= 0:
raise ValueError('Invalid PAD_TOKEN id.')
# id of the UNKNOWN_TOKEN should be "0" for copynet model
if config.output_vocab.WordToId(data.UNKNOWN_TOKEN) != 0:
raise ValueError('Invalid UNKOWN_TOKEN id.')
if config.output_vocab.WordToId(data.SENTENCE_START) <= 0:
raise ValueError('Invalid SENTENCE_START id.')
if config.output_vocab.WordToId(data.SENTENCE_END) <= 0:
raise ValueError('Invalid SENTENCE_END id.')
else:
config.output_vocab = config.input_vocab
train_batcher = config.Batcher(config.train_set, config)
valid_batcher = config.Batcher(config.valid_set, config)
tf.set_random_seed(config.random_seed)
if FLAGS.mode == 'train':
model = config.Model(config, 'train', num_gpus=FLAGS.num_gpus)
_Train(model, config, train_batcher)
elif FLAGS.mode == 'eval':
config.dropout_rnn = 1.0
config.dropout_emb = 1.0
model = config.Model(config, 'eval', num_gpus=FLAGS.num_gpus)
_Eval(model, config, valid_batcher)
elif FLAGS.mode == 'decode':
config.dropout_rnn = 1.0
config.dropout_emb = 1.0
config.batch_size = config.beam_size
model = config.Model(config, 'decode', num_gpus=FLAGS.num_gpus)
decoder = decode.BeamSearch(model, valid_batcher, config)
decoder.DecodeLoop()
示例14: main
# 需要导入模块: import data [as 别名]
# 或者: from data import PAD_TOKEN [as 别名]
def main(unused_argv):
vocab = data.Vocab(FLAGS.vocab_path, 1000000)
# Check for presence of required special tokens.
assert vocab.WordToId(data.PAD_TOKEN) > 0
assert vocab.WordToId(data.UNKNOWN_TOKEN) >= 0
assert vocab.WordToId(data.SENTENCE_START) > 0
assert vocab.WordToId(data.SENTENCE_END) > 0
batch_size = 4
if FLAGS.mode == 'decode':
batch_size = FLAGS.beam_size
hps = seq2seq_attention_model.HParams(
mode=FLAGS.mode, # train, eval, decode
min_lr=0.01, # min learning rate.
lr=0.15, # learning rate
batch_size=batch_size,
enc_layers=4,
enc_timesteps=120,
dec_timesteps=30,
min_input_len=2, # discard articles/summaries < than this
num_hidden=256, # for rnn cell
emb_dim=128, # If 0, don't use embedding
max_grad_norm=2,
num_softmax_samples=4096) # If 0, no sampled softmax.
batcher = batch_reader.Batcher(
FLAGS.data_path, vocab, hps, FLAGS.article_key,
FLAGS.abstract_key, FLAGS.max_article_sentences,
FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing,
truncate_input=FLAGS.truncate_input)
tf.set_random_seed(FLAGS.random_seed)
if hps.mode == 'train':
model = seq2seq_attention_model.Seq2SeqAttentionModel(
hps, vocab, num_gpus=FLAGS.num_gpus)
_Train(model, batcher)
elif hps.mode == 'eval':
model = seq2seq_attention_model.Seq2SeqAttentionModel(
hps, vocab, num_gpus=FLAGS.num_gpus)
_Eval(model, batcher, vocab=vocab)
elif hps.mode == 'decode':
decode_mdl_hps = hps
# Only need to restore the 1st step and reuse it since
# we keep and feed in state for each step's output.
decode_mdl_hps = hps._replace(dec_timesteps=1)
model = seq2seq_attention_model.Seq2SeqAttentionModel(
decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)
decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab)
decoder.DecodeLoop()