本文整理汇总了Python中data.get_batch方法的典型用法代码示例。如果您正苦于以下问题:Python data.get_batch方法的具体用法?Python data.get_batch怎么用?Python data.get_batch使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类data
的用法示例。
在下文中一共展示了data.get_batch方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _eval_test_set
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def _eval_test_set(sess, model, test_buckets):
""" Evaluate on the test set. """
for bucket_id in range(len(config.BUCKETS)):
if len(test_buckets[bucket_id]) == 0:
print(" Test: empty bucket %d" % (bucket_id))
continue
start = time.time()
encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(test_buckets[bucket_id],
bucket_id,
batch_size=config.BATCH_SIZE)
_, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs,
decoder_masks, bucket_id, True)
print('Test bucket {}: loss {}, time {}'.format(bucket_id, step_loss, time.time() - start))
示例2: train
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def train():
""" Train the bot """
test_buckets, data_buckets, train_buckets_scale = _get_buckets()
# in train mode, we need to create the backward path, so forwrad_only is False
model = ChatBotModel(False, config.BATCH_SIZE)
model.build_graph()
saver = tf.train.Saver()
with tf.Session() as sess:
print('Running session')
sess.run(tf.global_variables_initializer())
_check_restore_parameters(sess, saver)
iteration = model.global_step.eval()
total_loss = 0
while True:
skip_step = _get_skip_step(iteration)
bucket_id = _get_random_bucket(train_buckets_scale)
encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(data_buckets[bucket_id],
bucket_id,
batch_size=config.BATCH_SIZE)
start = time.time()
_, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False)
total_loss += step_loss
iteration += 1
if iteration % skip_step == 0:
print('Iter {}: loss {}, time {}'.format(iteration, total_loss/skip_step, time.time() - start))
start = time.time()
total_loss = 0
saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step)
if iteration % (10 * skip_step) == 0:
# Run evals on development set and print their loss
_eval_test_set(sess, model, test_buckets)
start = time.time()
sys.stdout.flush()
示例3: train
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def train(self,LR=2e-4,B1=0.5,B2=0.999,iterations=50000,sample_frequency=10,
sample_overlap=500,save_frequency=1000,domain_a="a",domain_b="b"):
self.trainer_D = tf.train.AdamOptimizer(LR,beta1=B1,beta2=B2).minimize(self.l_disc,var_list=self.disc_params)
self.trainer_G = tf.train.AdamOptimizer(LR,beta1=B1,beta2=B2).minimize(self.l_g,var_list=self.gen_params)
with self.sess as sess:
sess.run(tf.global_variables_initializer())
if self.analytics:
if not os.path.exists("logs"):
os.makedirs("logs")
self.summary_writer = tf.summary.FileWriter(os.getcwd()+'/logs',graph=sess.graph)
for i in range(iterations):
realA = data.get_batch(self.batch_size,domain_a)
realB = data.get_batch(self.batch_size,domain_b)
op_list = [self.trainer_D,self.l_disc,self.trainer_G,self.l_g,self.merged_summary_op]
_,dLoss,_,gLoss,summary_str = sess.run(op_list,feed_dict={self.x_a:realA,self.x_b:realB})
realA = data.get_batch(self.batch_size,domain_a)
realB = data.get_batch(self.batch_size,domain_b)
_,gLoss = sess.run([self.trainer_G,self.l_g],feed_dict={self.x_a:realA,self.x_b:realB})
if i%10 == 0:
self.summary_writer.add_summary(summary_str, i)
print("Generator Loss: " + str(gLoss) + "\tDiscriminator Loss: " + str(dLoss))
if i % sample_frequency == 0:
realA = data.get_batch(1,domain_a)
realB = data.get_batch(1,domain_b)
ops = [self.g_ba,self.g_ab,self.g_aba,self.g_bab]
out_a,out_b,out_ab,out_ba = sess.run(ops,feed_dict={self.x_a:realA,self.x_b:realB})
data.save(self.gen_a_dir+"/img"+str(i%sample_overlap)+'.png',out_a[0])
data.save(self.gen_b_dir+"/img"+str(i%sample_overlap)+'.png',out_b[0])
data.save(self.rec_a_dir+"/img"+str(i%sample_overlap)+'.png',out_ba[0])
data.save(self.rec_b_dir+"/img"+str(i%sample_overlap)+'.png',out_ab[0])
if i % save_frequency == 0:
if not os.path.exists(self.model_directory):
os.makedirs(self.model_directory)
self.saver.save(sess,self.model_directory+'/model-'+str(i)+'.ckpt')
print("Saved Model")
"""
Restore previously saved weights from
trained / in-progress model
"""
def restore():
try:
self.saver.restore(self.sess, tf.train.latest_checkpoint(self.model_directory))
except:
print("Previous weights not found")
示例4: train
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def train(epoch):
model.train()
acc_loss = 0
acc_kl_theta_loss = 0
cnt = 0
indices = torch.randperm(args.num_docs_train)
indices = torch.split(indices, args.batch_size)
for idx, ind in enumerate(indices):
optimizer.zero_grad()
model.zero_grad()
data_batch = data.get_batch(train_tokens, train_counts, ind, args.vocab_size, device)
sums = data_batch.sum(1).unsqueeze(1)
if args.bow_norm:
normalized_data_batch = data_batch / sums
else:
normalized_data_batch = data_batch
recon_loss, kld_theta = model(data_batch, normalized_data_batch)
total_loss = recon_loss + kld_theta
total_loss.backward()
if args.clip > 0:
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step()
acc_loss += torch.sum(recon_loss).item()
acc_kl_theta_loss += torch.sum(kld_theta).item()
cnt += 1
if idx % args.log_interval == 0 and idx > 0:
cur_loss = round(acc_loss / cnt, 2)
cur_kl_theta = round(acc_kl_theta_loss / cnt, 2)
cur_real_loss = round(cur_loss + cur_kl_theta, 2)
print('Epoch: {} .. batch: {}/{} .. LR: {} .. KL_theta: {} .. Rec_loss: {} .. NELBO: {}'.format(
epoch, idx, len(indices), optimizer.param_groups[0]['lr'], cur_kl_theta, cur_loss, cur_real_loss))
cur_loss = round(acc_loss / cnt, 2)
cur_kl_theta = round(acc_kl_theta_loss / cnt, 2)
cur_real_loss = round(cur_loss + cur_kl_theta, 2)
print('*'*100)
print('Epoch----->{} .. LR: {} .. KL_theta: {} .. Rec_loss: {} .. NELBO: {}'.format(
epoch, optimizer.param_groups[0]['lr'], cur_kl_theta, cur_loss, cur_real_loss))
print('*'*100)
示例5: evaluate
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def evaluate(m, source, tc=False, td=False):
"""Compute perplexity on document completion.
"""
m.eval()
with torch.no_grad():
if source == 'val':
indices = torch.split(torch.tensor(range(args.num_docs_valid)), args.eval_batch_size)
tokens = valid_tokens
counts = valid_counts
else:
indices = torch.split(torch.tensor(range(args.num_docs_test)), args.eval_batch_size)
tokens = test_tokens
counts = test_counts
## get \beta here
beta = m.get_beta()
### do dc and tc here
acc_loss = 0
cnt = 0
indices_1 = torch.split(torch.tensor(range(args.num_docs_test_1)), args.eval_batch_size)
for idx, ind in enumerate(indices_1):
## get theta from first half of docs
data_batch_1 = data.get_batch(test_1_tokens, test_1_counts, ind, args.vocab_size, device)
sums_1 = data_batch_1.sum(1).unsqueeze(1)
if args.bow_norm:
normalized_data_batch_1 = data_batch_1 / sums_1
else:
normalized_data_batch_1 = data_batch_1
theta, _ = m.get_theta(normalized_data_batch_1)
## get prediction loss using second half
data_batch_2 = data.get_batch(test_2_tokens, test_2_counts, ind, args.vocab_size, device)
sums_2 = data_batch_2.sum(1).unsqueeze(1)
res = torch.mm(theta, beta)
preds = torch.log(res)
recon_loss = -(preds * data_batch_2).sum(1)
loss = recon_loss / sums_2.squeeze()
loss = loss.mean().item()
acc_loss += loss
cnt += 1
cur_loss = acc_loss / cnt
ppl_dc = round(math.exp(cur_loss), 1)
print('*'*100)
print('{} Doc Completion PPL: {}'.format(source.upper(), ppl_dc))
print('*'*100)
if tc or td:
beta = beta.data.cpu().numpy()
if tc:
print('Computing topic coherence...')
get_topic_coherence(beta, train_tokens, vocab)
if td:
print('Computing topic diversity...')
get_topic_diversity(beta, 25)
return ppl_dc
示例6: evaluate
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def evaluate(epoch, eval_type='valid', final_eval=False):
nli_net.eval()
correct = 0.
global val_acc_best, lr, stop_training, adam_stop
if eval_type == 'valid':
print('\nVALIDATION : Epoch {0}'.format(epoch))
s1 = valid['s1'] if eval_type == 'valid' else test['s1']
s2 = valid['s2'] if eval_type == 'valid' else test['s2']
target = valid['label'] if eval_type == 'valid' else test['label']
for i in range(0, len(s1), params.batch_size):
# prepare batch
s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec, params.word_emb_dim)
s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec, params.word_emb_dim)
s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda()
# model forward
output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
pred = output.data.max(1)[1]
correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()
# save model
eval_acc = round(100 * correct / len(s1), 2)
if final_eval:
print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
else:
print('togrep : results : epoch {0} ; mean accuracy {1} :\
{2}'.format(epoch, eval_type, eval_acc))
if eval_type == 'valid' and epoch <= params.n_epochs:
if eval_acc > val_acc_best:
print('saving model at epoch {0}'.format(epoch))
if not os.path.exists(params.outputdir):
os.makedirs(params.outputdir)
torch.save(nli_net.state_dict(), os.path.join(params.outputdir,
params.outputmodelname))
val_acc_best = eval_acc
else:
if 'sgd' in params.optimizer:
optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
print('Shrinking lr by : {0}. New lr = {1}'
.format(params.lrshrink,
optimizer.param_groups[0]['lr']))
if optimizer.param_groups[0]['lr'] < params.minlr:
stop_training = True
if 'adam' in params.optimizer:
# early stopping (at 2nd decrease in accuracy)
stop_training = adam_stop
adam_stop = True
return eval_acc
示例7: evaluate
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def evaluate(epoch, eval_type='valid', final_eval=False):
nli_net.eval()
correct = 0.
global val_acc_best, lr, stop_training, adam_stop
if eval_type == 'valid':
print('\nVALIDATION : Epoch {0}'.format(epoch))
s1 = valid['s1'] if eval_type == 'valid' else test['s1']
s2 = valid['s2'] if eval_type == 'valid' else test['s2']
target = valid['label'] if eval_type == 'valid' else test['label']
for i in range(0, len(s1), params.batch_size):
# prepare batch
s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec)
s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec)
s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda())
tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda()
# model forward
output = nli_net((s1_batch, s1_len), (s2_batch, s2_len))
pred = output.data.max(1)[1]
correct += pred.long().eq(tgt_batch.data.long()).cpu().sum()
# save model
eval_acc = round(100 * correct / len(s1), 2)
if final_eval:
print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc))
else:
print('togrep : results : epoch {0} ; mean accuracy {1} :\
{2}'.format(epoch, eval_type, eval_acc))
if eval_type == 'valid' and epoch <= params.n_epochs:
if eval_acc > val_acc_best:
print('saving model at epoch {0}'.format(epoch))
if not os.path.exists(params.outputdir):
os.makedirs(params.outputdir)
torch.save(nli_net.state_dict(), os.path.join(params.outputdir,
params.outputmodelname))
val_acc_best = eval_acc
else:
if 'sgd' in params.optimizer:
optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink
print('Shrinking lr by : {0}. New lr = {1}'
.format(params.lrshrink,
optimizer.param_groups[0]['lr']))
if optimizer.param_groups[0]['lr'] < params.minlr:
stop_training = True
if 'adam' in params.optimizer:
# early stopping (at 2nd decrease in accuracy)
stop_training = adam_stop
adam_stop = True
return eval_acc
示例8: chat
# 需要导入模块: import data [as 别名]
# 或者: from data import get_batch [as 别名]
def chat():
""" in test mode, we don't to create the backward path
"""
_, enc_vocab = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.enc'))
inv_dec_vocab, _ = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.dec'))
model = ChatBotModel(True, batch_size=1)
model.build_graph()
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
_check_restore_parameters(sess, saver)
output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+')
# Decode from standard input.
max_length = config.BUCKETS[-1][0]
print('Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length)
while True:
line = _get_user_input()
if len(line) > 0 and line[-1] == '\n':
line = line[:-1]
if line == '':
break
output_file.write('HUMAN ++++ ' + line + '\n')
# Get token-ids for the input sentence.
token_ids = data.sentence2id(enc_vocab, str(line))
if (len(token_ids) > max_length):
print('Max length I can handle is:', max_length)
line = _get_user_input()
continue
# Which bucket does it belong to?
bucket_id = _find_right_bucket(len(token_ids))
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])],
bucket_id,
batch_size=1)
# Get output logits for the sentence.
_, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs,
decoder_masks, bucket_id, True)
response = _construct_response(output_logits, inv_dec_vocab)
print(response)
output_file.write('BOT ++++ ' + response + '\n')
output_file.write('=============================================\n')
output_file.close()