本文整理汇总了Python中tensorflow.models.image.cifar10.cifar10.loss函数的典型用法代码示例。如果您正苦于以下问题:Python loss函数的具体用法?Python loss怎么用?Python loss使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了loss函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tower_loss
def tower_loss(scope):
"""Calculate the total loss on a single tower running the CIFAR model.
Args:
scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
Returns:
Tensor of shape [] containing the total loss for a batch of data
"""
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build inference Graph.
logits = cifar10.inference(images)
# Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below.
_ = cifar10.loss(logits, labels)
# Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope)
# Calculate the total loss for the current tower.
total_loss = tf.add_n(losses, name='total_loss')
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
tf.contrib.deprecated.scalar_summary(loss_name, l)
return total_loss
示例2: train
def train():
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference6(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
# Create a saver.
saver = tf.train.Saver(tf.all_variables())
# Build the summary operation based on the TF collection of Summaries.
summary_op = tf.merge_all_summaries()
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph.
sess = tf.Session(config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
示例3: tower_loss
def tower_loss(scope):
"""Calculate the total loss on a single tower running the CIFAR model.
Args:
scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
Returns:
Tensor of shape [] containing the total loss for a batch of data
"""
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build inference Graph.
logits = cifar10.inference(images)
# Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below.
_ = cifar10.loss(logits, labels)
# Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope)
# Calculate the total loss for the current tower.
total_loss = tf.add_n(losses, name='total_loss')
# Compute the moving average of all individual losses and the total loss.
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
loss_averages_op = loss_averages.apply(losses + [total_loss])
# Attach a scalar summary to all individual losses and the total loss; do the
# same for the averaged version of the losses.
for l in losses + [total_loss]:
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
# Name each loss as '(raw)' and name the moving average version of the loss
# as the original loss name.
tf.scalar_summary(loss_name +' (raw)', l)
tf.scalar_summary(loss_name, loss_averages.average(l))
with tf.control_dependencies([loss_averages_op]):
total_loss = tf.identity(total_loss)
return total_loss
示例4: train
def train():
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default():
global_step = tf.contrib.framework.get_or_create_global_step()
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
#images, labels = cifar10.inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
class _LoggerHook(tf.train.SessionRunHook):
"""Logs loss and runtime."""
def begin(self):
self._step = -1
def before_run(self, run_context):
self._step += 1
self._start_time = time.time()
return tf.train.SessionRunArgs(loss) # Asks for loss value.
def after_run(self, run_context, run_values):
duration = time.time() - self._start_time
loss_value = run_values.results
if self._step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), self._step, loss_value,
examples_per_sec, sec_per_batch))
with tf.train.MonitoredTrainingSession(
checkpoint_dir=FLAGS.train_dir,
hooks=[tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
tf.train.NanTensorHook(loss),
_LoggerHook()],
config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement)) as mon_sess:
while not mon_sess.should_stop():
mon_sess.run(train_op)
示例5: train
def train():
# ops
global_step = tf.Variable(0, trainable=False)
images, labels = cifar10.distorted_inputs()
logits = cifar10.inference(tf.image.resize_images(images, cifar10.IMAGE_SIZE, cifar10.IMAGE_SIZE))
loss = cifar10.loss(logits, labels)
train_op = cifar10.train(loss, global_step)
summary_op = tf.merge_all_summaries()
with tf.Session() as sess:
saver = tf.train.Saver(tf.all_variables(), max_to_keep=21)
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir)
# restore or initialize variables
ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
else:
sess.run(tf.initialize_all_variables())
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
start = sess.run(global_step)
for step in xrange(start, FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
print '%d: %f (%.3f sec/batch)' % (step, loss_value, duration)
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
if step % 500 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
示例6: train
def train():
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
eval_data = FLAGS.eval_data == 'test'
#timages, tlabels = cifar10.inputs(eval_data=eval_data)
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
#tlogits = cifar10.inference(timages)
# Calculate loss.
top_k_op = tf.nn.in_top_k(logits, labels, 1)
loss = cifar10.loss(logits, labels)
#precision = tf.Variable(0.8, name='precision')
#tf.scalar_summary('accuracy', precision)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
# Create a saver.
saver = tf.train.Saver(tf.all_variables())
# Build the summary operation based on the TF collection of Summaries.
summary_op = tf.merge_all_summaries()
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph.
sess = tf.Session(config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
sess.graph.finalize()
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 100 == 0:
# Build a Graph that computes the logits predictions from the
# inference model.
# Calculate predictions.
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
true_count = 0 # Counts the number of correct predictions.
total_sample_count = num_iter * FLAGS.batch_size
i_step = 0
while i_step < num_iter:
predictions = sess.run([top_k_op])
true_count += np.sum(predictions)
i_step += 1
#Compute precision @ 1.
#sess.run(precision.assign(true_count / total_sample_count))
prec = true_count / total_sample_count
print(prec)
summary = tf.Summary()
summary.ParseFromString(sess.run(summary_op))
summary.value.add(tag='accuracy', simple_value=prec)
summary_writer.add_summary(summary, step)
#summary_str = sess.run(summary_op)
#summary_writer.add_summary(summary_str, step)
#summary_writer.flush()
# Save the model checkpoint periodically.
if step % 100 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
示例7: train
def train():
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default():
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
# Create a saver.
saver = tf.train.Saver(tf.all_variables())
# Build the summary operation based on the TF collection of Summaries.
summary_op = tf.merge_all_summaries()
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph.
sess = tf.Session(config=tf.ConfigProto(
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.train.SummaryWriter(FLAGS.train_dir)
summary_writer0 = tf.train.SummaryWriter(FLAGS.train_dir0)
summary_writer1= tf.train.SummaryWriter(FLAGS.train_dir1)
summary_writer2 = tf.train.SummaryWriter(FLAGS.train_dir2)
summary_writer3 = tf.train.SummaryWriter(FLAGS.train_dir3)
summary_writer4 = tf.train.SummaryWriter(FLAGS.train_dir4)
summary_writer5 = tf.train.SummaryWriter(FLAGS.train_dir5)
summary_writer6 = tf.train.SummaryWriter(FLAGS.train_dir6)
summary_writer7 = tf.train.SummaryWriter(FLAGS.train_dir7)
summary_writer8 = tf.train.SummaryWriter(FLAGS.train_dir8)
summary_writer9 = tf.train.SummaryWriter(FLAGS.train_dir9)
summary_writer10 = tf.train.SummaryWriter(FLAGS.train_dir10)
summary_writer11 = tf.train.SummaryWriter(FLAGS.train_dir11)
summary_writer12 = tf.train.SummaryWriter(FLAGS.train_dir12)
summary_writer13 = tf.train.SummaryWriter(FLAGS.train_dir13)
summary_writer14 = tf.train.SummaryWriter(FLAGS.train_dir14)
summary_writer15 = tf.train.SummaryWriter(FLAGS.train_dir15)
summary_writer16 = tf.train.SummaryWriter(FLAGS.train_dir16)
summary_writer17 = tf.train.SummaryWriter(FLAGS.train_dir17)
summary_writer18 = tf.train.SummaryWriter(FLAGS.train_dir18)
summary_writer19 = tf.train.SummaryWriter(FLAGS.train_dir19)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
summary_writer0.add_summary(summary_str, step)
summary_writer1.add_summary(summary_str, step)
summary_writer2.add_summary(summary_str, step)
summary_writer3.add_summary(summary_str, step)
summary_writer4.add_summary(summary_str, step)
summary_writer5.add_summary(summary_str, step)
summary_writer6.add_summary(summary_str, step)
summary_writer7.add_summary(summary_str, step)
summary_writer8.add_summary(summary_str, step)
summary_writer9.add_summary(summary_str, step)
summary_writer10.add_summary(summary_str, step)
summary_writer11.add_summary(summary_str, step)
summary_writer12.add_summary(summary_str, step)
summary_writer13.add_summary(summary_str, step)
summary_writer14.add_summary(summary_str, step)
summary_writer15.add_summary(summary_str, step)
summary_writer16.add_summary(summary_str, step)
#.........这里部分代码省略.........
示例8: SGDBead
def SGDBead(self, bead, thresh, maxindex):
finalerror = 0.
#thresh = .05
# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
display_step = 1
curWeights, curBiases = self.AllBeads[bead]
#test_model = multilayer_perceptron(w=curWeights, b=curBiases)
test_model = convnet(w=curWeights, b=curBiases)
with test_model.g.as_default():
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
test_images, test_labels = cifar10.inputs(eval_data='test')
# Build a Graph that computes the logits predictions from the
# inference model.
logits = test_model.predict(images)
logit_test = test_model.predict(test_images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
top_k_op = tf.nn.in_top_k(logit_test, test_labels, 1)
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph.
#sess = tf.Session(config=tf.ConfigProto(
# log_device_placement=FLAGS.log_device_placement))
with tf.Session(config=tf.ConfigProto(
log_device_placement=False)) as sess:
sess.run(init)
tf.train.start_queue_runners(sess=sess)
step = 0
stopcond = True
while step < max_steps and stopcond:
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
if step % 100 == 0:
num_iter = int(math.ceil(num_examples / batch_size))
true_count = 0 # Counts the number of correct predictions.
total_sample_count = num_iter * batch_size
stepp = 0
while stepp < num_iter:
predictions = sess.run([top_k_op])
true_count += np.sum(predictions)
stepp += 1
# Compute precision @ 1.
precision = true_count / total_sample_count
print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))
if precision > 1 - thresh:
stopcond = False
test_model.params = sess.run(test_model.weightslist), sess.run(test_model.biaseslist)
self.AllBeads[bead]=test_model.params
finalerror = 1 - precision
print ("Final bead error: ",str(finalerror))
step += 1
return finalerror
示例9:
models.append(test_model)
with test_model.g.as_default():
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
test_images, test_labels = cifar10.inputs(eval_data='test')
# Build a Graph that computes the logits predictions from the
# inference model.
logits = test_model.predict(images)
logit_test = test_model.predict(test_images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
# Build a Graph that trains the model with one batch of examples and
# updates the model parameters.
train_op = cifar10.train(loss, global_step)
top_k_op = tf.nn.in_top_k(logit_test, test_labels, 1)
# Build an initialization operation to run below.
init = tf.initialize_all_variables()
# Start running operations on the Graph.
#sess = tf.Session(config=tf.ConfigProto(
# log_device_placement=FLAGS.log_device_placement))
示例10: train
def train():
ps_hosts = FLAGS.ps_hosts.split(',')
worker_hosts = FLAGS.worker_hosts.split(',')
print ('PS hosts are: %s' % ps_hosts)
print ('Worker hosts are: %s' % worker_hosts)
server = tf.train.Server(
{'ps': ps_hosts, 'worker': worker_hosts},
job_name = FLAGS.job_name,
task_index=FLAGS.task_id)
if FLAGS.job_name == 'ps':
# `ps` jobs wait for incoming connections from the workers.
server.join()
is_chief = (FLAGS.task_id == 0)
if is_chief:
if tf.gfile.Exists(FLAGS.train_dir):
tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
"""Train CIFAR-10 for a number of steps."""
cluster = tf.train.ClusterSpec({'ps': ps_hosts, 'worker': worker_hosts})
device_setter = tf.train.replica_device_setter(cluster=cluster)
with tf.device(device_setter):
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
num_batches_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size
decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
global_step,
decay_steps,
LEARNING_RATE_DECAY_FACTOR,
staircase=True)
tf.scalar_summary('learning_rate', lr)
opt = tf.train.GradientDescentOptimizer(lr)
# Track the moving averages of all trainable variables.
exp_moving_averager = tf.train.ExponentialMovingAverage(
MOVING_AVERAGE_DECAY, global_step)
variables_to_average = (
tf.trainable_variables() + tf.moving_average_variables())
opt = tf.train.SyncReplicasOptimizer(
opt,
replicas_to_aggregate=len(worker_hosts),
replica_id=FLAGS.task_id,
total_num_replicas=len(worker_hosts),
variable_averages=exp_moving_averager,
variables_to_average=variables_to_average)
# Compute gradients with respect to the loss.
grads = opt.compute_gradients(loss)
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
tf.histogram_summary(var.op.name + '/gradients', grad)
apply_gradients_op = opt.apply_gradients(grads, global_step=global_step)
with tf.control_dependencies([apply_gradients_op]):
train_op = tf.identity(loss, name='train_op')
chief_queue_runners = [opt.get_chief_queue_runner()]
init_tokens_op = opt.get_init_tokens_op()
saver = tf.train.Saver()
# We run the summaries in the same thread as the training operations by
# passing in None for summary_op to avoid a summary_thread being started.
# Running summaries and training operations in parallel could run out of
# GPU memory.
sv = tf.train.Supervisor(is_chief=is_chief,
logdir=FLAGS.train_dir,
init_op=tf.initialize_all_variables(),
summary_op=tf.merge_all_summaries(),
global_step=global_step,
saver=saver,
save_model_secs=60)
tf.logging.info('%s Supervisor' % datetime.now())
sess_config = tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement)
#.........这里部分代码省略.........
示例11: int
key, value = reader.read(filename_queue)
batch_size = 128
min_fraction_of_examples_in_queue = 0.4
num_examples_per_epoch = 50000
min_queue_examples = int(num_examples_per_epoch *
min_fraction_of_examples_in_queue)
images_batch, label_batch =\
tf.train.shuffle_batch_join([read_example(value) for _ in range(9)],
batch_size=batch_size,
capacity=min_queue_examples + 3*batch_size,
min_after_dequeue=min_queue_examples)
logits = cifar10.inference(images_batch)
loss = cifar10.loss(logits, label_batch)
global_step = tf.Variable(0, trainable=False)
train_op = cifar10.train(loss, global_step)
saver = tf.train.Saver(tf.all_variables())
summary_op = tf.merge_all_summaries()
init = tf.initialize_all_variables()
sess = tf.Session()
summary_writer = tf.train.SummaryWriter('./train',
graph_def=sess.graph_def)
示例12: train
def train():
ps_hosts = FLAGS.ps_hosts.split(',')
worker_hosts = FLAGS.worker_hosts.split(',')
print ('PS hosts are: %s' % ps_hosts)
print ('Worker hosts are: %s' % worker_hosts)
server = tf.train.Server(
{'ps': ps_hosts, 'worker': worker_hosts},
job_name = FLAGS.job_name,
task_index=FLAGS.task_id)
if FLAGS.job_name == 'ps':
server.join()
is_chief = (FLAGS.task_id == 0)
if is_chief:
if tf.gfile.Exists(FLAGS.train_dir):
tf.gfile.DeleteRecursively(FLAGS.train_dir)
tf.gfile.MakeDirs(FLAGS.train_dir)
device_setter = tf.train.replica_device_setter(ps_tasks=1)
with tf.device('/job:worker/task:%d' % FLAGS.task_id):
with tf.device(device_setter):
global_step = tf.Variable(0, trainable=False)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
# Build a Graph that computes the logits predictions from the
# inference model.
logits = cifar10.inference(images)
# Calculate loss.
loss = cifar10.loss(logits, labels)
train_op = cifar10.train(loss, global_step)
saver = tf.train.Saver()
# We run the summaries in the same thread as the training operations by
# passing in None for summary_op to avoid a summary_thread being started.
# Running summaries and training operations in parallel could run out of
# GPU memory.
sv = tf.train.Supervisor(is_chief=is_chief,
logdir=FLAGS.train_dir,
init_op=tf.initialize_all_variables(),
summary_op=tf.merge_all_summaries(),
global_step=global_step,
saver=saver,
save_model_secs=60)
tf.logging.info('%s Supervisor' % datetime.now())
sess_config = tf.ConfigProto(allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement)
print ("Before session init")
# Get a session.
sess = sv.prepare_or_wait_for_session(server.target, config=sess_config)
print ("Session init done")
# Start the queue runners.
queue_runners = tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS)
sv.start_queue_runners(sess, queue_runners)
print ('Started %d queues for processing input data.' % len(queue_runners))
"""Train CIFAR-10 for a number of steps."""
for step in xrange(FLAGS.max_steps):
start_time = time.time()
_, loss_value, gs = sess.run([train_op, loss, global_step])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size
examples_per_sec = num_examples_per_step / duration
sec_per_batch = float(duration)
format_str = ('%s: step %d (global_step %d), loss = %.2f (%.1f examples/sec; %.3f sec/batch)')
print (format_str % (datetime.now(), step, gs, loss_value, examples_per_sec, sec_per_batch))
if is_chief:
saver.save(sess, os.path.join(FLAGS.train_dir, 'model.ckpt'), global_step=global_step)