本文整理汇总了Python中horovod.tensorflow.DistributedOptimizer方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.DistributedOptimizer方法的具体用法?Python tensorflow.DistributedOptimizer怎么用?Python tensorflow.DistributedOptimizer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类horovod.tensorflow
的用法示例。
在下文中一共展示了tensorflow.DistributedOptimizer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def __init__(self, local_optimizer=None, **kwargs):
"""
Initializes a distributed horovod optimizer by wrapping a local optimizer.
Args:
local_optimizer (Optional[dict,LocalOptimizer]): The spec-dict for the wrapped LocalOptimizer object or
a LocalOptimizer object itself.
"""
super(HorovodOptimizer, self).__init__(**kwargs)
# Create the horovod wrapper.
wrapped_local_optimizer = Optimizer.from_spec(local_optimizer)
self.local_optimizer = hvd.DistributedOptimizer(wrapped_local_optimizer)
@rlgraph_api
def step(self, variables, loss, time_percentage, *inputs):
grads_and_vars = self._graph_fn_calculate_gradients(variables, loss, time_percentage, *inputs)
return self._graph_fn_apply_gradients(grads_and_vars)
示例2: get_train_op
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def get_train_op(self, loss, tvars, init_lr,
num_train_steps, **kargs):
learning_rate = self.lr_decay_fn(init_lr, num_train_steps, **kargs)
learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
print("==optimizer hvd size=={}".format(hvd.size()))
opt = self.optimizer_op(learning_rate*hvd.size(), **kargs)
# add uber horvod distributed optimizer
self.opt = hvd.DistributedOptimizer(opt)
grads = self.grad_clip_fn(self.opt, loss, tvars, **kargs)
# self.grad_summaries_merged = optimizer_utils.add_grad_summaries(
# zip(grads, tvars))
train_op = self.opt.apply_gradients(
zip(grads, tvars), global_step=self.global_step)
new_global_step = self.global_step + 1
train_op = tf.group(train_op, [self.global_step.assign(new_global_step)])
return train_op
示例3: __init__
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def __init__(self, optimizer: TFOptimizer, comm=None):
super().__init__(optimizer.executor, optimizer.loss)
try:
import horovod.tensorflow as hvd
except ImportError:
raise ImportError('Cannot import Horovod')
hvd.init()
self.op = hvd.DistributedOptimizer(optimizer.op)
if comm is None:
comm = CommunicationNetwork()
self.communication = comm
self.original_optimizer = optimizer
示例4: experiment_fn
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def experiment_fn() -> Experiment:
# To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15
import tensorflow as tf
def train_input_fn():
dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="train")
return dataset.shuffle(1000).batch(128).repeat()
def eval_input_fn():
dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test")
return dataset.shuffle(1000).batch(128)
estimator = tf.compat.v1.estimator.LinearClassifier(
feature_columns=winequality.get_feature_columns(),
model_dir=f"{HDFS_DIR}",
n_classes=winequality.get_n_classes(),
optimizer=lambda: hvd.DistributedOptimizer(tf.compat.v1.train.AdamOptimizer()))
return Experiment(
estimator,
tf.estimator.TrainSpec(
train_input_fn,
max_steps=10,
hooks=[hvd.BroadcastGlobalVariablesHook(0)]
),
tf.estimator.EvalSpec(
eval_input_fn,
steps=10,
start_delay_secs=0,
throttle_secs=30
)
)
示例5: get_opt
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def get_opt(self, init_lr,
num_train_steps, **kargs):
learning_rate = self.lr_decay_fn(init_lr, num_train_steps, **kargs)
learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
print("==optimizer hvd size=={}".format(hvd.size()))
opt = self.optimizer_op(learning_rate*hvd.size(), **kargs)
# add uber horvod distributed optimizer
self.opt = hvd.DistributedOptimizer(opt)
示例6: DistributedOptimizer
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def DistributedOptimizer(cls, *args):
"""Get a distributed optimizer from the base optimizer."""
try:
return mgw.DistributedOptimizer(*args)
except NameError:
raise NameError('module <mgw> not imported')
示例7: _get_optimizer
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def _get_optimizer(params, is_distributed=DISTRIBUTED):
if is_distributed:
# Horovod: add Horovod Distributed Optimizer.
return hvd.DistributedOptimizer(
tf.train.MomentumOptimizer(
learning_rate=params["learning_rate"] * hvd.size(),
momentum=params["momentum"],
)
)
else:
return tf.train.MomentumOptimizer(
learning_rate=params["learning_rate"], momentum=params["momentum"]
)
示例8: _get_optimizer
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def _get_optimizer(params, is_distributed=defaults.DISTRIBUTED):
if is_distributed:
# Horovod: add Horovod Distributed Optimizer.
return hvd.DistributedOptimizer(
tf.train.MomentumOptimizer(
learning_rate=params["learning_rate"] * hvd.size(),
momentum=params["momentum"],
)
)
else:
return tf.train.MomentumOptimizer(
learning_rate=params["learning_rate"], momentum=params["momentum"]
)
示例9: get_opt
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def get_opt(self, init_lr,
num_train_steps, **kargs):
learning_rate = init_lr
if self.config.get("decay", "no") == "decay":
print("==apply lr decay==")
learning_rate = self.lr_decay_fn(learning_rate, num_train_steps, **kargs)
if self.config.get("warmup", "no") == "warmup":
print("==apply warmup==")
learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
else:
learning_rate = tf.cast(tf.constant(learning_rate), tf.float32)
self.learning_rate = learning_rate #* (self.config.get('gpu_count', 1) / 2)
# self.learning_rate = learning_rate / np.sqrt(self.config.get('gpu_count', 1) / 2)
# self.learning_rate = learning_rate * np.sqrt(self.config.get('gpu_count', 1)) * 2
self.single_node_learning = learning_rate
# add uber horvod distributed optimizer
if hvd and self.config["opt_type"] == "hvd":
print("==optimizer hvd size=={}".format(self.config.get("worker_count", hvd.size())))
opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", hvd.size()), **kargs)
self.opt = hvd.DistributedOptimizer(opt)
self.distributed_hooks = [hvd.BroadcastGlobalVariablesHook(0)]
# add pai soar distributed optimizer
elif pai and self.config["opt_type"] == "pai_soar":
print("==optimizer pai_soar size=={}".format(self.config.get("worker_count", 4)))
opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", 4), **kargs)
self.opt = pai.ReplicatedVarsOptimizer(opt, clip_norm=self.config.get("clip_norm", 1.0))
self.distributed_hooks = []
# add tensorflow ps sync distributed optimizer
elif self.config["opt_type"] == "ps_sync":
print("==optimizer ps_sync size=={}".format(self.config.get("worker_count", 4)))
opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", 4), **kargs)
self.opt = tf.train.SyncReplicasOptimizer(opt,
replicas_to_aggregate=self.config.get("worker_count", 4),
total_num_replicas=self.config.get("worker_count", 4))
self.distributed_hooks = [self.opt.make_session_run_hook(self.config["is_chief"], num_tokens=0)]
elif self.config["opt_type"] == "ps":
print("==optimizer ps_async size=={}".format(self.config.get("worker_count", 4)))
self.opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", 4), **kargs)
else:
print("==initialization of single node optimizer==")
self.opt = self.optimizer_op(self.learning_rate, **kargs)
self.distributed_hooks = []
示例10: main
# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def main(_):
# Horovod: initialize Horovod.
hvd.init()
# Download and load MNIST dataset.
mnist = learn.datasets.mnist.read_data_sets('MNIST-data-%d' % hvd.rank())
# Build model...
with tf.name_scope('input'):
image = tf.placeholder(tf.float32, [None, 784], name='image')
label = tf.placeholder(tf.float32, [None], name='label')
predict, loss = conv_model(image, label, tf.contrib.learn.ModeKeys.TRAIN)
# Horovod: adjust learning rate based on number of GPUs.
opt = tf.train.RMSPropOptimizer(0.001 * hvd.size())
# Horovod: add Horovod Distributed Optimizer.
opt = hvd.DistributedOptimizer(opt)
global_step = tf.contrib.framework.get_or_create_global_step()
train_op = opt.minimize(loss, global_step=global_step)
hooks = [
# Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states
# from rank 0 to all other processes. This is necessary to ensure consistent
# initialization of all workers when training is started with random weights
# or restored from a checkpoint.
hvd.BroadcastGlobalVariablesHook(0),
# Horovod: adjust number of steps based on number of GPUs.
tf.train.StopAtStepHook(last_step=20000 // hvd.size()),
tf.train.LoggingTensorHook(tensors={'step': global_step, 'loss': loss},
every_n_iter=10),
]
# Horovod: pin GPU to be used to process local rank (one GPU per process)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
# Horovod: save checkpoints only on worker 0 to prevent other workers from
# corrupting them.
checkpoint_dir = './checkpoints' if hvd.rank() == 0 else None
# The MonitoredTrainingSession takes care of session initialization,
# restoring from a checkpoint, saving to a checkpoint, and closing when done
# or an error occurs.
with tf.train.MonitoredTrainingSession(checkpoint_dir=checkpoint_dir,
hooks=hooks,
config=config) as mon_sess:
while not mon_sess.should_stop():
# Run a training step synchronously.
image_, label_ = mnist.train.next_batch(100)
mon_sess.run(train_op, feed_dict={image: image_, label: label_})