当前位置: 首页>>代码示例>>Python>>正文


Python tensorflow.DistributedOptimizer方法代码示例

本文整理汇总了Python中horovod.tensorflow.DistributedOptimizer方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.DistributedOptimizer方法的具体用法?Python tensorflow.DistributedOptimizer怎么用?Python tensorflow.DistributedOptimizer使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在horovod.tensorflow的用法示例。


在下文中一共展示了tensorflow.DistributedOptimizer方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def __init__(self, local_optimizer=None, **kwargs):
        """
        Initializes a distributed horovod optimizer by wrapping a local optimizer.

        Args:
            local_optimizer (Optional[dict,LocalOptimizer]): The spec-dict for the wrapped LocalOptimizer object or
                a LocalOptimizer object itself.
        """
        super(HorovodOptimizer, self).__init__(**kwargs)

        # Create the horovod wrapper.
        wrapped_local_optimizer = Optimizer.from_spec(local_optimizer)
        self.local_optimizer = hvd.DistributedOptimizer(wrapped_local_optimizer)

        @rlgraph_api
        def step(self, variables, loss, time_percentage, *inputs):
            grads_and_vars = self._graph_fn_calculate_gradients(variables, loss, time_percentage, *inputs)
            return self._graph_fn_apply_gradients(grads_and_vars) 
开发者ID:rlgraph,项目名称:rlgraph,代码行数:20,代码来源:horovod_optimizer.py

示例2: get_train_op

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def get_train_op(self, loss, tvars, init_lr, 
							num_train_steps, **kargs):
		learning_rate = self.lr_decay_fn(init_lr, num_train_steps, **kargs)
		learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
		print("==optimizer hvd size=={}".format(hvd.size()))
		opt = self.optimizer_op(learning_rate*hvd.size(), **kargs)

		# add uber horvod distributed optimizer
		self.opt = hvd.DistributedOptimizer(opt)
		grads = self.grad_clip_fn(self.opt, loss, tvars, **kargs)

		# self.grad_summaries_merged = optimizer_utils.add_grad_summaries(
		# 						zip(grads, tvars))

		train_op = self.opt.apply_gradients(
					zip(grads, tvars), global_step=self.global_step)
		new_global_step = self.global_step + 1
		train_op = tf.group(train_op, [self.global_step.assign(new_global_step)])
		return train_op 
开发者ID:yyht,项目名称:BERT,代码行数:21,代码来源:hvd_distributed_optimizer.py

示例3: __init__

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def __init__(self, optimizer: TFOptimizer, comm=None):
        super().__init__(optimizer.executor, optimizer.loss)

        try:
            import horovod.tensorflow as hvd
        except ImportError:
            raise ImportError('Cannot import Horovod')
            
        hvd.init()
        self.op = hvd.DistributedOptimizer(optimizer.op)


        if comm is None:
            comm = CommunicationNetwork()
        self.communication = comm
        self.original_optimizer = optimizer 
开发者ID:deep500,项目名称:deep500,代码行数:18,代码来源:tf_distributed_optimizer.py

示例4: experiment_fn

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def experiment_fn() -> Experiment:
    # To mitigate issue https://github.com/tensorflow/tensorflow/issues/32159 for tf >= 1.15
    import tensorflow as tf

    def train_input_fn():
        dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="train")
        return dataset.shuffle(1000).batch(128).repeat()

    def eval_input_fn():
        dataset = winequality.get_dataset(WINE_EQUALITY_FILE, split="test")
        return dataset.shuffle(1000).batch(128)

    estimator = tf.compat.v1.estimator.LinearClassifier(
        feature_columns=winequality.get_feature_columns(),
        model_dir=f"{HDFS_DIR}",
        n_classes=winequality.get_n_classes(),
        optimizer=lambda: hvd.DistributedOptimizer(tf.compat.v1.train.AdamOptimizer()))

    return Experiment(
        estimator,
        tf.estimator.TrainSpec(
            train_input_fn,
            max_steps=10,
            hooks=[hvd.BroadcastGlobalVariablesHook(0)]
        ),
        tf.estimator.EvalSpec(
            eval_input_fn,
            steps=10,
            start_delay_secs=0,
            throttle_secs=30
        )
    ) 
开发者ID:criteo,项目名称:tf-yarn,代码行数:34,代码来源:collective_all_reduce_example.py

示例5: get_opt

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def get_opt(self, init_lr, 
				num_train_steps, **kargs):
		learning_rate = self.lr_decay_fn(init_lr, num_train_steps, **kargs)
		learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
		print("==optimizer hvd size=={}".format(hvd.size()))
		opt = self.optimizer_op(learning_rate*hvd.size(), **kargs)

		# add uber horvod distributed optimizer
		self.opt = hvd.DistributedOptimizer(opt) 
开发者ID:yyht,项目名称:BERT,代码行数:11,代码来源:hvd_distributed_optimizer.py

示例6: DistributedOptimizer

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def DistributedOptimizer(cls, *args):
    """Get a distributed optimizer from the base optimizer."""

    try:
      return mgw.DistributedOptimizer(*args)
    except NameError:
      raise NameError('module <mgw> not imported') 
开发者ID:yuanyuanli85,项目名称:tf-hrnet,代码行数:9,代码来源:multi_gpu_wrapper.py

示例7: _get_optimizer

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def _get_optimizer(params, is_distributed=DISTRIBUTED):
    if is_distributed:
        # Horovod: add Horovod Distributed Optimizer.
        return hvd.DistributedOptimizer(
            tf.train.MomentumOptimizer(
                learning_rate=params["learning_rate"] * hvd.size(),
                momentum=params["momentum"],
            )
        )
    else:
        return tf.train.MomentumOptimizer(
            learning_rate=params["learning_rate"], momentum=params["momentum"]
        ) 
开发者ID:microsoft,项目名称:DistributedDeepLearning,代码行数:15,代码来源:train_model.py

示例8: _get_optimizer

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def _get_optimizer(params, is_distributed=defaults.DISTRIBUTED):
    if is_distributed:
        # Horovod: add Horovod Distributed Optimizer.
        return hvd.DistributedOptimizer(
            tf.train.MomentumOptimizer(
                learning_rate=params["learning_rate"] * hvd.size(),
                momentum=params["momentum"],
            )
        )
    else:
        return tf.train.MomentumOptimizer(
            learning_rate=params["learning_rate"], momentum=params["momentum"]
        ) 
开发者ID:microsoft,项目名称:DistributedDeepLearning,代码行数:15,代码来源:resnet_main.py

示例9: get_opt

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def get_opt(self, init_lr, 
				num_train_steps, **kargs):

		learning_rate = init_lr
		if self.config.get("decay", "no") == "decay":
			print("==apply lr decay==")
			learning_rate = self.lr_decay_fn(learning_rate, num_train_steps, **kargs)
		if self.config.get("warmup", "no") == "warmup":
			print("==apply warmup==")
			learning_rate = self.warm_up(learning_rate, init_lr, **kargs)
		else:
			learning_rate = tf.cast(tf.constant(learning_rate), tf.float32)
		self.learning_rate = learning_rate #* (self.config.get('gpu_count', 1) / 2)
		# self.learning_rate = learning_rate / np.sqrt(self.config.get('gpu_count', 1) / 2)
		# self.learning_rate = learning_rate * np.sqrt(self.config.get('gpu_count', 1)) * 2
		self.single_node_learning = learning_rate
		
		# add uber horvod distributed optimizer
		if hvd and self.config["opt_type"] == "hvd":
			print("==optimizer hvd size=={}".format(self.config.get("worker_count", hvd.size())))
			opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", hvd.size()), **kargs)
			self.opt = hvd.DistributedOptimizer(opt)
			self.distributed_hooks = [hvd.BroadcastGlobalVariablesHook(0)]
		# add pai soar distributed optimizer
		elif pai and self.config["opt_type"] == "pai_soar":
			print("==optimizer pai_soar size=={}".format(self.config.get("worker_count", 4)))
			opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", 4), **kargs)
			self.opt = pai.ReplicatedVarsOptimizer(opt, clip_norm=self.config.get("clip_norm", 1.0))
			self.distributed_hooks = []
		# add tensorflow ps sync distributed optimizer
		elif self.config["opt_type"] == "ps_sync":
			print("==optimizer ps_sync size=={}".format(self.config.get("worker_count", 4)))
			opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", 4), **kargs)
			self.opt = tf.train.SyncReplicasOptimizer(opt, 
											replicas_to_aggregate=self.config.get("worker_count", 4), 
											total_num_replicas=self.config.get("worker_count", 4))
			self.distributed_hooks = [self.opt.make_session_run_hook(self.config["is_chief"], num_tokens=0)]
		elif self.config["opt_type"] == "ps":
			print("==optimizer ps_async size=={}".format(self.config.get("worker_count", 4)))
			self.opt = self.optimizer_op(self.learning_rate*self.config.get("worker_count", 4), **kargs)
		else:
			print("==initialization of single node optimizer==")
			self.opt = self.optimizer_op(self.learning_rate, **kargs)
			self.distributed_hooks = [] 
开发者ID:yyht,项目名称:BERT,代码行数:46,代码来源:distributed_optimizer.py

示例10: main

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import DistributedOptimizer [as 别名]
def main(_):
    # Horovod: initialize Horovod.
    hvd.init()

    # Download and load MNIST dataset.
    mnist = learn.datasets.mnist.read_data_sets('MNIST-data-%d' % hvd.rank())

    # Build model...
    with tf.name_scope('input'):
        image = tf.placeholder(tf.float32, [None, 784], name='image')
        label = tf.placeholder(tf.float32, [None], name='label')
    predict, loss = conv_model(image, label, tf.contrib.learn.ModeKeys.TRAIN)

    # Horovod: adjust learning rate based on number of GPUs.
    opt = tf.train.RMSPropOptimizer(0.001 * hvd.size())

    # Horovod: add Horovod Distributed Optimizer.
    opt = hvd.DistributedOptimizer(opt)

    global_step = tf.contrib.framework.get_or_create_global_step()
    train_op = opt.minimize(loss, global_step=global_step)

    hooks = [
        # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states
        # from rank 0 to all other processes. This is necessary to ensure consistent
        # initialization of all workers when training is started with random weights
        # or restored from a checkpoint.
        hvd.BroadcastGlobalVariablesHook(0),

        # Horovod: adjust number of steps based on number of GPUs.
        tf.train.StopAtStepHook(last_step=20000 // hvd.size()),

        tf.train.LoggingTensorHook(tensors={'step': global_step, 'loss': loss},
                                   every_n_iter=10),
    ]

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())

    # Horovod: save checkpoints only on worker 0 to prevent other workers from
    # corrupting them.
    checkpoint_dir = './checkpoints' if hvd.rank() == 0 else None

    # The MonitoredTrainingSession takes care of session initialization,
    # restoring from a checkpoint, saving to a checkpoint, and closing when done
    # or an error occurs.
    with tf.train.MonitoredTrainingSession(checkpoint_dir=checkpoint_dir,
                                           hooks=hooks,
                                           config=config) as mon_sess:
        while not mon_sess.should_stop():
            # Run a training step synchronously.
            image_, label_ = mnist.train.next_batch(100)
            mon_sess.run(train_op, feed_dict={image: image_, label: label_}) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:57,代码来源:tensorflow_mnist.py


注:本文中的horovod.tensorflow.DistributedOptimizer方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。