当前位置: 首页>>代码示例>>Python>>正文


Python tensorflow.init方法代码示例

本文整理汇总了Python中horovod.tensorflow.init方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.init方法的具体用法?Python tensorflow.init怎么用?Python tensorflow.init使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在horovod.tensorflow的用法示例。


在下文中一共展示了tensorflow.init方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: evaluate

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def evaluate(self, dataset, epoch):
        """ evaluate the model """
        loss_metric = tf.keras.metrics.Mean(name="AverageLoss")
        loss, metrics = None, None
        evaluate_step = self.evaluate_step
        if self.hparams.enable_tf_function:
            logging.info("please be patient, enable tf.function, it takes time ...")
            evaluate_step = tf.function(evaluate_step, input_signature=self.sample_signature)
        self.model.reset_metrics()  # init metric.result() with 0
        for batch, samples in enumerate(dataset):
            samples = self.model.prepare_samples(samples)
            loss, metrics = evaluate_step(samples)
            if batch % self.hparams.log_interval == 0:
                logging.info(self.metric_checker(loss, metrics, -2))
            total_loss = sum(list(loss.values())) if isinstance(loss, dict) else loss
            loss_metric.update_state(total_loss)
        logging.info(self.metric_checker(loss_metric.result(), metrics, evaluate_epoch=epoch))
        self.model.reset_metrics()
        return loss_metric.result(), metrics 
开发者ID:athena-team,项目名称:athena,代码行数:21,代码来源:solver.py

示例2: test_horovod_allreduce_type_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def test_horovod_allreduce_type_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different type."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            # Same rank, different dimension
            dims = [17] * 3
            tensor = tf.ones(dims,
                             dtype=tf.int32 if rank % 2 == 0 else tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allreduce(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:20,代码来源:test_tensorflow.py

示例3: test_horovod_allreduce_cpu_gpu_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def test_horovod_allreduce_cpu_gpu_error(self):
        """Test that the allreduce raises an error if different ranks try to
        perform reduction on CPU and GPU."""
        # Only do this test if there are GPUs available.
        if not tf.test.is_gpu_available(cuda_only=True):
            return

        hvd.init()
        local_rank = hvd.local_rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        device = "/gpu:%d" % local_rank if local_rank % 2 == 0 else "/cpu:0"
        with self.test_session(config=self.config) as session:
            with tf.device(device):
                # Same rank, different dimension
                dims = [17] * 3
                tensor = tf.ones(dims, dtype=tf.int32)
                with self.assertRaises(tf.errors.FailedPreconditionError):
                    session.run(hvd.allreduce(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:25,代码来源:test_tensorflow.py

示例4: test_horovod_allgather_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def test_horovod_allgather_error(self):
        """Test that the allgather returns an error if any dimension besides
        the first is different among the tensors being gathered."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_tensorflow.py

示例5: test_horovod_allgather_type_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def test_horovod_allgather_type_error(self):
        """Test that the allgather returns an error if the types being gathered
        differ among the processes"""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_tensorflow.py

示例6: test_horovod_broadcast_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def test_horovod_broadcast_error(self):
        """Test that the broadcast returns an error if any dimension besides
        the first is different among the tensors being broadcasted."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.broadcast(tensor, 0)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_tensorflow.py

示例7: test_horovod_broadcast_type_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def test_horovod_broadcast_type_error(self):
        """Test that the broadcast returns an error if the types being broadcasted
        differ among the processes"""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.broadcast(tensor, 0)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_tensorflow.py

示例8: setup

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def setup():
    if not horovod_installed:
        return False

    global horovod_initialized
    if horovod_initialized:
        return hvd

    hvd.init()
    horovod_initialized = True

    horovod_num_worker = hvd.size()
    horovod_rank = hvd.rank()
    # verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()
    # make sure MPI is not re-initialized.
    import mpi4py.rc
    mpi4py.rc.initialize = False
    # import mpi4py
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    # check size and rank are synchronized
    assert horovod_num_worker == comm.Get_size()
    assert horovod_rank == comm.Get_rank()
    return hvd 
开发者ID:blue-oil,项目名称:blueoil,代码行数:27,代码来源:horovod.py

示例9: __init__

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def __init__(self, average=True):
        """
        Args:
            average (bool): whether to average or sum the gradients across processes.
        """
        import byteps.tensorflow as bps
        self.hvd = bps  # BytePS has the same interface as Horovod
        self.hvd.allreduce = bps.push_pull  # https://github.com/bytedance/byteps/issues/8
        assert os.environ.get("DMLC_ROLE", None) == "worker"
        assert "DMLC_WORKER_ID" in os.environ and "DMLC_NUM_WORKER" in os.environ
        bps.init()
        self.is_chief = bps.rank() == 0

        self._local_rank = bps.local_rank()
        self._rank = bps.rank()
        self._average = average

        self._compression = None
        self._has_compression = False
        logger.info("[BytePSTrainer] local rank={}".format(self._local_rank))
        SingleCostTrainer.__init__(self) 
开发者ID:junsukchoe,项目名称:ADL,代码行数:23,代码来源:trainers.py

示例10: __init__

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def __init__(self, optimizer: TFOptimizer, comm=None):
        super().__init__(optimizer.executor, optimizer.loss)

        try:
            import horovod.tensorflow as hvd
        except ImportError:
            raise ImportError('Cannot import Horovod')
            
        hvd.init()
        self.op = hvd.DistributedOptimizer(optimizer.op)


        if comm is None:
            comm = CommunicationNetwork()
        self.communication = comm
        self.original_optimizer = optimizer 
开发者ID:deep500,项目名称:deep500,代码行数:18,代码来源:tf_distributed_optimizer.py

示例11: setup_horovod

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def setup_horovod():
    import horovod.tensorflow as hvd

    # Initialize Horovod
    hvd.init()
    # Verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()

    from mpi4py import MPI

    assert hvd.size() == MPI.COMM_WORLD.Get_size()

    is_root = hvd.rank() == 0

    def mpi_average(local_list):
        # _local_list_orig = local_list
        local_list = list(map(float, local_list))
        # print('RANK {} AVERAGING {} -> {}'.format(hvd.rank(), _local_list_orig, local_list))
        sums = MPI.COMM_WORLD.gather(sum(local_list), root=0)
        counts = MPI.COMM_WORLD.gather(len(local_list), root=0)
        sum_counts = sum(counts) if is_root else None
        avg = (sum(sums) / sum_counts) if is_root else None
        return avg, sum_counts

    return hvd, MPI, is_root, mpi_average 
开发者ID:aravindsrinivas,项目名称:flowpp,代码行数:27,代码来源:flow_training.py

示例12: evaluate

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def evaluate(self, dataset, epoch):
        """ evaluate the model """
        loss_metric = tf.keras.metrics.Mean(name="AverageLoss")
        loss, metrics = None, None
        evaluate_step = self.evaluate_step
        if self.hparams.enable_tf_function:
            logging.info("please be patient, enable tf.function, it takes time ...")
            evaluate_step = tf.function(evaluate_step, input_signature=self.sample_signature)
        self.model.reset_metrics()  # init metric.result() with 0
        for batch, samples in enumerate(dataset):
            samples = self.model.prepare_samples(samples)
            loss, metrics = evaluate_step(samples)
            if batch % self.hparams.log_interval == 0:
                logging.info(self.metric_checker(loss, metrics, -2))
            loss_metric.update_state(loss)
        logging.info(self.metric_checker(loss_metric.result(), metrics, evaluate_epoch=epoch))
        self.model.reset_metrics()
        return loss_metric.result() 
开发者ID:didi,项目名称:athena,代码行数:20,代码来源:solver.py

示例13: __init__

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def __init__(self, average=True, compression=None):
        """
        Args:
            average (bool): whether to average or sum the gradients across processes.
            compression: `hvd.Compression.fp16` or `hvd.Compression.none`
        """
        if 'pyarrow' in sys.modules:
            logger.warn("Horovod and pyarrow may conflict due to pyarrow bugs.")
        # lazy import
        import horovod.tensorflow as hvd
        import horovod
        hvd_version = tuple(map(int, horovod.__version__.split('.')[:3]))
        self.hvd = hvd

        hvd.init()
        self.is_chief = hvd.rank() == 0
        self._local_rank = hvd.local_rank()
        self._rank = hvd.rank()
        self._average = average
        self._compression = compression
        self._has_compression = hvd_version >= (0, 15, 0)
        logger.info("[HorovodTrainer] local rank={}".format(self._local_rank))
        super(HorovodTrainer, self).__init__()

        self.BROADCAST_EVERY_EPOCH = True 
开发者ID:tensorpack,项目名称:tensorpack,代码行数:27,代码来源:trainers.py

示例14: _worker_fn

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def _worker_fn(client, task, net_if):
    event.broadcast(client, f"{task}/addr", net_if[1])

    worker_info = event.wait(client, f"chief:0/{net_if[1]}").split(',')
    driver_socket = event.wait(client, "chief:0/sock_addr").split(':')

    os.environ['HOROVOD_GLOO_RENDEZVOUS_ADDR'] = driver_socket[0]
    os.environ['HOROVOD_GLOO_RENDEZVOUS_PORT'] = driver_socket[1]
    os.environ['HOROVOD_CONTROLLER'] = 'gloo'
    os.environ['HOROVOD_CPU_OPERATIONS'] = 'gloo'
    os.environ['HOROVOD_GLOO_IFACE'] = net_if[0]
    os.environ['HOROVOD_RANK'] = worker_info[0]
    os.environ['HOROVOD_SIZE'] = worker_info[1]
    os.environ['HOROVOD_LOCAL_RANK'] = worker_info[2]
    os.environ['HOROVOD_LOCAL_SIZE'] = worker_info[3]
    os.environ['HOROVOD_CROSS_RANK'] = worker_info[4]
    os.environ['HOROVOD_CROSS_SIZE'] = worker_info[5]

    hvd.init()

    experiment = _task_commons._get_experiment(client)

    if task != 'chief:0':
        # Overwrite config to do nothing but training to improve training speed
        experiment.estimator._model_dir = "."
        new_config = experiment.estimator.config.replace(
            save_summary_steps=None,
            save_checkpoints_steps=None,
            save_checkpoints_secs=None,
            log_step_count_steps=None
        )
        experiment.estimator._config = new_config

    logger.info("start training..")

    experiment.estimator.train(
        experiment.train_spec.input_fn,
        hooks=experiment.train_spec.hooks,
        max_steps=experiment.train_spec.max_steps) 
开发者ID:criteo,项目名称:tf-yarn,代码行数:41,代码来源:gloo_allred_task.py

示例15: _driver_fn

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import init [as 别名]
def _driver_fn(client, net_if):
    cluster_tasks = _task_commons._get_cluster_tasks(client)
    # Worker discovery
    worker_list = [f"{net_if[1]}:{N_PROCESS_PER_WORKER}"]
    n_workers = 1
    for cluster_task in cluster_tasks:
        if 'worker' in cluster_task:
            worker_addr = event.wait(client, f"{cluster_task}/addr")
            logger.info(f"{cluster_task}: {worker_addr}")
            worker_list.append(f"{worker_addr}:{N_PROCESS_PER_WORKER}")
            n_workers += 1

    # Worker task allocation to workers
    hosts = gloo_run.parse_hosts(','.join(worker_list))
    host_alloc_plan = gloo_run.get_host_assignments(hosts, n_workers)
    for host in host_alloc_plan:
        host_info = f"""\
            {host.rank},{host.size},{host.local_rank},\
            {host.local_size},{host.cross_rank},{host.cross_size}\
            """
        event.broadcast(client, f"{cluster.get_task()}/{host.hostname}", host_info)

    global_rendezv = RendezvousServer(verbose=1)
    global_rendezv_port = global_rendezv.start_server()
    global_rendezv.httpd.init(host_alloc_plan)
    event.broadcast(client, f"{cluster.get_task()}/sock_addr", f"{net_if[1]}:{global_rendezv_port}")
    return global_rendezv.listen_thread 
开发者ID:criteo,项目名称:tf-yarn,代码行数:29,代码来源:gloo_allred_task.py


注:本文中的horovod.tensorflow.init方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。