当前位置: 首页>>代码示例>>Python>>正文


Python tensorflow.rank方法代码示例

本文整理汇总了Python中horovod.tensorflow.rank方法的典型用法代码示例。如果您正苦于以下问题:Python tensorflow.rank方法的具体用法?Python tensorflow.rank怎么用?Python tensorflow.rank使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在horovod.tensorflow的用法示例。


在下文中一共展示了tensorflow.rank方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: FastTrain

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def FastTrain():
    print("FastTrain", FLAGS.train_steps)
    estimator = FastTextEstimator(FLAGS.model_dir)
    print("TEST" + FLAGS.train_records)
    train_input = InputFn(tf.estimator.ModeKeys.TRAIN, FLAGS.train_records)
    print("STARTING TRAIN")
    hooks = None
    if FLAGS.horovod:
        hooks = [hvd.BroadcastGlobalVariablesHook(0)]
    estimator.train(input_fn=train_input, steps=FLAGS.train_steps, hooks=hooks)
    print("TRAIN COMPLETE")
    if not FLAGS.horovod or hvd.rank() == 0:
        print("EVALUATE")
        eval_input = InputFn(tf.estimator.ModeKeys.EVAL, FLAGS.eval_records)
        #eval_metrics = { "accuracy": tf.metrics.accuracy(labels, predictions) }
        result = estimator.evaluate(input_fn=eval_input, steps=FLAGS.eval_steps, hooks=None)
        print(result)
        print("DONE")
        if FLAGS.export_dir:
            print("EXPORTING")
            estimator.export_savedmodel(FLAGS.export_dir,
                                        inputs.ServingInputFn(FLAGS.use_ngrams)) 
开发者ID:apcode,项目名称:tensorflow_fasttext,代码行数:24,代码来源:classifier.py

示例2: print_act_stats

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def print_act_stats(x, _str=""):
    if not do_print_act_stats:
        return x
    if hvd.rank() != 0:
        return x
    if len(x.get_shape()) == 1:
        x_mean, x_var = tf.nn.moments(x, [0], keep_dims=True)
    if len(x.get_shape()) == 2:
        x_mean, x_var = tf.nn.moments(x, [0], keep_dims=True)
    if len(x.get_shape()) == 4:
        x_mean, x_var = tf.nn.moments(x, [0, 1, 2], keep_dims=True)
    stats = [tf.reduce_min(x_mean), tf.reduce_mean(x_mean), tf.reduce_max(x_mean),
             tf.reduce_min(tf.sqrt(x_var)), tf.reduce_mean(tf.sqrt(x_var)), tf.reduce_max(tf.sqrt(x_var))]
    return tf.Print(x, stats, "["+_str+"] "+x.name)

# Allreduce methods 
开发者ID:openai,项目名称:glow,代码行数:18,代码来源:tfops.py

示例3: train_input_fn

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def train_input_fn(input_file, _parse_fn, name_to_features,
		params, **kargs):
	if_shard = kargs.get("if_shard", "1")
	dataset = tf.data.TFRecordDataset(input_file, buffer_size=params.get("buffer_size", 100))
	print("==hvd size {}, rank {}==".format(hvd.size(), hvd.rank()))
	if if_shard == "1":
		dataset = dataset.shard(hvd.size(), hvd.rank())
	dataset = dataset.map(lambda x:_parse_fn(x, name_to_features))
	dataset = dataset.shuffle(
							buffer_size=params.get("buffer_size", 1024)+3*params.get("batch_size", 32),
							seed=np.random.randint(0,1e10,1)[0],
							reshuffle_each_iteration=True)
	dataset = dataset.batch(params.get("batch_size", 32))
	dataset = dataset.repeat(params.get("epoch", 100))
	iterator = dataset.make_one_shot_iterator()
	features = iterator.get_next()
	return features 
开发者ID:yyht,项目名称:BERT,代码行数:19,代码来源:hvd_distributed_tf_data_utils.py

示例4: train

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def train(self, dataset, total_batches=-1):
        """ Update the model in 1 epoch """
        train_step = self.train_step
        if self.hparams.enable_tf_function:
            logging.info("please be patient, enable tf.function, it takes time ...")
            train_step = tf.function(train_step, input_signature=self.sample_signature)
        for batch, samples in enumerate(dataset.take(total_batches)):
            # train 1 step
            samples = self.model.prepare_samples(samples)
            loss, metrics = train_step(samples)
            # Horovod: broadcast initial variable states from rank 0 to all other processes.
            # This is necessary to ensure consistent initialization of all workers when
            # training is started with random weights or restored from a checkpoint.
            #
            # Note: broadcast should be done after the first gradient step to ensure optimizer
            # initialization.
            if batch == 0:
                hvd.broadcast_variables(self.model.trainable_variables, root_rank=0)
                hvd.broadcast_variables(self.optimizer.variables(), root_rank=0)
            if batch % self.hparams.log_interval == 0 and hvd.rank() == 0:
                logging.info(self.metric_checker(loss, metrics))
                self.model.reset_metrics() 
开发者ID:athena-team,项目名称:athena,代码行数:24,代码来源:solver.py

示例5: evaluate

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def evaluate(self, dataset, epoch=0):
        """ evaluate the model """
        loss_metric = tf.keras.metrics.Mean(name="AverageLoss")
        loss, metrics = None, None
        evaluate_step = self.evaluate_step
        if self.hparams.enable_tf_function:
            logging.info("please be patient, enable tf.function, it takes time ...")
            evaluate_step = tf.function(evaluate_step, input_signature=self.sample_signature)
        self.model.reset_metrics()
        for batch, samples in enumerate(dataset):
            samples = self.model.prepare_samples(samples)
            loss, metrics = evaluate_step(samples)
            if batch % self.hparams.log_interval == 0 and hvd.rank() == 0:
                logging.info(self.metric_checker(loss, metrics, -2))
            loss_metric.update_state(loss)
        if hvd.rank() == 0:
            logging.info(self.metric_checker(loss_metric.result(), metrics, evaluate_epoch=epoch))
            self.model.reset_metrics()
        return loss_metric.result() 
开发者ID:athena-team,项目名称:athena,代码行数:21,代码来源:solver.py

示例6: test_horovod_allreduce_type_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def test_horovod_allreduce_type_error(self):
        """Test that the allreduce raises an error if different ranks try to
        send tensors of different type."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            # Same rank, different dimension
            dims = [17] * 3
            tensor = tf.ones(dims,
                             dtype=tf.int32 if rank % 2 == 0 else tf.float32)
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allreduce(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:20,代码来源:test_tensorflow.py

示例7: test_horovod_allreduce_cpu_gpu_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def test_horovod_allreduce_cpu_gpu_error(self):
        """Test that the allreduce raises an error if different ranks try to
        perform reduction on CPU and GPU."""
        # Only do this test if there are GPUs available.
        if not tf.test.is_gpu_available(cuda_only=True):
            return

        hvd.init()
        local_rank = hvd.local_rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        device = "/gpu:%d" % local_rank if local_rank % 2 == 0 else "/cpu:0"
        with self.test_session(config=self.config) as session:
            with tf.device(device):
                # Same rank, different dimension
                dims = [17] * 3
                tensor = tf.ones(dims, dtype=tf.int32)
                with self.assertRaises(tf.errors.FailedPreconditionError):
                    session.run(hvd.allreduce(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:25,代码来源:test_tensorflow.py

示例8: test_horovod_allgather_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def test_horovod_allgather_error(self):
        """Test that the allgather returns an error if any dimension besides
        the first is different among the tensors being gathered."""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            tensor_size[1] = 10 * (rank + 1)
            tensor = tf.ones(tensor_size, dtype=tf.float32) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_tensorflow.py

示例9: test_horovod_allgather_type_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def test_horovod_allgather_type_error(self):
        """Test that the allgather returns an error if the types being gathered
        differ among the processes"""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.allgather(tensor)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_tensorflow.py

示例10: test_horovod_broadcast_type_error

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def test_horovod_broadcast_type_error(self):
        """Test that the broadcast returns an error if the types being broadcasted
        differ among the processes"""
        hvd.init()
        rank = hvd.rank()
        size = hvd.size()

        # This test does not apply if there is only one worker.
        if size == 1:
            return

        with self.test_session(config=self.config) as session:
            tensor_size = [17] * 3
            dtype = tf.int32 if rank % 2 == 0 else tf.float32
            tensor = tf.ones(tensor_size, dtype=dtype) * rank
            with self.assertRaises(tf.errors.FailedPreconditionError):
                session.run(hvd.broadcast(tensor, 0)) 
开发者ID:mlperf,项目名称:training_results_v0.6,代码行数:19,代码来源:test_tensorflow.py

示例11: setup

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def setup():
    if not horovod_installed:
        return False

    global horovod_initialized
    if horovod_initialized:
        return hvd

    hvd.init()
    horovod_initialized = True

    horovod_num_worker = hvd.size()
    horovod_rank = hvd.rank()
    # verify that MPI multi-threading is supported.
    assert hvd.mpi_threads_supported()
    # make sure MPI is not re-initialized.
    import mpi4py.rc
    mpi4py.rc.initialize = False
    # import mpi4py
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    # check size and rank are synchronized
    assert horovod_num_worker == comm.Get_size()
    assert horovod_rank == comm.Get_rank()
    return hvd 
开发者ID:blue-oil,项目名称:blueoil,代码行数:27,代码来源:horovod.py

示例12: is_enabled

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def is_enabled():
    if os.getenv("USE_HOROVOD"):
        return True
    ppid = os.getppid()
    if ppid <= 1:
        return False

    parent_process_name = _get_pname(ppid)
    if parent_process_name.startswith("horovodrun") or parent_process_name.startswith("mpirun"):
        if horovod_installed:
            return True
        else:
            print("you're trying to run on horovod, but importing Horovod failed. exit.")
            sys.exit(1)
    else:
        return False


# return True if horovod is not enabled, or enabled and the process is rank 0. 
开发者ID:blue-oil,项目名称:blueoil,代码行数:21,代码来源:horovod.py

示例13: training_step

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def training_step(images, labels, first_batch):
  with tf.GradientTape() as tape:
    probs = mnist_model(images, training=True)
    loss_value = loss(labels, probs)

  # Horovod: add Horovod Distributed GradientTape.
  tape = hvd.DistributedGradientTape(tape)

  grads = tape.gradient(loss_value, mnist_model.trainable_variables)
  opt.apply_gradients(zip(grads, mnist_model.trainable_variables))

  # Horovod: broadcast initial variable states from rank 0 to all other processes.
  # This is necessary to ensure consistent initialization of all workers when
  # training is started with random weights or restored from a checkpoint.
  #
  # Note: broadcast should be done after the first gradient step to ensure optimizer
  # initialization.
  if first_batch:
    hvd.broadcast_variables(mnist_model.variables, root_rank=0)
    hvd.broadcast_variables(opt.variables(), root_rank=0)

  return loss_value


# Horovod: adjust number of steps based on number of GPUs. 
开发者ID:aws,项目名称:sagemaker-tensorflow-training-toolkit,代码行数:27,代码来源:horovod_mnist.py

示例14: _get_runconfig

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def _get_runconfig(is_distributed=DISTRIBUTED, save_checkpoints_steps=None):
    if is_distributed:
        # Horovod: pin GPU to be used to process local rank (one GPU per process)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = str(hvd.local_rank())

        return tf.estimator.RunConfig(
            save_checkpoints_steps=save_checkpoints_steps,
            save_checkpoints_secs=None,
            session_config=config,
            log_step_count_steps=100,
        )
    else:
        return tf.estimator.RunConfig(
            save_checkpoints_steps=save_checkpoints_steps,
            save_checkpoints_secs=None,
            log_step_count_steps=100,
        ) 
开发者ID:microsoft,项目名称:DistributedDeepLearning,代码行数:21,代码来源:train_model.py

示例15: _get_runconfig

# 需要导入模块: from horovod import tensorflow [as 别名]
# 或者: from horovod.tensorflow import rank [as 别名]
def _get_runconfig(is_distributed=defaults.DISTRIBUTED, save_checkpoints_steps=None):
    if is_distributed:
        # Horovod: pin GPU to be used to process local rank (one GPU per process)
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.visible_device_list = str(hvd.local_rank())

        return tf.estimator.RunConfig(
            save_checkpoints_steps=save_checkpoints_steps,
            save_checkpoints_secs=None,
            session_config=config,
            log_step_count_steps=100,
        )
    else:
        return tf.estimator.RunConfig(
            save_checkpoints_steps=save_checkpoints_steps,
            save_checkpoints_secs=None,
            log_step_count_steps=100,
        ) 
开发者ID:microsoft,项目名称:DistributedDeepLearning,代码行数:21,代码来源:resnet_main.py


注:本文中的horovod.tensorflow.rank方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。