当前位置: 首页>>代码示例>>Python>>正文


Python multi_worker_util.normalize_cluster_spec函数代码示例

本文整理汇总了Python中tensorflow.python.distribute.multi_worker_util.normalize_cluster_spec函数的典型用法代码示例。如果您正苦于以下问题:Python normalize_cluster_spec函数的具体用法?Python normalize_cluster_spec怎么用?Python normalize_cluster_spec使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了normalize_cluster_spec函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testUnexpectedInput

  def testUnexpectedInput(self):
    cluster_spec = ["127.0.0.1:8964", "127.0.0.1:2333"]

    with self.assertRaisesRegexp(
        ValueError,
        "`cluster_spec' should be dict or a `tf.train.ClusterSpec` or a "
        "`tf.train.ClusterDef` object"):
      multi_worker_util.normalize_cluster_spec(cluster_spec)
开发者ID:AnishShah,项目名称:tensorflow,代码行数:8,代码来源:multi_worker_util_test.py

示例2: _initialize_multi_worker

  def _initialize_multi_worker(self, num_gpus_per_worker, cluster_spec,
                               task_type, task_id):
    """Initializes the object for multi-worker training."""
    if task_type is None or task_id is None:
      raise ValueError("When `cluster_spec` is given, you must also specify "
                       "`task_type` and `task_id`")
    if task_type not in ("chief", "worker"):
      raise ValueError(
          "Unrecognized task_type: %r, valid task types are: \"chief\", "
          "\"worker\"." % task_type)
    cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
    self._num_workers = multi_worker_util.worker_count(cluster_spec, task_type)
    if not self._num_workers:
      raise ValueError("No `worker` or `chief` tasks can be found in "
                       "`cluster_spec`.")

    self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type,
                                                task_id)

    self._worker_device = "/job:%s/task:%d" % (task_type, task_id)
    self._host_input_device = numpy_dataset.SingleDevice(self._worker_device)
    if num_gpus_per_worker:
      local_devices = tuple(
          "%s/device:GPU:%d" % (self._worker_device, i)
          for i in range(num_gpus_per_worker)
      )
    else:
      local_devices = (self._worker_device,)

    self._collective_keys = cross_device_utils.CollectiveKeys()
    self._initialize_local(local_devices)
    self._input_workers = input_lib.InputWorkers(
        self._device_map, [(self._worker_device, self.worker_devices)])
    self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
        num_workers=self._num_workers,
        num_gpus_per_worker=num_gpus_per_worker,
        collective_keys=self._collective_keys)

    # Add a default device so that ops without specified devices will not end up
    # on other workers.
    self._default_device = "/job:%s/task:%d" % (task_type, task_id)

    self._cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
    self._task_type = task_type
    self._task_id = task_id

    logging.info(
        "Multi-worker CollectiveAllReduceStrategy with "
        "cluster_spec = %r, task_type = %r, task_id = %r, "
        "num_workers = %r, local_devices = %r", cluster_spec.as_dict(),
        task_type, task_id, self._num_workers, local_devices)
开发者ID:Wajih-O,项目名称:tensorflow,代码行数:51,代码来源:collective_all_reduce_strategy.py

示例3: _initialize_multi_worker

  def _initialize_multi_worker(self, num_gpus_per_worker, cluster_spec,
                               task_type, task_id):
    """Initializes the object for multi-worker training."""
    if task_type is None or task_id is None:
      raise ValueError("When `cluster_spec` is given, you must also specify "
                       "`task_type` and `task_id`")
    if task_type not in ["chief", "worker"]:
      raise ValueError(
          "Unrecognized task_type: %r, valid task types are: \"chief\", "
          "\"worker\"." % task_type)
    cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
    self._num_workers = len(cluster_spec.as_dict().get("worker", [])) + len(
        cluster_spec.as_dict().get("chief", []))
    if not self._num_workers:
      raise ValueError("No `worker` or `chief` tasks can be found in "
                       "`cluster_spec`.")

    self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type,
                                                task_id)

    worker_device = "/job:%s/task:%d" % (task_type, task_id)
    if num_gpus_per_worker:
      local_devices = [
          "%s/device:GPU:%d" % (worker_device, i)
          for i in range(num_gpus_per_worker)
      ]
    else:
      local_devices = [worker_device]

    self._collective_keys = cross_tower_utils.CollectiveKeys()
    super(CollectiveAllReduceStrategy, self).__init__(
        devices=local_devices,
        cross_tower_ops=cross_tower_ops_lib.CollectiveAllReduce(
            num_workers=self._num_workers,
            num_gpus_per_worker=num_gpus_per_worker,
            collective_keys=self._collective_keys))

    # Add a default device so that ops without specified devices will not end up
    # on other workers.
    self._default_device = "/job:%s/task:%d" % (task_type, task_id)

    self._cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
    self._task_type = task_type
    self._task_id = task_id

    logging.info(
        "Multi-worker CollectiveAllReduceStrategy with "
        "cluster_spec = %r, task_type = %r, task_id = %r, "
        "num_workers = %r, local_devices = %r", cluster_spec.as_dict(),
        task_type, task_id, self._num_workers, local_devices)
开发者ID:baojianzhou,项目名称:tensorflow,代码行数:50,代码来源:collective_all_reduce_strategy.py

示例4: configure

  def configure(self,
                session_config=None,
                cluster_spec=None,
                task_type=None,
                task_id=None):
    """Configures the strategy class.

    The strategy object will be re-initialized if `cluster_spec` is given but
    was not passed in the constructor.

    Args:
      session_config: not used currently.
      cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the
        cluster configurations.
      task_type: the current task type.
      task_id: the current task id.

    Raises:
      ValueError: if `cluster_spec` is given but `task_type` or `task_id` is
        not.
    """
    del session_config

    # Set the devices if cluster_spec is defined in TF_CONFIG but not passed in
    # the constructor.
    if not self._cluster_spec and cluster_spec:
      self._cluster_spec = multi_worker_util.normalize_cluster_spec(
          cluster_spec)
      if task_type is None or task_id is None:
        raise ValueError("When `cluster_spec` is given, must also specify "
                         "`task_type` and `task_id`.")
      self._initialize_devices(self._num_gpus_per_worker, self._cluster_spec,
                               task_type, task_id)
开发者ID:mrlittlepig,项目名称:tensorflow,代码行数:33,代码来源:parameter_server_strategy.py

示例5: _configure

  def _configure(self,
                 session_config=None,
                 cluster_spec=None,
                 task_type=None,
                 task_id=None):
    """Configures the object.

    Args:
      session_config: a `tf.compat.v1.ConfigProto`
      cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the
        cluster configurations.
      task_type: the current task type, such as "worker".
      task_id: the current task id.

    Raises:
      ValueError: if `task_type` is not in the `cluster_spec`.
    """
    if cluster_spec:
      # Use the num_gpus_per_worker recorded in constructor since _configure
      # doesn't take num_gpus.
      cluster_resolver = SimpleClusterResolver(
          cluster_spec=multi_worker_util.normalize_cluster_spec(cluster_spec),
          task_type=task_type,
          task_id=task_id,
          num_accelerators={"GPU": self._num_gpus_per_worker},
          rpc_layer=self._rpc_layer)
      self._initialize_multi_worker(cluster_resolver)
      assert isinstance(self._get_cross_device_ops(),
                        cross_device_ops_lib.CollectiveAllReduce)

    if session_config:
      session_config.CopyFrom(self._update_config_proto(session_config))
开发者ID:aritratony,项目名称:tensorflow,代码行数:32,代码来源:collective_all_reduce_strategy.py

示例6: __init__

  def __init__(self,
               num_gpus_per_worker=0,
               cluster_spec=None,
               task_type=None,
               task_id=None):
    """Initializes this strategy.

    Args:
      num_gpus_per_worker: number of local GPUs or GPUs per worker.
      cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the
        cluster configurations.
      task_type: the current task type.
      task_id: the current task id.

    Raises:
      ValueError: if `cluster_spec` is given but `task_type` or `task_id` is
        not.
    """
    super(ParameterServerStrategy, self).__init__()
    self._num_gpus_per_worker = num_gpus_per_worker
    if cluster_spec:
      cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
      if task_type is None or task_id is None:
        raise ValueError("When `cluster_spec` is given, must also specify "
                         "`task_type` and `task_id`.")
    self._cluster_spec = cluster_spec

    # We typically don't need to do all-reduce in this strategy.
    self._cross_tower_ops = (
        cross_tower_ops_lib.ReductionToOneDeviceCrossTowerOps(
            reduce_to_device=_LOCAL_CPU))

    self._initialize_devices(num_gpus_per_worker, cluster_spec, task_type,
                             task_id)
开发者ID:mrlittlepig,项目名称:tensorflow,代码行数:34,代码来源:parameter_server_strategy.py

示例7: _configure

  def _configure(self,
                 session_config=None,
                 cluster_spec=None,
                 task_type=None,
                 task_id=None):
    """Configures the strategy class.

    The strategy object will be re-initialized if `cluster_spec` is given but
    was not passed in the constructor.

    Args:
      session_config: not used currently.
      cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the
        cluster configurations.
      task_type: the current task type.
      task_id: the current task id.

    Raises:
      ValueError: if `cluster_spec` is given but `task_type` or `task_id` is
        not.
    """
    if cluster_spec:
      # Use the num_gpus_per_worker recorded in constructor since _configure
      # doesn't take num_gpus.
      cluster_resolver = SimpleClusterResolver(
          cluster_spec=multi_worker_util.normalize_cluster_spec(cluster_spec),
          task_type=task_type,
          task_id=task_id,
          num_accelerators={"GPU": self._num_gpus_per_worker})
      self._initialize_multi_worker(cluster_resolver)

    if session_config:
      session_config.CopyFrom(self._update_config_proto(session_config))
开发者ID:adit-chandra,项目名称:tensorflow,代码行数:33,代码来源:parameter_server_strategy.py

示例8: testClusterSpecAsInput

 def testClusterSpecAsInput(self):
   cluster_spec = server_lib.ClusterSpec({
       "chief": ["127.0.0.1:1234"],
       "worker": ["127.0.0.1:8964", "127.0.0.1:2333"],
       "ps": ["127.0.0.1:1926", "127.0.0.1:3141"]
   })
   self.assert_same_cluster(
       cluster_spec, multi_worker_util.normalize_cluster_spec(cluster_spec))
开发者ID:AnishShah,项目名称:tensorflow,代码行数:8,代码来源:multi_worker_util_test.py

示例9: _split_cluster_for_evaluator

def _split_cluster_for_evaluator(cluster_spec, task_type):
  """Split the cluster for evaluator since it needn't talk to other tasks."""
  # Splitting the cluster is important to prevent the evaluator from talking to
  # other tasks in the cluster. Since we allow evaluator not to use
  # distribution strategies and as a result ops in the evalauator task may have
  # unspecified devices. Those ops may end up on other tasks if we don't split
  # the cluster.
  new_cluster_spec = multi_worker_util.normalize_cluster_spec(
      cluster_spec).as_dict()
  if task_type == _TaskType.EVALUATOR:
    assert _TaskType.EVALUATOR in new_cluster_spec
    new_cluster_spec = {
        _TaskType.EVALUATOR: new_cluster_spec[_TaskType.EVALUATOR]
    }
  else:
    new_cluster_spec.pop(_TaskType.EVALUATOR, None)
  return multi_worker_util.normalize_cluster_spec(new_cluster_spec)
开发者ID:terrytangyuan,项目名称:tensorflow,代码行数:17,代码来源:distribute_coordinator.py

示例10: _initialize_multi_worker

  def _initialize_multi_worker(self, cluster_resolver):
    """Initializes the object for multi-worker training."""
    # TODO(yuefengz): The `num_gpus` is only for this particular task. It
    # assumes all workers have the same number of GPUs. We should remove this
    # assumption by querying all tasks for their numbers of GPUs.
    num_gpus = cluster_resolver.num_accelerators()
    cluster_spec = multi_worker_util.normalize_cluster_spec(
        cluster_resolver.cluster_spec())
    task_type = cluster_resolver.task_type
    task_id = cluster_resolver.task_id
    if task_type is None or task_id is None:
      raise ValueError("When `cluster_spec` is given, you must also specify "
                       "`task_type` and `task_id` in the `cluster_resolver`.")
    if task_type not in ("chief", "worker"):
      raise ValueError(
          "Unrecognized task_type: %r, valid task types are: \"chief\", "
          "\"worker\"." % task_type)

    self._num_workers = multi_worker_util.worker_count(cluster_spec, task_type)
    if not self._num_workers:
      raise ValueError("No `worker` or `chief` tasks can be found in "
                       "`cluster_spec`.")

    self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type,
                                                task_id)

    self._worker_device = "/job:%s/task:%d" % (task_type, task_id)
    self._host_input_device = numpy_dataset.SingleDevice(self._worker_device)
    if num_gpus:
      local_devices = tuple("%s/device:GPU:%d" % (self._worker_device, i)
                            for i in range(num_gpus))
    else:
      local_devices = (self._worker_device,)

    self._collective_keys = cross_device_utils.CollectiveKeys()
    super(CollectiveAllReduceExtended, self)._initialize_local(local_devices)
    self._input_workers = input_lib.InputWorkers(
        self._device_map, [(self._worker_device, self.worker_devices)])
    self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
        num_workers=self._num_workers,
        num_gpus_per_worker=num_gpus,
        collective_keys=self._collective_keys)

    # Add a default device so that ops without specified devices will not end up
    # on other workers.
    self._default_device = "/job:%s/task:%d" % (task_type, task_id)

    self._cluster_spec = cluster_spec
    self._task_type = task_type
    self._task_id = task_id

    logging.info(
        "Multi-worker CollectiveAllReduceStrategy with "
        "cluster_spec = %r, task_type = %r, task_id = %r, "
        "num_workers = %r, local_devices = %r", cluster_spec.as_dict(),
        task_type, task_id, self._num_workers, local_devices)
开发者ID:ziky90,项目名称:tensorflow,代码行数:56,代码来源:collective_all_reduce_strategy.py

示例11: _configure

  def _configure(self,
                 session_config=None,
                 cluster_spec=None,
                 task_type=None,
                 task_id=None):
    """Configures the strategy class.

    The strategy object will be re-initialized if `cluster_spec` is given but
    was not passed in the constructor.

    Args:
      session_config: not used currently.
      cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the
        cluster configurations.
      task_type: the current task type.
      task_id: the current task id.

    Raises:
      ValueError: if `cluster_spec` is given but `task_type` or `task_id` is
        not.
    """
    if not self._cluster_spec and cluster_spec:
      # If a `cluster_spec` is already passed in, do nothing here.
      # TODO(yuefengz): check `cluster_spec` is the same if this object has
      # already been initialized with a `cluster_spec`.
      if task_type is None or task_id is None:
        raise ValueError("When `cluster_spec` is given, must also specify "
                         "`task_type` and `task_id`.")
      self._cluster_spec = multi_worker_util.normalize_cluster_spec(
          cluster_spec)
      self._task_type = task_type
      self._task_id = task_id
      self._initialize_multi_worker(self._num_gpus_per_worker,
                                    self._cluster_spec, task_type, task_id)

    if not session_config or not self._cluster_spec:
      return

    session_config.isolate_session_state = False

    assert self._cluster_spec
    assert self._task_type
    assert self._task_id is not None

    # The device filters prevent communication between workers.
    if self._task_type not in ["chief", "worker"]:
      return
    del session_config.device_filters[:]
    session_config.device_filters.extend(
        ["/job:%s/task:%d" % (self._task_type, self._task_id), "/job:ps"])
开发者ID:JonathanRaiman,项目名称:tensorflow,代码行数:50,代码来源:parameter_server_strategy.py

示例12: _cluster_spec_to_device_list

def _cluster_spec_to_device_list(cluster_spec, num_gpus_per_worker):
  """Returns a device list given a cluster spec."""
  cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
  devices = []
  for task_type in ("chief", "worker"):
    for task_id in range(len(cluster_spec.as_dict().get(task_type, []))):
      if num_gpus_per_worker is 0:
        devices.append("/job:%s/task:%d" % (task_type, task_id))
      else:
        devices.extend([
            "/job:%s/task:%d/device:GPU:%i" % (task_type, task_id, gpu_id)
            for gpu_id in range(num_gpus_per_worker)
        ])
  return devices
开发者ID:aeverall,项目名称:tensorflow,代码行数:14,代码来源:mirrored_strategy.py

示例13: estimator_train

def estimator_train(estimator, train_distributed_fn, hooks):
  """Run distribute coordinator for Estimator's `train` method."""
  assert estimator._config._distribute_coordinator_mode
  run_config = estimator._config
  assert estimator._config.cluster_spec
  cluster_spec = multi_worker_util.normalize_cluster_spec(
      estimator._config.cluster_spec)
  assert estimator._config._train_distribute

  if 'evaluator' in cluster_spec.jobs:
    raise ValueError("'evaluator' job is not supported if you don't use "
                     '`train_and_evaluate`')

  if (estimator._config._distribute_coordinator_mode !=  # pylint: disable=protected-access
      dc.CoordinatorMode.STANDALONE_CLIENT):
    raise ValueError('Only `STANDALONE_CLIENT` mode is supported when you call '
                     '`estimator.train`')

  if estimator._config._train_distribute.extended.experimental_between_graph:
    # TODO(yuefengz): remove this limitation once we figure out how to merge
    # return values from `_worker_fn`s.
    raise ValueError('`Estimator.train` API is not supported for %s with '
                     '`STANDALONE_CLIENT` mode.' %
                     estimator._config._train_distribute.__class__.__name__)

  def _worker_fn(strategy):
    """Function for worker task."""
    local_estimator = copy.deepcopy(estimator)
    local_estimator._config._train_distribute = strategy
    context = dc_context.get_current_worker_context()
    _init_run_config_from_worker_context(local_estimator._config, context)
    logging.info('Updated config: %s', str(vars(local_estimator._config)))
    local_estimator._train_distribution = strategy

    if context.is_chief:
      chief_hooks = hooks
    else:
      chief_hooks = []
    train_distributed_fn(local_estimator, strategy, chief_hooks)
    return local_estimator

  return dc.run_distribute_coordinator(
      _worker_fn,
      estimator._config.train_distribute,
      mode=run_config._distribute_coordinator_mode,
      cluster_spec=cluster_spec,
      session_config=run_config.session_config)
开发者ID:kylin9872,项目名称:tensorflow,代码行数:47,代码来源:estimator_training.py

示例14: testClusterDefAsInput

  def testClusterDefAsInput(self):
    cluster_def = cluster_pb2.ClusterDef()
    job = cluster_def.job.add()
    job.name = "chief"
    job.tasks[0] = "127.0.0.1:1234"

    job = cluster_def.job.add()
    job.name = "worker"
    job.tasks[0] = "127.0.0.1:8964"
    job.tasks[1] = "127.0.0.1:2333"

    job = cluster_def.job.add()
    job.name = "ps"
    job.tasks[0] = "127.0.0.1:1926"
    job.tasks[1] = "127.0.0.1:3141"

    self.assert_same_cluster(
        cluster_def, multi_worker_util.normalize_cluster_spec(cluster_def))
开发者ID:AnishShah,项目名称:tensorflow,代码行数:18,代码来源:multi_worker_util_test.py

示例15: _initialize_multi_worker

  def _initialize_multi_worker(self, num_gpus, cluster_spec):
    """Initializes the object for multi-worker training."""
    cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
    self._cluster_spec = cluster_spec

    self._workers = []
    for job in ["chief", "worker"]:
      for task in range(len(cluster_spec.as_dict().get(job, []))):
        self._workers.append("/job:%s/task:%d" % (job, task))

    if num_gpus is None:
      raise ValueError("`num_gpus` is required if `cluster_spec` is given.")
    if num_gpus > 0:
      self._worker_device_map = {
          worker: [
              device_util.canonicalize(worker + "/device:GPU:%d" % gpu)
              for gpu in range(num_gpus)
          ] for worker in self._workers
      }
    else:
      self._worker_device_map = {
          worker: [device_util.canonicalize(worker, "/device:CPU:0")]
          for worker in self._workers
      }

    devices = nest.flatten(self._worker_device_map)

    # Setting `_default_device` will add a device scope in the
    # distribution.scope. We set the default device to the first worker. When
    # users specify device under distribution.scope by
    #   with tf.device("/cpu:0"):
    #     ...
    # their ops will end up on the cpu device of its first worker, e.g.
    # "/job:worker/task:0/device:CPU:0". Note this is not used in tower mode.
    self._default_device = self._workers[0]

    assert devices, "Must specify at least one device."
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument.")
    # TODO(josh11b): Require at least 2 devices?
    self._devices = [device_util.resolve(d) for d in devices]
    self._canonical_device_set = set(self._devices)
    self._device_index = values.PerDevice(
        {d: i for i, d in enumerate(devices)})
开发者ID:Jordan1237,项目名称:tensorflow,代码行数:44,代码来源:mirrored_strategy.py


注:本文中的tensorflow.python.distribute.multi_worker_util.normalize_cluster_spec函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。