本文整理汇总了Python中ray.wait方法的典型用法代码示例。如果您正苦于以下问题:Python ray.wait方法的具体用法?Python ray.wait怎么用?Python ray.wait使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ray
的用法示例。
在下文中一共展示了ray.wait方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _ensure_resources
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def _ensure_resources(self, instances):
"""Checks we have enough ray resources to create the request
TODO: This doesn't really work with more than 1 receiver as they create
and check in parallel. In any case ray will not error if we create an
actor without resources it will just wait and not be used until it can
be run
"""
available = ray.available_resources()
required = {
"CPU": self._cpus_per_worker,
**self._custom_resources_per_worker
}
required = {key: val * instances for key, val in required.items()}
if not np.all(
[available.get(key, 0) >= required[key] for key in required]):
raise RuntimeError(
"Not enough RAY resources to start the acting pool. "
f"Need: {required} Available: {available}")
示例2: step
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def step(self):
print(f"learner {self.rank} step")
# make sure exp_handles are done
for handle in self.exp_handles:
handle.wait()
# batch together exp
time.sleep(random.randint(0, 3))
# update with other learners
dist.barrier(self.learner_group)
for p in self.network_grads:
dist.all_reduce(p, group=self.learner_group)
print(f"learner {self.rank} shared gradients")
return True
示例3: game_stream
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def game_stream(self, examples: bool = False, full_game: bool = False):
if examples:
remaining_ids = [gen.get_game_examples.remote(with_id=True) for gen in self.generators]
else:
remaining_ids = [gen.recover_game.remote(True) for gen in self.generators]
while True:
ready_ids, remaining_ids = ray.wait(remaining_ids)
for ready_id in ready_ids:
game, worker_id = ray.get(ready_id)
if examples:
new_id = self.generators[worker_id].get_game_examples.remote(True)
else:
new_id = self.generators[worker_id].recover_game.remote(True)
remaining_ids.append(new_id)
if full_game:
yield game
else:
if not examples:
_states, observs, rewards, ends, infos, actions = game
for i in range(len(actions)):
yield _states[i], observs[i], rewards[i], ends[i], infos[i], actions[i]
else:
_states, obs, actions, rewards, new_obs, ends = game
for i in range(len(rewards)):
yield obs[i], actions[i], rewards[i], new_obs[i], ends[i]
示例4: run_driver
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def run_driver():
ray.init(redis_address=args.ip)
worker = Worker.remote()
ps = ParameterServer.remote()
log = util.FileLogger('out')
log(f"Worker ip {ray.get(worker.ip.remote())}")
log(f"PS ip {ray.get(ps.ip.remote())}")
log(f"Driver ip {socket.gethostbyname(socket.gethostname())}")
time_list = []
for i in range(args.iters):
start_time = time.perf_counter()
grads = worker.compute_gradients.remote()
result = ps.receive.remote(grads)
ray.wait([result])
elapsed_time_ms = (time.perf_counter() - start_time)*1000
time_list.append(elapsed_time_ms)
rate = args.size_mb / (elapsed_time_ms/1000)
log('%03d/%d sent %d MBs in %.1f ms: %.2f MB/second' % (i, args.iters, args.size_mb, elapsed_time_ms, rate))
min = np.min(time_list)
median = np.median(time_list)
log(f"min: {min:8.2f}, median: {median:8.2f}, mean: {np.mean(time_list):8.2f}")
示例5: get_objects
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def get_objects(self, object_ids, timeout=None):
"""Get the values in the object store associated with the IDs.
Return the values from the local object store for object_ids. This will
block until all the values for object_ids have been written to the
local object store.
Args:
object_ids (List[object_id.ObjectID]): A list of the object IDs
whose values should be retrieved.
timeout (float): timeout (float): The maximum amount of time in
seconds to wait before returning.
"""
# Make sure that the values are object IDs.
for object_id in object_ids:
if not isinstance(object_id, ObjectID):
raise TypeError(
"Attempting to call `get` on the value {}, "
"which is not an ray.ObjectID.".format(object_id))
timeout_ms = int(timeout * 1000) if timeout else -1
data_metadata_pairs = self.core_worker.get_objects(
object_ids, self.current_task_id, timeout_ms)
return self.deserialize_objects(data_metadata_pairs, object_ids)
示例6: wait
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def wait(object_ids, num_returns=1, timeout=None):
"""Return a list of IDs that are ready and a list of IDs that are not.
This method is identical to `ray.wait` except it adds support for tuples
and ndarrays.
Args:
object_ids (List[ObjectID], Tuple(ObjectID), np.array(ObjectID)):
List like of object IDs for objects that may or may not be ready.
Note that these IDs must be unique.
num_returns (int): The number of object IDs that should be returned.
timeout (float): The maximum amount of time in seconds to wait before
returning.
Returns:
A list of object IDs that are ready and a list of the remaining object
IDs.
"""
if isinstance(object_ids, (tuple, np.ndarray)):
return ray.wait(
list(object_ids), num_returns=num_returns, timeout=timeout)
return ray.wait(object_ids, num_returns=num_returns, timeout=timeout)
示例7: get_next_available_trial
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def get_next_available_trial(self):
shuffled_results = list(self._running.keys())
random.shuffle(shuffled_results)
# Note: We shuffle the results because `ray.wait` by default returns
# the first available result, and we want to guarantee that slower
# trials (i.e. trials that run remotely) also get fairly reported.
# See https://github.com/ray-project/ray/issues/4211 for details.
start = time.time()
[result_id], _ = ray.wait(shuffled_results)
wait_time = time.time() - start
if wait_time > NONTRIVIAL_WAIT_TIME_THRESHOLD_S:
self._last_nontrivial_wait = time.time()
if time.time() - self._last_nontrivial_wait > BOTTLENECK_WARN_PERIOD_S:
logger.warning(
"Over the last {} seconds, the Tune event loop has been "
"backlogged processing new results. Consider increasing your "
"period of result reporting to improve performance.".format(
BOTTLENECK_WARN_PERIOD_S))
self._last_nontrivial_wait = time.time()
return self._running[result_id]
示例8: test_wait_cluster
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def test_wait_cluster(ray_start_cluster):
cluster = ray_start_cluster
cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
cluster.add_node(num_cpus=1, resources={"RemoteResource": 1})
ray.init(address=cluster.address)
@ray.remote(resources={"RemoteResource": 1})
def f():
return
# Make sure we have enough workers on the remote nodes to execute some
# tasks.
tasks = [f.remote() for _ in range(10)]
start = time.time()
ray.get(tasks)
end = time.time()
# Submit some more tasks that can only be executed on the remote nodes.
tasks = [f.remote() for _ in range(10)]
# Sleep for a bit to let the tasks finish.
time.sleep((end - start) * 2)
_, unready = ray.wait(tasks, num_returns=len(tasks), timeout=0)
# All remote tasks should have finished.
assert len(unready) == 0
示例9: test_wait_makes_object_local
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def test_wait_makes_object_local(ray_start_cluster):
cluster = ray_start_cluster
cluster.add_node(num_cpus=0)
cluster.add_node(num_cpus=2)
ray.init(address=cluster.address)
@ray.remote
class Foo:
def method(self):
return np.zeros(1024 * 1024)
a = Foo.remote()
# Test get makes the object local.
x_id = a.method.remote()
assert not ray.worker.global_worker.core_worker.object_exists(x_id)
ray.get(x_id)
assert ray.worker.global_worker.core_worker.object_exists(x_id)
# Test wait makes the object local.
x_id = a.method.remote()
assert not ray.worker.global_worker.core_worker.object_exists(x_id)
ok, _ = ray.wait([x_id])
assert len(ok) == 1
assert ray.worker.global_worker.core_worker.object_exists(x_id)
示例10: testMemoryRequest
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def testMemoryRequest(self):
try:
ray.init(num_cpus=1, memory=200 * MB)
# fits first 2
a = Actor.remote()
b = Actor.remote()
ok, _ = ray.wait(
[a.ping.remote(), b.ping.remote()],
timeout=60.0,
num_returns=2)
self.assertEqual(len(ok), 2)
# does not fit
c = Actor.remote()
ok, _ = ray.wait([c.ping.remote()], timeout=5.0)
self.assertEqual(len(ok), 0)
finally:
ray.shutdown()
示例11: test_wait_reconstruction
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def test_wait_reconstruction(shutdown_only):
ray.init(
num_cpus=1,
object_store_memory=int(10**8),
_internal_config=json.dumps({
"object_pinning_enabled": 0
}))
@ray.remote
def f():
return np.zeros(6 * 10**7, dtype=np.uint8)
x_id = f.remote()
ray.wait([x_id])
ray.wait([f.remote()])
assert not ray.worker.global_worker.core_worker.object_exists(x_id)
ready_ids, _ = ray.wait([x_id])
assert len(ready_ids) == 1
示例12: test_driver_lives_parallel
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def test_driver_lives_parallel(ray_start_regular):
all_processes = ray.worker._global_node.all_processes
process_infos = (all_processes[ray_constants.PROCESS_TYPE_PLASMA_STORE] +
all_processes[ray_constants.PROCESS_TYPE_GCS_SERVER] +
all_processes[ray_constants.PROCESS_TYPE_RAYLET] +
all_processes[ray_constants.PROCESS_TYPE_LOG_MONITOR] +
all_processes[ray_constants.PROCESS_TYPE_MONITOR])
assert len(process_infos) == 5
# Kill all the components in parallel.
for process_info in process_infos:
process_info.process.terminate()
time.sleep(0.1)
for process_info in process_infos:
process_info.process.kill()
for process_info in process_infos:
process_info.process.wait()
# If the driver can reach the tearDown method, then it is still alive.
示例13: test_fair_queueing
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def test_fair_queueing(shutdown_only):
ray.init(
num_cpus=1, _internal_config=json.dumps({
"fair_queueing_enabled": 1
}))
@ray.remote
def h():
return 0
@ray.remote
def g():
return ray.get(h.remote())
@ray.remote
def f():
return ray.get(g.remote())
# This will never finish without fair queueing of {f, g, h}:
# https://github.com/ray-project/ray/issues/3644
ready, _ = ray.wait(
[f.remote() for _ in range(1000)], timeout=60.0, num_returns=1000)
assert len(ready) == 1000, len(ready)
示例14: test_actor_worker_dying
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def test_actor_worker_dying(ray_start_regular):
@ray.remote
class Actor:
def kill(self):
eval("exit()")
@ray.remote
def consume(x):
pass
a = Actor.remote()
[obj], _ = ray.wait([a.kill.remote()], timeout=5)
with pytest.raises(ray.exceptions.RayActorError):
ray.get(obj)
with pytest.raises(ray.exceptions.RayTaskError):
ray.get(consume.remote(obj))
wait_for_errors(ray_constants.WORKER_DIED_PUSH_ERROR, 1)
示例15: test_dynamic_res_infeasible_rescheduling
# 需要导入模块: import ray [as 别名]
# 或者: from ray import wait [as 别名]
def test_dynamic_res_infeasible_rescheduling(ray_start_regular):
# This test launches an infeasible task and then creates a
# resource to make the task feasible. This tests if the
# infeasible tasks get rescheduled when resources are
# created at runtime.
res_name = "test_res"
res_capacity = 1.0
@ray.remote
def set_res(resource_name, resource_capacity):
ray.experimental.set_resource(resource_name, resource_capacity)
def f():
return 1
remote_task = ray.remote(resources={res_name: res_capacity})(f)
oid = remote_task.remote() # This is infeasible
ray.get(set_res.remote(res_name, res_capacity)) # Now should be feasible
available_res = ray.available_resources()
assert available_res[res_name] == res_capacity
successful, unsuccessful = ray.wait([oid], timeout=1)
assert successful # The task completed