本文整理汇总了Python中ray.rllib.evaluation.policy_evaluator.PolicyEvaluator.as_remote方法的典型用法代码示例。如果您正苦于以下问题:Python PolicyEvaluator.as_remote方法的具体用法?Python PolicyEvaluator.as_remote怎么用?Python PolicyEvaluator.as_remote使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类ray.rllib.evaluation.policy_evaluator.PolicyEvaluator
的用法示例。
在下文中一共展示了PolicyEvaluator.as_remote方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _testWithOptimizer
# 需要导入模块: from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator [as 别名]
# 或者: from ray.rllib.evaluation.policy_evaluator.PolicyEvaluator import as_remote [as 别名]
def _testWithOptimizer(self, optimizer_cls):
n = 3
env = gym.make("CartPole-v0")
act_space = env.action_space
obs_space = env.observation_space
dqn_config = {"gamma": 0.95, "n_step": 3}
if optimizer_cls == SyncReplayOptimizer:
# TODO: support replay with non-DQN graphs. Currently this can't
# happen since the replay buffer doesn't encode extra fields like
# "advantages" that PG uses.
policies = {
"p1": (DQNPolicyGraph, obs_space, act_space, dqn_config),
"p2": (DQNPolicyGraph, obs_space, act_space, dqn_config),
}
else:
policies = {
"p1": (PGPolicyGraph, obs_space, act_space, {}),
"p2": (DQNPolicyGraph, obs_space, act_space, dqn_config),
}
ev = PolicyEvaluator(
env_creator=lambda _: MultiCartpole(n),
policy_graph=policies,
policy_mapping_fn=lambda agent_id: ["p1", "p2"][agent_id % 2],
batch_steps=50)
if optimizer_cls == AsyncGradientsOptimizer:
def policy_mapper(agent_id):
return ["p1", "p2"][agent_id % 2]
remote_evs = [
PolicyEvaluator.as_remote().remote(
env_creator=lambda _: MultiCartpole(n),
policy_graph=policies,
policy_mapping_fn=policy_mapper,
batch_steps=50)
]
else:
remote_evs = []
optimizer = optimizer_cls(ev, remote_evs, {})
for i in range(200):
ev.foreach_policy(
lambda p, _: p.set_epsilon(max(0.02, 1 - i * .02))
if isinstance(p, DQNPolicyGraph) else None)
optimizer.step()
result = collect_metrics(ev, remote_evs)
if i % 20 == 0:
ev.foreach_policy(
lambda p, _: p.update_target()
if isinstance(p, DQNPolicyGraph) else None)
print("Iter {}, rew {}".format(i,
result["policy_reward_mean"]))
print("Total reward", result["episode_reward_mean"])
if result["episode_reward_mean"] >= 25 * n:
return
print(result)
raise Exception("failed to improve reward")
示例2: make
# 需要导入模块: from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator [as 别名]
# 或者: from ray.rllib.evaluation.policy_evaluator.PolicyEvaluator import as_remote [as 别名]
def make(cls,
env_creator,
policy_graph,
optimizer_batch_size=None,
num_workers=0,
num_envs_per_worker=None,
optimizer_config=None,
remote_num_cpus=None,
remote_num_gpus=None,
**eval_kwargs):
"""Creates an Optimizer with local and remote evaluators.
Args:
env_creator(func): Function that returns a gym.Env given an
EnvContext wrapped configuration.
policy_graph (class|dict): Either a class implementing
PolicyGraph, or a dictionary of policy id strings to
(PolicyGraph, obs_space, action_space, config) tuples.
See PolicyEvaluator documentation.
optimizer_batch_size (int): Batch size summed across all workers.
Will override worker `batch_steps`.
num_workers (int): Number of remote evaluators
num_envs_per_worker (int): (Optional) Sets the number
environments per evaluator for vectorization.
If set, overrides `num_envs` in kwargs
for PolicyEvaluator.__init__.
optimizer_config (dict): Config passed to the optimizer.
remote_num_cpus (int): CPU specification for remote evaluator.
remote_num_gpus (int): GPU specification for remote evaluator.
**eval_kwargs: PolicyEvaluator Class non-positional args.
Returns:
(Optimizer) Instance of `cls` with evaluators configured
accordingly.
"""
optimizer_config = optimizer_config or {}
if num_envs_per_worker:
assert num_envs_per_worker > 0, "Improper num_envs_per_worker!"
eval_kwargs["num_envs"] = int(num_envs_per_worker)
if optimizer_batch_size:
assert optimizer_batch_size > 0
if num_workers > 1:
eval_kwargs["batch_steps"] = \
optimizer_batch_size // num_workers
else:
eval_kwargs["batch_steps"] = optimizer_batch_size
evaluator = PolicyEvaluator(env_creator, policy_graph, **eval_kwargs)
remote_cls = PolicyEvaluator.as_remote(remote_num_cpus,
remote_num_gpus)
remote_evaluators = [
remote_cls.remote(env_creator, policy_graph, **eval_kwargs)
for i in range(num_workers)
]
return cls(evaluator, remote_evaluators, optimizer_config)
示例3: testMetrics
# 需要导入模块: from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator [as 别名]
# 或者: from ray.rllib.evaluation.policy_evaluator.PolicyEvaluator import as_remote [as 别名]
def testMetrics(self):
ev = PolicyEvaluator(
env_creator=lambda _: MockEnv(episode_length=10),
policy_graph=MockPolicyGraph,
batch_mode="complete_episodes")
remote_ev = PolicyEvaluator.as_remote().remote(
env_creator=lambda _: MockEnv(episode_length=10),
policy_graph=MockPolicyGraph,
batch_mode="complete_episodes")
ev.sample()
ray.get(remote_ev.sample.remote())
result = collect_metrics(ev, [remote_ev])
self.assertEqual(result["episodes_this_iter"], 20)
self.assertEqual(result["episode_reward_mean"], 10)
示例4: make_remote_evaluators
# 需要导入模块: from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator [as 别名]
# 或者: from ray.rllib.evaluation.policy_evaluator.PolicyEvaluator import as_remote [as 别名]
def make_remote_evaluators(self, env_creator, policy_graph, count):
"""Convenience method to return a number of remote evaluators."""
remote_args = {
"num_cpus": self.config["num_cpus_per_worker"],
"num_gpus": self.config["num_gpus_per_worker"],
"resources": self.config["custom_resources_per_worker"],
}
cls = PolicyEvaluator.as_remote(**remote_args).remote
return [
self._make_evaluator(cls, env_creator, policy_graph, i + 1,
self.config) for i in range(count)
]
示例5: _make_evs
# 需要导入模块: from ray.rllib.evaluation.policy_evaluator import PolicyEvaluator [as 别名]
# 或者: from ray.rllib.evaluation.policy_evaluator.PolicyEvaluator import as_remote [as 别名]
def _make_evs(self):
def make_sess():
return tf.Session(config=tf.ConfigProto(device_count={"CPU": 2}))
local = PolicyEvaluator(
env_creator=lambda _: gym.make("CartPole-v0"),
policy_graph=PPOPolicyGraph,
tf_session_creator=make_sess)
remotes = [
PolicyEvaluator.as_remote().remote(
env_creator=lambda _: gym.make("CartPole-v0"),
policy_graph=PPOPolicyGraph,
tf_session_creator=make_sess)
]
return local, remotes