本文整理汇总了Python中gym.monitoring.VideoRecorder方法的典型用法代码示例。如果您正苦于以下问题:Python monitoring.VideoRecorder方法的具体用法?Python monitoring.VideoRecorder怎么用?Python monitoring.VideoRecorder使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类gym.monitoring
的用法示例。
在下文中一共展示了monitoring.VideoRecorder方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: play
# 需要导入模块: from gym import monitoring [as 别名]
# 或者: from gym.monitoring import VideoRecorder [as 别名]
def play(env, act, stochastic, video_path):
num_episodes = 0
video_recorder = None
video_recorder = VideoRecorder(
env, video_path, enabled=video_path is not None)
obs = env.reset()
while True:
env.unwrapped.render()
video_recorder.capture_frame()
action = act(np.array(obs)[None], stochastic=stochastic)[0]
obs, rew, done, info = env.step(action)
if done:
obs = env.reset()
if len(info["rewards"]) > num_episodes:
if len(info["rewards"]) == 1 and video_recorder.enabled:
# save video of first episode
print("Saved video.")
video_recorder.close()
video_recorder.enabled = False
print(info["rewards"][-1])
num_episodes = len(info["rewards"])
示例2: play
# 需要导入模块: from gym import monitoring [as 别名]
# 或者: from gym.monitoring import VideoRecorder [as 别名]
def play(env, act, stochastic, video_path):
num_episodes = 0
video_recorder = None
video_recorder = VideoRecorder(
env, video_path, enabled=video_path is not None)
obs = env.reset()
if args.visual:
action_names = distdeepq.actions_from_env(env)
plot_machine = distdeepq.PlotMachine(dist_params, env.action_space.n, action_names)
while True:
env.unwrapped.render()
video_recorder.capture_frame()
action = act(np.array(obs)[None], stochastic=stochastic)[0]
obs, rew, done, info = env.step(action)
if args.visual:
plot_machine.plot_distribution(np.array(obs)[None])
if done:
obs = env.reset()
if len(info["rewards"]) > num_episodes:
if len(info["rewards"]) == 1 and video_recorder.enabled:
# save video of first episode
print("Saved video.")
video_recorder.close()
video_recorder.enabled = False
print(info["rewards"][-1])
num_episodes = len(info["rewards"])
示例3: play
# 需要导入模块: from gym import monitoring [as 别名]
# 或者: from gym.monitoring import VideoRecorder [as 别名]
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path,
attack, m_target, m_adv):
num_episodes = 0
num_moves = 0
num_transfer = 0
video_recorder = None
video_recorder = VideoRecorder(
env, video_path, enabled=video_path is not None)
obs = env.reset()
while True:
env.unwrapped.render()
video_recorder.capture_frame()
# V: Attack #
if attack is not None:
# Craft adv. examples
with m_adv.get_session().as_default():
adv_obs = \
craft_adv_obs(np.array(obs)[None],
stochastic_adv=stochastic)[0]
with m_target.get_session().as_default():
action = act(np.array(adv_obs)[None],
stochastic=stochastic)[0]
action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
num_moves += 1
if action != action2:
num_transfer += 1
else:
# Normal
action = act(np.array(obs)[None], stochastic=stochastic)[0]
obs, rew, done, info = env.step(action)
if done:
obs = env.reset()
if len(info["rewards"]) > num_episodes:
if len(info["rewards"]) == 1 and video_recorder.enabled:
# save video of first episode
print("Saved video.")
video_recorder.close()
video_recorder.enabled = False
print('Reward: ' + str(info["rewards"][-1]))
num_episodes = len(info["rewards"])
print('Episode: ' + str(num_episodes))
success = float(num_transfer / num_moves) * 100.0
print("Percentage of successful attacks: " + str(success))
num_moves = 0
num_transfer = 0
示例4: play
# 需要导入模块: from gym import monitoring [as 别名]
# 或者: from gym.monitoring import VideoRecorder [as 别名]
def play(env, act, craft_adv_obs, stochastic, video_path, game_name, attack, defense):
if defense == 'foresight':
vf, game_screen_mean = load_visual_foresight(game_name)
pred_obs = deque(maxlen=4)
num_episodes = 0
video_recorder = None
video_recorder = VideoRecorder(
env, video_path, enabled=video_path is not None)
t = 0
obs = env.reset()
while True:
#env.unwrapped.render()
video_recorder.capture_frame()
# Attack
if craft_adv_obs != None:
# Craft adv. examples
adv_obs = craft_adv_obs(np.array(obs)[None], stochastic=stochastic)[0]
action = act(np.array(adv_obs)[None], stochastic=stochastic)[0]
else:
# Normal
action = act(np.array(obs)[None], stochastic=stochastic)[0]
# Defense
if t > 4 and defense == 'foresight':
pred_obs.append(
foresee(U.get_session(), old_obs, old_action, np.array(obs), game_screen_mean, vf,
env.action_space.n, t)
)
if len(pred_obs) == 4:
action = act(np.stack(pred_obs, axis=2)[None], stochastic=stochastic)[0]
old_obs = obs
old_action = action
# RL loop
obs, rew, done, info = env.step(action)
t += 1
if done:
t = 0
obs = env.reset()
if len(info["rewards"]) > num_episodes:
if len(info["rewards"]) == 1 and video_recorder.enabled:
# save video of first episode
print("Saved video.")
video_recorder.close()
video_recorder.enabled = False
print(info["rewards"][-1])
num_episodes = len(info["rewards"])
示例5: sample
# 需要导入模块: from gym import monitoring [as 别名]
# 或者: from gym.monitoring import VideoRecorder [as 别名]
def sample(self, horizon, policy, record_fname=None):
"""Samples a rollout from the agent.
Arguments:
horizon: (int) The length of the rollout to generate from the agent.
policy: (policy) The policy that the agent will use for actions.
record_fname: (str/None) The name of the file to which a recording of the rollout
will be saved. If None, the rollout will not be recorded.
Returns: (dict) A dictionary containing data from the rollout.
The keys of the dictionary are 'obs', 'ac', and 'reward_sum'.
"""
video_record = record_fname is not None
recorder = None if not video_record else VideoRecorder(self.env, record_fname)
times, rewards = [], []
O, A, reward_sum, done = [self.env.reset()], [], 0, False
policy.reset()
for t in range(horizon):
if video_record:
recorder.capture_frame()
start = time.time()
A.append(policy.act(O[t], t))
times.append(time.time() - start)
if self.noise_stddev is None:
obs, reward, done, info = self.env.step(A[t])
else:
action = A[t] + np.random.normal(loc=0, scale=self.noise_stddev, size=[self.dU])
action = np.minimum(np.maximum(action, self.env.action_space.low), self.env.action_space.high)
obs, reward, done, info = self.env.step(action)
O.append(obs)
reward_sum += reward
rewards.append(reward)
if done:
break
if video_record:
recorder.capture_frame()
recorder.close()
print("Average action selection time: ", np.mean(times))
print("Rollout length: ", len(A))
return {
"obs": np.array(O),
"ac": np.array(A),
"reward_sum": reward_sum,
"rewards": np.array(rewards),
}
示例6: play
# 需要导入模块: from gym import monitoring [as 别名]
# 或者: from gym.monitoring import VideoRecorder [as 别名]
def play(env, act, craft_adv_obs, craft_adv_obs2, stochastic, video_path,
attack, m_target, m_adv):
num_episodes = 0
num_moves = 0
num_transfer = 0
video_recorder = None
video_recorder = VideoRecorder(
env, video_path, enabled=video_path is not None)
obs = env.reset()
while True:
env.unwrapped.render()
video_recorder.capture_frame()
# V: Attack #
if attack is not None:
# Craft adv. examples
with m_adv.get_session().as_default():
adv_obs = \
craft_adv_obs(np.array(obs)[None],
stochastic_adv=stochastic)[0]
with m_target.get_session().as_default():
action = act(np.array(adv_obs)[None],
stochastic=stochastic)[0]
action2 = act(np.array(obs)[None], stochastic=stochastic)[0]
num_moves += 1
if action != action2:
num_transfer += 1
else:
# Normal
action = act(np.array(obs)[None], stochastic=stochastic)[0]
obs, rew, done, info = env.step(action)
if done:
obs = env.reset()
if len(info["rewards"]) > num_episodes:
if len(info["rewards"]) == 1 and video_recorder.enabled:
# save video of first episode
print("Saved video.")
video_recorder.close()
video_recorder.enabled = False
print('Reward: ' + str(info["rewards"][-1]))
num_episodes = len(info["rewards"])
print('Episode: ' + str(num_episodes))
success = float(num_transfer / num_moves) * 100.0
print("Percentage of successful attacks: " + str(success))
num_moves = 0
num_transfer = 0