本文整理汇总了Python中rl.core.Processor方法的典型用法代码示例。如果您正苦于以下问题:Python core.Processor方法的具体用法?Python core.Processor怎么用?Python core.Processor使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类rl.core
的用法示例。
在下文中一共展示了core.Processor方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: play
# 需要导入模块: from rl import core [as 别名]
# 或者: from rl.core import Processor [as 别名]
def play(self, nb_episodes=5, render=False):
"""Let the agent play"""
memory = SequentialMemory(limit=memory_limit, window_length=window_length)
policy = TrumpPolicy()
class CustomProcessor(Processor): # pylint: disable=redefined-outer-name
"""The agent and the environment"""
def process_state_batch(self, batch):
"""
Given a state batch, I want to remove the second dimension, because it's
useless and prevents me from feeding the tensor into my CNN
"""
return np.squeeze(batch, axis=1)
def process_info(self, info):
processed_info = info['player_data']
if 'stack' in processed_info:
processed_info = {'x': 1}
return processed_info
nb_actions = self.env.action_space.n
self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup,
target_model_update=1e-2, policy=policy,
processor=CustomProcessor(),
batch_size=batch_size, train_interval=train_interval, enable_double_dqn=enable_double_dqn)
self.dqn.compile(tf.optimizers.Adam(lr=1e-3), metrics=['mae']) # pylint: disable=no-member
self.dqn.test(self.env, nb_episodes=nb_episodes, visualize=render)
示例2: test_copy_observations
# 需要导入模块: from rl import core [as 别名]
# 或者: from rl.core import Processor [as 别名]
def test_copy_observations():
methods = [
'fit',
'test',
]
for method in methods:
original_observations = []
class LocalEnv(Env):
def __init__(self):
super(LocalEnv, self).__init__()
def step(self, action):
self.state += 1
done = self.state >= 6
reward = float(self.state) / 10.
obs = np.array(self.state)
original_observations.append(obs)
return obs, reward, done, {}
def reset(self):
self.state = 1
return np.array(self.state)
def seed(self, seed=None):
pass
def configure(self, *args, **kwargs):
pass
# Slight abuse of the processor for test purposes.
observations = []
class LocalProcessor(Processor):
def process_step(self, observation, reward, done, info):
observations.append(observation)
return observation, reward, done, info
processor = LocalProcessor()
memory = SequentialMemory(100, window_length=1)
agent = TestAgent(memory, processor=processor)
env = LocalEnv()
agent.compile()
getattr(agent, method)(env, 20, verbose=0, visualize=False)
assert len(observations) == len(original_observations)
assert_allclose(np.array(observations), np.array(original_observations))
assert np.all([o is not o_ for o, o_ in zip(original_observations, observations)])