本文整理汇总了Python中experiment.Experiment方法的典型用法代码示例。如果您正苦于以下问题:Python experiment.Experiment方法的具体用法?Python experiment.Experiment怎么用?Python experiment.Experiment使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类experiment
的用法示例。
在下文中一共展示了experiment.Experiment方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_agent
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_agent(self):
room_size = 5
mdp = mdps.MazeMDP(room_size, 1)
mdp.compute_states()
mdp.EXIT_REWARD = 1
mdp.MOVE_REWARD = -0.1
discount = mdp.get_discount()
num_actions = len(mdp.get_actions(None))
network = qnetwork.QNetwork(input_shape=2 * room_size, batch_size=1, num_actions=4, num_hidden=10, discount=discount, learning_rate=1e-3, update_rule='sgd', freeze_interval=10000, rng=None)
p = policy.EpsilonGreedy(num_actions, 0.5, 0.05, 10000)
rm = replay_memory.ReplayMemory(1)
log = logger.NeuralLogger(agent_name='QNetwork')
adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(room_size=room_size)
a = agent.NeuralAgent(network=network, policy=p, replay_memory=rm, logger=log, state_adapter=adapter)
num_epochs = 2
epoch_length = 10
test_epoch_length = 0
max_steps = 10
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests, value_logging=False)
e.run()
示例2: test_graphs_are_plotted_and_saved_during_experiment
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_graphs_are_plotted_and_saved_during_experiment(self):
mdp = mdps.MazeMDP(5, 3)
mdp.compute_states()
mdp.EXIT_REWARD = 1
mdp.MOVE_REWARD = -0.1
num_actions = len(mdp.get_actions(None))
discount = mdp.get_discount()
exploration_prob = .5
step_size = 1
a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=True)
num_epochs = 1
epoch_length = 100
test_epoch_length = 0
max_steps = 1000
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests, False)
e.run()
log_dir = e.agent.logger.log_dir
self.assertTrue(os.path.isfile(os.path.join(log_dir, 'actions_graph.png')))
self.assertTrue(os.path.isfile(os.path.join(log_dir, 'losses_graph.png')))
self.assertTrue(os.path.isfile(os.path.join(log_dir, 'rewards_graph.png')))
shutil.rmtree(log_dir)
示例3: test_run_with_maze_mdp_and_working_agent_completes
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_with_maze_mdp_and_working_agent_completes(self):
mdp = mdps.MazeMDP(5, 1)
num_actions = len(mdp.get_actions(None))
discount = 1
exploration_prob = .3
step_size = 1e-2
a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=False)
num_epochs = 1
epoch_length = 1
test_epoch_length = 0
max_steps = 10000
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
e.run()
total_len = len(e.agent.logger.actions)
self.assertTrue(total_len < max_steps * epoch_length * num_epochs)
示例4: test_run_basic_mdp_and_agent_episodes
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_basic_mdp_and_agent_episodes(self):
mdp = mdps.LineMDP(5)
a = agent.TestAgent(len(mdp.get_actions()))
num_epochs = 1
epoch_length = 10
test_epoch_length = 0
max_steps = 100
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
e.run()
actual = e.agent.episodes
expected = e.num_epochs * e.epoch_length
self.assertEquals(actual, expected)
示例5: test_run_basic_mdp_and_agent_many_episodes
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_basic_mdp_and_agent_many_episodes(self):
mdp = mdps.LineMDP(5)
a = agent.TestAgent(len(mdp.get_actions()))
num_epochs = 5
epoch_length = 10
test_epoch_length = 0
max_steps = 100
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
e.run()
actual = e.agent.episodes
expected = e.num_epochs * e.epoch_length
self.assertEquals(actual, expected)
示例6: test_run_with_large_maze_mdp_q_learning_agent_correct_V
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_with_large_maze_mdp_q_learning_agent_correct_V(self):
mdp = mdps.MazeMDP(5, 3)
mdp.compute_states()
mdp.EXIT_REWARD = 1
mdp.MOVE_REWARD = -0.1
num_actions = len(mdp.get_actions(None))
discount = 1
exploration_prob = .5
step_size = .1
a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=False)
num_epochs = 10
epoch_length = 200
test_epoch_length = 0
max_steps = 300
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
e.run()
V = get_V(e)
actual_total = 0
for k, v in V.iteritems():
actual_total += v
expected_total_min = -110
expected_total_max = -40
self.assertTrue(actual_total < expected_total_max)
self.assertTrue(actual_total > expected_total_min)
示例7: test_run_with_standard_maze_mdp_q_learning_agent_correct_V
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_with_standard_maze_mdp_q_learning_agent_correct_V(self):
mdp = mdps.MazeMDP(5, 2)
mdp.compute_states()
mdp.EXIT_REWARD = 1
mdp.MOVE_REWARD = -0.01
num_actions = len(mdp.get_actions(None))
discount = 1
exploration_prob = .5
step_size = .1
a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=False)
num_epochs = 10
epoch_length = 200
test_epoch_length = 0
max_steps = 300
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
e.run()
V = get_V(e)
actual_total = 0
for k, v in V.iteritems():
actual_total += v
expected_total_min = -110
expected_total_max = -40
self.assertTrue(actual_total < expected_total_max)
self.assertTrue(actual_total > expected_total_min)
示例8: test_sequence_value_string
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_sequence_value_string(self):
room_size = 3
num_rooms = 3
mdp = mdps.MazeMDP(room_size, num_rooms)
mdp.compute_states()
mdp.EXIT_REWARD = 1
mdp.MOVE_REWARD = -0.1
discount = 1
sequence_length = 2
batch_size = 10
learning_rate = 1e-3
freeze_interval = 10000
num_hidden = 4
eps = .5
reg = 1e-8
num_actions = len(mdp.get_actions(None))
batch_size = 100
network = recurrent_qnetwork.RecurrentQNetwork(input_shape=2 * room_size,
sequence_length=sequence_length, batch_size=batch_size,
num_actions=4, num_hidden=num_hidden, discount=discount, learning_rate=
learning_rate, regularization=reg, update_rule='adam', freeze_interval=
freeze_interval, network_type='single_layer_lstm', rng=None)
num_epochs = 5
epoch_length = 10
test_epoch_length = 0
max_steps = (room_size * num_rooms) ** 2
epsilon_decay = (num_epochs * epoch_length * max_steps) / 2
adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(room_size=room_size)
p = policy.EpsilonGreedy(num_actions, eps, 0.05, epsilon_decay)
rm = replay_memory.SequenceReplayMemory(input_shape=2 * room_size,
sequence_length=sequence_length, batch_size=batch_size, capacity=50000)
log = logger.NeuralLogger(agent_name='RecurrentQNetwork')
a = agent.RecurrentNeuralAgent(network=network, policy=p, replay_memory=rm, log=log, state_adapter=adapter)
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length,
max_steps, run_tests, value_logging=True)
e.log_temporal_value_string()
示例9: test_qnetwork_solves_small_mdp
# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_qnetwork_solves_small_mdp(self):
def run(learning_rate, freeze_interval, num_hidden, reg):
room_size = 5
num_rooms = 2
mdp = mdps.MazeMDP(room_size, num_rooms)
mdp.compute_states()
mdp.EXIT_REWARD = 1
mdp.MOVE_REWARD = -0.01
discount = 1
num_actions = len(mdp.get_actions(None))
batch_size = 100
print 'building network...'
network = qnetwork.QNetwork(input_shape=2 * room_size + num_rooms ** 2, batch_size=batch_size, num_hidden_layers=2, num_actions=4, num_hidden=num_hidden, discount=discount, learning_rate=learning_rate, regularization=reg, update_rule='adam', freeze_interval=freeze_interval, rng=None)
num_epochs = 50
epoch_length = 2
test_epoch_length = 0
max_steps = 4 * (room_size * num_rooms) ** 2
epsilon_decay = (num_epochs * epoch_length * max_steps) / 1.5
print 'building policy...'
p = policy.EpsilonGreedy(num_actions, 0.5, 0.05, epsilon_decay)
print 'building memory...'
rm = replay_memory.ReplayMemory(batch_size, capacity=50000)
print 'building logger...'
log = logger.NeuralLogger(agent_name='QNetwork')
print 'building state adapter...'
adapter = state_adapters.CoordinatesToRowColRoomAdapter(room_size=room_size, num_rooms=num_rooms)
# adapter = state_adapters.CoordinatesToRowColAdapter(room_size=room_size, num_rooms=num_rooms)
# adapter = state_adapters.CoordinatesToFlattenedGridAdapter(room_size=room_size, num_rooms=num_rooms)
# adapter = state_adapters.IdentityAdapter(room_size=room_size, num_rooms=num_rooms)
# adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(room_size=room_size)
print 'building agent...'
a = agent.NeuralAgent(network=network, policy=p, replay_memory=rm, log=log, state_adapter=adapter)
run_tests = False
e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests, value_logging=True)
e.run()
ak = file_utils.load_key('../access_key.key')
sk = file_utils.load_key('../secret_key.key')
bucket = 'hierarchical'
try:
aws_util = aws_s3_utility.S3Utility(ak, sk, bucket)
aws_util.upload_directory(e.agent.logger.log_dir)
except Exception as e:
print 'error uploading to s3: {}'.format(e)
for idx in range(2):
lr = random.choice([.007, .006, .005]) # learning rate
fi = random.choice([200, 300, 400, 500, 600, 700, 800]) # freeze interval
nh = random.choice([4]) # num hidden
reg = random.choice([5e-4]) # regularization
print 'run number: {}'.format(idx)
print lr, fi, nh, reg
run(lr, fi, nh, reg)