当前位置: 首页>>代码示例>>Python>>正文


Python experiment.Experiment方法代码示例

本文整理汇总了Python中experiment.Experiment方法的典型用法代码示例。如果您正苦于以下问题:Python experiment.Experiment方法的具体用法?Python experiment.Experiment怎么用?Python experiment.Experiment使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在experiment的用法示例。


在下文中一共展示了experiment.Experiment方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_agent

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_agent(self):
        room_size = 5
        mdp = mdps.MazeMDP(room_size, 1)
        mdp.compute_states()
        mdp.EXIT_REWARD = 1
        mdp.MOVE_REWARD = -0.1
        discount = mdp.get_discount()
        num_actions = len(mdp.get_actions(None))
        network = qnetwork.QNetwork(input_shape=2 * room_size, batch_size=1, num_actions=4, num_hidden=10, discount=discount, learning_rate=1e-3, update_rule='sgd', freeze_interval=10000, rng=None)
        p = policy.EpsilonGreedy(num_actions, 0.5, 0.05, 10000)
        rm = replay_memory.ReplayMemory(1)
        log = logger.NeuralLogger(agent_name='QNetwork')
        adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(room_size=room_size)
        a = agent.NeuralAgent(network=network, policy=p, replay_memory=rm, logger=log, state_adapter=adapter)
        num_epochs = 2
        epoch_length = 10
        test_epoch_length = 0
        max_steps = 10
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests, value_logging=False)
        e.run() 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:23,代码来源:test_neural_agent.py

示例2: test_graphs_are_plotted_and_saved_during_experiment

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_graphs_are_plotted_and_saved_during_experiment(self):
        mdp = mdps.MazeMDP(5, 3)
        mdp.compute_states()
        mdp.EXIT_REWARD = 1
        mdp.MOVE_REWARD = -0.1
        num_actions = len(mdp.get_actions(None))
        discount = mdp.get_discount()
        exploration_prob = .5
        step_size = 1
        a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=True)
        num_epochs = 1
        epoch_length = 100
        test_epoch_length = 0
        max_steps = 1000
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests, False)
        e.run()

        log_dir = e.agent.logger.log_dir
        self.assertTrue(os.path.isfile(os.path.join(log_dir, 'actions_graph.png')))
        self.assertTrue(os.path.isfile(os.path.join(log_dir, 'losses_graph.png')))
        self.assertTrue(os.path.isfile(os.path.join(log_dir, 'rewards_graph.png')))
        shutil.rmtree(log_dir) 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:25,代码来源:test_logger.py

示例3: test_run_with_maze_mdp_and_working_agent_completes

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_with_maze_mdp_and_working_agent_completes(self):
        mdp = mdps.MazeMDP(5, 1)
        num_actions = len(mdp.get_actions(None))
        discount = 1
        exploration_prob = .3
        step_size = 1e-2
        a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=False)
        num_epochs = 1
        epoch_length = 1
        test_epoch_length = 0
        max_steps = 10000
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
        e.run()
        total_len = len(e.agent.logger.actions)
        self.assertTrue(total_len < max_steps * epoch_length * num_epochs) 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:18,代码来源:test_experiment.py

示例4: test_run_basic_mdp_and_agent_episodes

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_basic_mdp_and_agent_episodes(self):
        mdp = mdps.LineMDP(5)
        a = agent.TestAgent(len(mdp.get_actions()))
        num_epochs = 1
        epoch_length = 10
        test_epoch_length = 0
        max_steps = 100
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
        e.run()
        actual = e.agent.episodes
        expected = e.num_epochs * e.epoch_length
        self.assertEquals(actual, expected) 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:15,代码来源:test_experiment.py

示例5: test_run_basic_mdp_and_agent_many_episodes

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_basic_mdp_and_agent_many_episodes(self):
        mdp = mdps.LineMDP(5)
        a = agent.TestAgent(len(mdp.get_actions()))
        num_epochs = 5
        epoch_length = 10
        test_epoch_length = 0
        max_steps = 100
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
        e.run()
        actual = e.agent.episodes
        expected = e.num_epochs * e.epoch_length
        self.assertEquals(actual, expected) 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:15,代码来源:test_experiment.py

示例6: test_run_with_large_maze_mdp_q_learning_agent_correct_V

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_with_large_maze_mdp_q_learning_agent_correct_V(self):
        mdp = mdps.MazeMDP(5, 3)
        mdp.compute_states()
        mdp.EXIT_REWARD = 1
        mdp.MOVE_REWARD = -0.1
        num_actions = len(mdp.get_actions(None))
        discount = 1
        exploration_prob = .5
        step_size = .1
        a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=False)
        num_epochs = 10
        epoch_length = 200
        test_epoch_length = 0
        max_steps = 300
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
        e.run()

        V = get_V(e)
        actual_total = 0
        for k, v in V.iteritems():
            actual_total += v
        expected_total_min = -110
        expected_total_max = -40
        self.assertTrue(actual_total < expected_total_max)
        self.assertTrue(actual_total > expected_total_min) 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:28,代码来源:test_experiment.py

示例7: test_run_with_standard_maze_mdp_q_learning_agent_correct_V

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_run_with_standard_maze_mdp_q_learning_agent_correct_V(self):
        mdp = mdps.MazeMDP(5, 2)
        mdp.compute_states()
        mdp.EXIT_REWARD = 1
        mdp.MOVE_REWARD = -0.01
        num_actions = len(mdp.get_actions(None))
        discount = 1
        exploration_prob = .5
        step_size = .1
        a = agent.QLearningAgent(num_actions=num_actions, discount=discount, exploration_prob=exploration_prob, step_size=step_size, logging=False)
        num_epochs = 10
        epoch_length = 200
        test_epoch_length = 0
        max_steps = 300
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests)
        e.run()

        V = get_V(e)
        actual_total = 0
        for k, v in V.iteritems():
            actual_total += v
        expected_total_min = -110
        expected_total_max = -40
        self.assertTrue(actual_total < expected_total_max)
        self.assertTrue(actual_total > expected_total_min) 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:28,代码来源:test_experiment.py

示例8: test_sequence_value_string

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_sequence_value_string(self):
        room_size = 3
        num_rooms = 3
        mdp = mdps.MazeMDP(room_size, num_rooms)
        mdp.compute_states()
        mdp.EXIT_REWARD = 1
        mdp.MOVE_REWARD = -0.1
        discount = 1
        sequence_length = 2
        batch_size = 10
        learning_rate = 1e-3
        freeze_interval = 10000
        num_hidden = 4
        eps = .5
        reg = 1e-8
        num_actions = len(mdp.get_actions(None))
        batch_size = 100
        network = recurrent_qnetwork.RecurrentQNetwork(input_shape=2 * room_size, 
            sequence_length=sequence_length, batch_size=batch_size, 
            num_actions=4, num_hidden=num_hidden, discount=discount, learning_rate=
            learning_rate, regularization=reg, update_rule='adam', freeze_interval=
            freeze_interval, network_type='single_layer_lstm', rng=None)        
        num_epochs = 5
        epoch_length = 10
        test_epoch_length = 0
        max_steps = (room_size * num_rooms) ** 2
        epsilon_decay = (num_epochs * epoch_length * max_steps) / 2
        adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(room_size=room_size)
        p = policy.EpsilonGreedy(num_actions, eps, 0.05, epsilon_decay)
        rm = replay_memory.SequenceReplayMemory(input_shape=2 * room_size,
                sequence_length=sequence_length, batch_size=batch_size, capacity=50000)
        log = logger.NeuralLogger(agent_name='RecurrentQNetwork')
        a = agent.RecurrentNeuralAgent(network=network, policy=p, replay_memory=rm, log=log, state_adapter=adapter)
        run_tests = False
        e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, 
            max_steps, run_tests, value_logging=True)
        e.log_temporal_value_string() 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:39,代码来源:test_experiment.py

示例9: test_qnetwork_solves_small_mdp

# 需要导入模块: import experiment [as 别名]
# 或者: from experiment import Experiment [as 别名]
def test_qnetwork_solves_small_mdp(self):
        

        def run(learning_rate, freeze_interval, num_hidden, reg):
            room_size = 5
            num_rooms = 2
            mdp = mdps.MazeMDP(room_size, num_rooms)
            mdp.compute_states()
            mdp.EXIT_REWARD = 1
            mdp.MOVE_REWARD = -0.01
            discount = 1
            num_actions = len(mdp.get_actions(None))
            batch_size = 100
            print 'building network...'
            network = qnetwork.QNetwork(input_shape=2 * room_size + num_rooms ** 2, batch_size=batch_size, num_hidden_layers=2, num_actions=4, num_hidden=num_hidden, discount=discount, learning_rate=learning_rate, regularization=reg, update_rule='adam', freeze_interval=freeze_interval, rng=None)
            num_epochs = 50
            epoch_length = 2
            test_epoch_length = 0
            max_steps = 4 * (room_size * num_rooms) ** 2 
            epsilon_decay = (num_epochs * epoch_length * max_steps) / 1.5
            print 'building policy...'
            p = policy.EpsilonGreedy(num_actions, 0.5, 0.05, epsilon_decay)
            print 'building memory...'
            rm = replay_memory.ReplayMemory(batch_size, capacity=50000)
            print 'building logger...'
            log = logger.NeuralLogger(agent_name='QNetwork')
            print 'building state adapter...'
            adapter = state_adapters.CoordinatesToRowColRoomAdapter(room_size=room_size, num_rooms=num_rooms)
            # adapter = state_adapters.CoordinatesToRowColAdapter(room_size=room_size, num_rooms=num_rooms)
            # adapter = state_adapters.CoordinatesToFlattenedGridAdapter(room_size=room_size, num_rooms=num_rooms)
            # adapter = state_adapters.IdentityAdapter(room_size=room_size, num_rooms=num_rooms)
            # adapter = state_adapters.CoordinatesToSingleRoomRowColAdapter(room_size=room_size)
            print 'building agent...'
            a = agent.NeuralAgent(network=network, policy=p, replay_memory=rm, log=log, state_adapter=adapter)
            run_tests = False
            e = experiment.Experiment(mdp, a, num_epochs, epoch_length, test_epoch_length, max_steps, run_tests, value_logging=True)
            e.run()

            ak = file_utils.load_key('../access_key.key')
            sk = file_utils.load_key('../secret_key.key')
            bucket = 'hierarchical'
            try:
                aws_util = aws_s3_utility.S3Utility(ak, sk, bucket)
                aws_util.upload_directory(e.agent.logger.log_dir)
            except Exception as e:
                print 'error uploading to s3: {}'.format(e)

        for idx in range(2):
            lr = random.choice([.007, .006, .005])  # learning rate
            fi = random.choice([200, 300, 400, 500, 600, 700, 800]) # freeze interval
            nh = random.choice([4]) # num hidden
            reg = random.choice([5e-4]) # regularization
            print 'run number: {}'.format(idx)
            print lr, fi, nh, reg
            run(lr, fi, nh, reg) 
开发者ID:wulfebw,项目名称:hierarchical_rl,代码行数:57,代码来源:test_qnetwork.py


注:本文中的experiment.Experiment方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。