Python dqn.DQN属性代码示例

本文整理汇总了Python中dqn.DQN属性的典型用法代码示例。如果您正苦于以下问题：Python dqn.DQN属性的具体用法？Python dqn.DQN怎么用？Python dqn.DQN使用的例子？那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类dqn的用法示例。

在下文中一共展示了dqn.DQN属性的10个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def main(env, weight_path, epsilon):
    env = make_atari(env)
    q_function = DQN(env.action_space.n)
    q_function.load_state_dict(torch.load(weight_path))

    done = False
    state = env.reset()
    step = 1
    sleep(2)
    while not done:
        env.render()
        if random() <= epsilon:
            action = randrange(0, env.action_space.n)
        else:
            state = variable(to_tensor(state).unsqueeze(0))
            action = q_function(state).data.view(-1).max(dim=0)[1].sum()

        state, reward, done, info = env.step(action)
        print(f"[step: {step:>5}] [reward: {reward:>5}]")
        step += 1
    sleep(2)

开发者ID:moskomule，项目名称:pytorch.rl.learning，代码行数:23，代码来源:demo.py

示例2: control_start

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def control_start(self):
        import dqn
        with tf.Session() as sess:
            mainDQN = dqn.DQN(sess, self.input_size, self.output_size,
                              name="main", is_training=False)
            tf.global_variables_initializer().run()

            mainDQN.restore(100)

            for episode in range(self.max_episodes):
                done = False
                clear = False
                state = self.env.reset()

                while not done and not clear:
                    action = np.argmax(mainDQN.predict(state))
                    print action
                    next_state, reward, done, clear, max_x, _, _ = self.env.step(action)
                    state = next_state

开发者ID:JSDanielPark，项目名称:tensorflow_dqn_supermario，代码行数:21，代码来源:play.py

示例3: simple_replay_train

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def simple_replay_train(DQN, train_batch):
    x_stack = np.empty(0).reshape(0,DQN.input_size)
    y_stack = np.empty(0).reshape(0,DQN.output_size)

    # Get stored information from the buffer
    for state, action, reward, next_state, done in train_batch:
        Q = DQN.predict(state)

        # terminal
        if done:
            Q[0,action] =reward
        else:
            Q[0,action] = reward + dis*np.max(DQN.predict(next_state))
        y_stack = np.vstack([y_stack,Q])
        x_stack = np.vstack([x_stack, state])

    return DQN.update(x_stack,y_stack)

开发者ID:Wonjuseo，项目名称:Project101，代码行数:19，代码来源:2-7. CartPole3.py

示例4: run

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def run(self):
        import dqn
        with tf.Session() as sess:
            self.sess = sess
            self.mainDQN = dqn.DQN(sess, self.input_size, self.output_size, name="main")
            self.targetDQN = dqn.DQN(sess, self.input_size, self.output_size, name="target")
            self.tempDQN = dqn.DQN(sess, self.input_size, self.output_size, name="temp")
            tf.global_variables_initializer().run()

            episode = 5100
            try:
                self.mainDQN.restore(episode)
                self.targetDQN.restore(episode)
                self.tempDQN.restore(episode)
            except NotFoundError:
                print "save file not found"

            self.copy_ops = self.get_copy_var_ops()
            self.copy_ops_temp = self.get_copy_var_ops(dest_scope_name="main", src_scope_name="temp")
            self.copy_ops_temp2 = self.get_copy_var_ops(dest_scope_name="temp", src_scope_name="main")
            sess.run(self.copy_ops)
            sess.run(self.copy_ops_temp2)

            predict_thread = threading.Thread(target=self.predict)
            train_thread = threading.Thread(target=self.train)
            predict_thread.start()
            train_thread.start()
            train_thread.join()
            predict_thread.join()

开发者ID:JSDanielPark，项目名称:tensorflow_dqn_supermario，代码行数:31，代码来源:main_async.py

示例5: main

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def main(unusued_argv):
    parent_proc = psutil.Process()
    with tf.Session() as sess:
        mainDQN = dqn.DQN(sess, FLAGS.screen_size, FLAGS.minimap_size, output_size, FLAGS.learning_rate, name="main")
        targetDQN = dqn.DQN(sess, FLAGS.screen_size, FLAGS.minimap_size, output_size, FLAGS.learning_rate, name="target")

        copy_ops = get_copy_var_ops(dest_scope_name="target", src_scope_name="main")
        sess.run(copy_ops)
        print("memory before starting the iteration : %s (kb)"%(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))

        for episode in range(FLAGS.start_episode, FLAGS.num_episodes):
            e = 1.0 / ((episode / 50) + 2.0) # decaying exploration rate
            with sc2_env.SC2Env(
                    FLAGS.map_name,
                    screen_size_px=(FLAGS.screen_size, FLAGS.screen_size),
                    minimap_size_px=(FLAGS.minimap_size, FLAGS.minimap_size),
                    agent_race=FLAGS.agent_race,
                    bot_race=FLAGS.bot_race,
                    difficulty=FLAGS.difficulty,
                    visualize=FLAGS.visualize) as env:

                agent = minerva_agent.MinervaAgent(mainDQN)
                run_result = run_loop([agent], env, sess, e, mainDQN, targetDQN, copy_ops, 5000)
                agent.close()
                reward = run_result[0].reward
                if reward > 0:
                    env.save_replay("victory/")
                #else:
                #    env.save_replay("defeat/")

            children = parent_proc.children(recursive=True)
            for child in children:
                print("remaining child proc :", child)
            print("memory after exit %d'th sc2env : %s (kb)"%(episode, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))

            mainDQN.saveWeight()
            print("networks were saved, %d'th game result :"%episode,reward)

开发者ID:phraust1612，项目名称:MinervaSc2，代码行数:39，代码来源:trainingRL.py

示例6: main

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def main(unused_argv):
    replay_list = []
    if FLAGS.replay:
        REPLAY_PATH = REPLAY_HOME + FLAGS.replay
    else:
        REPLAY_PATH = REPLAY_HOME

    for root, dirs, files in os.walk(REPLAY_PATH):
        for subdir in dirs:
            tmp = os.path.join(root, subdir)
            if tmp[-10:] == '.SC2Replay':
                replay_list.append(tmp)
        for file1 in files:
            tmp = os.path.join(root, file1)
            if tmp[-10:] == '.SC2Replay':
                replay_list.append(tmp)

    with tf.Session() as sess:
        mainDQN = dqn.DQN(sess, FLAGS.screen_size, FLAGS.minimap_size, output_size, FLAGS.learning_rate, name="main")

        for iter in range(FLAGS.repeat):
            for replay in replay_list:
                start_time = time.time()
                run_loop(replay, 1, mainDQN)
                run_loop(replay, 2, mainDQN)
                mainDQN.saveWeight()
                print("networks were updated / replay :",replay)
                elapsed_time = time.time() - start_time
                print("Took %.3f seconds... " % (elapsed_time))

开发者ID:phraust1612，项目名称:MinervaSc2，代码行数:31，代码来源:trainingSL.py

示例7: init

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def __init__(self, env, sess, load_path = None):
        self.env = env
        self.sess = sess
        self.ep = 0.35
        self.state_size = 6
        self.batch_size = 128 
        self.start_train = 370100 
        self.train_freq = 10 
        self.action_size = self.env.action_size 
        self.qnet = DQN(self.sess, self.state_size, self.action_size, load_path)
        self.xrep = ExperienceReplay(self.state_size) 
        print 'made agent'

开发者ID:cheng-xie，项目名称:RobocupSSLSim，代码行数:14，代码来源:agent.py

示例8: main

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def main():
    max_episodes = 1000

    replay_buffer = deque()
    with tf.Session() as sess:
        mainDQN = dqn.DQN(sess,input_size,output_size)
        # Initialize variables
        init = tf.global_variables_initializer()
        sess.run(init)

        for i in range(max_episodes):
            # e- greedy
            e = 1./((i/10)+1)
            step_count = 0
            state = env.reset()
            done = False

            # Before fail
            while not done:
                # E-greedy
                if random.gauss(0,1) < e:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(mainDQN.predict(state))
                # Get the observation results
                next_state, reward, done, _ = env.step(action)            
                # After fail
                if done:
                    if step_count >= 199:
                        reward = 200
                    else:
                        reward = -100
                
                replay_buffer.append((state,action,reward,next_state,done))
                if len(replay_buffer) >REPLAY_MEMORY:
                    replay_buffer.popleft()
                # Update state
                state = next_state
                step_count += 1
                if step_count > 199:
                    break

            print("Episode:",i,"step:",step_count)
            if step_count> 10000:
                pass
        
            # train every 10 episodes
            if i % 10 == 1: 
                # Get a random batch of experience
                for _ in range(50):
                    # Minibatch works better
                    minibatch = random.sample(replay_buffer,10)
                    loss, _ = simple_replay_train(mainDQN,minibatch)

                print("Loss:",loss)
        # Rendering
        bot_play(mainDQN)

# python start code

开发者ID:Wonjuseo，项目名称:Project101，代码行数:61，代码来源:2-7. CartPole3.py

示例9: batch_train

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def batch_train(env, mainDQN, targetDQN, train_batch: list) -> float:
    """Trains `mainDQN` with target Q values given by `targetDQN`
    Args:
        mainDQN (dqn.DQN): Main DQN that will be trained
        targetDQN (dqn.DQN): Target DQN that will predict Q_target
        train_batch (list): Minibatch of stored buffer
            Each element is (s, a, r, s', done)
            [(state, action, reward, next_state, done), ...]
    Returns:
        float: After updating `mainDQN`, it returns a `loss`
    """
    states = np.vstack([x[0] for x in train_batch])
    actions_id = np.array([x[1] for x in train_batch])
    rewards = np.array([x[3] for x in train_batch])
    next_states = np.vstack([x[4] for x in train_batch])
    done = np.array([x[5] for x in train_batch])

    # actions_arg[i] : arguments whose id=i
    actions_arg = np.ones([13,FLAGS.batch_size],dtype=np.int32)
    actions_arg *= -1

    batch_index = 0
    for x in train_batch:
        action_id = x[1]
        arg_index = 0

        for arg in env.action_spec().functions[action_id].args:
            if arg.id in range(3):
                actions_arg[arg.id][batch_index] = coordinateToInt(x[2][arg_index])
            else:
                actions_arg[arg.id][batch_index] = (int) (x[2][arg_index][0])
            arg_index += 1
        batch_index += 1

    X = states

    Q_target = rewards + FLAGS.discount * np.max(targetDQN.predict(next_states), axis=1) * ~done
    spatial_Q_target = []
    spatial_predict = targetDQN.predictSpatial(next_states)
    for i in range(13):
        spatial_Q_target.append( rewards + FLAGS.discount * np.max(spatial_predict[i], axis=1) *~done )

    # y shape : [batch_size, output_size]
    y = mainDQN.predict(states)
    y[np.arange(len(X)), actions_id] = Q_target

    # ySpatial shape : [13, batch_size, arg_size(id)]
    ySpatial = mainDQN.predictSpatial(states)
    for j in range(13):
        for i in range(len(X)):
            if actions_arg[j][i] >= 0:
                ySpatial[j][i][actions_arg[j][i]] = spatial_Q_target[j][i]

    # Train our network using target and predicted Q values on each episode
    return mainDQN.update(X, y, ySpatial)

开发者ID:phraust1612，项目名称:MinervaSc2，代码行数:57，代码来源:trainingRL.py

示例10: test

# 需要导入模块: import dqn [as 别名]
# 或者: from dqn import DQN [as 别名]
def test(session, hist_len=4, discount=0.99, act_rpt=4, upd_freq=4, min_sq_grad=0.01, epsilon=TEST_EPSILON, 
    no_op_max=30, num_tests=30, learning_rate=0.00025, momentum=0.95, sq_momentum=0.95):
    #Create ALE object
    if len(sys.argv) < 2:
      print 'Usage:', sys.argv[0], 'rom_file'
      sys.exit()

    ale = ALEInterface()

    # Get & Set the desired settings
    ale.setInt('random_seed', 123)
    #Changes repeat action probability from default of 0.25
    ale.setFloat('repeat_action_probability', 0.0)
    # Set USE_SDL to true to display the screen. ALE must be compilied
    # with SDL enabled for this to work. On OSX, pygame init is used to
    # proxy-call SDL_main.
    USE_SDL = False
    if USE_SDL:
      if sys.platform == 'darwin':
        import pygame
        pygame.init()
        ale.setBool('sound', False) # Sound doesn't work on OSX
      elif sys.platform.startswith('linux'):
        ale.setBool('sound', True)
      ale.setBool('display_screen', True)

    # Load the ROM file
    ale.loadROM(sys.argv[1])

    # create DQN agent
    # learning_rate and momentum are unused parameters (but needed)
    agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum, hist_len, len(ale.getMinimalActionSet()), None, discount, rom_name(sys.argv[1]))
    
    #Store the most recent two images
    preprocess_stack = deque([], 2)

    num_episodes = 0
    while num_episodes < num_tests:
        #initialize sequence with initial image
        seq = list()
        perform_no_ops(ale, no_op_max, preprocess_stack, seq)
        total_reward = 0
        while not ale.game_over():
            state = get_state(seq, hist_len)
            action = agent.get_action_best_network(state, epsilon)
            #skip frames by repeating action
            reward = 0
            for i in range(act_rpt):
                reward = reward + ale.act(action)
                preprocess_stack.append(ale.getScreenRGB())
            seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1]))
            total_reward += reward
        print('Episode ended with score: %d' % (total_reward))
        num_episodes = num_episodes + 1
        ale.reset_game()

开发者ID:prabhatnagarajan，项目名称:dqn，代码行数:57，代码来源:test.py

注：本文中的dqn.DQN属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。