本文整理匯總了Python中dqn.DQN屬性的典型用法代碼示例。如果您正苦於以下問題:Python dqn.DQN屬性的具體用法?Python dqn.DQN怎麽用?Python dqn.DQN使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類dqn
的用法示例。
在下文中一共展示了dqn.DQN屬性的10個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: main
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def main(env, weight_path, epsilon):
env = make_atari(env)
q_function = DQN(env.action_space.n)
q_function.load_state_dict(torch.load(weight_path))
done = False
state = env.reset()
step = 1
sleep(2)
while not done:
env.render()
if random() <= epsilon:
action = randrange(0, env.action_space.n)
else:
state = variable(to_tensor(state).unsqueeze(0))
action = q_function(state).data.view(-1).max(dim=0)[1].sum()
state, reward, done, info = env.step(action)
print(f"[step: {step:>5}] [reward: {reward:>5}]")
step += 1
sleep(2)
示例2: control_start
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def control_start(self):
import dqn
with tf.Session() as sess:
mainDQN = dqn.DQN(sess, self.input_size, self.output_size,
name="main", is_training=False)
tf.global_variables_initializer().run()
mainDQN.restore(100)
for episode in range(self.max_episodes):
done = False
clear = False
state = self.env.reset()
while not done and not clear:
action = np.argmax(mainDQN.predict(state))
print action
next_state, reward, done, clear, max_x, _, _ = self.env.step(action)
state = next_state
示例3: simple_replay_train
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def simple_replay_train(DQN, train_batch):
x_stack = np.empty(0).reshape(0,DQN.input_size)
y_stack = np.empty(0).reshape(0,DQN.output_size)
# Get stored information from the buffer
for state, action, reward, next_state, done in train_batch:
Q = DQN.predict(state)
# terminal
if done:
Q[0,action] =reward
else:
Q[0,action] = reward + dis*np.max(DQN.predict(next_state))
y_stack = np.vstack([y_stack,Q])
x_stack = np.vstack([x_stack, state])
return DQN.update(x_stack,y_stack)
示例4: run
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def run(self):
import dqn
with tf.Session() as sess:
self.sess = sess
self.mainDQN = dqn.DQN(sess, self.input_size, self.output_size, name="main")
self.targetDQN = dqn.DQN(sess, self.input_size, self.output_size, name="target")
self.tempDQN = dqn.DQN(sess, self.input_size, self.output_size, name="temp")
tf.global_variables_initializer().run()
episode = 5100
try:
self.mainDQN.restore(episode)
self.targetDQN.restore(episode)
self.tempDQN.restore(episode)
except NotFoundError:
print "save file not found"
self.copy_ops = self.get_copy_var_ops()
self.copy_ops_temp = self.get_copy_var_ops(dest_scope_name="main", src_scope_name="temp")
self.copy_ops_temp2 = self.get_copy_var_ops(dest_scope_name="temp", src_scope_name="main")
sess.run(self.copy_ops)
sess.run(self.copy_ops_temp2)
predict_thread = threading.Thread(target=self.predict)
train_thread = threading.Thread(target=self.train)
predict_thread.start()
train_thread.start()
train_thread.join()
predict_thread.join()
示例5: main
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def main(unusued_argv):
parent_proc = psutil.Process()
with tf.Session() as sess:
mainDQN = dqn.DQN(sess, FLAGS.screen_size, FLAGS.minimap_size, output_size, FLAGS.learning_rate, name="main")
targetDQN = dqn.DQN(sess, FLAGS.screen_size, FLAGS.minimap_size, output_size, FLAGS.learning_rate, name="target")
copy_ops = get_copy_var_ops(dest_scope_name="target", src_scope_name="main")
sess.run(copy_ops)
print("memory before starting the iteration : %s (kb)"%(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
for episode in range(FLAGS.start_episode, FLAGS.num_episodes):
e = 1.0 / ((episode / 50) + 2.0) # decaying exploration rate
with sc2_env.SC2Env(
FLAGS.map_name,
screen_size_px=(FLAGS.screen_size, FLAGS.screen_size),
minimap_size_px=(FLAGS.minimap_size, FLAGS.minimap_size),
agent_race=FLAGS.agent_race,
bot_race=FLAGS.bot_race,
difficulty=FLAGS.difficulty,
visualize=FLAGS.visualize) as env:
agent = minerva_agent.MinervaAgent(mainDQN)
run_result = run_loop([agent], env, sess, e, mainDQN, targetDQN, copy_ops, 5000)
agent.close()
reward = run_result[0].reward
if reward > 0:
env.save_replay("victory/")
#else:
# env.save_replay("defeat/")
children = parent_proc.children(recursive=True)
for child in children:
print("remaining child proc :", child)
print("memory after exit %d'th sc2env : %s (kb)"%(episode, resource.getrusage(resource.RUSAGE_SELF).ru_maxrss))
mainDQN.saveWeight()
print("networks were saved, %d'th game result :"%episode,reward)
示例6: main
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def main(unused_argv):
replay_list = []
if FLAGS.replay:
REPLAY_PATH = REPLAY_HOME + FLAGS.replay
else:
REPLAY_PATH = REPLAY_HOME
for root, dirs, files in os.walk(REPLAY_PATH):
for subdir in dirs:
tmp = os.path.join(root, subdir)
if tmp[-10:] == '.SC2Replay':
replay_list.append(tmp)
for file1 in files:
tmp = os.path.join(root, file1)
if tmp[-10:] == '.SC2Replay':
replay_list.append(tmp)
with tf.Session() as sess:
mainDQN = dqn.DQN(sess, FLAGS.screen_size, FLAGS.minimap_size, output_size, FLAGS.learning_rate, name="main")
for iter in range(FLAGS.repeat):
for replay in replay_list:
start_time = time.time()
run_loop(replay, 1, mainDQN)
run_loop(replay, 2, mainDQN)
mainDQN.saveWeight()
print("networks were updated / replay :",replay)
elapsed_time = time.time() - start_time
print("Took %.3f seconds... " % (elapsed_time))
示例7: __init__
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def __init__(self, env, sess, load_path = None):
self.env = env
self.sess = sess
self.ep = 0.35
self.state_size = 6
self.batch_size = 128
self.start_train = 370100
self.train_freq = 10
self.action_size = self.env.action_size
self.qnet = DQN(self.sess, self.state_size, self.action_size, load_path)
self.xrep = ExperienceReplay(self.state_size)
print 'made agent'
示例8: main
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def main():
max_episodes = 1000
replay_buffer = deque()
with tf.Session() as sess:
mainDQN = dqn.DQN(sess,input_size,output_size)
# Initialize variables
init = tf.global_variables_initializer()
sess.run(init)
for i in range(max_episodes):
# e- greedy
e = 1./((i/10)+1)
step_count = 0
state = env.reset()
done = False
# Before fail
while not done:
# E-greedy
if random.gauss(0,1) < e:
action = env.action_space.sample()
else:
action = np.argmax(mainDQN.predict(state))
# Get the observation results
next_state, reward, done, _ = env.step(action)
# After fail
if done:
if step_count >= 199:
reward = 200
else:
reward = -100
replay_buffer.append((state,action,reward,next_state,done))
if len(replay_buffer) >REPLAY_MEMORY:
replay_buffer.popleft()
# Update state
state = next_state
step_count += 1
if step_count > 199:
break
print("Episode:",i,"step:",step_count)
if step_count> 10000:
pass
# train every 10 episodes
if i % 10 == 1:
# Get a random batch of experience
for _ in range(50):
# Minibatch works better
minibatch = random.sample(replay_buffer,10)
loss, _ = simple_replay_train(mainDQN,minibatch)
print("Loss:",loss)
# Rendering
bot_play(mainDQN)
# python start code
示例9: batch_train
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def batch_train(env, mainDQN, targetDQN, train_batch: list) -> float:
"""Trains `mainDQN` with target Q values given by `targetDQN`
Args:
mainDQN (dqn.DQN): Main DQN that will be trained
targetDQN (dqn.DQN): Target DQN that will predict Q_target
train_batch (list): Minibatch of stored buffer
Each element is (s, a, r, s', done)
[(state, action, reward, next_state, done), ...]
Returns:
float: After updating `mainDQN`, it returns a `loss`
"""
states = np.vstack([x[0] for x in train_batch])
actions_id = np.array([x[1] for x in train_batch])
rewards = np.array([x[3] for x in train_batch])
next_states = np.vstack([x[4] for x in train_batch])
done = np.array([x[5] for x in train_batch])
# actions_arg[i] : arguments whose id=i
actions_arg = np.ones([13,FLAGS.batch_size],dtype=np.int32)
actions_arg *= -1
batch_index = 0
for x in train_batch:
action_id = x[1]
arg_index = 0
for arg in env.action_spec().functions[action_id].args:
if arg.id in range(3):
actions_arg[arg.id][batch_index] = coordinateToInt(x[2][arg_index])
else:
actions_arg[arg.id][batch_index] = (int) (x[2][arg_index][0])
arg_index += 1
batch_index += 1
X = states
Q_target = rewards + FLAGS.discount * np.max(targetDQN.predict(next_states), axis=1) * ~done
spatial_Q_target = []
spatial_predict = targetDQN.predictSpatial(next_states)
for i in range(13):
spatial_Q_target.append( rewards + FLAGS.discount * np.max(spatial_predict[i], axis=1) *~done )
# y shape : [batch_size, output_size]
y = mainDQN.predict(states)
y[np.arange(len(X)), actions_id] = Q_target
# ySpatial shape : [13, batch_size, arg_size(id)]
ySpatial = mainDQN.predictSpatial(states)
for j in range(13):
for i in range(len(X)):
if actions_arg[j][i] >= 0:
ySpatial[j][i][actions_arg[j][i]] = spatial_Q_target[j][i]
# Train our network using target and predicted Q values on each episode
return mainDQN.update(X, y, ySpatial)
示例10: test
# 需要導入模塊: import dqn [as 別名]
# 或者: from dqn import DQN [as 別名]
def test(session, hist_len=4, discount=0.99, act_rpt=4, upd_freq=4, min_sq_grad=0.01, epsilon=TEST_EPSILON,
no_op_max=30, num_tests=30, learning_rate=0.00025, momentum=0.95, sq_momentum=0.95):
#Create ALE object
if len(sys.argv) < 2:
print 'Usage:', sys.argv[0], 'rom_file'
sys.exit()
ale = ALEInterface()
# Get & Set the desired settings
ale.setInt('random_seed', 123)
#Changes repeat action probability from default of 0.25
ale.setFloat('repeat_action_probability', 0.0)
# Set USE_SDL to true to display the screen. ALE must be compilied
# with SDL enabled for this to work. On OSX, pygame init is used to
# proxy-call SDL_main.
USE_SDL = False
if USE_SDL:
if sys.platform == 'darwin':
import pygame
pygame.init()
ale.setBool('sound', False) # Sound doesn't work on OSX
elif sys.platform.startswith('linux'):
ale.setBool('sound', True)
ale.setBool('display_screen', True)
# Load the ROM file
ale.loadROM(sys.argv[1])
# create DQN agent
# learning_rate and momentum are unused parameters (but needed)
agent = DQN(ale, session, epsilon, learning_rate, momentum, sq_momentum, hist_len, len(ale.getMinimalActionSet()), None, discount, rom_name(sys.argv[1]))
#Store the most recent two images
preprocess_stack = deque([], 2)
num_episodes = 0
while num_episodes < num_tests:
#initialize sequence with initial image
seq = list()
perform_no_ops(ale, no_op_max, preprocess_stack, seq)
total_reward = 0
while not ale.game_over():
state = get_state(seq, hist_len)
action = agent.get_action_best_network(state, epsilon)
#skip frames by repeating action
reward = 0
for i in range(act_rpt):
reward = reward + ale.act(action)
preprocess_stack.append(ale.getScreenRGB())
seq.append(pp.preprocess(preprocess_stack[0], preprocess_stack[1]))
total_reward += reward
print('Episode ended with score: %d' % (total_reward))
num_episodes = num_episodes + 1
ale.reset_game()