本文整理匯總了Python中SwingyMonkey.SwingyMonkey類的典型用法代碼示例。如果您正苦於以下問題:Python SwingyMonkey類的具體用法?Python SwingyMonkey怎麽用?Python SwingyMonkey使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了SwingyMonkey類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: run_games
def run_games(learner, hist, iters = 100, t_len = 100):
'''
Driver function to simulate learning by having the agent play a sequence of games.
'''
for ii in range(iters):
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length = t_len, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
learner.last_state = swing.get_state()
pass
# Save score history.
hist.append(swing.score)
# Reset the state of the learner.
learner.reset()
return
示例2: evaluate
def evaluate(gamma=0.4, iters=100, chatter=True):
learner = TDValueLearner()
learner.gamma = gamma
highscore = 0
avgscore = 0.0
for ii in xrange(iters):
learner.epsilon = 1/(ii+1)
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length=1, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
score = swing.get_state()['score']
highscore = max([highscore, score])
avgscore = (ii*avgscore+score)/(ii+1)
if chatter:
print ii, score, highscore, avgscore
# Reset the state of the learner.
learner.reset()
return -avgscore
示例3: run_games
def run_games(learner, hist, iters = 100, t_len = 100):
'''
Driver function to simulate learning by having the agent play a sequence of games.
'''
for ii in range(iters):
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length = t_len, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
# This is where we build sarsa arrays utilizing learner.method()
# You can get the action via learner.last_action (False=0/glide, True=1/jump)
# You can get the state via learner.last_state
# You can get the reward via learner.last_reward (0,+1 if pass, -5 if hit, -10 if fall off screen)
# Can infer gravity by checking monkey velocity from time step to time step if action is false
# Gravity is an integer 1, 2, 3, or 4
pass
# Save score history.
hist.append(swing.score)
# Reset the state of the learner.
learner.reset()
return
示例4: testgame
def testgame(iters=100,show=True):
learner = QLearner2()
highestscore = 0
avgscore = 0
record={}
record['epoch']=[]
record['highest']=[]
record['avg']=[]
record['score']=[]
record['q']=[]
for ii in range(iters):
learner.epsilon = 1/(ii+1)
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length=1, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
score = swing.get_state()['score']
highestscore = max([highestscore, score])
avgscore = (ii*avgscore+score)/(ii+1)
q=round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)
if show==True:
print "epoch:",ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q
record['epoch'].append(ii)
record['highest'].append(highestscore)
record['avg'].append(avgscore)
record['score'].append(score)
record['q'].append(q)
pickle.dump( record, open( "record12.p", "wb" ) )
# Reset the state of the learner.
learner.reset()
return avgscore,highestscore,score
示例5: run_games
def run_games(learner, hist, iters = 100, t_len = 100):
'''
Driver function to simulate learning by having the agent play a sequence of games.
'''
for ii in range(iters):
# Make a new monkey object.
swing = SwingyMonkey()
# Initialize history dictionaries for iteration ii
hist['state'][ii] = []
hist['action'][ii] = []
hist['reward'][ii] = []
# Loop until you hit something.
while swing.game_loop():
# This is where we build sarsa arrays utilizing learner.method()
# You can get the action via learner.last_action (False=0/glide, True=1/jump)
# You can get the state via learner.last_state
# You can get the reward via learner.last_reward (0,+1 if pass, -5 if hit, -10 if fall off screen)
# Can infer gravity by checking monkey velocity from time step to time step if action is false
# Gravity is an integer 1, 2, 3, or 4
# import pdb
# pdb.set_trace()
hist['state'][ii].append(learner.last_state)
hist['action'][ii].append(learner.last_action)
hist['reward'][ii].append(learner.last_reward)
else: # Get final action,reward and state just to see how the monkey failed.
hist['state'][ii].append(learner.last_state)
hist['action'][ii].append(learner.last_action)
hist['reward'][ii].append(learner.last_reward)
# Save score history.
hist['score'].append(swing.score)
# Reset the state of the learner.
learner.reset()
return
示例6: run_games
def run_games(learner, hist, iters = 100, t_len = 100):
'''
Driver function to simulate learning by having the agent play a sequence of games.
'''
if iters < 20:
print "I can't learn that fast! Try more iterations."
# DATA-GATHERING PHASE
for ii in range(30):
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length = t_len, # Make game ticks super fast.
action_callback=learner.explore_action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
# Save score history.
hist.append(swing.score)
# Reset the state of the learner.
learner.reset()
# EXPLOITATION PHASE
for ii in range(iters)[30:]:
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length = t_len, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
# Save score history.
hist.append(swing.score)
# Reset the state of the learner.
learner.reset()
return
示例7: run_games
def run_games(learner, hist, iters=100, t_len=100):
'''
Driver function to simulate learning by having the agent play a sequence of games.
'''
for ii in range(iters):
# make a new monkey object
swing = SwingyMonkey(sound=False, # don't play sounds
text="Epoch %d" % (ii), # display the epoch on screen
tick_length = t_len, # make game ticks super fast
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# pass the screen dimensions to the agent
learner.update_specs(swing.screen_height, swing.screen_width)
# loop until you hit something
while swing.game_loop():
pass
# update transition to terminal state
learner.update_terminal_transition()
# save score history
hist.append(swing.score)
print 'Epoch %i: current score %i; best score %i' % (ii, swing.score, np.max(hist))
# reset the state of the learner
learner.reset()
# display score history and stats
print '----------'
print 'Parameters: %0.2f alpha; %0.2f gamma; %0.2f epsilon' % (learner.alpha, learner.gamma, learner.epsilon)
print 'Score history:', hist
print 'Best score:', np.max(hist)
print 'Average score:', np.mean(hist)
print '----------'
return np.max(hist)
示例8: run_games
def run_games(learner, hist, iters = 100, t_len = 100):
'''
Driver function to simulate learning by having the agent play a sequence of games.
'''
high = 0
avg = 0
for ii in range(iters):
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length = t_len, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
new_score = swing.score
# Save score history.
if new_score > high:
high = new_score
avg = (new_score + ii*avg)/(ii+1.0)
print "%i\t%i\t%i\t%s:\t%s"%(ii,new_score,high,avg,np.mean(learner.Q))
hist.append(swing.score)
# Reset the state of the learner.
learner.reset()
print learner.Q
print learner.state_counts
return
示例9: run_games
def run_games(learner, hist, policy="random", eps=0.9, gam=0.5, alph=0.75, iters=20, t_len=100):
"""
Driver function to simulate learning by having the agent play a sequence of games.
"""
# Place alpha and epsilon values into learner
learner.eps = eps
learner.gam = gam
learner.alph = alph
learner.num_actions = 2
# Initialize estimator for Q-function
total_states = []
total_actions = []
total_rewards = []
total_scores = []
for ii in range(iters):
# Make a new monkey object.
if policy == "random":
swing = SwingyMonkey(
sound=False,
text="Random Epoch %d" % (ii),
tick_length=t_len,
action_callback=learner.random_actions,
reward_callback=learner.reward_callback,
)
else:
swing = SwingyMonkey(
sound=False, # Don't play sounds.
text="Learned Epoch %d" % (ii), # Display the epoch on screen.
tick_length=t_len, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback,
)
learner.fitted = True
# Initialize history dictionaries for iteration ii
states = []
actions = []
rewards = []
loop_counter = 0
# Loop until you hit something.
while swing.game_loop():
states.append(learner.create_state_tuple(learner.last_state))
actions.append(int(learner.last_action == True))
rewards.append(learner.last_reward)
if learner.learn_g & (loop_counter > 1):
learner.infer_g(states, actions)
for pp in range(len(states)):
states[pp][-1] = learner.gravity
loop_counter += 1
else: # Get final action,reward and state just to see how the monkey failed.
states.append(learner.create_state_tuple(learner.last_state))
actions.append(int(learner.last_action == True))
rewards.append(learner.last_reward)
# Append histories from most recent epoch, create training arrays
total_scores.append(swing.score)
total_states += states
total_actions += actions
total_rewards += rewards
# Reset the state of the learner.
learner.reset()
hist["state_history"] = hist["state_history"] + total_states
hist["action_history"] += total_actions
hist["reward_history"] += total_rewards
hist["score_history"] += total_scores
return
示例10: reward_callback
return new_action
def reward_callback(self, reward):
'''This gets called so you can see what reward you get.'''
self.last_reward = reward
iters = 10000
learner = Learner()
for ii in xrange(iters):
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
tick_length=1, # Make game ticks super fast.
# Display the epoch on screen and % of Q matrix filled
text="Epoch %d " % (ii) + str(round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)) + "%",
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
# Keep track of the score for that epoch.
learner.scores.append(learner.last_state['score'])
if learner.last_state['score'] > learner.best_score:
print 'New best Q'
learner.best_score = learner.last_state['score']
learner.bestQ = learner.Q.copy()
print 'score %d' % learner.last_state['score'], str(round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)) + "%"
示例11: reward_callback
def reward_callback(self, reward):
'''This gets called so you can see what reward you get.'''
self.last_reward = reward
iters = 150
learner = Learner()
scores = []
for ii in xrange(iters):
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length=1, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
scores.append(swing.get_score())
# Reset the state of the learner.
learner.reset()
domain = np.arange(1, iters + 1, 1)
plt.plot(domain, scores)
plt.title("Scores over each Epoch (discount = " + str(learner.discount) + ")")
示例12: run_games
def run_games(learner, hist, eps=0.5, gam=0.5, alph=0.75, iters = 20, t_len = 100, test=False):
'''
Driver function to simulate learning by having the agent play a sequence of games.
'''
# Place alpha and epsilon values into learner
learner.eps = eps
learner.gam = gam
learner.alph = alph
learner.num_actions = 2
# Initialize estimator for Q-function
total_states = []
total_actions = []
total_rewards = []
total_scores = []
for ii in range(iters):
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length = t_len, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Initialize history dictionaries for iteration ii
states = []
actions = []
rewards = []
loop_counter = 0
# Loop until you hit something.
while swing.game_loop():
states.append(learner.create_state_tuple(learner.last_state))
actions.append(int(learner.last_action==True))
rewards.append(learner.last_reward)
if learner.learn_g & (loop_counter > 1):
learner.infer_g(states,actions)
for pp in range(len(states)):
states[pp][-1] = learner.gravity
loop_counter += 1
else: # Get final action,reward and state just to see how the monkey failed.
states.append(learner.create_state_tuple(learner.last_state))
actions.append(int(learner.last_action==True))
rewards.append(learner.last_reward)
# Append histories from most recent epoch, create training arrays
total_scores.append(swing.score)
total_states += states
total_actions += actions
total_rewards += rewards
if not test:
# Iteratively refine the optimal policy after each epoch
if ii == 0:
X_train = np.array([np.append(total_states[kk],total_actions[kk]) for kk in range(len(total_states))])
y_train = np.array(total_rewards)
#Build tree using first stage Q-learning
extraTrees = ExtraTreesRegressor(n_estimators=50)
extraTrees.fit(X_train, y_train)
# Refit random forest estimator based on composite epochs
else:
# Generate new X(state,action) and y(reward) lists from newly run batch, based off of Q-estimator and using prior rewards a la Ernst '06'
X_train = np.array([np.append(total_states[kk],total_actions[kk]) for kk in range(len(total_rewards)-1)])
# Construct Bellman's equations to get expected rewards based on next proposed state
y_train = np.array([agent.estimator.predict(np.append(total_states[kk],total_actions[kk])) \
+agent.alph*(total_rewards[kk]+(agent.gam * np.max([agent.estimator.predict(np.append(total_states[kk+1]\
,act)) for act in range(agent.num_actions)]))-agent.estimator.predict(np.append(total_states[kk],total_actions[kk])))\
for kk in range(len(total_states)-1)])
# Re-fit regression to refine optimal policy according to expected reward.
extraTrees = ExtraTreesRegressor(n_estimators=50)
extraTrees.fit(X_train,y_train)
# As we refine the policy, we should reduce the amount we explore.
if ii % 10 == 0:
learner.eps += 0.05
learner.estimator = extraTrees
learner.fitted = True
else:
learner.fitted = True
# Reset the state of the learner.
learner.reset()
# Place state, action, reward and score histories to be saved by wrapper.
hist['state_history'] = total_states
hist['action_history'] = total_actions
#.........這裏部分代碼省略.........
示例13: Learner
'''This gets called so you can see what reward you get.'''
self.last_reward = reward
iters = 10000
learner = Learner()
scorelist=[]
for ii in xrange(iters):
learning_rate=(learning_rate_start+.5)/(iters/100)
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length=1, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
#store all values for mins and max calcs -- only need to run once to get values for the find_state_bounds function which saves these values
scorelist.append(swing.get_state()['score'])
#print swing.get_state()
# Reset the state of the learner.
learner.reset()
#calculate avg score for this approach
示例14: Learner
# formal learning step
iters = 10000
learner = Learner()
reward = []
score = []
score_cur = 0
ii = 0
#for ii in xrange(iters):
while score_cur < 100:
ii += 1
# Make a new monkey object.
swing = SwingyMonkey(sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length=0, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback)
# Loop until you hit something.
while swing.game_loop():
pass
reward.append(learner.last_reward)
score_cur = swing.get_state()["score"]
score.append(swing.get_state()["score"])
print "################### Score = " + \
str(swing.get_state()["score"]) + " ########################"
# Reset the state of the learner.
learner.reset()
示例15: xrange
score_cur = 0
ii = 0
# for ii in xrange(iters):
# learner.Q = np.load("Qmat_manual.npy")
# learner.learnTime = np.load("Lmat_manual.npy")
# while score_cur < 5000:
while ii < 1e5:
ii += 1
# Make a new monkey object.
swing = SwingyMonkey(
sound=False, # Don't play sounds.
text="Epoch %d" % (ii), # Display the epoch on screen.
tick_length=0, # Make game ticks super fast.
action_callback=learner.action_callback,
reward_callback=learner.reward_callback,
)
# Loop until you hit something.
while swing.game_loop():
pass
reward.append(learner.last_reward)
score_cur = swing.get_state()["score"]
veloc_cur = swing.get_state()["monkey"]["vel"]
result_cur = learner.result_callback()
qnorm = np.linalg.norm(learner.Q)
score.append(score_cur)
state_grid.append(learner.state_grid)
state_num.append(learner.state_num)