本文整理汇总了Python中world.World.move方法的典型用法代码示例。如果您正苦于以下问题:Python World.move方法的具体用法?Python World.move怎么用?Python World.move使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类world.World
的用法示例。
在下文中一共展示了World.move方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
class Game:
def __init__(self, size):
pygame.init()
pygame.key.set_repeat(200, 5)
self.screen = pygame.display.set_mode(size)
guy_image = pygame.image.load("data/images/guy.png")
guy_image = guy_image.convert()
self.guy = AnimatedSprite([0, 0], guy_image, 3, 4, 10)
self.guy_idle = self.guy.create_animation([0, 1, 2, 3, 3, 2, 1, 0])
self.guy_waving = self.guy.create_animation([7, 8, 9, 10, 11, 11, 10, 9, 8, 7])
self.guy.set_animation(self.guy_idle)
world_image = pygame.image.load("data/images/world.png")
world_image = world_image.convert()
self.world = World([200, 200], world_image)
self.running = True
def main_loop(self):
while self.running:
for event in pygame.event.get():
if event.type == pygame.QUIT:
self.running = False
elif event.type == pygame.KEYDOWN:
self.key_handler(event.key)
if distance(self.guy, self.world) < 140:
if self.guy.animation != self.guy_waving:
self.guy.set_animation(self.guy_waving)
else:
if self.guy.animation != self.guy_idle:
self.guy.set_animation(self.guy_idle)
self.guy.update()
self.screen.blit(self.world.image, self.world.rect)
self.screen.blit(self.guy.image, self.guy.rect)
pygame.display.flip()
def key_handler(self, key):
if (key == pygame.K_DOWN
or key == pygame.K_UP
or key == pygame.K_LEFT
or key == pygame.K_RIGHT):
self.world.move(key)
self.screen.fill((0, 0, 0))
示例2: Qlearning
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def Qlearning(episodes, initialState,policy,alpha_pred=0.2,alpha_prey=0.5):
initValue=0
policyParam=0.2
discount=0.7
# world object, (starting state is trivial)
world = World((5,5),initialState)
# Q value table
Q_pred = {}
Q_prey = {}
steps = [0]*episodes
rewards = [0]*episodes
for i in range(episodes):
iterations = 0
# initialize world
world = World((5,5),initialState)
while True:
# world.prettyPrint()
state = world.position
# move the predator according to policy with one parameter (epsilon for E-greedy or Tua for softmax)
pred_action = policy(state, world.allMoveList(), Q_pred, policyParam, initValue)
prey_action = policy(state, world.singleMoveList(), Q_prey, policyParam, initValue)
reward = world.move(prey_action, pred_action)
iterations += 1
if (state,pred_action) not in Q_pred:
Q_pred[(state,pred_action)] = initValue
if (state,prey_action) not in Q_prey:
Q_prey[(state,prey_action)] = initValue
# check if predator caught the prey
if world.stopState():
# the Q(s,a) update rule (note that the next state is the absorbing state)
Q_prey[state,prey_action] = Q_prey.get((state,prey_action),initValue) + alpha_prey * (reward[0] - Q_prey[state,prey_action])
Q_pred[state,pred_action] = Q_pred.get((state,pred_action),initValue) + alpha_pred * (reward[1] - Q_pred[state,pred_action])
break
newState = world.position
# the maximum value the agent can have after another move
maxQ_pred = max([Q_pred.get((newState,nextAction),initValue) for nextAction in world.allMoveList()])
maxQ_prey = max([Q_prey.get((newState,nextAction),initValue) for nextAction in world.singleMoveList()])
# the Q(s,a) update rule (note that the immediate reward is zero)
Q_pred[state,pred_action] = Q_pred[(state,pred_action)] + alpha_pred * ( discount*maxQ_pred - Q_pred[state,pred_action])
Q_prey[state,prey_action] = Q_prey[(state,prey_action)] + alpha_prey * ( discount*maxQ_prey - Q_prey[state,prey_action])
if i > 0 and i % 1000 == 0:
print "Episode", i
# print the number of steps the predator took
steps[i] = iterations
if reward[1] > 0:
rewards[i] = 1
return steps, rewards
示例3: isOptimal
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def isOptimal(self,state, move):
world = World((0,0),(1,1))
ourMove = 0
bestMove = 0
for nmove in world.moveList():
world.setState(state)
world.move(nmove)
if world.position == (0,0):
probSum = 10
else:
probSum = 0
for nextState,prob in world.nextPreyStates():
probSum += prob*self.discount*self.value[nextState]
bestMove = max(bestMove,probSum)
if nmove == move:
ourMove = probSum
return ourMove/bestMove > 0.97
示例4: MCon
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def MCon(episodes, initValue=15,epsilon=0.1, alpha=0.5,discount=0.9):
# world object, (starting state is trivial)
world = World((0,0),(1,1))
# initialize Q value table and Return list for every (s,a)-pair
Q = {}
R = {}
for state in world.allStates():
for move in world.moveList():
Q[state,move] = initValue # some value
R[state,move] = [] # empty list; return = cummulative discounted reward
steps = [0]*episodes # list counting number of iterations
for i in range(episodes):
iterations = 0
# initialize world
world.setState((-5,-5))
stateActionPairs = {}
# generate an episode using current policy
while True:
state = world.position
# move the predator according to policy
action = epsGreedyPolicy(state, world, Q, epsilon)
world.move(action)
if not (state,action) in stateActionPairs: # store first occurence
stateActionPairs[(state,action)] = iterations # will be used for discounting
iterations += 1
# check if predator caught the prey
if world.stopState():
break
# move the prey (stochasticly)
world.performPreyMove()
newState = world.position
steps[i] = iterations # save amount of iterations needed to catch the prey
# update Q and R
for pair in stateActionPairs.keys():
firstReturn = 10.0*discount**(iterations-stateActionPairs[pair]) # always zero but 10 when episode ends
R[pair].append(firstReturn)
Q[pair] = np.mean(np.array(R[pair]))
# update policy done in epsilon greedy policy code
return steps
示例5: range
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
for no in range(len(predatorLocations)):
world = World((5,5),predatorLocations[:no+1])
allMoves = world.allMoveList()
singleMoves = world.singleMoveList()
runs = 1000
totalCaughtPrey = 0
totalIterations = 0
for i in range(runs):
world = World((5,5),predatorLocations[:no+1])
iterations = 0
while not world.stopState():
preyMove = random.choice(singleMoves)
predatorMoves = random.choice(allMoves)
reward = world.move(preyMove,predatorMoves)
iterations += 1
if reward[0] < 0:
totalCaughtPrey += 1
totalIterations += iterations
print "Number of predators", no+1,
preds.append(no+1)
print "Average Iterations", totalIterations/float(runs),
iters.append(totalIterations/float(runs))
print "chance of catching prey", totalCaughtPrey/float(runs)
prey.append(totalCaughtPrey/float(runs))
pl.scatter(preds,prey)
pl.plot(preds,prey)
示例6: policyHillClimbing
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def policyHillClimbing(episodes,initial_state,gamma=0.5, delta=0.2, alpha_pred=0.4, alpha_prey=0.1):
world = World((5,5),initial_state)
# initialization might be too expansive
Q_pred = {}
Q_prey = {}
seen_states = [] # keep track of seen states
pi_pred = {}
pi_prey = {}
initValue = 0.0
num_actions_prey = len(world.singleMoveList())
num_actions_pred = len(world.allMoveList())
steps = [0]*episodes
rewards = [0]*episodes
for i in range(episodes):
# initialize world
world = World((5,5),initial_state)
iterations =0
state = world.position
seen_states.append(state)
seen_states.append((0,0))
for action_p in world.singleMoveList():
Q_prey[(state, action_p)] = initValue
pi_prey[(state, action_p)] = 1/float(num_actions_prey)
Q_prey[((0,0), action_p)] = 0
for action_p in world.allMoveList():
Q_pred[(state,action_p)] = initValue
pi_pred[(state, action_p)] = 1/float(num_actions_pred)
Q_pred[((0,0), action_p)] = 0
while not world.stopState():
state = world.position
# choose action
action_pred = greedy_policy(pi_pred, state, world.allMoveList())
action_prey = greedy_policy(pi_prey, state, world.singleMoveList())
reward = world.move(action_prey,action_pred)
new_state = world.position
iterations +=1
# update Q
if new_state not in seen_states:
seen_states.append(new_state)
for action_p in world.singleMoveList():
Q_prey[(new_state, action_p)] = initValue
pi_prey[(new_state, action_p)] = 1/float(num_actions_prey)
for action_p in world.allMoveList():
Q_pred[(new_state,action_p)] = initValue
pi_pred[(new_state, action_p)] = 1/float(num_actions_pred)
best_Q_pred = max([Q_pred[(new_state,action)] for action in world.allMoveList()])
best_Q_prey = max([Q_prey[(new_state,action)] for action in world.singleMoveList()])
Q_pred[(state,action_pred)] = (1.0-alpha_pred)*Q_pred[(state,action_pred)] + alpha_pred*(reward[1]+ gamma* best_Q_pred)
Q_prey[(state,action_prey)] = (1.0-alpha_prey)*Q_prey[(state,action_prey)] + alpha_prey*(reward[0]+ gamma* best_Q_prey)
# update pi for predator and prey
if Q_pred[(state,action_pred)] == max([Q_pred[(state,action)] for action in world.allMoveList()]):
pi_pred[(state,action_pred)] += delta
else:
pi_pred[(state,action_pred)] -= delta/(num_actions_pred-1.0)
if Q_prey[(state,action_prey)] == max([Q_prey[(state,action)] for action in world.singleMoveList()]):
pi_prey[(state,action_prey)] += delta
else:
pi_prey[(state,action_prey)] -= delta/(num_actions_prey-1.0)
# restrict to probability distribution and make it epsilon greedy (divide 0.1 over all actions)
sum_value = sum([Q_pred[(state,action)] for action in world.allMoveList()])
for action_p in world.allMoveList():
if sum_value > 0:
pi_pred[(state, action_p)] /= sum_value
pi_pred[(state, action_p)] *= 0.9
pi_pred[(state, action_p)] += 0.1/num_actions_pred
sum_value = sum([Q_prey[(state,action)] for action in world.singleMoveList()])
for action_p in world.singleMoveList():
if sum_value > 0:
pi_prey[(state, action_p)] /= sum_value
pi_prey[(state, action_p)] *= 0.9
pi_prey[(state, action_p)] += 0.1/num_actions_prey
#alpha *= decay
rewards[i]=reward[0]
steps[i]= iterations
print "Episode", i
return steps, rewards
示例7: minimax
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def minimax(episodes,initial_state,epsilon, decay, gamma, alpha_pred=1.0, alpha_prey=1.0):
# initialization might be too expansive
Q_pred = dict()
Q_prey = dict()
V_pred = dict()
V_prey = dict()
pi_pred = dict()
pi_prey = dict()
initValue = 1.0
# initialisation
world = World((5,5),initial_state)
for state in world.allStates():
V_pred[state] = 1.0
V_prey[state] = 1.0
for action in world.allMoveList():
pi_pred[(state,action)]=1.0/len(world.allMoveList())
for prey_move in world.singleMoveList():
Q_pred[(state, action, prey_move)]=1.0
Q_prey[(state, action, prey_move)]=1.0
for action in world.singleMoveList():
pi_prey[(state,action)]=1.0/len(world.singleMoveList())
# absorbing states
terminal_state = tuple([(0,0)] * len(initial_state))
V_pred[terminal_state] = 0.0
V_prey[terminal_state] = 0.0
steps = [0]*episodes
rewards = [0]*episodes
for epi in range(episodes):
# initialize world
world = World((5,5),initial_state)
# print "Begin Pred", V_pred[world.position]
# print "End Prey", V_prey[world.position]
# for s in world.singleMoveList():
# print s, "Pred", V_pred[(s,)]
# print s, "Prey", V_pred[(s,)]
# for a in world.allMoveList():
# for a2 in world.singleMoveList():
# print s, "Q", a, a2, Q_pred[(state,a,a2)]
iterations =0
while not world.stopState():
state = world.position
# choose action
action_pred = minimax_policy(epsilon, pi_pred, state, world.allMoveList())
action_prey = minimax_policy(epsilon, pi_prey, state, world.singleMoveList())
reward = world.move(action_prey,action_pred)
iterations +=1
new_state = world.position
# update Q
# if (state,action_prey) not in Q_prey:
# Q_prey[state,action_prey] = initValue
# if (state,action_pred) not in Q_pred:
# Q_pred[state,action_pred] = initValue
Q_pred[(state,action_pred,action_prey)] = (1.0-alpha_pred)*Q_pred[(state,action_pred,action_prey)] + alpha_pred*(reward[1]+ gamma* V_pred[new_state])
Q_prey[(state,action_pred,action_prey)] = (1.0-alpha_prey)*Q_prey[(state,action_pred,action_prey)] + alpha_prey*(reward[0]+ gamma* V_prey[new_state])
# update pi
# adapted from example: http://abel.ee.ucla.edu/cvxopt/examples/tutorial/lp.html
## PREDATOR update
# constraint to minimize w.r.t. prey action
minConstr = [[1.0] + [-Q_pred[(state,a_pred,a_prey)] for a_pred in world.allMoveList()] for a_prey in world.singleMoveList()]
# constrinat to keep every pi(a) positive
posConstr = []
for i in range(1,len(world.allMoveList())+1):
new_row = [0.0] * (len(world.allMoveList())+1)
new_row[i] = -1.0
posConstr.append(new_row)
normGreater = [0.0] + [1.0] * len(world.allMoveList())
normSmaller = [0.0] + [-1.0] * len(world.allMoveList())
A = matrix([normGreater, normSmaller] + minConstr + posConstr).trans()
b = matrix([ 1.0, -1.0] + [0.0] * (len(world.singleMoveList()) + len(world.allMoveList())) )
# -1 V and 0 for all pi(s,a)
c = matrix([ -1.0 ] + [0.0] * len(world.allMoveList()))
sol=solvers.lp(c,A,b)
V_pred[state] = sol['x'][0]
for a_pred, x in zip(world.allMoveList(),sol['x'][1:]):
pi_pred[(state,a_pred)] = x
# ## PREY update
# constraint to minimize w.r.t. prey action
minConstr = [[1.0] + [-Q_prey[(state,a_pred,a_prey)] for a_prey in world.singleMoveList()] for a_pred in world.allMoveList()]
# # constriant to keep every pi(a) positive
posConstr = []
for i in range(1,len(world.singleMoveList())+1):
new_row = [0.0] * (len(world.singleMoveList())+1)
new_row[i] = -1.0
posConstr.append(new_row)
normGreater = [0.0] + [ 1.0] * len(world.singleMoveList())
normSmaller = [0.0] + [-1.0] * len(world.singleMoveList())
#.........这里部分代码省略.........
示例8: World
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
'''
Created on Jan 18, 2014
@author: anthony.lozano
'''
from world import World
if __name__ == '__main__':
world = World(16,16,1, see_port="COM8")
#world.carve_path()
print world
for i in range(20):
world.communicate()
input = raw_input()
if input == "w":
world.move(True)
elif input == "a":
world.turn(False)
elif input == "d'":
world.turn(True)
elif input == "s":
world.move(False)
示例9: MCoff
# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def MCoff(episodes, behaPolicy, matches=[], initValue=15,discount=0.9):
# behaPolicy = dictionary with keys (state,action) and value P(action|state)
world = World((0,0),(1,1))
movelist = world.moveList()
def policy(world):
return world.pickElementWithProbs([(move,behaPolicy[(world.position,move)]) for move in movelist])
# initialize Q value table and Return list for every (s,a)-pair
Q = {}
R = {}
num = {}
denum = {}
for state in world.allStates():
for move in world.moveList():
num[state,move] = 0.0
denum[state,move] = 0.0
Q[state,move] = float(initValue) # some value
R[state,move] = [] # empty list; return = cummulative discounted reward
steps = [0]*episodes # list counting number of iterations
for epi in range(episodes):
time = 0
totalTime =0
# initialize world
world.setState((-5,-5))
episode = []
while True:
action = policy(world)
episode.append((world.position, action))
if action == None:
print action, state
world.move(action)
if world.stopState():
break
world.performPreyMove()
# save the pairs that match, and their first occurence
matchingHistory = {}
# last time move was equal to policy
last = 0
for i, (state, action) in enumerate(episode[::-1]):
actionValues = [(Q[state,maction],maction) for maction in world.moveList()]
bestActions = [actionValues[j][1] for j in maxIndices(actionValues)]
matchingHistory[(state, action)] = len(episode)-i - 1
if action not in bestActions:
last = len(episode)-i
break
matches.append(len(episode)-last)
for (state, action) in matchingHistory:
if matchingHistory[(state, action)] >= last-1:
w = np.prod([ 1.0/behaPolicy[episode[j]] for j in range(matchingHistory[(state, action)],len(episode))])
num[(state,move)] += w * (10.0*discount**matchingHistory[(state, action)]) # return is gamma^{T-t}*10
denum[(state,move)] += w
Q[(state,move)]= num[(state,move)]/float(denum[(state,move)])
world.setState((-5,-5))
iterations = 0
while True:
iterations += 1
actionValues = [(maction, Q[state,maction]) for maction in world.moveList()]
bestAction = random.choice([actionValues[j][0] for j in maxIndices(actionValues)])
world.move(bestAction)
if world.stopState() or iterations > 2000:
break
world.performPreyMove()
steps[epi] = iterations
return steps