Python World.move方法代码示例

本文整理汇总了Python中world.World.move方法的典型用法代码示例。如果您正苦于以下问题：Python World.move方法的具体用法？Python World.move怎么用？Python World.move使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类world.World的用法示例。

在下文中一共展示了World.move方法的9个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
class Game:
    def __init__(self, size):
        pygame.init()
        pygame.key.set_repeat(200, 5)
        
        self.screen = pygame.display.set_mode(size)
        
        guy_image = pygame.image.load("data/images/guy.png")
        guy_image = guy_image.convert()
        self.guy = AnimatedSprite([0, 0], guy_image, 3, 4, 10)
        self.guy_idle = self.guy.create_animation([0, 1, 2, 3, 3, 2, 1, 0])
        self.guy_waving = self.guy.create_animation([7, 8, 9, 10, 11, 11, 10, 9, 8, 7])
        
        self.guy.set_animation(self.guy_idle)
        
        world_image = pygame.image.load("data/images/world.png")
        world_image = world_image.convert()
        self.world = World([200, 200], world_image)
        
        self.running = True
        
    def main_loop(self):
        while self.running:
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    self.running = False
                elif event.type == pygame.KEYDOWN:
                    self.key_handler(event.key)
            
            if distance(self.guy, self.world) < 140:
                if self.guy.animation != self.guy_waving:
                    self.guy.set_animation(self.guy_waving)
            else:
                if self.guy.animation != self.guy_idle:
                    self.guy.set_animation(self.guy_idle)
            
            self.guy.update()
            self.screen.blit(self.world.image, self.world.rect)
            self.screen.blit(self.guy.image, self.guy.rect)
            pygame.display.flip()

    def key_handler(self, key):
        if (key == pygame.K_DOWN
        or key == pygame.K_UP
        or key == pygame.K_LEFT
        or key == pygame.K_RIGHT):
            self.world.move(key)
            self.screen.fill((0, 0, 0))

开发者ID:diegodukao，项目名称:hello_pygame，代码行数:50，代码来源:hello_pygame.py

示例2: Qlearning

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def Qlearning(episodes, initialState,policy,alpha_pred=0.2,alpha_prey=0.5):
    initValue=0
    policyParam=0.2
    discount=0.7
    # world object, (starting state is trivial)
    world = World((5,5),initialState)

    # Q value table
    Q_pred = {}
    Q_prey = {}


    steps   = [0]*episodes
    rewards = [0]*episodes
    for i in range(episodes):
        iterations = 0
        # initialize world
        world = World((5,5),initialState)
        while True:
            # world.prettyPrint()

            state = world.position
            # move the predator according to policy with one parameter (epsilon for E-greedy or Tua for softmax)
            pred_action = policy(state, world.allMoveList(),    Q_pred, policyParam, initValue)
            prey_action = policy(state, world.singleMoveList(), Q_prey, policyParam, initValue)
            
            reward      = world.move(prey_action, pred_action)
            iterations += 1


            if (state,pred_action) not in Q_pred:
                Q_pred[(state,pred_action)] = initValue
            if (state,prey_action) not in Q_prey:
                Q_prey[(state,prey_action)] = initValue

            # check if predator caught the prey
            if world.stopState():
                # the Q(s,a) update rule (note that the next state is the absorbing state)
                Q_prey[state,prey_action] = Q_prey.get((state,prey_action),initValue) + alpha_prey * (reward[0] - Q_prey[state,prey_action])
                Q_pred[state,pred_action] = Q_pred.get((state,pred_action),initValue) + alpha_pred * (reward[1] - Q_pred[state,pred_action])
                break

            newState = world.position
            # the maximum value the agent can have after another move
            maxQ_pred = max([Q_pred.get((newState,nextAction),initValue) for nextAction in world.allMoveList()])
            maxQ_prey = max([Q_prey.get((newState,nextAction),initValue) for nextAction in world.singleMoveList()])

            # the Q(s,a) update rule (note that the immediate reward is zero)
            Q_pred[state,pred_action] = Q_pred[(state,pred_action)] + alpha_pred * ( discount*maxQ_pred - Q_pred[state,pred_action])
            Q_prey[state,prey_action] = Q_prey[(state,prey_action)] + alpha_prey * ( discount*maxQ_prey - Q_prey[state,prey_action])

        if i > 0 and i % 1000 == 0:
            print "Episode", i
        # print the number of steps the predator took
        steps[i]   = iterations
        if reward[1] > 0:
            rewards[i] = 1

    return steps, rewards

开发者ID:HarrieO，项目名称:Autonomous-Agents，代码行数:61，代码来源:Qlearning.py

示例3: isOptimal

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
	def isOptimal(self,state, move):
		world    = World((0,0),(1,1))
		ourMove  = 0
		bestMove = 0
		for nmove in world.moveList():
			world.setState(state)
			world.move(nmove)
			if world.position == (0,0):
				probSum = 10
			else:
				probSum = 0
				for nextState,prob in world.nextPreyStates():
					probSum += prob*self.discount*self.value[nextState]
			bestMove = max(bestMove,probSum)
			if nmove == move:
				ourMove = probSum
		return ourMove/bestMove > 0.97

开发者ID:HarrieO，项目名称:Autonomous-Agents，代码行数:19，代码来源:Assignment2.3.py

示例4: MCon

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def MCon(episodes, initValue=15,epsilon=0.1, alpha=0.5,discount=0.9):
	# world object, (starting state is trivial)
	world = World((0,0),(1,1))

	# initialize Q value table and Return list for every (s,a)-pair
	Q = {}
	R = {}
	for state in world.allStates():
		for move in world.moveList():
			Q[state,move] = initValue # some value
			R[state,move] = [] # empty list; return = cummulative discounted reward
	steps = [0]*episodes # list counting number of iterations

	for i in range(episodes):
		iterations = 0
		# initialize world
		world.setState((-5,-5))
		stateActionPairs = {}
		# generate an episode using current policy
		while True:
			state = world.position
			# move the predator according to policy
			action = epsGreedyPolicy(state, world, Q, epsilon)
			world.move(action)
			if not (state,action) in stateActionPairs: # store first occurence
				stateActionPairs[(state,action)] = iterations # will be used for discounting
			iterations += 1
			# check if predator caught the prey
			if world.stopState():
				break
			# move the prey (stochasticly)
			world.performPreyMove()
			newState = world.position
		steps[i] = iterations # save amount of iterations needed to catch the prey
		# update Q and R
		for pair in stateActionPairs.keys():
			firstReturn = 10.0*discount**(iterations-stateActionPairs[pair]) # always zero but 10 when episode ends
			R[pair].append(firstReturn)
			Q[pair] = np.mean(np.array(R[pair]))
		# update policy done in epsilon greedy policy code
	return steps

开发者ID:HarrieO，项目名称:Autonomous-Agents，代码行数:43，代码来源:MCon.py

示例5: range

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
for no in range(len(predatorLocations)):
    world = World((5,5),predatorLocations[:no+1])

    allMoves    = world.allMoveList()
    singleMoves = world.singleMoveList()

    runs            = 1000
    totalCaughtPrey = 0
    totalIterations = 0
    for i in range(runs):
        world      = World((5,5),predatorLocations[:no+1])
        iterations = 0
        while not world.stopState():
            preyMove      = random.choice(singleMoves)
            predatorMoves = random.choice(allMoves)
            reward        = world.move(preyMove,predatorMoves)
            iterations   += 1
    
        if reward[0] < 0:
            totalCaughtPrey += 1
        totalIterations += iterations

    print "Number of predators", no+1,
    preds.append(no+1)
    print "Average Iterations", totalIterations/float(runs),
    iters.append(totalIterations/float(runs))
    print "chance of catching prey", totalCaughtPrey/float(runs)
    prey.append(totalCaughtPrey/float(runs))

pl.scatter(preds,prey)
pl.plot(preds,prey)

开发者ID:HarrieO，项目名称:Autonomous-Agents，代码行数:33，代码来源:Assignment3.1.py

示例6: policyHillClimbing

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def policyHillClimbing(episodes,initial_state,gamma=0.5, delta=0.2, alpha_pred=0.4, alpha_prey=0.1):
	world = World((5,5),initial_state)
	# initialization might be too expansive
	Q_pred = {}
	Q_prey = {}
	seen_states = [] # keep track of seen states 
	pi_pred = {}
	pi_prey = {}
	initValue = 0.0
	num_actions_prey = len(world.singleMoveList())
	num_actions_pred = len(world.allMoveList())
	steps = [0]*episodes
	rewards = [0]*episodes
	for i in range(episodes):
		# initialize world
		world = World((5,5),initial_state)
		iterations =0
		state = world.position
		seen_states.append(state)
		seen_states.append((0,0))
		for action_p in world.singleMoveList():
			Q_prey[(state, action_p)]  = initValue
			pi_prey[(state, action_p)] = 1/float(num_actions_prey)
			Q_prey[((0,0), action_p)]  = 0
		for action_p in world.allMoveList():
			Q_pred[(state,action_p)]   = initValue
			pi_pred[(state, action_p)] = 1/float(num_actions_pred)
			Q_pred[((0,0), action_p)]  = 0
		while not world.stopState():
			state = world.position
			# choose action
			action_pred = greedy_policy(pi_pred, state, world.allMoveList())
			action_prey = greedy_policy(pi_prey, state, world.singleMoveList())
			reward = world.move(action_prey,action_pred)
			new_state = world.position
			iterations +=1
			# update Q
			if new_state not in seen_states:
				seen_states.append(new_state)
				for action_p in world.singleMoveList():
					Q_prey[(new_state, action_p)]  = initValue
					pi_prey[(new_state, action_p)] = 1/float(num_actions_prey)
				for action_p in world.allMoveList():
					Q_pred[(new_state,action_p)]   = initValue
					pi_pred[(new_state, action_p)] = 1/float(num_actions_pred)

			best_Q_pred = max([Q_pred[(new_state,action)] for action in world.allMoveList()])
			best_Q_prey = max([Q_prey[(new_state,action)] for action in world.singleMoveList()])
			Q_pred[(state,action_pred)] = (1.0-alpha_pred)*Q_pred[(state,action_pred)] + alpha_pred*(reward[1]+ gamma* best_Q_pred)
			Q_prey[(state,action_prey)] = (1.0-alpha_prey)*Q_prey[(state,action_prey)] + alpha_prey*(reward[0]+ gamma* best_Q_prey)
			# update pi for predator and prey
			if Q_pred[(state,action_pred)] ==  max([Q_pred[(state,action)] for action in world.allMoveList()]):
				pi_pred[(state,action_pred)] += delta
			else:
				pi_pred[(state,action_pred)] -= delta/(num_actions_pred-1.0)
			if Q_prey[(state,action_prey)] ==  max([Q_prey[(state,action)] for action in world.singleMoveList()]):
				pi_prey[(state,action_prey)] += delta
			else:
				pi_prey[(state,action_prey)] -= delta/(num_actions_prey-1.0)

			# restrict to probability distribution and make it epsilon greedy (divide 0.1 over all actions)
			sum_value = sum([Q_pred[(state,action)] for action in world.allMoveList()])
			for action_p in world.allMoveList():
				if sum_value > 0:
					pi_pred[(state, action_p)] /= sum_value
				pi_pred[(state, action_p)] *= 0.9
				pi_pred[(state, action_p)] += 0.1/num_actions_pred
			sum_value = sum([Q_prey[(state,action)] for action in world.singleMoveList()])
			for action_p in world.singleMoveList():
				if sum_value > 0:
					pi_prey[(state, action_p)] /= sum_value
				pi_prey[(state, action_p)] *= 0.9
				pi_prey[(state, action_p)] += 0.1/num_actions_prey

			#alpha *= decay

		rewards[i]=reward[0]
		steps[i]= iterations
		print "Episode", i
	return steps, rewards

开发者ID:HarrieO，项目名称:Autonomous-Agents，代码行数:82，代码来源:policyHillClimbing.py

示例7: minimax

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def minimax(episodes,initial_state,epsilon, decay, gamma, alpha_pred=1.0, alpha_prey=1.0):
    # initialization might be too expansive
    Q_pred = dict()
    Q_prey = dict()
    V_pred = dict()
    V_prey = dict()
    pi_pred = dict()
    pi_prey = dict()
    initValue = 1.0
    # initialisation
    world = World((5,5),initial_state)
    for state in world.allStates():
      V_pred[state] = 1.0
      V_prey[state] = 1.0
      for action in world.allMoveList():
          pi_pred[(state,action)]=1.0/len(world.allMoveList())
          for prey_move in world.singleMoveList():
              Q_pred[(state, action, prey_move)]=1.0
              Q_prey[(state, action, prey_move)]=1.0
      for action in world.singleMoveList():
          pi_prey[(state,action)]=1.0/len(world.singleMoveList())
    # absorbing states
    terminal_state = tuple([(0,0)] * len(initial_state))
    V_pred[terminal_state] = 0.0
    V_prey[terminal_state] = 0.0

    steps = [0]*episodes
    rewards = [0]*episodes
    for epi in range(episodes):
        
        # initialize world
        world = World((5,5),initial_state)

        # print "Begin Pred", V_pred[world.position]
        # print "End   Prey", V_prey[world.position]
        # for s in world.singleMoveList():
        #     print s, "Pred", V_pred[(s,)]
        #     print s, "Prey", V_pred[(s,)]
        #     for a in world.allMoveList():
        #         for a2 in world.singleMoveList():
        #             print s, "Q", a, a2, Q_pred[(state,a,a2)]

        iterations =0
        while not world.stopState():
            state = world.position
            # choose action
            action_pred = minimax_policy(epsilon, pi_pred, state, world.allMoveList())
            action_prey = minimax_policy(epsilon, pi_prey, state, world.singleMoveList())
            
            reward = world.move(action_prey,action_pred)
            iterations +=1
            new_state = world.position

            # update Q
            # if (state,action_prey) not in Q_prey:
            #     Q_prey[state,action_prey] = initValue
            # if (state,action_pred) not in Q_pred:
            #     Q_pred[state,action_pred] = initValue 
            Q_pred[(state,action_pred,action_prey)] = (1.0-alpha_pred)*Q_pred[(state,action_pred,action_prey)] + alpha_pred*(reward[1]+ gamma* V_pred[new_state])
            Q_prey[(state,action_pred,action_prey)] = (1.0-alpha_prey)*Q_prey[(state,action_pred,action_prey)] + alpha_prey*(reward[0]+ gamma* V_prey[new_state])

            # update pi
            # adapted from example: http://abel.ee.ucla.edu/cvxopt/examples/tutorial/lp.html

            ##  PREDATOR update
            # constraint to minimize w.r.t. prey action
            minConstr   = [[1.0] + [-Q_pred[(state,a_pred,a_prey)] for a_pred in world.allMoveList()] for a_prey in world.singleMoveList()]
            # constrinat to keep every pi(a) positive
            posConstr   = []
            for i in range(1,len(world.allMoveList())+1):
                new_row    = [0.0] * (len(world.allMoveList())+1)
                new_row[i] = -1.0
                posConstr.append(new_row)

            normGreater = [0.0] + [1.0] * len(world.allMoveList())
            normSmaller = [0.0] + [-1.0] * len(world.allMoveList())

            A = matrix([normGreater, normSmaller] + minConstr + posConstr).trans()
            b = matrix([ 1.0, -1.0] + [0.0] * (len(world.singleMoveList()) + len(world.allMoveList())) )
            # -1 V and 0 for all pi(s,a)
            c = matrix([ -1.0 ] + [0.0] * len(world.allMoveList()))

            sol=solvers.lp(c,A,b)

            V_pred[state] = sol['x'][0]
            for a_pred, x in zip(world.allMoveList(),sol['x'][1:]):
                pi_pred[(state,a_pred)] = x

            # ## PREY update
            # constraint to minimize w.r.t. prey action
            minConstr   = [[1.0] + [-Q_prey[(state,a_pred,a_prey)] for a_prey in world.singleMoveList()] for a_pred in world.allMoveList()]
            # # constriant to keep every pi(a) positive
            posConstr   = []
            for i in range(1,len(world.singleMoveList())+1):
                new_row    = [0.0] * (len(world.singleMoveList())+1)
                new_row[i] = -1.0
                posConstr.append(new_row)

            normGreater = [0.0] + [ 1.0] * len(world.singleMoveList())
            normSmaller = [0.0] + [-1.0] * len(world.singleMoveList())
#.........这里部分代码省略.........

开发者ID:HarrieO，项目名称:Autonomous-Agents，代码行数:103，代码来源:minimax.py

示例8: World

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
'''
Created on Jan 18, 2014

@author: anthony.lozano
'''
from world import World
if __name__ == '__main__':
    world = World(16,16,1, see_port="COM8")
    #world.carve_path()
    print world
    for i in range(20):
        world.communicate()
        input = raw_input()
        if input == "w":
            world.move(True)
        elif input == "a":
            world.turn(False)
        elif input == "d'":
            world.turn(True)
        elif input == "s":
            world.move(False)

开发者ID:hackphx-arduinowearables，项目名称:Team05，代码行数:23，代码来源:main.py

示例9: MCoff

# 需要导入模块: from world import World [as 别名]
# 或者: from world.World import move [as 别名]
def MCoff(episodes, behaPolicy, matches=[], initValue=15,discount=0.9):
	# behaPolicy = dictionary with keys (state,action) and value P(action|state)


	world = World((0,0),(1,1))
	movelist = world.moveList()
	def policy(world):
		return world.pickElementWithProbs([(move,behaPolicy[(world.position,move)]) for move in movelist])

	# initialize Q value table and Return list for every (s,a)-pair
	Q = {}
	R = {}
	num = {}
	denum = {} 
	for state in world.allStates():
		for move in world.moveList():
			num[state,move] = 0.0
			denum[state,move] = 0.0
			Q[state,move] = float(initValue) # some value
			R[state,move] = [] # empty list; return = cummulative discounted reward
	steps = [0]*episodes # list counting number of iterations
	for epi in range(episodes):
		time = 0
		totalTime =0
		# initialize world
		world.setState((-5,-5))
		
		episode = []
		while True:
			action = policy(world)
			episode.append((world.position, action))
			if action == None:
				print action, state
			world.move(action)
			if world.stopState():
				break
			world.performPreyMove()

		# save the pairs that match, and their first occurence
		matchingHistory = {}
		# last time move was equal to policy
		last = 0
		for i, (state, action) in enumerate(episode[::-1]):
			actionValues = [(Q[state,maction],maction) for maction in world.moveList()]
			bestActions = [actionValues[j][1] for j in maxIndices(actionValues)]
			matchingHistory[(state, action)] = len(episode)-i - 1
			if action not in bestActions:
				last = len(episode)-i
				break
			
		
		matches.append(len(episode)-last)
		
		for (state, action) in matchingHistory:
			if matchingHistory[(state, action)] >= last-1:
				w = np.prod([ 1.0/behaPolicy[episode[j]] for j in range(matchingHistory[(state, action)],len(episode))])
				num[(state,move)]   += w * (10.0*discount**matchingHistory[(state, action)]) # return is gamma^{T-t}*10
				denum[(state,move)] += w
				Q[(state,move)]= num[(state,move)]/float(denum[(state,move)])

		world.setState((-5,-5))
		iterations = 0
		while True:
			iterations += 1
			actionValues = [(maction, Q[state,maction]) for maction in world.moveList()]
			bestAction = random.choice([actionValues[j][0] for j in maxIndices(actionValues)])
			world.move(bestAction)
			if world.stopState() or iterations > 2000:
				break
			world.performPreyMove()
		steps[epi] = iterations
		
			
	return steps

开发者ID:HarrieO，项目名称:Autonomous-Agents，代码行数:76，代码来源:MCoff.py

注：本文中的world.World.move方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: __init__