本文整理汇总了Python中agent.Agent.getMoveList方法的典型用法代码示例。如果您正苦于以下问题:Python Agent.getMoveList方法的具体用法?Python Agent.getMoveList怎么用?Python Agent.getMoveList使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类agent.Agent
的用法示例。
在下文中一共展示了Agent.getMoveList方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: valueIteration
# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import getMoveList [as 别名]
def valueIteration(discountFactor):
# all locations in grid
alllocations = [ (x,y) for x in range(11) for y in range(11)]
# initialize values
values = {}
bestMoves = {}
for predloc in alllocations:
for preyloc in alllocations:
if preyloc != predloc:
values[(predloc,preyloc)] = 0
agent = Agent(0,0)
deltas = []
epsilon = 0.01
delta = 1
numIt = 0
# perform value iteration according to pseud-code
while delta > epsilon:
delta = 0
newValues = {}
# loop over all states
for predloc in alllocations:
for preyloc in alllocations:
if predloc == preyloc:
continue
agent.setLocation(predloc)
prey = Prey(*preyloc)
temp = values[(predloc,preyloc)]
# find optimal value according to current values
bestVal = 0
bestMove = (0,0)
for prob, predMove in agent.getMoveList():
preySum = 0
newPredloc = ((predloc[0] + predMove[0])%11,(predloc[1] + predMove[1])%11)
if newPredloc == preyloc :
preySum += 10.0
else:
for preyProb, newPreyloc in prey.expand(newPredloc):
preySum += preyProb * discountFactor * values[(newPredloc,newPreyloc)]
if bestVal <= preySum:
bestVal = preySum
bestMove = predMove
newValues[(predloc,preyloc)] = bestVal
bestMoves[(predloc,preyloc)] = bestMove
delta = max(delta, np.abs(bestVal - temp))
values = newValues
deltas.append(delta)
numIt+=1
# greedy policy to the optimal values computed above
def policy(state):
predloc, preyloc = state
agent.setLocation(predloc)
prey = Prey(*preyloc)
return bestMoves[(predloc,preyloc)]
return numIt, values, policy
示例2: valueIteration
# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import getMoveList [as 别名]
def valueIteration():
alldiffs = [ (x,y) for x in range(-5,6) for y in range(-5,6)]
alldiffs.remove((0,0))
# the relative positions vary from -5 up to 5, in both dimensions
values = {}
for x in range(-5,6):
for y in range(-5,6):
values[(x,y)] = 0
bestMoves = {}
agent = Agent(0,0)
deltas = []
discountFactor = 0.8
epsilon = 0.01
delta = 1
while delta > epsilon:
delta = 0
newValues = {}
for diff in alldiffs:
# we place the predator in the middle of the world,
# we are allowed to do this, since the positions are encoded relatively
predloc = (5,5)
preyloc = (predloc[0]+diff[0],predloc[1]+diff[1])
curKey = rewriteStates(predloc,preyloc)
agent.setLocation(predloc)
prey = Prey(*preyloc)
temp = values[curKey]
bestVal = 0
bestMove = (0,0)
for prob, predMove in agent.getMoveList():
preySum = 0
newPredloc = agent.locAfterMove(predMove)
if newPredloc == preyloc :
preySum += 10.0
else:
for preyProb, newPreyloc in prey.expand(newPredloc):
# using rewriteStates we use relative positions
preySum += preyProb * discountFactor * values[rewriteStates(newPredloc,newPreyloc)]
if bestVal <= preySum:
bestVal = preySum
bestMove = predMove
newValues[curKey] = bestVal
bestMoves[curKey] = bestMove
delta = max(delta, np.abs(bestVal - temp))
values = newValues
deltas.append(delta)
def policy(state):
predloc, preyloc = state
agent.setLocation(predloc)
prey = Prey(*preyloc)
return bestMoves[rewriteStates(predloc,preyloc)]
return policy