本文整理汇总了Python中mdp.getPossibleActions函数的典型用法代码示例。如果您正苦于以下问题:Python getPossibleActions函数的具体用法?Python getPossibleActions怎么用?Python getPossibleActions使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了getPossibleActions函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
for i in range(iterations):
currentValues = self.values.copy()
for s in mdp.getStates():
if not self.mdp.isTerminal(s):
temp, i = [float("-inf")]*len(mdp.getPossibleActions(s)), 0
for a in mdp.getPossibleActions(s):
temp[i], i = self.getQValue(s, a), i + 1
currentValues[s] = max(temp)
self.values = currentValues
示例2: __init__
def __init__(self, mdp, discount=0.9, iterations=100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
self.optimalActionInState = collections.defaultdict(None)
for k in range(iterations):
lastValues = self.values.copy()
for state in mdp.getStates():
if self.mdp.isTerminal(state):
continue
maxValue = float("-inf") if mdp.getPossibleActions(state) else 0
for action in mdp.getPossibleActions(state):
theSum = 0
for nextState, prob in self.mdp.getTransitionStatesAndProbs(state, action):
R = self.mdp.getReward(state, action, nextState)
theSum += prob * (R + self.discount * lastValues[nextState])
maxValue = max(maxValue,theSum)
self.values[state] = maxValue
示例3: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
helper_vector = util.Counter() # Copy of vectors to be used for batch updating
for i in range(self.iterations):
for state in mdp.getStates():
if mdp.isTerminal(state):
continue
if mdp.getPossibleActions(state):
helper_vector[state] = sum([transition[1]*(mdp.getReward(state,mdp.getPossibleActions(state)[0],transition[0])+self.discount*self.values[transition[0]])
for transition in mdp.getTransitionStatesAndProbs(state, mdp.getPossibleActions(state)[0])] )
for action in mdp.getPossibleActions(state):
helper_vector[state] = max(helper_vector[state],sum([ transition[1]*(mdp.getReward(state, action, transition[0])+self.discount*self.values[transition[0]])
for transition in mdp.getTransitionStatesAndProbs(state, action)] ))
for state in helper_vector:
self.values[state] = helper_vector[state]
示例4: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
self.depth = 1
self.qTable = {}
self.vTable = {}
for state in mdp.getStates():
self.vTable[state] = 0
self.qTable[state] = {}
for action in mdp.getPossibleActions(state):
self.qTable[state][action] = 0
while self.depth < self.iterations + 1:
self.tempTable = {}
for state in mdp.getStates():
self.stateValue = 0
if not mdp.isTerminal(state):
self.stateValue = -9999
for action in mdp.getPossibleActions(state):
self.Qtotal = 0
for nextState,prob in mdp.getTransitionStatesAndProbs(state,action):
self.reward = mdp.getReward(state, action, nextState)
self.Qtotal += prob * (self.reward + self.discount * self.vTable[nextState])
#print "###state:",state,"Next",nextState,"reward:",self.reward,"Qtotal",self.Qtotal,"Value:",self.vTable[nextState]
self.qTable[state][action] = self.Qtotal
#print self.qTable[state][action]
self.stateValue = max(self.stateValue,self.qTable[state][action])
else:
self.tempTable[state] = 0
self.tempTable[state] = self.stateValue
self.vTable = self.tempTable
self.depth += 1
for state in mdp.getStates():
self.stateValue = -9999
for action in mdp.getPossibleActions(state):
self.Qtotal = 0
for nextState,prob in mdp.getTransitionStatesAndProbs(state,action):
self.reward = mdp.getReward(state, action, nextState)
self.Qtotal += prob * (self.reward + self.discount * self.vTable[nextState])
self.qTable[state][action] = self.Qtotal
示例5: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
i = 0
terminalstates = []
while i<iterations:
nextValues = util.Counter()
for state in mdp.getStates():
stateValues = []
for action in mdp.getPossibleActions(state):
sumValue = 0
for item in mdp.getTransitionStatesAndProbs(state, action):
nextState = item[0]
probability = item[1]
reward = mdp.getReward(state,action,nextState)
#print "reward", reward
sumValue = sumValue + (probability * (reward + (discount * self.values[nextState])))
#print "SUMVALUE", sumValue
stateValues.append(sumValue)
if len(mdp.getPossibleActions(state)) == 0:
nextValues[state] = 0
else:
nextValues[state] = max(stateValues)
i+=1
self.values = nextValues
示例6: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
self.delta = 0
while(self.iterations > 0):
# self.delta = 0
batchValues = util.Counter()
for state in mdp.getStates():
maxM = -10000
if mdp.isTerminal(state):
continue
for action in mdp.getPossibleActions(state):
statesProbs = mdp.getTransitionStatesAndProbs(state, action)
sumU = 0
Rs = 0
for stateProb in statesProbs:
# if stateProb[0] == 'TERMINAL_STATE':
# continue
sumU = sumU + self.values[stateProb[0]]*stateProb[1]
Rs = Rs + mdp.getReward(state, action, stateProb[0]) * stateProb[1]
# if sumU > maxM:
# maxM = sumU
v = Rs + sumU * discount
if (v > maxM):
maxM = v
batchValues[state] = maxM
self.values = batchValues
self.iterations = self.iterations - 1
self.policy = {}
for state in mdp.getStates():
if mdp.isTerminal(state):
self.policy[state] = None
continue
QValues = []
for action in mdp.getPossibleActions(state):
QValues.append(self.getQValue(state, action))
self.policy[state] = mdp.getPossibleActions(state)[QValues.index(max (QValues))]
示例7: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
#states = mdp.getStates()
#values = {state: 0 for state in states}
for i in range(iterations):
previous = self.values.copy()
for state in mdp.getStates():
possibleActions = mdp.getPossibleActions(state)
if len(possibleActions) == 0: continue
results = []
for action in possibleActions:
total = 0
for (nextState, prob) in mdp.getTransitionStatesAndProbs(state,action):
total += (prob * previous[nextState])
results.append(total)
self.values[state] = mdp.getReward(state) + (discount * max(results))
示例8: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
"*** YOUR CODE HERE ***"
# OUR CODE HERE
#Note: I think we should use the util.Counter thing?
for times in range(0, iterations):
#values from previous iteration so we don't update over them while iterating
prevVals = self.values.copy()
#iterate through all states
for state in mdp.getStates():
#will store the action-value for the iteration
value = util.Counter()
for action in mdp.getPossibleActions(state):
for transitionState, probability in mdp.getTransitionStatesAndProbs(state, action):
#expected value, probability * reward for the state with the discount * reward
value[action] += probability * (mdp.getReward( state, action, transitionState) + discount * prevVals[transitionState])
#update the values to the new value from the iteration
#the .argMax() function returns the one with the largest value
self.values[state] = value[value.argMax()]
示例9: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
"*** YOUR CODE HERE ***"
for times in range(iterations):
V = self.values.copy()
for state in mdp.getStates():
action_values = util.Counter()
for action in mdp.getPossibleActions(state):
for trans_state, prob in mdp.getTransitionStatesAndProbs(state, action):
action_values[action] += prob * (mdp.getReward( state, action, trans_state) + discount * V[trans_state])
self.values[state] = action_values[action_values.argMax()]
示例10: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
states = mdp.getStates()
for k in range(iterations):
newValues = {}
for state in states:
actions = mdp.getPossibleActions(state)
v = util.Counter()
for action in actions:
v[action] = self.computeQValueFromValues(state, action)
newValues[state] = v[v.argMax()]
self.values = newValues
示例11: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
# Write value iteration code here
"*** YOUR CODE HERE ***"
self.vks = util.Counter()
for i in range(0,iterations):
self.vks = self.values.copy()
st = mdp.getStates()
for s in st:
a = mdp.getPossibleActions(s)
qvals = util.Counter()
for action in a:
qvals[action] = 0
stp = self.mdp.getTransitionStatesAndProbs(s,action)
for ss, prob in stp:
qvals[action] = qvals[action] + prob*(self.mdp.getReward(s,action,ss) + self.discount*(self.vks[ss]))
self.values[s] = qvals[qvals.argMax()]
示例12: __init__
def __init__(self, mdp, discount=0.9, iterations=100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
mdp.isTerminal(state)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
while self.iterations > 0:
prev_values = self.values.copy()
for state in mdp.getStates():
actions = mdp.getPossibleActions(state)
if not actions:
continue
self.values[state] = max([sum([prob*(mdp.getReward(state, act, state1) + discount*prev_values[state1])
for state1, prob in mdp.getTransitionStatesAndProbs(state, act)])
for act in actions])
self.iterations -= 1
示例13: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Author - Shandheap Shanmuganathan
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default values as 0
self.count = 1
while self.count <= iterations:
for state in mdp.getStates():
possibleActions = mdp.getPossibleActions(state)
if len(possibleActions) == 0:
continue
QValues = {}
for action in possibleActions:
if action == "exit":
finalScore = self.mdp.getReward(state, action, 'TERMINAL_STATE')
self.values[state, self.count] = finalScore
continue
else:
QValues[action] = self.getQValue(state, action)
maxAction = None
maxQ = -sys.maxint - 1
for key, value in QValues.iteritems():
if value > maxQ:
maxAction = key
maxQ = value
if maxQ != -sys.maxint - 1:
self.values[state, self.count] = maxQ
self.count += 1
示例14: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
"""
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # A Counter is a dict with default 0
self.qvalues = util.Counter()
states = mdp.getStates()
for i in range(self.iterations):
valuesCopy = self.values.copy()
for state in states:
actions = mdp.getPossibleActions(state)
q = []
for action in actions:
q.append(self.getQValue(state,action))
if len(q) == 0:
valuesCopy[state] = 0
else: valuesCopy[state] = max(q)
self.values = valuesCopy
示例15: __init__
def __init__(self, mdp, discount = 0.9, iterations = 100):
"""
Your value iteration agent should take an mdp on
construction, run the indicated number of iterations
and then act according to the resulting policy.
Some useful mdp methods you will use:
mdp.getStates()
mdp.getPossibleActions(state)
mdp.getTransitionStatesAndProbs(state, action)
mdp.getReward(state, action, nextState)
"""
"*** YOUR CODE HERE ***"
self.mdp = mdp
self.discount = discount
self.iterations = iterations
self.values = util.Counter() # value of each state; a Counter is a dict with default 0
# run for desired number of iterations
for i in xrange(iterations):
new_values = self.values.copy()
for s in mdp.getStates():
if not mdp.isTerminal(s):
# the commented code works as well
#curr_best = float("-inf")
#for a in mdp.getPossibleActions(s):
#temp_value = sum([p * (mdp.getReward(s, a, s2) + discount*prev[s2]) for s2, p in mdp.getTransitionStatesAndProbs(s, a)])
# if temp_value > curr_best:
# curr_best = temp_value
#self.values[s] = curr_best
new_values[s] = max([self.getQValue(s, a) for a in mdp.getPossibleActions(s)])
self.values = new_values