本文整理汇总了Python中agent.Agent.numValidTrt方法的典型用法代码示例。如果您正苦于以下问题:Python Agent.numValidTrt方法的具体用法?Python Agent.numValidTrt怎么用?Python Agent.numValidTrt使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类agent.Agent
的用法示例。
在下文中一共展示了Agent.numValidTrt方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: solve
# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import numValidTrt [as 别名]
def solve(system,initV = None, gamma = 0.9):
numNodes = system.network.numNodes
numTrt = Agent.numTrt(system)
numValidTrt = Agent.numValidTrt(numNodes,numTrt)
if initV is None:
initV = np.zeros((1 << numNodes,))
it = 0
maxIt = 1000
tol = 1e-8
cont = True
v0 = initV
while cont:
v1 = ValueIteration.operT(system,gamma,v0)
it += 1
if np.linalg.norm(v1 - v0,2) < tol or it == maxIt:
cont = False
v0 = v1
if it == maxIt:
raise ValueError("ValueIteration hit iteration limit")
return v0
示例2: operT
# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import numValidTrt [as 别名]
def operT(system,gamma,v):
numNodes = system.network.numNodes
numTrt = Agent.numTrt(system)
numValidTrt = Agent.numValidTrt(numNodes,numTrt)
vForA = np.zeros((1 << numNodes, numValidTrt))
for aInd in range(numValidTrt):
P,R = ValueIteration.calcPAndR(system,aInd)
vForA[:,aInd] = (R + gamma * (P.dot(v)))
return np.amax(vForA,1)
示例3: unitTest
# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import numValidTrt [as 别名]
def unitTest(cls):
print "Testing ValueIteration"
np.random.seed(0)
from system import System
from networks import genGridNetwork
from model import PJ
system = System(genGridNetwork((3,3)),PJ())
numNodes = system.network.numNodes
numTrt = Agent.numTrt(system)
numValidTrt = Agent.numValidTrt(numNodes,numTrt)
v = ValueIteration.solve(dc(system))
q = PolicyIteration.solve(dc(system))
q = util.unflattenQ(q,numNodes,numValidTrt)
vChk = [max(i) for i in q]
for i in zip(v,vChk):
print "% 12.6f % 10.6f" % i
示例4: calcPAndR
# 需要导入模块: from agent import Agent [as 别名]
# 或者: from agent.Agent import numValidTrt [as 别名]
def calcPAndR(system,trtInd):
numNodes = system.network.numNodes
numTrt = Agent.numTrt(system)
numValidTrt = Agent.numValidTrt(numNodes,numTrt)
P = np.zeros((1 << numNodes,1 << numNodes))
R = np.zeros((1 << numNodes,))
trtCmb = util.ind2Combo(trtInd,numNodes,numTrt)
system.trtCmb(cmb = trtCmb)
for s in range(1 << numNodes):
system.infCmb(cmb = s)
probs = system.model.transProbs(system)
for sp in range(1 << numNodes):
changes = s ^ sp
prob = 0.0
ind = 1
for i in range(numNodes):
if changes & ind:
if probs[i] < 1e-13:
prob += -30
else:
prob += np.log(probs[i])
else:
if 1.0 - probs[i] < 1e-13:
prob += -30
else:
prob += np.log(1.0 - probs[i])
ind <<= 1
prob = np.exp(prob)
P[s,sp] = prob
r = reward(s,trtCmb,sp,numNodes)
R[s] += prob * r
return P,R