本文整理匯總了Python中net.Net.updateQ2方法的典型用法代碼示例。如果您正苦於以下問題:Python Net.updateQ2方法的具體用法?Python Net.updateQ2怎麽用?Python Net.updateQ2使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類net.Net
的用法示例。
在下文中一共展示了Net.updateQ2方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import updateQ2 [as 別名]
#.........這裏部分代碼省略.........
n = 60
for i,v in enumerate(self.lastQs):
print '#', i, ':', v
frames = self.data.getLastFrames(n)
for p in range(n):
plt.subplot(10, 6, p+1)
plt.imshow(frames[p,:,:], interpolation='none', cmap='gray')
plt.show(block=False)
d = []
for i,q in enumerate(self.lastQs):
if type(q) is not int:
d.append(np.max(q, axis=1))
plt.figure()
plt.plot(d)
plt.show(block=True)
def beginTest(self):
self.test = True
self.episodeReward = 0
self.rewardAcc = 0.0
self.episodeNb = 0
self.qValueAcc = 0.0
self.qValueNb = 0
def endTest(self):
self.test = False
self.qValues.append(self.qValueAcc / self.qValueNb)
self.rewards.append(self.rewardAcc / self.episodeNb)
print 'TEST :', ' qValue =', self.qValues[-1], ' reward = ', self.rewards[-1]
#Inputs : results of an action (a frame and a reward)
def tick(self, frame, reward):
self.data.addData(self.lastFrame, self.lastAction, reward, False)
state = self.data.getLastState(frame)
action = self.chooseAction(state)
self.episodeReward += reward
self.rewardAcc += reward
if self.data.getSize() > self.minReplaySize and self.tickCount % self.learnFrequency == 0 and not self.test:
self.learn()
self.lastFrame = frame
self.lastAction = action
self.tickCount += 1
return action
def begin(self, frame):
self.lastFrame = frame
self.lastAction = self.rng.randint(0, self.actionNb)
self.episodeNb += 1
return self.lastAction
def end(self, reward):
self.data.addData(self.lastFrame, self.lastAction, reward, True)
if self.episodeReward > self.maxReward:
self.maxReward = self.episodeReward
print 'MAX REWARD :', self.maxReward
self.episodeReward = 0
return
def chooseAction(self, state):
epsilon = self.testEpsilon
if not self.test:
epsilon = self.epsilonStart - (self.epsilonStart - self.epsilonEnd) * self.tickCount / self.epsilonDecayLength
epsilon = max(self.epsilonEnd, epsilon)
if self.rng.rand() > epsilon:
v = self.net.forward(state)
self.lastQs.append(v)
self.qValueNb += 1
self.qValueAcc += max(max(v))
return np.argmax(v)
else:
r = self.rng.randint(0, self.actionNb)
self.lastQs.append(r)
return r
def learn(self):
self.learnCount += 1
states, actions, rewards, terminals, states2 = self.data.getBatch(32)
self.net.learn(states, states2, actions, rewards, terminals)
if self.learnCount % self.targetNetworkUpdateFrequency == 0:
self.net.updateQ2()
def showState(self, states, states2):
import matplotlib.pyplot as plt
for p in range(4):
plt.subplot(2, 4, p+1)
plt.imshow(states[0,p,:,:], interpolation='none', cmap='gray')
for p in range(4):
plt.subplot(2, 4, p+5)
plt.imshow(states2[0,p,:,:], interpolation='none', cmap='gray')
plt.show()