當前位置: 首頁>>代碼示例>>Python>>正文


Python Net.updateQ2方法代碼示例

本文整理匯總了Python中net.Net.updateQ2方法的典型用法代碼示例。如果您正苦於以下問題:Python Net.updateQ2方法的具體用法?Python Net.updateQ2怎麽用?Python Net.updateQ2使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在net.Net的用法示例。


在下文中一共展示了Net.updateQ2方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import updateQ2 [as 別名]

#.........這裏部分代碼省略.........
		n = 60
		for i,v in enumerate(self.lastQs):
			print '#', i, ':', v
		frames = self.data.getLastFrames(n)
		for p in range(n):
			plt.subplot(10, 6, p+1)
			plt.imshow(frames[p,:,:], interpolation='none', cmap='gray')
		plt.show(block=False)
		
		d = []
		for i,q in enumerate(self.lastQs):
			if type(q) is not int:
				d.append(np.max(q, axis=1))
		
		plt.figure()
		plt.plot(d)
		plt.show(block=True)
		
	def beginTest(self):
		self.test = True
		self.episodeReward = 0
		self.rewardAcc = 0.0
		self.episodeNb = 0
		self.qValueAcc = 0.0
		self.qValueNb = 0
		
		
	def endTest(self):
		self.test = False
		self.qValues.append(self.qValueAcc / self.qValueNb)
		self.rewards.append(self.rewardAcc / self.episodeNb)
		
		print 'TEST :', ' qValue =', self.qValues[-1], '  reward = ', self.rewards[-1]
		
	#Inputs : results of an action (a frame and a reward)
	def tick(self, frame, reward):
		self.data.addData(self.lastFrame, self.lastAction, reward, False)
		state = self.data.getLastState(frame)
		action = self.chooseAction(state)
		
		self.episodeReward += reward
		self.rewardAcc += reward
		
		if self.data.getSize() > self.minReplaySize and self.tickCount % self.learnFrequency == 0 and not self.test:
			self.learn()
		
		self.lastFrame = frame
		self.lastAction = action
		
		self.tickCount += 1
		
		return action
			
		
	def begin(self, frame):
		self.lastFrame = frame
		self.lastAction = self.rng.randint(0, self.actionNb)
		self.episodeNb += 1
		return self.lastAction
		
	def end(self, reward):
		self.data.addData(self.lastFrame, self.lastAction, reward, True)
		if self.episodeReward > self.maxReward:
			self.maxReward = self.episodeReward
			print 'MAX REWARD :', self.maxReward
		self.episodeReward = 0
		return
		
	def chooseAction(self, state):
		epsilon = self.testEpsilon
		if not self.test:
			epsilon = self.epsilonStart - (self.epsilonStart - self.epsilonEnd) * self.tickCount / self.epsilonDecayLength
			epsilon = max(self.epsilonEnd, epsilon)
		if self.rng.rand() > epsilon:
			v = self.net.forward(state)
			self.lastQs.append(v)
			self.qValueNb += 1
			self.qValueAcc += max(max(v))
			return np.argmax(v)
		else:
			r = self.rng.randint(0, self.actionNb)
			self.lastQs.append(r)
			return r
		
	def learn(self):
		self.learnCount += 1
		states, actions, rewards, terminals, states2 = self.data.getBatch(32)
		self.net.learn(states, states2, actions, rewards, terminals)
		if self.learnCount % self.targetNetworkUpdateFrequency == 0:
			self.net.updateQ2()
			
	def showState(self, states, states2):
		import matplotlib.pyplot as plt
		for p in range(4):
			plt.subplot(2, 4, p+1)
			plt.imshow(states[0,p,:,:], interpolation='none', cmap='gray')
		for p in range(4):
			plt.subplot(2, 4, p+5)
			plt.imshow(states2[0,p,:,:], interpolation='none', cmap='gray')	
		plt.show()
開發者ID:Levoila,項目名稱:CrappyAI,代碼行數:104,代碼來源:learning_agent.py


注:本文中的net.Net.updateQ2方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。