本文整理匯總了Python中net.Net.learn方法的典型用法代碼示例。如果您正苦於以下問題:Python Net.learn方法的具體用法?Python Net.learn怎麽用?Python Net.learn使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類net.Net
的用法示例。
在下文中一共展示了Net.learn方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from net import Net [as 別名]
# 或者: from net.Net import learn [as 別名]
class LearningAgent:
def __init__(self, args):
self.epsilonStart = args.epsilonStart
self.epsilonEnd = args.epsilonEnd
self.epsilonDecayLength = args.epsilonDecayLength
self.testEpsilon = args.testEpsilon
self.replaySize = args.replaySize
self.minReplaySize = args.minReplaySize
self.framesPerState = args.framesPerState
self.learnFrequency = args.learnFrequency
self.targetNetworkUpdateFrequency = args.targetNetworkUpdateFrequency
self.batchSize = args.batchSize
self.actionNb = args.actionNb
self.lastAction = 0
self.lastFrame = None
self.rng = np.random.RandomState(42)
self.data = Data(self.replaySize, self.framesPerState, (100,100))
self.tickCount = 0
self.learnCount = 0
self.rewardAcc = 0.0
self.episodeNb = 0
self.qValueAcc = 0.0
self.qValueNb = 0
self.maxReward = 0
self.episodeReward = 0
self.test = False
self.lastQs = collections.deque(maxlen=60)
self.net = Net(args)
self.qValues = []
self.rewards = []
self.tickCount = 0
def load(self, filename):
if os.path.isfile(filename):
print 'Using', filename
f = open(filename, 'r')
data = cPickle.load(f)
self.net = data['net']
self.qValues = data['qValues']
self.rewards = data['rewards']
self.tickCount = data['tickCount']
self.learnCount = data['learnCount']
f.close()
def save(self, filename):
f = open(filename, 'w')
data = {'net':self.net, 'qValues':self.qValues, 'rewards':self.rewards, 'tickCount':self.tickCount, 'learnCount':self.learnCount}
cPickle.dump(data, f, -1)
f.close()
def printStuff(self):
import matplotlib.pyplot as plt
n = 60
for i,v in enumerate(self.lastQs):
print '#', i, ':', v
frames = self.data.getLastFrames(n)
for p in range(n):
plt.subplot(10, 6, p+1)
plt.imshow(frames[p,:,:], interpolation='none', cmap='gray')
plt.show(block=False)
d = []
for i,q in enumerate(self.lastQs):
if type(q) is not int:
d.append(np.max(q, axis=1))
plt.figure()
plt.plot(d)
plt.show(block=True)
def beginTest(self):
self.test = True
self.episodeReward = 0
self.rewardAcc = 0.0
self.episodeNb = 0
self.qValueAcc = 0.0
self.qValueNb = 0
def endTest(self):
self.test = False
self.qValues.append(self.qValueAcc / self.qValueNb)
self.rewards.append(self.rewardAcc / self.episodeNb)
print 'TEST :', ' qValue =', self.qValues[-1], ' reward = ', self.rewards[-1]
#Inputs : results of an action (a frame and a reward)
def tick(self, frame, reward):
self.data.addData(self.lastFrame, self.lastAction, reward, False)
state = self.data.getLastState(frame)
action = self.chooseAction(state)
self.episodeReward += reward
self.rewardAcc += reward
#.........這裏部分代碼省略.........