本文整理汇总了Python中experiment.Experiment.doEpisode方法的典型用法代码示例。如果您正苦于以下问题:Python Experiment.doEpisode方法的具体用法?Python Experiment.doEpisode怎么用?Python Experiment.doEpisode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类experiment.Experiment
的用法示例。
在下文中一共展示了Experiment.doEpisode方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: trainQValueNetwork
# 需要导入模块: from experiment import Experiment [as 别名]
# 或者: from experiment.Experiment import doEpisode [as 别名]
def trainQValueNetwork(loopNum=10000, startTurn=0, history_filename='train_winners_nn', inputNum=192, type=1):
'''
通过让三个agent互相玩游戏,然后来训练出一个Q值网络
三个agent的网络保存在playeri里面,数字分别代表的是训练了多少次后得出的网络
胜负情况记录在train_winners里面
'''
agents = []
winners = {}
if os.path.isfile(history_filename):
with open(history_filename, 'r') as f:
winners = pickle.load(f)
startTurn = sum([v for i,v in winners.items()])
for i in range(0, 3):
playerName = PLAYER_LIST[i]
nw = RunFastNetwork(playerName, inputNum=inputNum, hiddenNum=inputNum, outNum=1)
nw.loadNet(playerName, startTurn)
rfa = RunFastAgent(playerName, nw)
agents.append(rfa)
env = RunFastEnvironment()
exp = Experiment(env, agents, type=type)
for i in xrange(startTurn, startTurn + loopNum):
if i % 200 == 0:
for agent in agents:
agent.saveNet()
with open(history_filename, 'w') as f:
pickle.dump(winners, f)
# exp.setTurn(i)
winner = exp.doEpisode()
if winners.has_key(winner):
winners[winner] += 1
else:
winners[winner] = 1
for agent in agents:
agent.saveNet()
with open(history_filename, 'w') as f:
pickle.dump(winners, f)
print winners
with open(history_filename, 'w') as f:
pickle.dump(winners, f)
示例2: trainDeepNetwork
# 需要导入模块: from experiment import Experiment [as 别名]
# 或者: from experiment.Experiment import doEpisode [as 别名]
def trainDeepNetwork(loopNum=10000, startTurn=0, history_filename='train_winners_dn', type=1, inputNum=192):
'''
用深度网络来训练Q值
'''
agents = []
winners = {}
# load history match
if os.path.isfile(history_filename):
with open(history_filename, 'r') as f:
winners = pickle.load(f)
startTurn = sum([v for i,v in winners.items()])
# load agents with network
for i in range(0, 3):
playerName = PLAYER_LIST[i]
nw = RunFastDeepNetwork(playerName, inputNum=inputNum, hidden1Num=inputNum, hidden2Num=inputNum, hidden3Num=inputNum, outNum=1)
nw.loadNet(playerName, startTurn)
rfa = RunFastAgent(playerName, nw)
agents.append(rfa)
env = RunFastEnvironment()
exp = Experiment(env, agents, type=type)
for i in range(startTurn, startTurn + loopNum):
if i % 200 == 0:
for agent in agents:
agent.saveNet()
with open(history_filename, 'w') as f:
pickle.dump(winners, f)
# exp.setTurn(i)
winner = exp.doEpisode()
if winners.has_key(winner):
winners[winner] += 1
else:
winners[winner] = 1
for agent in agents:
agent.saveNet()
with open(history_filename, 'w') as f:
pickle.dump(winners, f)
print winners
with open(history_filename, 'w') as f:
pickle.dump(winners, f)