本文整理汇总了Python中pybrain.rl.learners.valuebased.ActionValueTable.getActionValues方法的典型用法代码示例。如果您正苦于以下问题:Python ActionValueTable.getActionValues方法的具体用法?Python ActionValueTable.getActionValues怎么用?Python ActionValueTable.getActionValues使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.learners.valuebased.ActionValueTable
的用法示例。
在下文中一共展示了ActionValueTable.getActionValues方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: run
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import getActionValues [as 别名]
def run():
"""
number of states is:
current value: 0-20
number of actions:
Stand=0, Hit=1 """
# define action value table
av_table = ActionValueTable(MAX_VAL, MIN_VAL)
av_table.initialize(0.)
# define Q-learning agent
q_learner = Q(Q_ALPHA, Q_GAMMA)
q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, q_learner)
# define the environment
env = BlackjackEnv()
# define the task
task = BlackjackTask(env, verbosity=VERBOSE)
# finally, define experiment
experiment = Experiment(task, agent)
# ready to go, start the process
for _ in range(NB_ITERATION):
experiment.doInteractions(1)
if task.lastreward != 0:
if VERBOSE:
print "Agent learn"
agent.learn()
print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
print '|:-------:|:-------|:-----|:-----|'
for i in range(MAX_VAL):
print '| %s | %s | %s | %s |' % (
(i+1),
av_table.getActionValues(i)[0],
av_table.getActionValues(i)[1],
av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
)
示例2: runMainProg
# 需要导入模块: from pybrain.rl.learners.valuebased import ActionValueTable [as 别名]
# 或者: from pybrain.rl.learners.valuebased.ActionValueTable import getActionValues [as 别名]
def runMainProg():
# define action value table
av_table = ActionValueTable(32, 2)
av_table.initialize(0.)
for i in range (0,32):
print "The AV Value At ",i," is: ", av_table.getActionValues(i)
# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0,0))
agent = LearningAgent(av_table, learner)
#define a blackjack deck
theDeck = BlackjackCardDeck()
#define the environment
env = BlackjackEnv(theDeck)
env.createHand()
#define a Dealer
dealer = BlackjackDealer(theDeck)
#define the task
task = BlackjackTask(env)
#define the experiment
experiment = Experiment(task, agent)
#run the game
for i in range(0,10000):
playGame(dealer, task, env, experiment, agent)
print "Games Agent Won: ", GamesAgentWon
print "Games Dealer won: ", GamesDealerWon
print "Games Tied: ", GamesTied
print "Total Games Played: ", TotalGames
for i in range (0,32):
print "The AV Value At ",i," is: ", av_table.getActionValues(i)