本文整理汇总了Python中pybrain.rl.experiments.EpisodicExperiment.doEpisodes方法的典型用法代码示例。如果您正苦于以下问题:Python EpisodicExperiment.doEpisodes方法的具体用法?Python EpisodicExperiment.doEpisodes怎么用?Python EpisodicExperiment.doEpisodes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.rl.experiments.EpisodicExperiment
的用法示例。
在下文中一共展示了EpisodicExperiment.doEpisodes方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def train():
# Make the environment
environment = TwentyFortyEightEnvironment()
# The task is the game this time
task = environment
# Make the reinforcement learning agent (use a network because inputs are continuous)
network = ActionValueNetwork(task.nSenses, task.nActions)
# Use Q learning for updating the table (NFQ is for networks)
learner = NFQ()
learner.gamma = GAMMA
agent = LearningAgent(network, learner)
# Set up an experiment
experiment = EpisodicExperiment(task, agent)
# Train the Learner
meanScores = []
for i in xrange(LEARNING_EPOCHS):
experiment.doEpisodes(GAMES_PER_EPOCH)
print "Iteration ", i, " With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
meanScores.append(task.meanScore)
agent.learn()
agent.reset()
params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "gamma": GAMMA }
return meanScores, params, agent
示例2: train
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def train():
# Make the environment
environment = TwentyFortyEightEnvironment()
# Store the environment as the task
task = environment
# Set up the Neural Network
neuralNet = buildNetwork(task.nSenses, HIDDEN_NODES, task.nActions)
# Use a Genetic Algorithm as the Trainer
trainer = GA( populationSize=20, topProportion=0.2, elitism=False
, eliteProportion=0.25, mutationProb=0.1
, mutationStdDev=0.2, tournament=False
, tournamentSize=2 )
agent = OptimizationAgent(neuralNet, trainer)
# Set up an experiment
experiment = EpisodicExperiment(task, agent)
# Train the network
meanScores = []
print "Starting HillClimberNN"
for i in xrange(LEARNING_EPOCHS):
experiment.doEpisodes(GAMES_PER_EPOCH)
print "Training Iteration", i, "With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
environment.maxGameBlock = 0
meanScores.append(task.meanScore)
params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "hiddenNodes": HIDDEN_NODES }
return meanScores, params, experiment
示例3: train
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def train(self, episodes, maxSteps):
avgReward = 0
# set up environment and task
self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories)
self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
do_decay_beliefs = True, uniformInitialBeliefs = True)
# create neural net and learning agent
self.params = buildNetwork(self.task.outdim, self.task.indim, \
bias=True, outclass=SoftmaxLayer)
if self._PGPE:
self.agent = OptimizationAgent(self.params, PGPE(minimize=False,verbose=False))
elif self._CMAES:
self.agent = OptimizationAgent(self.params, CMAES(minimize=False,verbose=False))
# init and perform experiment
exp = EpisodicExperiment(self.task, self.agent)
for i in range(episodes):
exp.doEpisodes(1)
avgReward += self.task.getTotalReward()
print "reward episode ",i,self.task.getTotalReward()
# print initial info
print "\naverage reward over training = ",avgReward/episodes
# save trained network
self._saveWeights()
示例4: __init__
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
class BaggerBot:
def __init__(self, host, port, net=None):
self.conn = ServerConnection(host, port)
self.env = self.conn.env
self.conn.join()
self.task = SurviveTask(self.env, self.conn)
self.net = buildNetwork(self.env.outdim, 4, self.env.indim, outclass=TanhLayer)
self.agent = OptimizationAgent(self.net, PGPE())
self.experiment = EpisodicExperiment(self.task, self.agent)
def wait_connected(self):
self.conn.wait_connected()
def train(self):
'''
Infinitely play the game. Figure out the next move(s), parse incoming
data, discard all that, do stupid stuff and die :)
'''
while self.env.in_game:
# Ask to be spawned
logging.info('Requesting spawn...')
self.conn.send_spawn()
while not self.env.playing:
self.conn.parse_pregame()
while self.env.playing:
self.experiment.doEpisodes(100)
示例5: main
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def main():
"""
The task represents one full simulation. Therefore it is episodic.
Each episode calls performAction after passing getObservation to the agent.
Once isFinished is true, the reward is returned and one simulation is done.
The net is the neural network. It has 7 input nodes, a hidden layer of 5
nodes, and 2 output nodes. It is a feed-forward network using sigmoid
activation functions.
OptimizationAgent(module, learner)
EpisodicExperiment.optimizer = learner
learner.setEvaluator(task, module)
optimizer.learn()
"""
task = LanderTask(batchSize=1)
net = buildNetwork(task.indim, 5, task.outdim)
learner = StochasticHillClimber()
agent = OptimizationAgent(net, learner)
experiment = EpisodicExperiment(task, agent)
experiment.doEpisodes(100000)
tasks = [LanderTask(environment=Lander(acceleration=float(i)))
for i in range(1, 4)]
test_size = 1000
for task in tasks:
print("Running task with acceleration {}".format(task.env.acceleration))
success = 0
for _ in range(test_size):
task.env.reset()
while not task.isFinished():
observation = task.getObservation()
action = net.activate(observation)
task.performAction(action)
print("Finished a simulation with result {}".format(task.env.status))
if task.env.status == 'landed':
success += 1
print("Succeeded {} times out of {}".format(success, test_size))
示例6: someEpisodes
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False):
""" Return the fitness value for one episode of play, given the policy defined by a neural network. """
task = GameTask(game_env)
game_env.recordingEnabled = True
game_env.reset()
net.reset()
task.maxSteps=maxSteps
agent = LearningAgent(net)
agent.learning = False
agent.logging = False
exper = EpisodicExperiment(task, agent)
fitness = 0
for _ in range(avgOver):
rs = exper.doEpisodes(1)
# add a slight bonus for more exploration, if rewards are identical
fitness += len(set(game_env._allEvents)) * 1e-6
# the true, discounted reward
fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
fitness /= avgOver
if returnEvents:
return fitness, game_env._allEvents
else:
return fitness
示例7: run
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def run(arg):
task = arg[0]
parameters = arg[1]
#print "run with", parameters
seed = parameters["seed"]
process_id = hash(multiprocessing.current_process()._identity)
numpy.random.seed(seed + process_id)
render = False
plot = False
plt.ion()
env = CartPoleEnvironment()
if render:
renderer = CartPoleRenderer()
env.setRenderer(renderer)
renderer.start()
task_class = getattr(cp, task)
task = task_class(env, parameters["MaxRunsPerEpisode"])
testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"],desiredValue=None)
#print "dim: ", task.indim, task.outdim
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.agents import OptimizationAgent
from pybrain.optimization import PGPE
module = buildNetwork(task.outdim, task.indim, bias=False)
# create agent with controller and learner (and its options)
# % of random actions
#learner.explorer.epsilon = parameters["ExplorerEpsilon"]
agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=False, maxEvaluations=None,desiredEvaluation=1, verbose=False))
#
# print agent
# from pprint import pprint
# pprint (vars(agent.learner))
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)
testexperiment = EpisodicExperiment(testtask, testagent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if plot:
pf_fig = plt.figure()
m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
for episode in range(0,m):
# one learning step after one episode of world-interaction
experiment.doEpisodes(parameters["EpisodesPerLearn"])
#agent.learn(1)
#renderer.drawPlot()
# test performance (these real-world experiences are not used for training)
if plot:
env.delay = True
if (episode) % parameters["TestAfter"] == 0:
#print "Evaluating at episode: ", episode
#experiment.agent = testagent
#r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
#for i in range(0,parameters["TestWith"]):
# y = testexperiment.doEpisodes(1)
# print (agent.learner._allEvaluated)
#
#
# from pprint import pprint
# pprint (vars(task))
l = parameters["TestWith"]
task.N = parameters["MaxRunsPerEpisodeTest"]
experiment.doEpisodes(l)
task.N = parameters["MaxRunsPerEpisode"]
resList = (agent.learner._allEvaluations)[-l:-1]
#.........这里部分代码省略.........
示例8: table
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
#use Q learning for updating the table (NFQ is for networks)
learner = NFQ()
agent = LearningAgent(controller, learner)
#set up an experiment
experiment = EpisodicExperiment(task, agent)
meanscores = []
m = 0.0
for i in xrange(learning_eps):
print i
experiment.doEpisodes(games_per_ep)
meanscores.append(task.meanscore)
if meanscores[-1] > m:
m = meanscores[-1]
f = open("bestRL.pkl",'w')
pickle.dump(agent,f)
f.close()
agent.learn()
agent.reset()
import matplotlib.pyplot as plt
plt.plot(meanscores)
plt.title("Mean Agent Score Per Batch")
plt.show()
示例9: plotPerformance
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if not render:
pf_fig = plt.figure()
while(True):
# one learning step after one episode of world-interaction
experiment.doEpisodes(1)
agent.learn(1)
# test performance (these real-world experiences are not used for training)
if render:
env.delay = True
experiment.agent = testagent
r = mean([sum(x) for x in experiment.doEpisodes(5)])
env.delay = False
testagent.reset()
experiment.agent = agent
performance.append(r)
if not render:
plotPerformance(performance, pf_fig)
示例10: run
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def run(arg):
task = arg[0]
parameters = arg[1]
#print "run with", parameters
seed = parameters["seed"]
process_id = hash(multiprocessing.current_process()._identity)
numpy.random.seed(seed + process_id)
render = False
plot = False
plt.ion()
env = CartPoleEnvironment()
if render:
renderer = CartPoleRenderer()
env.setRenderer(renderer)
renderer.start()
task_class = getattr(cp, task)
task = task_class(env, parameters["MaxRunsPerEpisode"])
testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"])
#print "dim: ", task.indim, task.outdim
# to inputs state and 4 actions
module = ActionValueNetwork(task.outdim, task.indim)
learner = NFQ()
# % of random actions
learner.explorer.epsilon = parameters["ExplorerEpsilon"]
agent = LearningAgent(module, learner)
testagent = LearningAgent(module, None)
experiment = EpisodicExperiment(task, agent)
testexperiment = EpisodicExperiment(testtask, testagent)
def plotPerformance(values, fig):
plt.figure(fig.number)
plt.clf()
plt.plot(values, 'o-')
plt.gcf().canvas.draw()
# Without the next line, the pyplot plot won't actually show up.
plt.pause(0.001)
performance = []
if plot:
pf_fig = plt.figure()
m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
for episode in range(0,m):
# one learning step after one episode of world-interaction
experiment.doEpisodes(parameters["EpisodesPerLearn"])
agent.learn(1)
#renderer.drawPlot()
# test performance (these real-world experiences are not used for training)
if plot:
env.delay = True
if (episode) % parameters["TestAfter"] == 0:
#print "Evaluating at episode: ", episode
#experiment.agent = testagent
r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
env.delay = False
testagent.reset()
#experiment.agent = agent
performance.append(r)
if plot:
plotPerformance(performance, pf_fig)
# print "reward avg", r
# print "explorer epsilon", learner.explorer.epsilon
# print "num episodes", agent.history.getNumSequences()
# print "update step", len(performance)
# print "done"
return performance
#print "network", json.dumps(module.bn.net.E, indent=2)
示例11: run
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
#.........这里部分代码省略.........
# # switch this to True if you want to see the cart balancing the pole (slower)
# render = False
#
# plt.ion()
#
# env = CartPoleEnvironment()
# if render:
# renderer = CartPoleRenderer()
# env.setRenderer(renderer)
# renderer.start()
#
# module = ActionValueNetwork(4, 3)
#
# task = DiscreteBalanceTask(env, 100)
# learner = NFQ()
# learner.explorer.epsilon = 0.4
#
# agent = LearningAgent(module, learner)
# testagent = LearningAgent(module, None)
# experiment = EpisodicExperiment(task, agent)
#
# performance = []
#
# if not render:
# pf_fig = plt.figure()
count = 0
while(True):
# one learning step after one episode of world-interaction
count += 1
print "learning #",count
experiment.agent = agent
experiment.doOptimization = True
erg = experiment.doEpisodes(1)
print erg
#experiment.doOptimization = False
#print "agent learn"
#agent.learner.learn(1)
if count > 8:
# test performance (these real-world experiences are not used for training)
# if render:
# env.delay = True
#experiment.agent = testagent
print "testing"
experiment.doOptimization = False
erg = experiment.doEpisodes(1)
summe = 0
#print erg
# for x in erg:
# summe = sum(x)
# print summe
#r = mean([sum(x) for x in experiment.doEpisodes(5)])
# env.delay = False
# testagent.reset()
# performance.append(r)
# if not render:
# plotPerformance(performance, pf_fig)
# print "reward avg", r
# print "explorer epsilon", learner.explorer.epsilon
# print "num episodes", agent.history.getNumSequences()
# print "update step", len(performance)
示例12: CCRLEnvironment
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
__author__ = 'Stubborn'
from pybrain.rl.environments.ode import CCRLEnvironment
from pybrain.rl.environments.ode.tasks import CCRLGlasTask
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules.tanhlayer import TanhLayer
from pybrain.optimization import PGPE
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment
environment = CCRLEnvironment()
task = CCRLGlasTask(environment)
net = buildNetwork(len(task.getObservation()), 4, environment.indim, outclass=TanhLayer)
agent = OptimizationAgent(net, PGPE())
experiment = EpisodicExperiment(task, agent)
for updates in range(20000):
experiment.doEpisodes(1)
示例13: mazeEnv
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
side = 9
goal = 3,2
env = mazeEnv(structure, goal) #use maze environment for now; note pos is Y,X
# our own task and environment for later
#env = policyEnv()
thetask = MDPMazeTaskEpisodic(env)
# create neural net; create and train agent
theparams = buildNetwork(thetask.outdim, thetask.indim, bias=False)
agent = OptimizationAgent(theparams, CMAES())
exp = EpisodicExperiment(thetask, agent)
# train agent
exp.doEpisodes(NUM_EPISODES)
print "\ntotal reward = ",thetask.getTotalReward()
#print "\n"
#print "initial weights: "; print theparams.params
print "\n"
print "NOTE positions below are (Y,X)"
print "\n"
print "getting observation 1"
print "robot = ",thetask.getObservation()
print "goal = ",goal
print "reward: ", thetask.getReward()
print "\n"
print "performing action 1"
示例14: FiniteDifferenceAgent
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
# create agent with controller and learner
agent = FiniteDifferenceAgent(net, SPLA())
# learning options
agent.learner.gd.alpha = 0.05
agent.learner.gdSig.alpha = 0.1
agent.learner.gd.momentum = 0.0
agent.learner.epsilon = 2.0
agent.learner.initSigmas()
sr = []
experiment = EpisodicExperiment(task, agent)
for updates in range(1000):
# training step
for i in range(5):
experiment.doEpisodes(10)
agent.learn()
print "parameters:", agent.module.params
agent.reset()
# learning step
agent.disableLearning()
experiment.doEpisodes(50)
# append mean reward to sr array
ret = []
for n in range(agent.history.getNumSequences()):
state, action, reward, _ = agent.history.getSequence(n)
ret.append( sum(reward, 0).item() )
sr.append(mean(ret))
agent.enableLearning()
示例15: run_experiment
# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def run_experiment():
# Create the controller network
HIDDEN_NODES = 4
RUNS = 2
BATCHES = 1
PRINTS = 1
EPISODES = 500
env = None
start_state_net = None
run_results = []
# Set up plotting tools for the experiments
tools = ExTools(BATCHES, PRINTS)
# Run the experiment
for run in range(RUNS):
if run == 0:
continue
# If an environment already exists, shut it down
if env:
env.closeSocket()
# Create the environment
env = create_environment()
# Create the task
task = Pa10MovementTask(env)
# Create the neural network. Only create the network once so it retains
# the same starting values for each run.
if start_state_net:
net = start_state_net.copy()
else:
# Create the initial neural network
net = create_network(
in_nodes=env.obsLen,
hidden_nodes=HIDDEN_NODES,
out_nodes=env.actLen
)
start_state_net = net.copy()
# Create the learning agent
learner = HillClimber(storeAllEvaluations=True)
agent = OptimizationAgent(net, learner)
tools.agent = agent
# Create the experiment
experiment = EpisodicExperiment(task, agent)
# Perform all episodes in the run
for episode in range(EPISODES):
experiment.doEpisodes(BATCHES)
# Calculate results
all_results = agent.learner._allEvaluations
max_result = np.max(all_results)
min_result = np.min(all_results)
avg_result = np.sum(all_results) / len(all_results)
run_results.append((run, max_result, min_result, avg_result))
# Make the results directory if it does not exist
if not os.path.exists(G_RESULTS_DIR):
os.mkdir(G_RESULTS_DIR)
# Write all results to the results file
with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
# Store the calculated max, min, avg
f.write('RUN, MAX, MIN, AVG\n')
f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))
# Store all results from this run
f.write('EPISODE, REWARD\n')
for episode, result in enumerate(all_results):
f.write('%d, %f\n' % (episode, result))
return