当前位置: 首页>>代码示例>>Python>>正文


Python EpisodicExperiment.doEpisodes方法代码示例

本文整理汇总了Python中pybrain.rl.experiments.EpisodicExperiment.doEpisodes方法的典型用法代码示例。如果您正苦于以下问题:Python EpisodicExperiment.doEpisodes方法的具体用法?Python EpisodicExperiment.doEpisodes怎么用?Python EpisodicExperiment.doEpisodes使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pybrain.rl.experiments.EpisodicExperiment的用法示例。


在下文中一共展示了EpisodicExperiment.doEpisodes方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def train():

    # Make the environment
    environment = TwentyFortyEightEnvironment()

    # The task is the game this time
    task = environment

    # Make the reinforcement learning agent (use a network because inputs are continuous)
    network = ActionValueNetwork(task.nSenses, task.nActions)

    # Use Q learning for updating the table (NFQ is for networks)
    learner = NFQ()
    learner.gamma = GAMMA

    agent = LearningAgent(network, learner)

    # Set up an experiment
    experiment = EpisodicExperiment(task, agent)

    # Train the Learner
    meanScores = []
    for i in xrange(LEARNING_EPOCHS):
        experiment.doEpisodes(GAMES_PER_EPOCH)
        print "Iteration ", i, " With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
        meanScores.append(task.meanScore)
        agent.learn()
        agent.reset()

    params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "gamma": GAMMA }
    return meanScores, params, agent
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:33,代码来源:RLNFQ.py

示例2: train

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def train():

    # Make the environment
    environment = TwentyFortyEightEnvironment()

    # Store the environment as the task
    task = environment

    # Set up the Neural Network
    neuralNet = buildNetwork(task.nSenses, HIDDEN_NODES, task.nActions)

    # Use a Genetic Algorithm as the Trainer
    trainer = GA( populationSize=20, topProportion=0.2, elitism=False
                , eliteProportion=0.25, mutationProb=0.1
                , mutationStdDev=0.2, tournament=False
                , tournamentSize=2 )

    agent = OptimizationAgent(neuralNet, trainer)

    # Set up an experiment
    experiment = EpisodicExperiment(task, agent)

    # Train the network
    meanScores = []
    print "Starting HillClimberNN"
    for i in xrange(LEARNING_EPOCHS):
        experiment.doEpisodes(GAMES_PER_EPOCH)
        print "Training Iteration", i, "With mean score ", task.meanScore, "Max block achieved ", environment.maxGameBlock
        environment.maxGameBlock = 0
        meanScores.append(task.meanScore)

    params = {"learningEpochs": LEARNING_EPOCHS, "gamesPerEpoch": GAMES_PER_EPOCH, "hiddenNodes": HIDDEN_NODES }
    return meanScores, params, experiment
开发者ID:Aggregates,项目名称:MI_HW2,代码行数:35,代码来源:hillclimberNN.py

示例3: train

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
	def train(self, episodes, maxSteps):
 	
		avgReward = 0

		# set up environment and task
		self.env = InfoMaxEnv(self.objectNames, self.actionNames, self.numCategories)
		self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
					do_decay_beliefs = True, uniformInitialBeliefs = True)

		# create neural net and learning agent
		self.params = buildNetwork(self.task.outdim, self.task.indim, \
						bias=True, outclass=SoftmaxLayer)

		if self._PGPE:
			self.agent = OptimizationAgent(self.params, PGPE(minimize=False,verbose=False))
		elif self._CMAES:
			self.agent = OptimizationAgent(self.params, CMAES(minimize=False,verbose=False))

		# init and perform experiment
		exp = EpisodicExperiment(self.task, self.agent)

		for i in range(episodes):        
			exp.doEpisodes(1)
			avgReward += self.task.getTotalReward()
			print "reward episode ",i,self.task.getTotalReward()

		# print initial info
		print "\naverage reward over training = ",avgReward/episodes

		# save trained network
		self._saveWeights()
开发者ID:Kenkoko,项目名称:ua-ros-pkg,代码行数:33,代码来源:agent.py

示例4: __init__

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
class BaggerBot:
	def __init__(self, host, port, net=None):
		self.conn = ServerConnection(host, port)
		self.env = self.conn.env
		self.conn.join()
		self.task = SurviveTask(self.env, self.conn)
		self.net = buildNetwork(self.env.outdim, 4, self.env.indim, outclass=TanhLayer)
		self.agent = OptimizationAgent(self.net, PGPE())
		self.experiment = EpisodicExperiment(self.task, self.agent)

	def wait_connected(self):
		self.conn.wait_connected()

	def train(self):
		'''
		Infinitely play the game. Figure out the next move(s), parse incoming
		data, discard all that, do stupid stuff and die :)
		'''
		while self.env.in_game:
			# Ask to be spawned
			logging.info('Requesting spawn...')
			self.conn.send_spawn()
			while not self.env.playing:
				self.conn.parse_pregame()
			while self.env.playing:
				self.experiment.doEpisodes(100)
开发者ID:Remboooo,项目名称:LoBotomy,代码行数:28,代码来源:baggerbot.py

示例5: main

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def main():
    """
    The task represents one full simulation. Therefore it is episodic.
    Each episode calls performAction after passing getObservation to the agent.
    Once isFinished is true, the reward is returned and one simulation is done.

    The net is the neural network. It has 7 input nodes, a hidden layer of 5
    nodes, and 2 output nodes. It is a feed-forward network using sigmoid
    activation functions.

    OptimizationAgent(module, learner)
    EpisodicExperiment.optimizer = learner
    learner.setEvaluator(task, module)
    optimizer.learn()
    """
    task = LanderTask(batchSize=1)
    net = buildNetwork(task.indim, 5, task.outdim)
    learner = StochasticHillClimber()
    agent = OptimizationAgent(net, learner)
    experiment = EpisodicExperiment(task, agent)
    experiment.doEpisodes(100000)

    tasks = [LanderTask(environment=Lander(acceleration=float(i)))
             for i in range(1, 4)]
    test_size = 1000
    for task in tasks:
        print("Running task with acceleration {}".format(task.env.acceleration))
        success = 0
        for _ in range(test_size):
            task.env.reset()
            while not task.isFinished():
                observation = task.getObservation()
                action = net.activate(observation)
                task.performAction(action)
            print("Finished a simulation with result {}".format(task.env.status))
            if task.env.status == 'landed':
                success += 1
        print("Succeeded {} times out of {}".format(success, test_size))
开发者ID:andschwa,项目名称:uidaho-cs470-moonlander,代码行数:40,代码来源:main.py

示例6: someEpisodes

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def someEpisodes(game_env, net, discountFactor=0.99, maxSteps=100, avgOver=1, returnEvents=False):
    """ Return the fitness value for one episode of play, given the policy defined by a neural network. """
    task = GameTask(game_env)
    game_env.recordingEnabled = True        
    game_env.reset()        
    net.reset()
    task.maxSteps=maxSteps
    agent = LearningAgent(net)
    agent.learning = False
    agent.logging = False
    exper = EpisodicExperiment(task, agent)
    fitness = 0
    for _ in range(avgOver):
        rs = exper.doEpisodes(1)
        # add a slight bonus for more exploration, if rewards are identical
        fitness += len(set(game_env._allEvents)) * 1e-6
        # the true, discounted reward        
        fitness += sum([sum([v*discountFactor**step for step, v in enumerate(r)]) for r in rs])
    fitness /= avgOver
    if returnEvents:
        return fitness, game_env._allEvents
    else:
        return fitness
开发者ID:sarobe,项目名称:VGDLEntityCreator,代码行数:25,代码来源:nomodel_pomdp.py

示例7: run

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", parameters
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed + process_id)


    
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, parameters["MaxRunsPerEpisode"])
    
    testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"],desiredValue=None)

    #print "dim: ", task.indim, task.outdim

    from pybrain.tools.shortcuts import buildNetwork
    from pybrain.rl.agents import OptimizationAgent
    from pybrain.optimization import PGPE

    module = buildNetwork(task.outdim, task.indim, bias=False)
    # create agent with controller and learner (and its options)

    # % of random actions
    #learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = OptimizationAgent(module, PGPE(storeAllEvaluations = True,storeAllEvaluated=False, maxEvaluations=None,desiredEvaluation=1, verbose=False))
#
#    print agent
#    from pprint import pprint
#    pprint (vars(agent.learner))
    
    testagent = LearningAgent(module, None)
    experiment = EpisodicExperiment(task, agent)
    testexperiment = EpisodicExperiment(testtask, testagent)

    
    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    for episode in range(0,m):
    	# one learning step after one episode of world-interaction
        experiment.doEpisodes(parameters["EpisodesPerLearn"])
        #agent.learn(1)
    
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
        
        if (episode) % parameters["TestAfter"] == 0:
            #print "Evaluating at episode: ", episode
            
            #experiment.agent = testagent
            #r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
            #for i in range(0,parameters["TestWith"]):
#            y = testexperiment.doEpisodes(1)
#            print (agent.learner._allEvaluated)
#                
#            
#            from pprint import pprint
#            pprint (vars(task))
                
            l = parameters["TestWith"]
            
            task.N = parameters["MaxRunsPerEpisodeTest"]
            experiment.doEpisodes(l)
            task.N = parameters["MaxRunsPerEpisode"]

            resList = (agent.learner._allEvaluations)[-l:-1]
            
#.........这里部分代码省略.........
开发者ID:nairboon,项目名称:bnrl,代码行数:103,代码来源:PGPE.py

示例8: table

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
#use Q learning for updating the table (NFQ is for networks)
learner = NFQ()

agent = LearningAgent(controller, learner)



#set up an experiment
experiment = EpisodicExperiment(task, agent)

meanscores = []
m = 0.0
for i in xrange(learning_eps):
    print i
    experiment.doEpisodes(games_per_ep)
    meanscores.append(task.meanscore)
    if meanscores[-1] > m:
        m = meanscores[-1]
        f = open("bestRL.pkl",'w')
        pickle.dump(agent,f)
        f.close()
    agent.learn()
    agent.reset()

import matplotlib.pyplot as plt
plt.plot(meanscores)

plt.title("Mean Agent Score Per Batch")
plt.show()
开发者ID:jskye,项目名称:uon.2014.comp3330.hwa2.alevmy,代码行数:31,代码来源:runRL.py

示例9: plotPerformance

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def plotPerformance(values, fig):
    plt.figure(fig.number)
    plt.clf()
    plt.plot(values, 'o-')
    plt.gcf().canvas.draw()
    # Without the next line, the pyplot plot won't actually show up.
    plt.pause(0.001)

performance = []

if not render:
    pf_fig = plt.figure()

while(True):
	# one learning step after one episode of world-interaction
    experiment.doEpisodes(1)
    agent.learn(1)

    # test performance (these real-world experiences are not used for training)
    if render:
        env.delay = True
    experiment.agent = testagent
    r = mean([sum(x) for x in experiment.doEpisodes(5)])
    env.delay = False
    testagent.reset()
    experiment.agent = agent

    performance.append(r)
    if not render:
        plotPerformance(performance, pf_fig)
开发者ID:vascobailao,项目名称:PYTHON,代码行数:32,代码来源:NFQ.py

示例10: run

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def run(arg):
    task = arg[0]
    parameters = arg[1]
    #print "run with", parameters
    
    seed = parameters["seed"]
   

    process_id = hash(multiprocessing.current_process()._identity)
    numpy.random.seed(seed + process_id)


    
    
    render = False    
    plot = False
    
    plt.ion()
    
    env = CartPoleEnvironment()
    if render:
        renderer = CartPoleRenderer()
        env.setRenderer(renderer)
        renderer.start()
    
    task_class = getattr(cp, task)
    task = task_class(env, parameters["MaxRunsPerEpisode"])
    testtask = task_class(env, parameters["MaxRunsPerEpisodeTest"])

    #print "dim: ", task.indim, task.outdim
    
    # to inputs state and 4 actions
    module = ActionValueNetwork(task.outdim, task.indim)
    

    learner = NFQ()
    # % of random actions
    learner.explorer.epsilon = parameters["ExplorerEpsilon"]
    
    
    agent = LearningAgent(module, learner)
    testagent = LearningAgent(module, None)
    experiment = EpisodicExperiment(task, agent)
    testexperiment = EpisodicExperiment(testtask, testagent)

    
    def plotPerformance(values, fig):
        plt.figure(fig.number)
        plt.clf()
        plt.plot(values, 'o-')
        plt.gcf().canvas.draw()
        # Without the next line, the pyplot plot won't actually show up.
        plt.pause(0.001)
    
    performance = []
    
    if plot:
        pf_fig = plt.figure()
    
    m = parameters["MaxTotalEpisodes"]/parameters["EpisodesPerLearn"]
    for episode in range(0,m):
    	# one learning step after one episode of world-interaction
        experiment.doEpisodes(parameters["EpisodesPerLearn"])
        agent.learn(1)
    
        #renderer.drawPlot()
        
        # test performance (these real-world experiences are not used for training)
        if plot:
            env.delay = True
        
        if (episode) % parameters["TestAfter"] == 0:
            #print "Evaluating at episode: ", episode
            
            #experiment.agent = testagent
            r = mean([sum(x) for x in testexperiment.doEpisodes(parameters["TestWith"])])
            
            env.delay = False
            testagent.reset()
            #experiment.agent = agent
        
            performance.append(r)
            if plot:
                plotPerformance(performance, pf_fig)
        
#            print "reward avg", r
#            print "explorer epsilon", learner.explorer.epsilon
#            print "num episodes", agent.history.getNumSequences()
#            print "update step", len(performance)
            
#    print "done"
    return performance
            
        #print "network",   json.dumps(module.bn.net.E, indent=2)
开发者ID:nairboon,项目名称:bnrl,代码行数:96,代码来源:NFQ.py

示例11: run

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]

#.........这里部分代码省略.........
#    # switch this to True if you want to see the cart balancing the pole (slower)
#    render = False
#
#    plt.ion()
#
#    env = CartPoleEnvironment()
#    if render:
#        renderer = CartPoleRenderer()
#        env.setRenderer(renderer)
#        renderer.start()
#
#    module = ActionValueNetwork(4, 3)
#
#    task = DiscreteBalanceTask(env, 100)
#    learner = NFQ()
#    learner.explorer.epsilon = 0.4
#
#    agent = LearningAgent(module, learner)
#    testagent = LearningAgent(module, None)
#    experiment = EpisodicExperiment(task, agent)
#
#    performance = []
#
#    if not render:
#        pf_fig = plt.figure()

    count = 0
    while(True):
            # one learning step after one episode of world-interaction
        count += 1
        print "learning #",count
        experiment.agent = agent
        experiment.doOptimization = True
        erg = experiment.doEpisodes(1)
        print erg
        #experiment.doOptimization = False
        #print "agent learn"
        #agent.learner.learn(1)

        if count > 8:
        # test performance (these real-world experiences are not used for training)
#        if render:
#            env.delay = True
            #experiment.agent = testagent
            print "testing"
            experiment.doOptimization = False

            erg = experiment.doEpisodes(1)
            summe = 0
            #print erg
#            for x in erg:
#                summe = sum(x)
#            print summe
        #r = mean([sum(x) for x in experiment.doEpisodes(5)])
#        env.delay = False
#            testagent.reset()
        

#        performance.append(r)
#        if not render:
#            plotPerformance(performance, pf_fig)

#        print "reward avg", r
#        print "explorer epsilon", learner.explorer.epsilon
#        print "num episodes", agent.history.getNumSequences()
#        print "update step", len(performance)
开发者ID:c0de2014,项目名称:nao-control,代码行数:70,代码来源:grabbingTest.py

示例12: CCRLEnvironment

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
__author__ = 'Stubborn'

from pybrain.rl.environments.ode import CCRLEnvironment
from pybrain.rl.environments.ode.tasks import CCRLGlasTask
from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.modules.tanhlayer import TanhLayer
from pybrain.optimization import PGPE
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment

environment = CCRLEnvironment()
task = CCRLGlasTask(environment)

net = buildNetwork(len(task.getObservation()), 4, environment.indim, outclass=TanhLayer)

agent = OptimizationAgent(net, PGPE())

experiment = EpisodicExperiment(task, agent)

for updates in range(20000):
    experiment.doEpisodes(1)



开发者ID:AkselBH,项目名称:QLearning,代码行数:23,代码来源:Pybrain+tutvideo+3+Robotarm.py

示例13: mazeEnv

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
  side = 9
  goal = 3,2

  env = mazeEnv(structure, goal)   #use maze environment for now; note pos is Y,X

  # our own task and environment for later
  #env = policyEnv()
  thetask = MDPMazeTaskEpisodic(env)

  # create neural net; create and train agent
  theparams = buildNetwork(thetask.outdim, thetask.indim, bias=False)
  agent = OptimizationAgent(theparams, CMAES())
  exp = EpisodicExperiment(thetask, agent)

  # train agent        
  exp.doEpisodes(NUM_EPISODES)
  print "\ntotal reward = ",thetask.getTotalReward()

  #print "\n"
  #print "initial weights: "; print theparams.params
  print "\n"
  print "NOTE positions below are (Y,X)"

  print "\n"
  print "getting observation 1"
  print "robot = ",thetask.getObservation()
  print "goal  = ",goal
  print "reward: ", thetask.getReward()

  print "\n"
  print "performing action 1"
开发者ID:krylenko,项目名称:python,代码行数:33,代码来源:INFOMAX__policyWrapper.py

示例14: FiniteDifferenceAgent

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
# create agent with controller and learner
agent = FiniteDifferenceAgent(net, SPLA())
# learning options
agent.learner.gd.alpha = 0.05
agent.learner.gdSig.alpha = 0.1
agent.learner.gd.momentum = 0.0
agent.learner.epsilon = 2.0
agent.learner.initSigmas()

sr = []

experiment = EpisodicExperiment(task, agent)
for updates in range(1000):
    # training step
    for i in range(5):
        experiment.doEpisodes(10)
        agent.learn()
        print "parameters:", agent.module.params
        agent.reset()
        
    # learning step
    agent.disableLearning()
    experiment.doEpisodes(50)
    # append mean reward to sr array
    ret = []
    for n in range(agent.history.getNumSequences()):
        state, action, reward, _ = agent.history.getSequence(n)
        ret.append( sum(reward, 0).item() )
    sr.append(mean(ret))
        
    agent.enableLearning()
开发者ID:HKou,项目名称:pybrain,代码行数:33,代码来源:flexCubeNAC.py

示例15: run_experiment

# 需要导入模块: from pybrain.rl.experiments import EpisodicExperiment [as 别名]
# 或者: from pybrain.rl.experiments.EpisodicExperiment import doEpisodes [as 别名]
def run_experiment():
    # Create the controller network
    HIDDEN_NODES = 4

    RUNS = 2
    BATCHES = 1
    PRINTS = 1
    EPISODES = 500

    env = None
    start_state_net = None

    run_results = []

    # Set up plotting tools for the experiments
    tools = ExTools(BATCHES, PRINTS)

    # Run the experiment
    for run in range(RUNS):
        if run == 0:
            continue

        # If an environment already exists, shut it down
        if env:
            env.closeSocket()

        # Create the environment
        env = create_environment()

        # Create the task
        task = Pa10MovementTask(env)

        # Create the neural network. Only create the network once so it retains
        # the same starting values for each run.
        if start_state_net:
            net = start_state_net.copy()
        else:
            # Create the initial neural network
            net = create_network(
                    in_nodes=env.obsLen,
                    hidden_nodes=HIDDEN_NODES,
                    out_nodes=env.actLen
            )
            start_state_net = net.copy()

        # Create the learning agent
        learner = HillClimber(storeAllEvaluations=True)
        agent = OptimizationAgent(net, learner)
        tools.agent = agent

        # Create the experiment
        experiment = EpisodicExperiment(task, agent)

        # Perform all episodes in the run
        for episode in range(EPISODES):
            experiment.doEpisodes(BATCHES)

        # Calculate results
        all_results = agent.learner._allEvaluations
        max_result = np.max(all_results)
        min_result = np.min(all_results)
        avg_result = np.sum(all_results) / len(all_results)
        run_results.append((run, max_result, min_result, avg_result))

        # Make the results directory if it does not exist
        if not os.path.exists(G_RESULTS_DIR):
            os.mkdir(G_RESULTS_DIR)

        # Write all results to the results file
        with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
            # Store the calculated max, min, avg
            f.write('RUN, MAX, MIN, AVG\n')
            f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))

            # Store all results from this run
            f.write('EPISODE, REWARD\n')
            for episode, result in enumerate(all_results):
                f.write('%d, %f\n' % (episode, result))

    return
开发者ID:evansneath,项目名称:surgicalsim,代码行数:82,代码来源:start_environment.py


注:本文中的pybrain.rl.experiments.EpisodicExperiment.doEpisodes方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。