本文整理汇总了Python中pybrain.datasets.SequentialDataSet.clear方法的典型用法代码示例。如果您正苦于以下问题:Python SequentialDataSet.clear方法的具体用法?Python SequentialDataSet.clear怎么用?Python SequentialDataSet.clear使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.datasets.SequentialDataSet
的用法示例。
在下文中一共展示了SequentialDataSet.clear方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: RWR
# 需要导入模块: from pybrain.datasets import SequentialDataSet [as 别名]
# 或者: from pybrain.datasets.SequentialDataSet import clear [as 别名]
class RWR(DirectSearchLearner):
""" Reward-weighted regression.
The algorithm is currently limited to discrete-action episodic tasks, subclasses of POMDPTasks.
"""
# parameters
batchSize = 20
# feedback settings
verbose = True
greedyRuns = 20
supervisedPlotting = False
# settings for the supervised training
learningRate = 0.005
momentum = 0.9
maxEpochs = 20
validationProportion = 0.33
continueEpochs = 2
# parameters for the variation that uses a value function
# TODO: split into 2 classes.
valueLearningRate = None
valueMomentum = None
#valueTrainEpochs = 5
resetAllWeights = False
netweights = 0.01
def __init__(self, net, task, valueNetwork=None, **args):
self.net = net
self.task = task
self.setArgs(**args)
if self.valueLearningRate == None:
self.valueLearningRate = self.learningRate
if self.valueMomentum == None:
self.valueMomentum = self.momentum
if self.supervisedPlotting:
from pylab import ion
ion()
# adaptive temperature:
self.tau = 1.
# prepare the datasets to be used
self.weightedDs = ImportanceDataSet(self.task.outdim, self.task.indim)
self.rawDs = ReinforcementDataSet(self.task.outdim, self.task.indim)
self.valueDs = SequentialDataSet(self.task.outdim, 1)
# prepare the supervised trainers
self.bp = BackpropTrainer(self.net, self.weightedDs, self.learningRate,
self.momentum, verbose=False,
batchlearning=True)
# CHECKME: outsource
self.vnet = valueNetwork
if valueNetwork != None:
self.vbp = BackpropTrainer(self.vnet, self.valueDs, self.valueLearningRate,
self.valueMomentum, verbose=self.verbose)
# keep information:
self.totalSteps = 0
self.totalEpisodes = 0
def shapingFunction(self, R):
return exp(self.tau * R)
def updateTau(self, R, U):
self.tau = sum(U) / dot((R - self.task.minReward), U)
def reset(self):
self.weightedDs.clear()
self.valueDs.clear()
self.rawDs.clear()
self.bp.momentumvector *= 0.0
if self.vnet != None:
self.vbp.momentumvector *= 0.0
if self.resetAllWeights:
self.vnet.params[:] = randn(len(self.vnet.params)) * self.netweights
def greedyEpisode(self):
""" run one episode with greedy decisions, return the list of rewards recieved."""
rewards = []
self.task.reset()
self.net.reset()
while not self.task.isFinished():
obs = self.task.getObservation()
act = self.net.activate(obs)
chosen = argmax(act)
self.task.performAction(chosen)
reward = self.task.getReward()
rewards.append(reward)
return rewards
def learn(self, batches):
self.greedyAvg = []
self.rewardAvg = []
self.lengthAvg = []
self.initr0Avg = []
for b in range(batches):
#.........这里部分代码省略.........
示例2: ModelExperiment
# 需要导入模块: from pybrain.datasets import SequentialDataSet [as 别名]
# 或者: from pybrain.datasets.SequentialDataSet import clear [as 别名]
class ModelExperiment(EpisodicExperiment):
""" An experiment that learns a model of its (action, state) pair
with a Gaussian Process for each dimension of the state.
"""
def __init__(self, task, agent):
EpisodicExperiment.__init__(self, task, agent)
# create model and training set (action dimension + 1 for time)
self.modelds = SequentialDataSet(self.task.indim + 1, 1)
self.model = [GaussianProcess(indim=self.modelds.getDimension('input'),
start=(-10, -10, 0), stop=(10, 10, 300), step=(5, 5, 100))
for _ in range(self.task.outdim)]
# change hyper parameters for all gps
for m in self.model:
m.hyper = (20, 2.0, 0.01)
# m.autonoise = True
def doEpisodes(self, number = 1):
""" returns the rewards of each step as a list and learns
the model for each rollout.
"""
all_rewards = []
for dummy in range(number):
self.stepid = 0
rewards = []
# the agent is informed of the start of the episode
self.agent.newEpisode()
self.task.reset()
while not self.task.isFinished():
r = self._oneInteraction()
rewards.append(r)
all_rewards.append(rewards)
# clear model dataset (to retrain it)
self.modelds.clear()
print "retrain gp"
[m.trainOnDataset(self.modelds) for m in self.model]
for i in range(self.agent.history.getNumSequences()):
seq = self.agent.history.getSequence(i)
state, action, dummy, dummy = seq
l = len(action)
index = map(lambda x: int(floor(x)), mgrid[0:l-1:5j])
action = action[index, :]
inp = c_[action, array([index]).T]
self.modelds.setField('input', inp)
# add training data to all gaussian processes
for i,m in enumerate(self.model):
tar = state[index, i]
self.modelds.setField('target', array([tar]).T)
m.addDataset(self.modelds)
# print "updating GPs..."
# [m._calculate() for m in self.model]
# print "done."
return all_rewards
def _oneInteraction(self):
self.stepid += 1
obs = self.task.getObservation()
self.agent.integrateObservation(obs)
action = self.agent.getAction()
self.task.performAction(action)
# predict with model
#modelobs = array([0, 0, 0])
# time dimension
# if self.stepid < self.model[0].stop:
# steps = self.model[0].step
#
# # linear interpolation between two adjacent gp states
# try:
# modelobs = [ (1.0-float(self.stepid%steps)/steps) * self.model[i].pred_mean[int(floor(float(self.stepid)/steps))] +
# (float(self.stepid%steps)/steps) * self.model[i].pred_mean[int(ceil(float(self.stepid)/steps))]
# for i in range(self.task.outdim) ]
# except IndexError:
action = r_[action, array([self.stepid])]
action = reshape(action, (1, 3))
modelobs = [self.model[i].testOnArray(action) for i in range(self.task.outdim)]
# tell environment about model obs
self.task.env.model = [modelobs]
reward = self.task.getReward()
self.agent.giveReward(reward)
return reward