本文整理汇总了Python中pybrain.supervised.trainers.BackpropTrainer.setData方法的典型用法代码示例。如果您正苦于以下问题:Python BackpropTrainer.setData方法的具体用法?Python BackpropTrainer.setData怎么用?Python BackpropTrainer.setData使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pybrain.supervised.trainers.BackpropTrainer
的用法示例。
在下文中一共展示了BackpropTrainer.setData方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
def main():
for stock in STOCK_TICKS:
# Download Data
get_data(stock)
# Import Data
days = extract_data(stock)
today = days.pop(0)
# Make DataSet
data_set = ClassificationDataSet(INPUT_NUM, 1, nb_classes=2)
for day in days:
target = 0
if day.change > 0:
target = 1
data_set.addSample(day.return_metrics(), [target])
# Make Network
network = buildNetwork(INPUT_NUM, MIDDLE_NUM, MIDDLE_NUM, OUTPUT_NUM)
# Train Network
trainer = BackpropTrainer(network)
trainer.setData(data_set)
trainer.trainUntilConvergence(maxEpochs=EPOCHS_MAX)
# Activate Network
prediction = network.activate(today.return_metrics())
print prediction
示例2: PHC_NN
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
class PHC_NN(PHC_FA):
'''PHC with neural function approximation. '''
delta=0.1
maxNumberofAverage=30
weightdecay=0.001
trainingEpochPerUpdateWight=2
def __init__(self, num_features, num_actions, indexOfAgent=None):
PHC_FA.__init__(self, num_features, num_actions, indexOfAgent)
self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass = SigmoidLayer, outclass = LinearLayer)
self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass = SigmoidLayer,outclass = SigmoidLayer)
self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,weightdecay=self.weightdecay)
def _pi(self, state):
"""Given state, compute probabilities for each action."""
values = np.array(self.linPolicy.activate(r_[state]))
z=np.sum(values)
return (values/z).flatten()
def _qValues(self, state):
""" Return vector of q-values for all actions,
given the state(-features). """
values = np.array([self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions)])
return values.flatten()
def _updateWeights(self, state, action, reward, next_state):
""" state and next_state are vectors, action is an integer. """
#update Q-value function approximator
target=reward + self.rewardDiscount * max(self._qValues(next_state))
inp=r_[asarray(state), one_to_n(action, self.num_actions)]
self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
ds = SupervisedDataSet(self.num_features+self.num_actions,1)
ds.addSample(inp, target)
self.trainer4LinQ.trainOnDataset(ds)
#Update policy
bestAction=r_argmax(self._qValues(state))
target= one_to_n(bestAction, self.num_actions)
inp=r_[asarray(state)]
ds = SupervisedDataSet(self.num_features,self.num_actions)
ds.addSample(inp, target)
self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,
learningrate=self.delta,
weightdecay=self.weightdecay)
self.trainer4LinPolicy.setData(ds)
self.trainer4LinPolicy.trainEpochs(epochs=self.trainingEpochPerUpdateWight)
示例3: outputUnits
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
outmatrix = outputUnits(entry);
lpos = 0;
ds = SupervisedDataSet(NUMINPUTS, NUMOUTPUTS);
for letterContexts in wordstream(input_entries = (entry,)):
#print("letterContexts", letterContexts);
for inarray in convertToBinary(letterContexts):
outarray = outmatrix[lpos];
#print("inarray",inarray);
#print("outarray",outarray);
#print("inlen %d outlen %d" % (len(inarray), len(outarray)));
ds.addSample(inarray, outarray);
observed = net.activate(inarray);
phoneme = entry.phonemes[lpos];
observedPhoneme = closestByDotProduct(observed[:MINSTRESS], articFeatures);
phonemeErrors.append(bool(phoneme != observedPhoneme));
stress = entry.stress[lpos];
observedStress = closestByDotProduct(observed[MINSTRESS:], stressFeatures);
stressErrors.append(bool(stress != observedStress));
lpos += 1
trainer.setData(ds);
#pdb.set_trace();
err = trainer.train();
#print(err, " ", entry);
print("accuracy: phonemes %.3f stresses %.3f" % (1 - np.mean(phonemeErrors), 1 - np.mean(stressErrors)) );
#accuracy is a vector with one element in {0,1} for each letter i
#that we have #trained so far.
#make that two vectors, one for phoneme and one for stress.
示例4: xrange
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
else:
input_rates.append(0)
prev = exchange_rates[i][1]
for i in xrange(window_s + INPUT_LEN, window_s + INPUT_LEN + OUTPUT_LEN):
output_rates.append(exchange_rates[i][1])
y_arr = np.array(output_rates)
angle = np.polyfit(x_arr, y_arr, 1)[0]
# print "learn_angle " + str(angle)
# print "add angle" + str(angle)
# print "add input_rates len " + str(len(input_rates))
ds.addSample(input_rates, [angle])
trainer = BackpropTrainer(rnn_net, **parameters)
trainer.setData(ds)
trainer.train()
del ds # release memory
# predict
rnn_net.reset()
dump_fd = open("./rnn_net.dump", "w")
pickle.dump(rnn_net, dump_fd)
### training end
# frslt = open('../test/rnn_result8.csv', 'w')
# frslt.write(enroll_id_str + "," + str(result[0]) + "\n")
portfolio = 1000000
示例5: Predictor
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
class Predictor():
def __init__(self, inSize, outSize, LearningRate):
self.learning_rate = LearningRate
self.ds = SupervisedDataSet(inSize, outSize)
self.net = buildNetwork(inSize, 10, outSize, hiddenclass=TanhLayer, bias=True)
self.trainer = BackpropTrainer(self.net, self.ds, learningrate=self.learning_rate, verbose = False, weightdecay=WEIGHT_DECAY)
self.prediction = [0] * outSize
self.mse = 100
self.age=0
#Specific to Mai's code. Make input and output masks.
self.inputMask = [1 for i in range(inSize)]
# self.outputMask = [random.randint(0, 1) for i in range(outSize)]
self.outputMask = [0]*outSize
r = random.randint(0,outSize-1)
self.outputMask[r] = 1
self.error = 0
self.errorHistory = []
self.dErrorHistory = []
self.slidingError = 0
self.dError = 0
self.fitness = 0
self.problem=r
self.previousData=[]
def getPrediction(self, input):
out = self.net.activate(input)
return out
def trainPredictor(self):
self.age+=1
new_ds=deepcopy(self.ds)
if FLAGS.sliding_training:
if len(self.previousData)!=0:
for sample,target in self.previousData:
new_ds.addSample(sample,target)
self.trainer.setData(new_ds)
for i in range(FLAGS.epochs):
e = self.trainer.train()
if FLAGS.sliding_training:
self.previousData=deepcopy(self.ds)
#Update possible fitness indicators.
#Error now
self.error = e
#Entire error history
if len(self.errorHistory) < 5:
self.errorHistory.append(e)
else:
for i in range(len(self.errorHistory)-1):
self.errorHistory[i] = self.errorHistory[i+1]
self.errorHistory[-1] = e
#Sliding window error over appeox last 10 episodes characturistic time.
self.slidingError = self.slidingError*0.9 + self.error
#Instantaneous difference in last er ror between episodes.
if len(self.errorHistory) > 1:
self.dError = self.errorHistory[-1] - self.errorHistory[-2]
return e
def getFitness(self, type):
fit = 0
#Fitness function 1 Chrisantha's attempt
if type == 0:#SIMPLE MINIMIZE PREDICTION ERROR FITNESS FUNCTION FOR PREDICTORS.
# fit = -self.dError/(1.0*self.error)
fit = -self.error
elif type == 1:
#Fitness function 2 Mai's attempt (probably need to use adaptive thresholds for this to be ok)
if self.error > ERROR_THRESHOLD and self.dError > DERROR_THRESHOLD:
fit = 0
else:
fit = 1
self.fitness = fit
return fit
def storeDataPoint(self, inputA, targetA):
self.ds.addSample(inputA, targetA)
def predict(self,inputA):
return self.net.activate(inputA)
示例6: row_preprocess
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
from pybrain.tools.validation import CrossValidator, ModuleValidator
translation = {"x": 0, "o": 1, "b": 2}
def row_preprocess(row):
return [translation[x] for x in row]
if __name__ == "__main__":
raw_data = list(csv.reader(open("tic-tac-toe.data")))
targets = [1 if x[-1] == "positive" else 0 for x in raw_data]
inputs = [row_preprocess(x[:-1]) for x in raw_data]
alldata = ClassificationDataSet(9, class_labels=["negative", "positive"])
for (i, t) in zip(inputs, targets):
alldata.addSample(i, [t])
network = buildNetwork(9, 3, 1, hiddenclass=SigmoidLayer, outclass=LinearLayer)
# validation_data, training_data = alldata.splitWithProportion(0.25)
trainer = BackpropTrainer(network, verbose=True, weightdecay=0.001, learningrate=0.1)
trainer.setData(alldata)
trainer.trainUntilConvergence(maxEpochs=6000)
validator = CrossValidator(trainer, alldata, n_folds=10, valfunc=ModuleValidator.MSE)
ret = validator.validate()
print(ret)
示例7: len
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
print "errorrr!!!"
print len(fv1)
print len(fv2)
continue
fv_diff = numpy.atleast_2d(numpy.asarray(fv1) - numpy.asarray(fv2))
#print fv_diff.shape
#print len(fv_diff[0].tolist())
print count
#cPickle.dump([0,1],open('lol','wb'))
#cPickle.load(open('lol','rb'))
#####
# pybrain training
######
dataSet = SupervisedDataSet(67584, 2)
dataSet.addSample(fv_diff[0].tolist(),y)
trainer.setData(dataSet)
print "training"
trainer.train()
print "finished train"
res=net.activate(fv_diff[0].tolist())
if((res[0]>res[1]) & (y[0]>y[1])):
tp=tp+1
elif((res[0]<=res[1]) & (y[0]>y[1])):
fp=fp+1
elif((res[0]>res[1]) & (y[0]<y[1])):
fn=fn+1
########
# fann
########
if(count%interval == 0):
#ann.train(fv_diff[0].tolist(),y)
示例8: SupervisedDataSet
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
t_ds_list.append((events_list, truth_val_list))
t_ds_list.append((events_list, truth_val_list))
t_ds_list.append((events_list, truth_val_list))
t_ds_list.append((events_list, truth_val_list))
t_ds_list.append((events_list, truth_val_list))
t_ds_list.append((events_list, truth_val_list))
t_ds_list.append((events_list, truth_val_list))
t_ds_list.append((events_list, truth_val_list))
t_ds = SupervisedDataSet(events_len, 1)
random.shuffle(t_ds_list)
for data in t_ds_list:
t_ds.addSample(data[0], data[1])
trainer = BackpropTrainer(rnn_net, **parameters)
trainer.setData(t_ds)
trainer.train()
del t_ds # release memory
# predict
rnn_net.reset()
frslt = open('../test/rnn_result8.csv', 'w')
fts = open('../test/rnn_test.csv', 'r')
for tsline in fts:
splited = tsline.split(",")
enroll_id_str = str(int(splited[0]))
rvsd = splited[1:]
rvsd.reverse()
示例9: _train_CV
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
def _train_CV(self,perms, n_folds = 5, num_neuron = 50,learning_rate_input=0.01,
decay=0.01,maxEpochs_input=200,verbose_input=True):
'''call the class in model validators'''
'''and do cross validation'''
'''pass values'''
perf_tst = 0
perf_trn = 0
# Set up network
# NetworkWriter.writeToFile(net, 'temp_net_this.xml')
# Set up trainer
# Not the dataset have not been setted here
# Need to be dealt with inside the loop of CV
for i in range(n_folds):
# shuffle out the index of training data and the test data
train_perms_idxs = list(range(n_folds))
train_perms_idxs.pop(i)
temp_list = []
for train_perms_idx in train_perms_idxs:
temp_list.append(perms[ train_perms_idx ])
# These are the index for training data
train_idxs = np.concatenate(temp_list)
# this is the index for test set:
test_idxs = perms[i]
print "Training on part: ", i, '.......'
trn_ds_ann, tst_ds_ann = self._set_dataset(train_idxs, test_idxs)
# Initialize network and training object
# net_this = NetworkReader.readFrom('temp_net_this.xml')
net_this = buildNetwork(self.indim,
num_neuron,
self.outdim,bias=True,
hiddenclass = SigmoidLayer)
trainer_this = BackpropTrainer(net_this,learningrate = learning_rate_input,
weightdecay=decay,
verbose=verbose_input)
# set up training data for the trainer
trainer_this.setData(trn_ds_ann)
#train asked times:
trainer_this.trainEpochs(maxEpochs_input)
for iter in range(self.max_Epoches):
# trainer_this.trainUntilConvergence(verbose=False,maxEpochs=maxEpochs_input)
trainer_this.trainEpochs(1)
trn_error = self._net_performance(net_this, trn_ds_ann)
tst_error = self._net_performance(net_this, tst_ds_ann)
if verbose_input == True:
print "Training", iter+1,"times"
print "the trn error is: ", trn_error
print "the test error is: ",tst_error
perf_this_tst = self._net_performance(net_this, tst_ds_ann)
perf_this_trn = self._net_performance(net_this, trn_ds_ann)
print 'The Performance of this time on Test is: ', perf_this_tst
print 'The Performance of this time on Training is: ', perf_this_trn
print 'Number of Neuron: ', num_neuron
print '###'
perf_tst = perf_tst + perf_this_tst
perf_trn = perf_trn + perf_this_trn
perf_tst /= n_folds
perf_trn /= n_folds
print perf_tst, perf_trn
return perf_tst, perf_trn
示例10: ANN_edge_analysis
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
def ANN_edge_analysis(a_network, a_gene, a_dataset, boot_val):
"Creates and trains a network that is created to reflect the structure of the hypothesized network"
regulatory_network = FeedForwardNetwork()
# retrievingneeded parameters from the input network
data_node_list = get_sub_list_from_network(a_network, a_gene, "gene,TF", 1)
# Need to add +1 node to the input layer that represents the "other" control variables
# describing network modules to be used
inLayer = LinearLayer(len(data_node_list)-1)
#hiddenLayer = LinearLayer(len(data_node_list)-1))
outLayer = LinearLayer(1)
# Adding layers to network
regulatory_network.addInputModule(inLayer)
#regulatory_network.addModule(hiddenLayer)
regulatory_network.addOutputModule(outLayer)
# Adding connections between layers
#in_to_hidden = LinearConnection(inLayer,hiddenLayer)
#hidden_to_out = FullConnection(hiddenLayer, outLayer)
in_to_out = FullConnection(inLayer, outLayer)
#regulatory_network.addConnection(in_to_hidden)
#regulatory_network.addConnection(hidden_to_out)
regulatory_network.addConnection(in_to_out)
get_nn_details(regulatory_network)
# Other stuff added
regulatory_network.sortModules()
# Formatting the dataset
input_dimention = len(data_node_list)-1
print "in_dimention = ", input_dimention
DS = SupervisedDataSet( input_dimention, 1 )
# Adding data, there may be a problem with order here where tfs are not always the same... seems ok though
for experiment in a_dataset:
tf_list = []
gene_list = []
tf_labels = []
for TF in data_node_list:
if TF != a_gene:
#print TF, "<---"
tf_list.append(experiment[TF])
tf_labels.append(TF)
else:
#print TF, "<---gene"
gene_list.append(experiment[TF])
print tf_list
print gene_list
if (check_missing_experiments(tf_list) == True) and (check_missing_experiments(gene_list) == True):
float_tf_list = [float(i) for i in tf_list]
float_gene_list = [float(i) for i in gene_list]
DS.appendLinked( float_tf_list, float_gene_list )
print "......"
print DS
# Training
trainer = BackpropTrainer(regulatory_network, momentum=0.1, verbose=True, weightdecay=0.01)
trainer.setData(DS)
result_list = []
boot_count = 0
while boot_count < boot_val:
#trainer.trainEpochs(1000)
trainer.trainUntilConvergence(validationProportion=0.25)
print regulatory_network
this = get_nn_details(regulatory_network)
result_list.append(this)
regulatory_network.reset()
boot_count += 1
print tf_labels
print regulatory_network.params
print in_to_out.params
print inLayer
pesos_conexiones(regulatory_network)
NetworkWriter.writeToFile(regulatory_network, 'trained_net.xml')
#.........这里部分代码省略.........
示例11: train_the_nn
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
def train_the_nn(max_iterations,iterations_between_reports,train_percent_of_dataset,layerDims):
print 'creating total dict'
fv_dict = this_create_total_dict(0,train_percent_of_dataset)
print 'finished creating dict'
######
# pybrain
###########
print "building network"
net = buildNetwork(*layerDims,hiddenclass=SigmoidLayer,outclass=SoftmaxLayer)
print "finished building network"
#net.addInputModule(LinearLayer(67584, 'visible'))
trainer = BackpropTrainer(net)
#trainer = pybrain.supervised.trainers.BackpropTrainer(net, ds, learningrate = 0.001, momentum = 0.99)
##########
# fann
#########
#ann = libfann.neural_net()
#ann.create_standard_array((67584,300,100,2))
#ann.set_learning_rate(learning_rate)
#ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE)
fn=0
fp=0
tp=0
for i in xrange(max_iterations):
(fv_diff,y) = get_diff_of_fvs(fv_dict,iterations_between_reports,i)
#print fv_diff.shape
#print len(fv_diff[0].tolist())
#cPickle.dump([0,1],open('lol','wb'))
#cPickle.load(open('lol','rb'))
#####
# pybrain training
######
dataSet = SupervisedDataSet(67584, 2)
dataSet.addSample(fv_diff[0].tolist(),y)
trainer.setData(dataSet)
#print "training"
#print "training"
trainer.train()
#print "finished train"
res=net.activate(fv_diff[0].tolist())
#print "finished test"
if((res[0]>res[1]) & (y[0]>y[1])):
tp=tp+1
elif((res[0]<=res[1]) & (y[0]>y[1])):
fp=fp+1
elif((res[0]>res[1]) & (y[0]<y[1])):
fn=fn+1
########
# fann
########
if(i%iterations_between_reports == 0):
#ann.train(fv_diff[0].tolist(),y)
#print "testing"
print i
print net.activate(fv_diff[0].tolist())
print y
#print ann.run(fv_diff[0].tolist())
#print y
print "prec"
print (tp/(1e-4+fp+tp))
print "recall"
print (tp/(1e-4+fn+tp))
tp=0
fp=0
fn=0
return net
示例12: predict_ball
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
def predict_ball(hidden_nodes, is_elman=True, training_data=16, epoch=-1, parameters={}, predict_count=16):
# build rnn
n = construct_network(hidden_nodes, is_elman)
# make training data
ep = 1 if epoch < 0 else epoch
initial_p = [9., 7.]
initial_v = [1., 1.]
# initial_v = ball_data.gen_velocity(BOX_SIZE)
data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, initial_p=initial_p, initial_v=initial_v)
total_avg = np.average(data_set, axis=0)
total_std = np.std(data_set, axis=0)
total_std[2] = 1.
total_std[3] = 1.
# initial_p = data_set[np.random.choice(range(training_data))][:2]
training_ds = []
print("data_set = {}".format(data_set))
normalized_d = __normalize(data_set, total_avg, total_std)
# print("normalized_d = {}".format(normalized_d))
for e_index in range(ep):
t_ds = SequentialDataSet(4, 4)
t_ds.newSequence()
e_begin = e_index * training_data
for j in range(e_begin, e_begin + training_data):
# from current, predict next
p_in = normalized_d[0].tolist()
p_out = normalized_d[j + 1].tolist()
t_ds.addSample(p_in, p_out)
training_ds.append(t_ds)
# training network
err1 = 0
if epoch < 0:
trainer = BackpropTrainer(n, training_ds[0], learningrate=3e-4, weightdecay=1e-2, verbose=True)
err1 = trainer.trainEpochs(20000)
else:
trainer = BackpropTrainer(n, **parameters)
epoch_errs = []
for ds in training_ds:
trainer.setData(ds)
epoch_errs.append(trainer.train())
err1 = max(epoch_errs)
# predict
predict = None
next_pv = np.hstack((initial_p, initial_v))
n.reset()
for i in range(predict_count):
predict = next_pv if predict is None else np.vstack((predict, next_pv))
# print("predict = {}".format(predict))
p_normalized = (data_set[0] - total_avg) / total_std
next_pv = n.activate(p_normalized.tolist())
restored = np.array(next_pv) * total_std + total_avg
next_pv = restored
print("restored, answer = {}, {}".format(restored, data_set[i + 1]))
real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v)
err_matrix = (predict - real) ** 2
err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1))
err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1))
err2 = np.hstack((err_distance, err_velocity))
return predict, real, err1, err2
示例13: PHC_WoLF_NN
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
class PHC_WoLF_NN(PHC_FA):
'''PHC_WoLF with neural function '''
deltaW=0.05
deltaL=0.2
maxNumberofAverage=30
weightdecay=0.001
trainingEpochPerUpdateWight=1
def __init__(self, num_features, num_actions, indexOfAgent=None):
PHC_FA.__init__(self, num_features, num_actions, indexOfAgent)
self.linQ = buildNetwork(num_features + num_actions, (num_features + num_actions), 1, hiddenclass = SigmoidLayer, outclass = LinearLayer)
self.linPolicy = buildNetwork(num_features, (num_features + num_actions), num_actions, hiddenclass = SigmoidLayer,outclass = SigmoidLayer)
self.averagePolicy=[]
self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,weightdecay=self.weightdecay)
def _pi(self, state):
"""Given state, compute softmax probability for each action."""
values = np.array(self.linPolicy.activate(r_[state]))
z=np.sum(values)
return (values/z).flatten()
def _qValues(self, state):
""" Return vector of q-values for all actions,
given the state(-features). """
values = np.array([self.linQ.activate(r_[state, one_to_n(i, self.num_actions)]) for i in range(self.num_actions)])
return values.flatten()
def _piAvr(self, state):
pi=np.zeros(self.num_actions)
for elem in self.averagePolicy:
values = np.array(elem.activate(r_[state]))
pi=np.add(pi.flatten(),values.flatten())
z=np.sum(pi)
pi=pi/z
return pi.flatten()
def _updateWeights(self, state, action, reward, next_state):
""" state and next_state are vectors, action is an integer. """
#update Q-value function approximator
target=reward + self.rewardDiscount * max(self._qValues(next_state))
inp=r_[asarray(state), one_to_n(action, self.num_actions)]
self.trainer4LinQ=BackpropTrainer(self.linQ,weightdecay=self.weightdecay)
ds = SupervisedDataSet(self.num_features+self.num_actions,1)
ds.addSample(inp, target)
self.trainer4LinQ.trainOnDataset(ds)
#update estimate of average policy
self.averagePolicy.append(copy.deepcopy(self.linPolicy))
if len(self.averagePolicy) > self.maxNumberofAverage:
self.averagePolicy.pop(np.random.randint(len(self.averagePolicy)))
#update policy function approximator
delta=None
cumRewardOfCurrentPolicy=0.0
values=self._qValues(state)
pi=self._pi(state)
for elem_action in range(self.num_actions):
cumRewardOfCurrentPolicy=pi[elem_action]*values[elem_action]
cumRewardOfAveragePolicy=0.0
api=self._piAvr(state)
for elem_action in range(self.num_actions):
cumRewardOfAveragePolicy=api[elem_action]*values[elem_action]
if cumRewardOfCurrentPolicy > cumRewardOfAveragePolicy:
delta=self.deltaW
else:
delta=self.deltaL
#Update policy
bestAction=r_argmax(self._qValues(state))
target=one_to_n(bestAction, self.num_actions)
inp=r_[asarray(state)]
ds = SupervisedDataSet(self.num_features,self.num_actions)
ds.addSample(inp, target)
self.trainer4LinPolicy=BackpropTrainer(self.linPolicy,
learningrate=(delta),
weightdecay=self.weightdecay)
self.trainer4LinPolicy.setData(ds)
self.trainer4LinPolicy.trainEpochs(epochs=self.trainingEpochPerUpdateWight)
示例14: predict_ball
# 需要导入模块: from pybrain.supervised.trainers import BackpropTrainer [as 别名]
# 或者: from pybrain.supervised.trainers.BackpropTrainer import setData [as 别名]
def predict_ball(hidden_nodes, is_elman=True, training_data=5000, epoch=-1, parameters={}, predict_count=128):
# build rnn
n = construct_network(hidden_nodes, is_elman)
# make training data
ep = 1 if epoch < 0 else epoch
initial_v = ball_data.gen_velocity(BOX_SIZE)
data_set = ball_data.bounce_ball((training_data + 1) * ep, BOX_SIZE, None, initial_v=initial_v)
total_avg = np.average(data_set, axis=0)
total_std = np.std(data_set, axis=0)
# initial_p = data_set[np.random.choice(range(training_data))][:2]
training_ds = []
normalized_d = __normalize(data_set)
for e_index in range(ep):
t_ds = SupervisedDataSet(4, 4)
e_begin = e_index * training_data
for j in range(e_begin, e_begin + training_data):
# from current, predict next
p_in = normalized_d[j].tolist()
p_out = normalized_d[j + 1].tolist()
t_ds.addSample(p_in, p_out)
training_ds.append(t_ds)
del data_set # release memory
# training network
err1 = 0
if epoch < 0:
trainer = BackpropTrainer(n, training_ds[0], **parameters)
err1 = trainer.train()
else:
trainer = BackpropTrainer(n, **parameters)
epoch_errs = []
for ds in training_ds:
trainer.setData(ds)
epoch_errs.append(trainer.train())
err1 = max(epoch_errs)
del training_ds # release memory
# predict
initial_p = ball_data.gen_position(BOX_SIZE)
predict = None
next_pv = np.hstack((initial_p, initial_v))
n.reset()
for i in range(predict_count):
predict = next_pv if predict is None else np.vstack((predict, next_pv))
p_normalized = (next_pv - total_avg) / total_std
next_pv = n.activate(p_normalized.tolist())
restored = np.array(next_pv) * total_std + total_avg
next_pv = restored
real = ball_data.bounce_ball(predict_count, BOX_SIZE, initial_p, initial_v)
err_matrix = (predict - real) ** 2
err_distance = np.sqrt(np.sum(err_matrix[:, 0:2], axis=1)).reshape((predict_count, 1))
err_velocity = np.sum(np.sqrt(err_matrix[:, 2:4]), axis=1).reshape((predict_count, 1))
err2 = np.hstack((err_distance, err_velocity))
return predict, real, err1, err2