本文整理汇总了Python中Model.Model.buildRewardFunction方法的典型用法代码示例。如果您正苦于以下问题:Python Model.buildRewardFunction方法的具体用法?Python Model.buildRewardFunction怎么用?Python Model.buildRewardFunction使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Model.Model
的用法示例。
在下文中一共展示了Model.buildRewardFunction方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test1
# 需要导入模块: from Model import Model [as 别名]
# 或者: from Model.Model import buildRewardFunction [as 别名]
def test1(transition_samples):
def momentum(current, previous, decay):
new = current + decay * previous
return new
w_init = [-1.3, -1.2, -1, -0.8, -0.8, -1.4, -1.5, -3.0, -2.0, -1.0, -0.3, -0.5, -8.0, -3.0]
# w_init /=np.linalg.norm(w_init)
steps = 10
diff = []
m = DiscModel()
model = Model(m, w_init)
initial_transition = model.transition_f
policy = caus_ent_backward(model.transition, model.reward_f, 3, steps, conv=0.1, z_states=None)
start_states = [400, 45, 65, 67, 87, 98, 12, 34, 54, 67, 54, 32, 34, 56, 80, 200, 100, 150]
# statistics = [generate_test_statistic(policy,model,start_state,steps) for start_state in start_states]
statistics, dt_states_base = generate_test_statistic(policy, model, start_states, steps)
model.w = [-1, -1.2, -1, -0.8, -0.8, -4.4, -2, -2.0, -3.0, -1.0, -2.3, -1.5, -4.0, -3.0]
# model.w =[-2.,-0.6,-4.,-4.,-3.,-5.,-2.,-0.5,-4.,-0.8,-4.,-3.,-5.]
# model.w /=np.linalg.norm(model.w)
model.buildRewardFunction()
if transition_samples != 1:
model.buildTransitionFunction(transition_samples, learn=False)
transition_diff = np.sum(np.absolute(initial_transition - model.transition_f))
initial_transition = 0
gamma = 0.04
iterations = 110
for i in range(iterations):
policy2 = caus_ent_backward(model.transition, model.reward_f, 1, steps, conv=0.1, z_states=None)
# gradients = np.array([(statistics[j] - generate_test_statistic(policy,model,start_state,steps)) for j,start_state in enumerate(start_states)])
state_freq, dt_states_train = generate_test_statistic(policy2, model, start_states, steps)
gradients = statistics - state_freq
if i == 0:
image = np.absolute(dt_states_train - dt_states_base)
gradient = gradients
else:
gradient = momentum(gradients, prev, 0.8)
image = np.append(image, np.absolute(dt_states_train - dt_states_base), axis=1)
model.w = model.w * np.exp(-gamma * gradient)
# model.w /=np.linalg.norm(model.w)
prev = gradient
gamma = gamma * 1.04
model.buildRewardFunction()
print "Iteration", i
print "Gradient", gradient
print "New Weights", model.w
print "Real weights", w_init
print "Policy Difference", np.sum(np.sum(np.absolute(policy - policy2)))
diff.append(np.sum(np.sum(np.absolute(policy - policy2))))
policy_diff = np.sum(np.sum(np.absolute(policy - policy2)))
w_diff = np.absolute(w_init - model.w)
grad = np.sum(np.absolute(gradient))
return image, diff, grad, w_diff, transition_diff
示例2: extract_info
# 需要导入模块: from Model import Model [as 别名]
# 或者: from Model.Model import buildRewardFunction [as 别名]
#Load data ->-----------------------------------------------------------------------------------------------
print "Loading data"
steps = 70
examples_good = extract_info(disc_model,steps,examples_type ="good")
#examples_bad = extract_info(disc_model,steps,examples_type ="bad")
#Other Settings----------------------------------------------------------------------------------------------
iterations =10
gamma = 0.01
fol =1
examples_good = examples_good[:2]
for idx in xrange(0,len(examples_good)/fol):
train_g,test_g = getFolds(examples_good,fol,idx)
train_b,test_b = getFolds(examples_good,fol,idx)
model.w = w
model.buildRewardFunction()
n1 = "Fold %s init" %idx
trajectoryCompare(train_g,steps,model,n1)
#n1 = "Fold %s init bad" %idx
#trajectoryCompare(train_b,steps,model,n1)s
learner = Learner(model,train_g,test_g,train_b,test_b)
learner(iterations,gamma,0.4,examples_type= "good")
#name = "Fold %s bad" %idx
#plot_result(learner.results_b.train_error,learner.results_b.test_error,learner.results_b.train_lik,learner.results_b.test_lik,name)
#n1 = name+"train" ; n2 = name + "test"
#trajectoryCompare(train_b,steps,model,n1)
#trajectoryCompare(test_b,steps,model,n2)
name = "Fold %s good" %idx
#plot_result(learner.results_g.train_error,learner.results_g.test_error,learner.results_g.train_lik,learner.results_g.test_lik,name)
plot_result(learner.results.train_error,learner.results.test_error,learner.results.train_lik,learner.results.test_lik,name)