本文整理汇总了Python中chainer.FunctionSet.l4方法的典型用法代码示例。如果您正苦于以下问题:Python FunctionSet.l4方法的具体用法?Python FunctionSet.l4怎么用?Python FunctionSet.l4使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类chainer.FunctionSet
的用法示例。
在下文中一共展示了FunctionSet.l4方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Replay
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class DN_class:
# Hyper-Parameters
gamma = 0.99 # Discount factor
initial_exploration = 100#10**4 # Initial exploratoin. original: 5x10^4
replay_size = 32 # Replay (batch) size
target_model_update_freq = 10**4 # Target update frequancy. original: 10^4
data_size = 10**5 # Data size of history. original: 10^6
def __init__(self, enable_controller=[0, 1, 3, 4]):
self.num_of_actions = len(enable_controller)
self.enable_controller = enable_controller # Default setting : "Breakout"
print "Initializing DN..."
# Initialization of Chainer 1.1.0 or older.
# print "CUDA init"
# cuda.init()
print "Model Building"
self.model = FunctionSet(
l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
l4=F.Linear(3136, 256, wscale=np.sqrt(2)),
l5=F.Linear(3136, 256, wscale=np.sqrt(2)),
l6=F.Linear(256, 1, initialW=np.zeros((1, 256), dtype=np.float32)),
l7=F.Linear(256, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 256),
dtype=np.float32)),
q_value=DN_out.DN_out(1, self.num_of_actions, self.num_of_actions, nobias = True)
).to_gpu()
if args.resumemodel:
# load saved model
serializers.load_npz(args.resumemodel, self.model)
print "load model from resume.model"
self.model_target = copy.deepcopy(self.model)
print "Initizlizing Optimizer"
self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
self.optimizer.setup(self.model.collect_parameters())
# History Data : D=[s, a, r, s_dash, end_episode_flag]
if args.resumeD1 and args.resumeD2:
# load saved D1 and D2
npz_tmp1 = np.load(args.resumeD1)
print "finished loading half of D data"
npz_tmp2 = np.load(args.resumeD2)
self.D = [npz_tmp1['D0'],
npz_tmp1['D1'],
npz_tmp1['D2'],
npz_tmp2['D3'],
npz_tmp2['D4']]
npz_tmp1.close()
npz_tmp2.close()
print "loaded stored all D data"
else:
self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
np.zeros(self.data_size, dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.int8),
np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.bool)]
print "initialized D data"
def forward(self, state, action, Reward, state_dash, episode_end):
num_of_batch = state.shape[0]
s = Variable(state)
s_dash = Variable(state_dash)
Q = self.Q_func(s) # Get Q-value
# Generate Target Signals
tmp2 = self.Q_func(s_dash)
tmp2 = list(map(np.argmax, tmp2.data.get())) # argmaxQ(s',a)
tmp = self.Q_func_target(s_dash) # Q'(s',*)
tmp = list(tmp.data.get())
# select Q'(s',*) due to argmaxQ(s',a)
res1 = []
for i in range(num_of_batch):
res1.append(tmp[i][tmp2[i]])
#max_Q_dash = np.asanyarray(tmp, dtype=np.float32)
max_Q_dash = np.asanyarray(res1, dtype=np.float32)
target = np.asanyarray(Q.data.get(), dtype=np.float32)
for i in xrange(num_of_batch):
if not episode_end[i][0]:
tmp_ = np.sign(Reward[i]) + self.gamma * max_Q_dash[i]
else:
tmp_ = np.sign(Reward[i])
action_index = self.action_to_index(action[i])
target[i, action_index] = tmp_
# TD-error clipping
td = Variable(cuda.to_gpu(target)) - Q # TD error
td_tmp = td.data + 1000.0 * (abs(td.data) <= 1) # Avoid zero division
td_clip = td * (abs(td.data) <= 1) + td/abs(td_tmp) * (abs(td.data) > 1)
zero_val = Variable(cuda.to_gpu(np.zeros((self.replay_size, self.num_of_actions), dtype=np.float32)))
loss = F.mean_squared_error(td_clip, zero_val)
return loss, Q
#.........这里部分代码省略.........
示例2: Replay
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class DQN_class:
gamma = 0.99
initial_exploration = 10**2
replay_size = 32 # Replay (batch) size
target_model_update_freq = 10**4 # Target update frequancy. original: 10^4
data_size = 10**2
def __init__(self, enable_controller=[0, 1, 2, 3, 4, 5, 6, 7, 8]):
# """ [ 0, 0],
# [ 0, 1],
# [ 0,-1],
# [ 1, 0],
# [ 1, 1],
# [ 1,-1],
# [-1, 0],
# [-1, 1],
# [-1,-1]]):"""
self.num_of_actions = len(enable_controller)
self.enable_controller = enable_controller
print "Initializing DQN..."
print "CUDA init"
#cuda.init()
print "Model Building"
self.model = FunctionSet(
l1 = F.Linear(INPUT_SIZE, 5000), # input map[100, 100] + v[2] + w[1] + wp[2]
l2 = F.Linear(5000, 1000),
l3 = F.Linear(1000, 100),
l4 = F.Linear(100, self.num_of_actions,
initialW=np.zeros((self.num_of_actions, 100), dtype=np.float32))
).to_gpu()
self.model_target = copy.deepcopy(self.model)
print "Initizlizing Optimizer"
self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) ### 重要!!!! RMSProp!!
self.optimizer.setup(self.model.collect_parameters())
# History Data : D=[s, a, r, s_dash, end_episode_flag]
self.D = [np.zeros((self.data_size, INPUT_SIZE), dtype=np.float32),
np.zeros(self.data_size, dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.float32),
np.zeros((self.data_size, INPUT_SIZE), dtype=np.float32),
np.zeros((self.data_size, 1), dtype=np.bool)]
#self.D = [np.zeros((self.data_size, INPUT_SIZE), dtype=np.uint8),
# np.zeros(self.data_size, dtype=np.uint8),
# np.zeros((self.data_size, 1), dtype=np.int8),
# np.zeros((self.data_size, INPUT_SIZE), dtype=np.uint8),
# np.zeros((self.data_size, 1), dtype=np.bool)]
def forward(self, state, action, Reward, state_dash, episode_end):
num_of_batch = state.shape[0]
s = Variable(state)
s_dash = Variable(state_dash)
Q = self.Q_func(s) # Get Q-value
# Generate Target Signals
tmp = self.Q_func_target(s_dash) # Q(s',*)
tmp = list(map(np.max, tmp.data.get())) # max_a Q(s',a)
max_Q_dash = np.asanyarray(tmp, dtype=np.float32)
target = np.asanyarray(Q.data.get(), dtype=np.float32)
for i in xrange(num_of_batch):
if not episode_end[i][0]:
tmp_ = np.sign(Reward[i]) + self.gamma * max_Q_dash[i]
else:
tmp_ = np.sign(Reward[i])
#action_index = self.action_to_index(action[i])
#target[i, action_index] = tmp_
target[i, action[i]] = tmp_
# TD-error clipping
td = Variable(cuda.to_gpu(target)) - Q # TD error
#print "td-error"
print "np.max(td.data) : ",
print np.max(td.data.get())
# 何のためにあるのか不明 td = td_clipとなっている
td_tmp = td.data + 1000.0 * (abs(td.data) <= 1) # Avoid zero division
td_clip = td * (abs(td.data) <= 1) + td/abs(td_tmp) * (abs(td.data) > 1)
#print "td_clip.data :",
#print td_clip.data
zero_val = Variable(cuda.to_gpu(np.zeros((self.replay_size, self.num_of_actions))).astype(np.float32))
#zero_val = Variable(cuda.to_gpu(np.zeros((self.replay_size, self.num_of_actions))))
loss = F.mean_squared_error(td_clip, zero_val)
return loss, Q
# Dataを保存
def stockExperience(self, time,
state, action, reward, state_dash,
episode_end_flag):
data_index = time % self.data_size
if episode_end_flag is True:
self.D[0][data_index] = state
self.D[1][data_index] = action
self.D[2][data_index] = reward
#.........这里部分代码省略.........
示例3: Replay
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class QNet:
# Hyper-Parameters
gamma = 0.99 # Discount factor
initial_exploration = 10**3 # Initial exploratoin. original: 5x10^4
replay_size = 32 # Replay (batch) size
target_model_update_freq = 10**4 # Target update frequancy. original: 10^4
data_size = 10**5 # Data size of history. original: 10^6
hist_size = 1 #original: 4
def __init__(self, use_gpu, enable_controller, dim):
self.use_gpu = use_gpu
self.num_of_actions = len(enable_controller)
self.enable_controller = enable_controller
self.dim = dim
print("Initializing Q-Network...")
hidden_dim = 256
self.model = FunctionSet(
l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)),
q_value=F.Linear(hidden_dim, self.num_of_actions,
initialW=np.zeros((self.num_of_actions, hidden_dim),
dtype=np.float32))
)
if self.use_gpu >= 0:
self.model.to_gpu()
self.model_target = copy.deepcopy(self.model)
self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
self.optimizer.setup(self.model.collect_parameters())
# History Data : D=[s, a, r, s_dash, end_episode_flag]
self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
np.zeros(self.data_size, dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.int8),
np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.bool)]
def forward(self, state, action, reward, state_dash, episode_end):
num_of_batch = state.shape[0]
s = Variable(state)
s_dash = Variable(state_dash)
q = self.q_func(s) # Get Q-value
# Generate Target Signals
tmp = self.q_func_target(s_dash) # Q(s',*)
if self.use_gpu >= 0:
tmp = list(map(np.max, tmp.data.get())) # max_a Q(s',a)
else:
tmp = list(map(np.max, tmp.data)) # max_a Q(s',a)
max_q_dash = np.asanyarray(tmp, dtype=np.float32)
if self.use_gpu >= 0:
target = np.asanyarray(q.data.get(), dtype=np.float32)
else:
# make new array
target = np.array(q.data, dtype=np.float32)
for i in xrange(num_of_batch):
if not episode_end[i][0]:
tmp_ = reward[i] + self.gamma * max_q_dash[i]
else:
tmp_ = reward[i]
action_index = self.action_to_index(action[i])
target[i, action_index] = tmp_
# TD-error clipping
if self.use_gpu >= 0:
target = cuda.to_gpu(target)
td = Variable(target) - q # TD error
td_tmp = td.data + 1000.0 * (abs(td.data) <= 1) # Avoid zero division
td_clip = td * (abs(td.data) <= 1) + td/abs(td_tmp) * (abs(td.data) > 1)
zero_val = np.zeros((self.replay_size, self.num_of_actions), dtype=np.float32)
if self.use_gpu >= 0:
zero_val = cuda.to_gpu(zero_val)
zero_val = Variable(zero_val)
loss = F.mean_squared_error(td_clip, zero_val)
return loss, q
def stock_experience(self, time,
state, action, reward, state_dash,
episode_end_flag):
data_index = time % self.data_size
if episode_end_flag is True:
self.d[0][data_index] = state
self.d[1][data_index] = action
self.d[2][data_index] = reward
else:
self.d[0][data_index] = state
self.d[1][data_index] = action
self.d[2][data_index] = reward
self.d[3][data_index] = state_dash
self.d[4][data_index] = episode_end_flag
def experience_replay(self, time):
#.........这里部分代码省略.........
示例4: Replay
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class DQN_class:
# Hyper-Parameters
gamma = 0.99 # Discount factor
initial_exploration = 100#10**4 # Initial exploratoin. original: 5x10^4
replay_size = 32 # Replay (batch) size
target_model_update_freq = 10**4 # Target update frequancy. original: 10^4
data_size = 10**5 #10**5 # Data size of history. original: 10^6
def __init__(self, enable_controller=[0, 3, 4]):
self.num_of_actions = len(enable_controller)
self.enable_controller = enable_controller # Default setting : "Pong"
print "Initializing DQN..."
print "Model Building"
self.CNN_model = FunctionSet(
l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
)
self.model = FunctionSet(
l4=F.Linear(3136, 512, wscale=np.sqrt(2)),
q_value=F.Linear(512, self.num_of_actions,
initialW=np.zeros((self.num_of_actions, 512),
dtype=np.float32))
).to_gpu()
d = 'elite/'
self.CNN_model.l1.W.data = np.load(d+'l1_W.npy')#.astype(np.float32)
self.CNN_model.l1.b.data = np.load(d+'l1_b.npy')#.astype(np.float32)
self.CNN_model.l2.W.data = np.load(d+'l2_W.npy')#.astype(np.float32)
self.CNN_model.l2.b.data = np.load(d+'l2_b.npy')#.astype(np.float32)
self.CNN_model.l3.W.data = np.load(d+'l3_W.npy')#.astype(np.float32)
self.CNN_model.l3.b.data = np.load(d+'l3_b.npy')#.astype(np.float32)
self.CNN_model = self.CNN_model.to_gpu()
self.CNN_model_target = copy.deepcopy(self.CNN_model)
self.model_target = copy.deepcopy(self.model)
print "Initizlizing Optimizer"
self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001)
self.optimizer.setup(self.model.collect_parameters())
# History Data : D=[s, a, r, s_dash, end_episode_flag]
self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
np.zeros(self.data_size, dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.int8),
np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8),
np.zeros((self.data_size, 1), dtype=np.bool),
np.zeros((self.data_size, 1), dtype=np.uint8)]
def forward(self, state, action, Reward, state_dash, episode_end):
num_of_batch = state.shape[0]
s = Variable(state)
s_dash = Variable(state_dash)
Q = self.Q_func(s) # Get Q-value
# Generate Target Signals
tmp = self.Q_func_target(s_dash) # Q(s',*)
tmp = list(map(np.max, tmp.data.get())) # max_a Q(s',a)
max_Q_dash = np.asanyarray(tmp, dtype=np.float32)
target = np.asanyarray(Q.data.get(), dtype=np.float32)
for i in xrange(num_of_batch):
if not episode_end[i][0]:
tmp_ = np.sign(Reward[i]) + self.gamma * max_Q_dash[i]
else:
tmp_ = np.sign(Reward[i])
action_index = self.action_to_index(action[i])
target[i, action_index] = tmp_
# TD-error clipping
td = Variable(cuda.to_gpu(target)) - Q # TD error
td_tmp = td.data + 1000.0 * (abs(td.data) <= 1) # Avoid zero division
td_clip = td * (abs(td.data) <= 1) + td/abs(td_tmp) * (abs(td.data) > 1)
zero_val = Variable(cuda.to_gpu(np.zeros((self.replay_size, self.num_of_actions), dtype=np.float32)))
loss = F.mean_squared_error(td_clip, zero_val)
return loss, Q
def stockExperience(self, time,
state, action, lstm_reward, state_dash,
episode_end_flag, ale_reward):
data_index = time % self.data_size
if episode_end_flag is True:
self.D[0][data_index] = state
self.D[1][data_index] = action
self.D[2][data_index] = lstm_reward
self.D[5][data_index] = ale_reward
else:
self.D[0][data_index] = state
#.........这里部分代码省略.........
示例5: Replay
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class DQN_class:
# Hyper-Parameters
gamma = 0.99 # Discount factor
initial_exploration = 5*10**4 # 10**4 # Initial exploratoin. original: 5x10^4
replay_size = 32 # Replay (batch) size
target_model_update_freq = 10**4 # Target update frequancy. original: 10^4
data_size = 10**6 # Data size of history. original: 10^6
num_of_actions = 2 # Action dimention
num_of_states = 12 # State dimention
def __init__(self):
print "Initializing DQN..."
# Initialization of Chainer 1.1.0 or older.
# print "CUDA init"
# cuda.init()
print "Model Building"
# self.model = FunctionSet(
# l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
# l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
# l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
# l4=F.Linear(3136, 512, wscale=np.sqrt(2)),
# q_value=F.Linear(512, self.num_of_actions,
# initialW=np.zeros((self.num_of_actions, 512),
# dtype=np.float32))
# ).to_gpu()
# self.critic = FunctionSet(
# l1=F.Linear(self.num_of_actions+self.num_of_states,512),
# l2=F.Linear(512,256),
# l3=F.Linear(256,128),
# q_value=F.Linear(128,1,initialW=np.zeros((1,128),dtype=np.float32))
# ).to_gpu()
#
# self.actor = FunctionSet(
# l1=F.Linear(self.num_of_states,512),
# l2=F.Linear(512,256),
# l3=F.Linear(256,128),
# a_value=F.Linear(128,self.num_of_actions,initialW=np.zeros((1,128),dtype=np.float32))
# ).to_gpu()
self.critic = FunctionSet(
l1=F.Linear(self.num_of_actions+self.num_of_states,1024),
l2=F.Linear(1024,512),
l3=F.Linear(512,256),
l4=F.Linear(256,128),
q_value=F.Linear(128,1,initialW=np.zeros((1,128),dtype=np.float32))
).to_gpu()
self.actor = FunctionSet(
l1=F.Linear(self.num_of_states,1024),
l2=F.Linear(1024,512),
l3=F.Linear(512,256),
l4=F.Linear(256,128),
a_value=F.Linear(128,self.num_of_actions,initialW=np.zeros((1,128),dtype=np.float32))
).to_gpu()
# self.critic = FunctionSet(
# l1=F.Linear(self.num_of_actions+self.num_of_states,1024,wscale=0.01*math.sqrt(self.num_of_actions+self.num_of_states)),
# l2=F.Linear(1024,512,wscale=0.01*math.sqrt(1024)),
# l3=F.Linear(512,256,wscale=0.01*math.sqrt(512)),
# l4=F.Linear(256,128,wscale=0.01*math.sqrt(256)),
# q_value=F.Linear(128,1,wscale=0.01*math.sqrt(128))
# ).to_gpu()
#
# self.actor = FunctionSet(
# l1=F.Linear(self.num_of_states,1024,wscale=0.01*math.sqrt(self.num_of_states)),
# l2=F.Linear(1024,512,wscale=0.01*math.sqrt(1024)),
# l3=F.Linear(512,256,wscale=0.01*math.sqrt(512)),
# l4=F.Linear(256,128,wscale=0.01*math.sqrt(256)),
# a_value=F.Linear(128,self.num_of_actions,wscale=0.01*math.sqrt(128))
# ).to_gpu()
self.critic_target = copy.deepcopy(self.critic)
self.actor_target = copy.deepcopy(self.actor)
print "Initizlizing Optimizer"
#self.optim_critic = optimizers.RMSpropGraves(lr=0.0001, alpha=0.95, momentum=0.95, eps=0.0001)
#self.optim_actor = optimizers.RMSpropGraves(lr=0.0001, alpha=0.95, momentum=0.95, eps=0.0001)
self.optim_critic = optimizers.Adam(alpha=0.00001)
self.optim_actor = optimizers.Adam(alpha=0.00001)
self.optim_critic.setup(self.critic)
self.optim_actor.setup(self.actor)
# self.optim_critic.add_hook(chainer.optimizer.WeightDecay(0.00001))
# self.optim_critic.add_hook(chainer.optimizer.GradientClipping(10))
# self.optim_actor.add_hook(chainer.optimizer.WeightDecay(0.00001))
# self.optim_actor.add_hook(chainer.optimizer.GradientClipping(10))
# History Data : D=[s, a, r, s_dash, end_episode_flag]
self.D = [np.zeros((self.data_size, self.num_of_states), dtype=np.float32),
np.zeros((self.data_size, self.num_of_actions), dtype=np.float32),
np.zeros((self.data_size, 1), dtype=np.float32),
np.zeros((self.data_size, self.num_of_states), dtype=np.float32),
np.zeros((self.data_size, 1), dtype=np.bool)]
# with open('dqn_dump.json', 'a') as f:
# json.dump(datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S'), f)
# f.write('\n')
#.........这里部分代码省略.........
示例6: SDA
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class SDA(object):
def __init__(
self,
rng,
data,
target,
n_inputs=784,
n_hidden=[784,784,784],
n_outputs=10,
corruption_levels=[0.1,0.2,0.3],
gpu=-1):
self.model = FunctionSet(
l1=F.Linear(n_inputs, n_hidden[0]),
l2=F.Linear(n_hidden[0], n_hidden[1]),
l3=F.Linear(n_hidden[1], n_hidden[2]),
l4=F.Linear(n_hidden[2], n_outputs)
)
if gpu >= 0:
self.model.to_gpu()
self.rng = rng
self.gpu = gpu
self.data = data
self.target = target
self.x_train, self.x_test = data
self.y_train, self.y_test = target
self.n_train = len(self.y_train)
self.n_test = len(self.y_test)
self.corruption_levels = corruption_levels
self.n_inputs = n_inputs
self.n_hidden = n_hidden
self.n_outputs = n_outputs
self.dae1 = None
self.dae2 = None
self.dae3 = None
self.optimizer = None
self.setup_optimizer()
self.train_accuracies = []
self.train_losses = []
self.test_accuracies = []
self.test_losses = []
def setup_optimizer(self):
self.optimizer = optimizers.AdaDelta()
self.optimizer.setup(self.model)
@property
def xp(self):
return cuda.cupy if self.gpu >= 0 else numpy
def pre_train(self, n_epoch=20, batchsize=100):
first_inputs = self.data
# initialize first dAE
self.dae1 = DA(self.rng,
data=first_inputs,
n_inputs=self.n_inputs,
n_hidden=self.n_hidden[0],
corruption_level=self.corruption_levels[0],
gpu=self.gpu)
# train first dAE
logging.info("--------First DA training has started!--------")
self.dae1.train_and_test(n_epoch=n_epoch, batchsize=batchsize)
self.dae1.to_cpu()
# compute second iputs for second dAE
tmp1 = self.dae1.compute_hidden(first_inputs[0])
tmp2 = self.dae1.compute_hidden(first_inputs[1])
if self.gpu >= 0:
self.dae1.to_gpu()
second_inputs = [tmp1, tmp2]
# initialize second dAE
self.dae2 = DA(
self.rng,
data=second_inputs,
n_inputs=self.n_hidden[0],
n_hidden=self.n_hidden[1],
corruption_level=self.corruption_levels[1],
gpu=self.gpu
)
# train second dAE
logging.info("--------Second DA training has started!--------")
self.dae2.train_and_test(n_epoch=n_epoch, batchsize=batchsize)
self.dae2.to_cpu()
# compute third inputs for third dAE
tmp1 = self.dae2.compute_hidden(second_inputs[0])
tmp2 = self.dae2.compute_hidden(second_inputs[1])
if self.gpu >= 0:
self.dae2.to_gpu()
third_inputs = [tmp1, tmp2]
# initialize third dAE
#.........这里部分代码省略.........
示例7: ChainerAgent
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class ChainerAgent(Agent):
def __init__(self, epsilon=1.0, frames_per_action=4):
super(ChainerAgent, self).__init__()
cuda.init()
self.epsilon = epsilon
self.gamma = 0.99
self.iterations = 0
self.model = FunctionSet(
l1 = F.Linear(9 * frames_per_action, 256),
l2 = F.Linear(256, 256),
l3 = F.Linear(256, 256),
l4 = F.Linear(256, 2),
).to_gpu()
self.optimizer = optimizers.RMSprop(lr=1e-5)
self.optimizer.setup(self.model)
self.update_target()
self.num_frames = 0
self.frames_per_action = frames_per_action
self.prev_reward = 0.0
self.history = ChainHistory(state_len=(9 * frames_per_action))
def forward(self, state, action, reward, new_state, is_terminal):
q = self.get_q(Variable(state))
q_target = self.get_target_q(Variable(new_state))
max_target_q = cp.max(q_target.data, axis=1)
target = cp.copy(q.data)
for i in xrange(target.shape[0]):
curr_action = int(action[i])
if is_terminal[i]:
target[i, curr_action] = reward[i]
else:
target[i, curr_action] = reward[i] + self.gamma * max_target_q[i]
loss = F.mean_squared_error(Variable(target), q)
return loss, 0.0 #cp.mean(q.data[:, action[i]])
def get_q(self, state):
h1 = F.relu(self.model.l1(state))
h2 = F.relu(self.model.l2(h1))
h3 = F.relu(self.model.l3(h2))
return self.model.l4(h3)
def get_target_q(self, state):
h1 = F.relu(self.target_model.l1(state))
h2 = F.relu(self.target_model.l2(h1))
h3 = F.relu(self.target_model.l3(h2))
return self.target_model.l4(h3)
def accept_reward(self, state, action, reward, new_state, is_terminal):
self.prev_reward += reward
if not (is_terminal or self.num_frames % self.frames_per_action == 0):
return
if self.num_frames == self.frames_per_action:
self.prev_reward = 0.0
self.prev_action = action
return
self.history.add((self.prev_state, self.prev_action, self.prev_reward,
self.curr_state, is_terminal))
self.prev_reward = 0.0
self.prev_action = action
self.iterations += 1
if self.iterations % 10000 == 0:
print '*** UPDATING TARGET NETWORK ***'
self.update_target()
state, action, reward, new_state, is_terminal = self.history.get(num=32)
state = cuda.to_gpu(state)
action = cuda.to_gpu(action)
new_state = cuda.to_gpu(new_state)
reward = cuda.to_gpu(reward)
loss, q = self.forward(state, action, reward, new_state, is_terminal)
self.optimizer.zero_grads()
loss.backward()
self.optimizer.update()
def update_state_vector(self, state):
if self.num_frames < self.frames_per_action:
if self.num_frames == 0:
self.curr_state = state
else:
self.curr_state = np.hstack((self.curr_state, state))
else:
if self.num_frames < 2 * self.frames_per_action:
if self.num_frames == self.frames_per_action:
self.prev_state = np.copy(self.curr_state[:, :9])
else:
self.prev_state = np.hstack((self.prev_state, self.curr_state[:, :9]))
#.........这里部分代码省略.........
示例8: ConvQAgent
# 需要导入模块: from chainer import FunctionSet [as 别名]
# 或者: from chainer.FunctionSet import l4 [as 别名]
class ConvQAgent(Agent):
def __init__(self, frames_per_action=4):
super(ConvQAgent, self).__init__()
cuda.init()
self.epsilon = 1.0
self.gamma = 0.99
self.iterations = 0
self.model = FunctionSet(
l1 = F.Convolution2D(frames_per_action, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)),
l2 = F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)),
l3 = F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)),
l4 = F.Linear(64 * 7 * 7, 512),
l5 = F.Linear(512, 2)
).to_gpu()
self.optimizer = optimizers.RMSprop(lr=1e-5)
self.optimizer.setup(self.model)
self.update_target()
self.num_frames = 0
self.frames_per_action = frames_per_action
self.prev_reward = 0.0
self.history = ConvHistory((frames_per_action, 84, 84))
def update_target(self):
self.target_model = copy.deepcopy(self.model)
self.target_model = self.target_model.to_gpu()
def act(self, state):
self.update_state_vector(state)
if self.num_frames < self.frames_per_action - 1 or self.num_frames % self.frames_per_action != 0:
return None
if random.random() < 0.001:
print 'Epsilon: {}'.format(self.epsilon)
if self.epsilon > 0.05:
self.epsilon -= (0.95 / 300000)
if random.random() < self.epsilon:
return random.random() > 0.375
q = self.get_q(Variable(cuda.to_gpu(self.curr_state[np.newaxis, :, :, :])))
if random.random() < 0.01:
if q.data[0,1] > q.data[0,0]:
print 'On: {}'.format(q.data)
else:
print 'Off: {}'.format(q.data)
return q.data[0,1] > q.data[0,0]
def update_state_vector(self, state):
if self.num_frames < self.frames_per_action:
if self.num_frames == 0:
self.curr_state = np.zeros((self.frames_per_action, 84, 84), dtype=np.float32)
self.curr_state[self.num_frames, :, :] = state
else:
if self.num_frames == self.frames_per_action:
self.prev_state = np.zeros((self.frames_per_action, 84, 84), dtype=np.float32)
self.prev_state[1:, :, :] = self.prev_state[:-1, :, :]
self.prev_state[0, :, :] = self.curr_state[-1, :, :]
self.curr_state[1:, :, :] = self.curr_state[:-1, :, :]
self.curr_state[0, :, :] = state
self.num_frames += 1
def accept_reward(self, state, action, reward, new_state, is_terminal):
self.prev_reward += reward
if not (is_terminal or self.num_frames % self.frames_per_action == 0):
return
if self.num_frames == self.frames_per_action:
self.prev_reward = 0.0
self.prev_action = action
return
self.history.add((self.prev_state, self.prev_action, self.prev_reward,
self.curr_state, is_terminal))
self.prev_reward = 0.0
self.prev_action = action
self.iterations += 1
if self.iterations % 10000 == 0:
print '*** UPDATING TARGET NETWORK ***'
self.update_target()
state, action, reward, new_state, is_terminal = self.history.get(num=32)
state = cuda.to_gpu(state)
action = cuda.to_gpu(action)
new_state = cuda.to_gpu(new_state)
reward = cuda.to_gpu(reward)
loss, q = self.forward(state, action, reward, new_state, is_terminal)
#.........这里部分代码省略.........