本文整理汇总了Python中chainer.Variable.unchain_backward方法的典型用法代码示例。如果您正苦于以下问题:Python Variable.unchain_backward方法的具体用法?Python Variable.unchain_backward怎么用?Python Variable.unchain_backward使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类chainer.Variable
的用法示例。
在下文中一共展示了Variable.unchain_backward方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: optimizeCRNN
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
def optimizeCRNN(iterNum,maxIndex,indicies):
batchSize = 1000
model = EvalCRNN(maxIndex,500)
print(len(indicies),computeEntropy(maxIndex,indicies))
learningRate = 0.001
epoch = 3
for j in range(epoch):
my_optimizer = optimizers.RMSpropGraves(lr = learningRate)
my_optimizer.setup(model)
my_optimizer.add_hook(optimizer.GradientClipping(1))
model.cRNN.reset()
loss = Variable(np.array([[0]]))
for i in range(iterNum):
t1 = time.clock()
model.zerograds()
loss.unchain_backward()
loss = model(indicies[batchSize*i:batchSize*(i+1)],iterNum*batchSize)
loss.backward()
t2 = time.clock()
msg = "iter: " + str(i + iterNum * j + 1) + "/" + str(iterNum * epoch)
msgLoss = "loss: " + str(loss.data/batchSize)
msgNorm = "grad: " + str(my_optimizer.compute_grads_norm())
msgTime = "time: " + str(t2 - t1) + " seconds"
print(msgLoss,msgNorm,msg,msgTime)
my_optimizer.update()
learningRate *= 0.50
print(model(indicies[batchSize*(iterNum):batchSize*(iterNum+10)]).data/(batchSize*10))
return model.cRNN
示例2: update_core
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
def update_core(self):
enc_optimizer = self.get_optimizer('enc')
dec_optimizer = self.get_optimizer('dec')
dis_optimizer = self.get_optimizer('dis')
enc, dec, dis = self.enc, self.dec, self.dis
xp = enc.xp
batch = self.get_iterator('main').next()
batchsize = len(batch)
in_ch = batch[0][0].shape[0]
""" Edit g """
#print("Batch size", len(batch))
#print("Batch all", batch)
#print("Batch -1[0]", batch[-1][0])
#print("Batch -1[1]", batch[-1][1])
#print("Batch -1[0][0]", batch[-1][0][0])
""" 最後のインデックスにアクセスして、情報を取り出す """
""" これは、バッチサイズが1のときのみ有効であるからして、気をつけること """
#path_through1 = []
#for in_contain in batch[-1][0][-1]:
#print("IN_CONTAIN", in_contain)
# for c in in_contain:
# path_through1.append(c)
#print("path-through len", len(path_through1))
""" ここまで """
out_ch = batch[0][1].shape[0]
w_in = 256
w_out = 256
x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")
for i in range(batchsize):
x_in[i,:] = xp.asarray(batch[i][0])
t_out[i,:] = xp.asarray(batch[i][1])
x_in = Variable(x_in)
z = enc(x_in, test=False)
""" このzベクトルを変化させれば、任意の方向性に持っていくことができる """
#print("z", z)
""" Zを直接編集するのは危険なので、decの引数を増やして対処したほうが良さそう """
#x_out = dec(z, path_through1, test=False)
x_out = dec(z, test=False)
y_fake = dis(x_in, x_out, test=False)
y_real = dis(x_in, t_out, test=False)
enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
for z_ in z:
z_.unchain_backward()
dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
x_in.unchain_backward()
x_out.unchain_backward()
dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
示例3: train
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
def train(self, x):
# Encoder/Decoder
h = self.encoder(x)
xp = cuda.get_array_module(x)
z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
hz = self.generator0(z)
x_rec = self.decoder(h, hz)
l_rec = self.recon_loss(x, x_rec)
self.cleargrads()
l_rec.backward()
self.optimizer_enc.update()
self.optimizer_dec.update()
# Discriminator
h = Variable(h.data) # disconnect
h.unchain_backward()
xp = cuda.get_array_module(x)
z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
hz = self.generator0(z)
x_gen = self.generator(h, hz)
d_x_gen = self.discriminator(x_gen, h)
d_x_real = self.discriminator(x, h)
l_dis = self.lsgan_loss(d_x_gen, d_x_real)
self.cleargrads()
l_dis.backward()
self.optimizer_dis.update()
# Generator
xp = cuda.get_array_module(x)
z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
hz = self.generator0(z)
x_gen = self.generator(h, hz)
d_x_gen = self.discriminator(x_gen, h)
h_gen = self.encoder(x_gen)
l_gen = self.lsgan_loss(d_x_gen)
self.cleargrads()
l_gen.backward()
self.optimizer_dec.update()
self.optimizer_gen.update()
示例4: update_core
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
def update_core(self):
enc_optimizer = self.get_optimizer('enc')
dec_optimizer = self.get_optimizer('dec')
dis_optimizer = self.get_optimizer('dis')
enc, dec, dis = self.enc, self.dec, self.dis
xp = enc.xp
batch = self.get_iterator('main').next()
batchsize = len(batch)
in_ch = batch[0][0].shape[0]
out_ch = batch[0][1].shape[0]
w_in = 256
w_out = 256
x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")
for i in range(batchsize):
x_in[i, :] = xp.asarray(batch[i][0])
t_out[i, :] = xp.asarray(batch[i][1])
x_in = Variable(x_in)
z = enc(x_in)
x_out = dec(z)
y_fake = dis(x_in, x_out)
y_real = dis(x_in, t_out)
enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
for z_ in z:
z_.unchain_backward()
dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
x_in.unchain_backward()
x_out.unchain_backward()
dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
示例5: print
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
if args.gpu >=0:
x_batch = cuda.to_gpu(x_batch)
y_batch = cuda.to_gpu(y_batch)
state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout)
accum_loss += loss_i
if (i + 1) % bprop_len == 0: # Run truncated BPTT
now = time.time()
print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
cur_at = now
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward() # truncate
if args.gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
optimizer.clip_grads(grad_clip)
optimizer.update()
if (i + 1) % 10000 == 0:
fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump))
pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))
if (i + 1) % jump == 0:
epoch += 1
示例6: main
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
def main():
args = parse_args()
init_program_state(args)
vocab = make_vocab()
data, batched_data = load_data(args.train, vocab, args.batch_size)
dev , batched_dev = load_data(args.dev, vocab, 1)
test, batched_test = load_data(args.test, vocab, 1)
model = init_model(input_size = len(vocab),
embed_size = args.embed_size,
hidden_size = args.hidden_size,
output_size = len(vocab))
optimizer = optimizers.SGD(lr=args.lr)
# Begin Training
UF.init_model_parameters(model)
model = UF.convert_to_GPU(USE_GPU, model)
optimizer.setup(model)
batchsize = args.batch_size
epoch = args.epoch
accum_loss = Variable(xp.zeros((), dtype=np.float32))
counter = 0
# For each epoch..
for ep in range(epoch):
UF.trace("Training Epoch %d" % ep)
total_tokens = 0
log_ppl = 0.0
# For each batch, do forward & backward computations
for i, batch in enumerate(batched_data):
loss, nwords = forward(model, batch)
accum_loss += loss
log_ppl += loss.data.reshape(())
# Tracing...
total_tokens += nwords
# UF.trace(' %d/%d = %.5f' % (min(i*batchsize, len(data)), len(data), loss.data.reshape(())*batchsize))
# Counting
if (counter+1) % bp_len == 0:
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward()
accum_loss = Variable(xp.zeros((), dtype=np.float32))
optimizer.clip_grads(grad_clip)
optimizer.update()
counter += 1
# Counting Perplexity
log_ppl /= total_tokens
UF.trace(" PPL (Train) = %.10f" % math.exp(UF.to_cpu(USE_GPU, log_ppl)))
dev_ppl = evaluate(model, batched_dev)
UF.trace(" PPL (Dev) = %.10f" % math.exp(UF.to_cpu(USE_GPU, dev_ppl)))
# Reducing learning rate
if ep > 6:
optimizer.lr /= 1.2
UF.trace("Reducing LR:", optimizer.lr)
# Begin Testing
UF.trace("Begin Testing...")
test_ppl = evaluate(model, batched_test)
UF.trace(" log(PPL) = %.10f" % test_ppl)
UF.trace(" PPL = %.10f" % math.exp(UF.to_cpu(USE_GPU, test_ppl)))
示例7: Variable
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
n_epochs = 50
batch_size = 100
bprop_len = 50
train_data = corpus.train_data
whole_len = train_data.shape[0]
jump = whole_len / batch_size
accum_loss = Variable(np.zeros((), dtype=np.float32))
for i in xrange(jump * n_epochs):
x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in xrange(batch_size)])
y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in xrange(batch_size)])
print x_batch
loss = net.trainOneStep(x_batch, y_batch)
accum_loss += loss
if (i + 1) % bprop_len == 0:
print "i is %d / %d, loss is %f" % (i + 1, jump * n_epochs, accum_loss.data / bprop_len)
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward()
accum_loss = Variable(np.zeros((), dtype=np.float32))
optimizer.clip_grads(5.0)
optimizer.update()
print corpus.decode(net.predict([corpus.encode("Before we proceed")], num=100)[0])
print corpus.decode(net.predict([corpus.encode("My lord")], num=100)[0])
# serializers.save_npz("model/shakespeare.mod", net)
示例8: train_dcgan_labeled
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
#.........这里部分代码省略.........
yl_train = dis(movie_in,movie_out)
L_dis += F.softmax_cross_entropy(yl_train, Variable(xp.zeros(batchsize, dtype=np.int32)))
elif args.norm == 'CA':
L_evol = d_norm(0, dis, movie_out, movie_out_predict_before)
L_proj = d_norm(0, dis, movie_out, movie_out_predict)
L_dis = d_norm(1, dis, movie_out, movie_out_predict_before)
# L_dis += d_norm(1, dis, movie_out, movie_out_predict)
L_dis += d_norm(0, dis, movie_out, movie_other)
# L_dis += d_norm(0, dis, movie_other, movie_out)
else:
L2norm = (movie_out - movie_out_predict)**2
yl = F.sum(L2norm) / L2norm.data.size
L_evol = yl
evol_scores[difficulty] += [L_evol.data.get()] # np.average(F.softmax(yl).data.get()[:,0])
proj_scores[difficulty] += [L_proj.data.get()] # np.average(F.softmax(yl).data.get()[:,0])
# stop learning on normal mode.
if difficulty == 'hard':
o_evol.zero_grads()
L_evol.backward()
o_evol.update()
o_dis.zero_grads()
L_dis.backward()
o_dis.update()
o_proj.zero_grads()
L_proj.backward()
o_proj.update()
movie_in.unchain_backward()
movie_out_predict.unchain_backward()
movie_out_predict_before.unchain_backward()
movie_other.unchain_backward()
L_evol.unchain_backward()
if args.norm == 'dcgan' or args.norm == 'CA':
L_dis.unchain_backward()
sys.stdout.write('%d %6s %s: %f -> %f, %f -> %f shuzo:%s\r'%(train_offset,difficulty, args.norm,
np.average(evol_scores['normal']), np.average(proj_scores['normal']),
np.average(evol_scores['hard']), np.average(proj_scores['hard']),
str(shuzo_evoke_timestep[-10:])))
sys.stdout.flush()
# update the prediction as results of learning.
prediction_movie[train_offset+n_timeseries-1] = evolve_image(evol,proj,prediction_movie[train_offset: train_offset+n_timeseries-1])
# prevent too much learning from noisy prediction.
# if len(evol_scores['hard'])>=10 and np.average(evol_scores['hard'][-5:-1]) > 5 * np.average(evol_scores['normal']):
if train_offset == next_shuzo_offset:
next_shuzo_offset = train_offset + 1 + abs(int(round(np.random.normal(scale=next_shuzo_scale))))
# Zettaini, akiramennna yo!
# matsuoka_shuzo['hard'] = False
shuzo_evoke_timestep += [train_offset]
evol_scores['hard']=[0.0]
proj_scores['hard']=[0.0]
for t in range(train_offset, train_offset+n_timeseries):
if current_movie[t] is not None:
prediction_movie[t]=current_movie[t]
print
def visualize_vis_kit(vis_kit):
示例9: _train
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
def _train(self, **kwargs):
gpu = -1 if "gpu" not in kwargs else kwargs["gpu"]
lr = 2e-3 if "lr" not in kwargs else kwargs["lr"]
lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"]
lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"]
decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"]
dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"]
bprop_len = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"]
batchsize = 50 if "batchsize" not in kwargs else kwargs["batchsize"]
grad_clip = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"]
n_epochs = 5 if "epochs" not in kwargs else kwargs["epochs"]
if gpu >= 0:
cuda.get_device(gpu).use()
self.model.to_gpu()
optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8)
optimizer.setup(self.model)
train_data = self.dataset
whole_len = train_data.shape[0]
jump = whole_len // batchsize
epoch = 0
start_at = time.time()
cur_at = start_at
state = self.model.make_initial_state(batchsize=batchsize)
if gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
for key, value in state.items():
value.data = cuda.to_gpu(value.data)#plist
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
print ('going to train {} iterations'.format(jump * n_epochs))
for i in range(jump * n_epochs):
x_batch = np.array([train_data[(jump * j + i) % whole_len]
for j in range(batchsize)])
y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
for j in range(batchsize)])
if gpu >=0:
x_batch = cuda.to_gpu(x_batch)
y_batch = cuda.to_gpu(y_batch)
state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout)
accum_loss += loss_i
if (i + 1) % bprop_len == 0: # Run truncated BPTT
now = time.time()
sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at))
sys.stderr.flush()
cur_at = now
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward() # truncate
if gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
optimizer.clip_grads(grad_clip)
optimizer.update()
if (i + 1) % 10000 == 0:
pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
if (i + 1) % jump == 0:
epoch += 1
if epoch >= lr_decay_after:
optimizer.lr *= lr_decay
print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr))
sys.stdout.flush()
pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
示例10: RNNCharEstimator
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
class RNNCharEstimator(ChainerClassifier):
def __init__(self, net_type='lstm', net_hidden=100,
vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0,
**params):
ChainerClassifier.__init__(self, **params)
self.net_hidden = net_hidden
self.net_type = net_type
self.vocab_size = vocab_size
self.dropout_ratio = dropout_ratio
self.seq_size = seq_size
self.grad_clip = grad_clip
self.param_names.append('vocab_size')
self.param_names.append('net_type')
self.param_names.append('net_hidden')
self.param_names.append('dropout_ratio')
def setup_network(self, n_features):
if self.net_type == 'lstm':
self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size)
elif self.net_type == 'irnn':
self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size)
else:
error("Unknown net_type")
self.reset_accum_loss()
def reset_accum_loss(self):
if self.gpu >= 0:
self.accum_loss = Variable(cuda.zeros(()))
else:
self.accum_loss = Variable(np.zeros(()))
def forward_train(self, x, t):
return self.network.train(x, t, dropout_ratio=self.dropout_ratio)
def predict(self, x_data):
self.network.reset_state(1)
if self.gpu >= 0:
self.network.to_gpu()
x_data = cuda.to_gpu(x_data)
results = None
for i in xrange(x_data.shape[0]):
x = Variable(x_data[i,:])
y = self.network.predict(x)
if results == None:
results = cuda.to_cpu(y.data)
else:
results = np.concatenate([results, cuda.to_cpu(y.data)])
results = results.argmax(1)
return results
def fit_update(self, loss, batch_id):
self.accum_loss += loss
if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT
self.optimizer.zero_grads()
self.accum_loss.backward()
self.accum_loss.unchain_backward() # truncate
self.optimizer.clip_grads(self.grad_clip)
self.optimizer.update()
self.reset_accum_loss()
def make_batch(self, x_data, y_data, batch_id):
batch_num = self.n_samples / self.batch_size
x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples]
for j in xrange(self.batch_size)]).reshape(self.batch_size)
y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples]
for j in xrange(self.batch_size)])
return x_batch, y_batch
示例11: str
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
if args.gpu >= 0:
x_t = cuda.to_gpu(x_t)
y_t = cuda.to_gpu(y_t)
state, loss_i = model.forward_one_step(x_t, y_t, state, dropout_ratio=args.dropout)
loss += loss_i
now = time.time()
end_time += now - cur_at
iterations_count += 1
print "loss_all=" + str(loss.data)
print "{}, train_loss = {}, time = {:.4f}".format(
iterations_count, loss.data / (len(train_data[i % whole_len]) - 1), now - cur_at
)
cur_at = now
optimizer.zero_grads()
loss.backward()
loss.unchain_backward()
optimizer.clip_grads(grad_clip)
optimizer.update()
if (i + 1) == (whole_len * n_epochs):
cuda.cupy.save("l1_x_W.npy", model.l1_x.W)
cuda.cupy.save("l1_x_b.npy", model.l1_x.b)
cuda.cupy.save("l1_h_W.npy", model.l1_h.W)
cuda.cupy.save("l1_h_b.npy", model.l1_h.b)
cuda.cupy.save("l6_W.npy", model.l6.W)
cuda.cupy.save("l6_b.npy", model.l6.b)
if ((i + 1) % whole_len) == 0:
epoch += 1
train_loss_all.append(loss.data.get() / len(train_data[i % whole_len]))
for k in xrange(whole_val_len):
val_state = make_initial_state(n_units)
for key, value in val_state.items():
示例12: train_dcgan_labeled
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
#.........这里部分代码省略.........
x3=retou(x_retouch_motif) # let the retoucher make the generated image better
yl1st = dis(x3) # and try deceive the discriminator
# retoucher want their image to look like those from dataset(zeros),
# while discriminators want to classify them as from noise(ones)
L_retou = F.softmax_cross_entropy(yl1st, Variable(xp.zeros(batchsize, dtype=np.int32)))
L_dis += F.softmax_cross_entropy(yl1st, Variable(xp.ones(batchsize, dtype=np.int32)))
o_gen.zero_grads()
L_gen.backward()
o_gen.update()
o_retou.zero_grads()
L_retou.backward()
o_retou.update()
o_dis.zero_grads()
L_dis.backward()
o_dis.update()
retouch_loss = float(str((L_retou).data))
if retouch_loss >= last_retouch_loss:
retouch_fail_count += 1
last_retouch_loss = min(retouch_loss,last_retouch_loss)
#print "backward done"
sum_l_gen += L_gen.data.get()
sum_l_dis += L_dis.data.get()
x.unchain_backward()
x_train.unchain_backward()
x3.unchain_backward()
x_retouch_motif = x3
L_gen.unchain_backward()
L_retou.unchain_backward()
L_dis.unchain_backward()
print "epoch:",epoch,"iter:",i,"softmax:",average_softmax, "retouch:",retouch_fail_count, retouch_loss
if i%image_save_interval==0:
n_retou=2
plt.rcParams['figure.figsize'] = (16.0,16.0*n_retou)
plt.close('all')
vissize = 100
z = zvis
z[50:,:] = (xp.random.uniform(-1, 1, (50, nz), dtype=np.float32))
z = Variable(z)
x = gen(z, test=True)
x_data = x.data.get()
imgfn = '%s/vis_%d_%d.png'%(out_image_dir, epoch,i)
x_split = F.split_axis(x,vissize,0)
def mktitle(x1):
d1 = F.softmax(dis(x1,test=True))
示例13: train
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import unchain_backward [as 别名]
def train(self, words, steps, batchsize=100, sequence_length=10):
""" Train the Predictor's model on words for steps number of steps. """
whole_len = len(words)
train_data = np.ndarray(whole_len, dtype=np.int32)
jumps = steps * sequence_length
# Initialize training data and maybe vocab.
if self.vocab is None:
vocab_initializing = True
self.vocab = {}
for i, word in enumerate(words):
if vocab_initializing:
if word not in self.vocab:
self.vocab[word] = len(self.vocab)
train_data[i] = self.vocab[word]
vocab_initializing = False
print 'corpus length:', len(words)
print 'self.vocab size:', len(self.vocab)
# Initialize base model (if we need to)
if self.model is None:
self.model = BaseRNN(len(self.vocab), self.units)
if self.gpu >= 0:
cuda.get_device(self.gpu).use()
self.model.to_self.gpu()
optimizer = optimizers.RMSprop(lr=self.settings.learning_rate,
alpha=self.settings.decay_rate,
eps=1e-8)
optimizer.setup(self.model)
jumpsPerEpoch = whole_len / batchsize
epoch = 0
start_at = time.time()
cur_at = start_at
state = make_initial_state(self.units, batchsize=batchsize)
if self.gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
for _, value in state.items():
value.data = cuda.to_self.gpu(value.data)
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
print 'going to train {} iterations'.format(steps)
for i in xrange(jumps):
x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len]
for j in xrange(batchsize)])
y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len]
for j in xrange(batchsize)])
if self.gpu >= 0:
x_batch = cuda.to_self.gpu(x_batch)
y_batch = cuda.to_self.gpu(y_batch)
state, loss_i = self.model.forward_one_step(x_batch,
y_batch,
state,
dropout_ratio=self.settings.dropout)
accum_loss += loss_i
if (i + 1) % sequence_length == 0:
now = time.time()
print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at)
cur_at = now
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward() # truncate
if self.gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
optimizer.clip_grads(self.settings.grad_clip)
optimizer.update()
if (i + 1) % jumpsPerEpoch == 0:
epoch += 1
if epoch >= self.settings.learning_rate_decay_after:
optimizer.lr *= self.settings.learning_rate_decay
print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)