本文整理汇总了Python中chainer.Variable.backward方法的典型用法代码示例。如果您正苦于以下问题:Python Variable.backward方法的具体用法?Python Variable.backward怎么用?Python Variable.backward使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类chainer.Variable
的用法示例。
在下文中一共展示了Variable.backward方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: optimizeCRNN
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def optimizeCRNN(iterNum,maxIndex,indicies):
batchSize = 1000
model = EvalCRNN(maxIndex,500)
print(len(indicies),computeEntropy(maxIndex,indicies))
learningRate = 0.001
epoch = 3
for j in range(epoch):
my_optimizer = optimizers.RMSpropGraves(lr = learningRate)
my_optimizer.setup(model)
my_optimizer.add_hook(optimizer.GradientClipping(1))
model.cRNN.reset()
loss = Variable(np.array([[0]]))
for i in range(iterNum):
t1 = time.clock()
model.zerograds()
loss.unchain_backward()
loss = model(indicies[batchSize*i:batchSize*(i+1)],iterNum*batchSize)
loss.backward()
t2 = time.clock()
msg = "iter: " + str(i + iterNum * j + 1) + "/" + str(iterNum * epoch)
msgLoss = "loss: " + str(loss.data/batchSize)
msgNorm = "grad: " + str(my_optimizer.compute_grads_norm())
msgTime = "time: " + str(t2 - t1) + " seconds"
print(msgLoss,msgNorm,msg,msgTime)
my_optimizer.update()
learningRate *= 0.50
print(model(indicies[batchSize*(iterNum):batchSize*(iterNum+10)]).data/(batchSize*10))
return model.cRNN
示例2: generate_image
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
batch_size = img_orig.shape[0]
mid_orig = nn.forward(Variable(img_orig, volatile=True))
style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]
if img_gen is None:
if args.gpu >= 0:
img_gen_ = xp.random.uniform(-20,20,(3,width,width),dtype=np.float32)
img_gen = xp.random.uniform(-20,20,(batch_size,3,width,width),dtype=np.float32)
img_gen[:,:,:,:] = img_gen_
else:
img_gen_ = np.random.uniform(-20,20,(3,width,width)).astype(np.float32)
img_gen = np.random.uniform(-20,20,(batch_size,3,width,width)).astype(np.float32)
img_gen[:,:,:,:] = img_gen_
x = Variable(img_gen)
xg = xp.zeros_like(x.data)
optimizer = optimizers.Adam(alpha=lr)
optimizer.setup((img_gen,xg))
for i in range(max_iter):
x = Variable(img_gen)
y = nn.forward(x)
optimizer.zero_grads()
L = Variable(xp.zeros((), dtype=np.float32))
for l in range(len(y)):
gogh_matrix = get_matrix(y[l])
L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
L += L1+L2
if i%100==0:
print i,l,L1.data,L2.data
L.backward()
xg += x.grad
optimizer.update()
tmp_shape = img_gen.shape
if args.gpu >= 0:
img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
else:
def clip(x):
return -120 if x<-120 else (136 if x>136 else x)
img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen
if i%50==0:
for j in range(img_gen.shape[0]):
save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_%d/im_%05d.png"%(j,i))
for j in range(img_gen.shape[0]):
save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_last/im_%d.png"%(j))
示例3: generate_image
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
mid_orig = nn.forward(Variable(img_orig, volatile=True))
style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]
if img_gen is None:
if args.gpu >= 0:
img_gen = xp.random.uniform(-20,20,(1,3,width,width),dtype=np.float32)
else:
img_gen = np.random.uniform(-20,20,(1,3,width,width)).astype(np.float32)
x = Variable(img_gen)
xg = xp.zeros_like(x.data)
optimizer = optimizers.Adam(alpha=lr)
optimizer.setup((img_gen,xg))
for i in range(max_iter):
x = Variable(img_gen)
y = nn.forward(x)
optimizer.zero_grads()
L = Variable(xp.zeros((), dtype=np.float32))
for l in range(len(y)):
ch = y[l].data.shape[1]
wd = y[l].data.shape[2]
gogh_y = F.reshape(y[l], (ch,wd**2))
gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)
L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
L += L1+L2
if i%100==0:
print i,l,L1.data,L2.data
L.backward()
xg += x.grad
optimizer.update()
tmp_shape = img_gen.shape
if args.gpu >= 0:
img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
else:
def clip(x):
return -120 if x<-120 else (136 if x>136 else x)
img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen
if i%3000==0:
save_image(img_gen, W, nw, nh, i)
示例4: step
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def step(self,perm,batch_index,mode,epoch):
if mode=='train':
data, first_words, label=self.read_batch(perm,batch_index,self.train_data,mode)
train = True
else :
data, first_words, label=self.read_batch(perm,batch_index,self.test_data,mode)
train = False
data = Variable(cuda.to_gpu(data))
state = {name: Variable(self.xp.zeros((self.batchsize, 1024),dtype=self.xp.float32)) for name in ('c1', 'h1')}
loss=Variable(cuda.cupy.asarray(0.0).astype(np.float32))
acc=0.0
### image-encoder ###
h = self.enc(data, train=train, test=not train)
h=h.data
h=Variable(h)
### first LSTM ###
state,_ = self.dec(h, state,train=train, test=not train, image=True)
### input <SOS> ###
state,y = self.dec(Variable(cuda.to_gpu(first_words)), state,train=train, test=not train)
loss += F.softmax_cross_entropy(y, Variable(cuda.to_gpu(label.T[1])))
acc += F.accuracy(y, Variable(cuda.to_gpu(label.T[1])), ignore_label=-1).data.get()
for cur_word,next_word in zip(label.T[1:-1],label.T[2:]):
state,y = self.dec(Variable(cuda.to_gpu(cur_word)), state,train=train, test=not train)
loss += F.softmax_cross_entropy(y, Variable(cuda.to_gpu(next_word)))
acc += F.accuracy(y, Variable(cuda.to_gpu(next_word)), ignore_label=-1).data.get()
if mode=='train':
self.dec.cleargrads()
loss.backward()
self.o_dec.update()
return {"prediction": 0,
"current_loss": loss.data.get()/(label.T.shape[0]),
"current_accuracy": acc/(label.T.shape[0]),
}
示例5: range
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
for j in range(batchsize)])
if args.gpu >=0:
x_batch = cuda.to_gpu(x_batch)
y_batch = cuda.to_gpu(y_batch)
state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout)
accum_loss += loss_i
if (i + 1) % bprop_len == 0: # Run truncated BPTT
now = time.time()
print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
cur_at = now
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward() # truncate
if args.gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
optimizer.clip_grads(grad_clip)
optimizer.update()
if (i + 1) % 10000 == 0:
fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump))
pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))
if (i + 1) % jump == 0:
epoch += 1
示例6: main
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def main():
args = parse_args()
init_program_state(args)
vocab = make_vocab()
data, batched_data = load_data(args.train, vocab, args.batch_size)
dev , batched_dev = load_data(args.dev, vocab, 1)
test, batched_test = load_data(args.test, vocab, 1)
model = init_model(input_size = len(vocab),
embed_size = args.embed_size,
hidden_size = args.hidden_size,
output_size = len(vocab))
optimizer = optimizers.SGD(lr=args.lr)
# Begin Training
UF.init_model_parameters(model)
model = UF.convert_to_GPU(USE_GPU, model)
optimizer.setup(model)
batchsize = args.batch_size
epoch = args.epoch
accum_loss = Variable(xp.zeros((), dtype=np.float32))
counter = 0
# For each epoch..
for ep in range(epoch):
UF.trace("Training Epoch %d" % ep)
total_tokens = 0
log_ppl = 0.0
# For each batch, do forward & backward computations
for i, batch in enumerate(batched_data):
loss, nwords = forward(model, batch)
accum_loss += loss
log_ppl += loss.data.reshape(())
# Tracing...
total_tokens += nwords
# UF.trace(' %d/%d = %.5f' % (min(i*batchsize, len(data)), len(data), loss.data.reshape(())*batchsize))
# Counting
if (counter+1) % bp_len == 0:
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward()
accum_loss = Variable(xp.zeros((), dtype=np.float32))
optimizer.clip_grads(grad_clip)
optimizer.update()
counter += 1
# Counting Perplexity
log_ppl /= total_tokens
UF.trace(" PPL (Train) = %.10f" % math.exp(UF.to_cpu(USE_GPU, log_ppl)))
dev_ppl = evaluate(model, batched_dev)
UF.trace(" PPL (Dev) = %.10f" % math.exp(UF.to_cpu(USE_GPU, dev_ppl)))
# Reducing learning rate
if ep > 6:
optimizer.lr /= 1.2
UF.trace("Reducing LR:", optimizer.lr)
# Begin Testing
UF.trace("Begin Testing...")
test_ppl = evaluate(model, batched_test)
UF.trace(" log(PPL) = %.10f" % test_ppl)
UF.trace(" PPL = %.10f" % math.exp(UF.to_cpu(USE_GPU, test_ppl)))
示例7: Variable
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
pg0=get_batch6.GET_BATCH6(band_num=BAND_BUNKATU0, seq_num=SEQUENCE_LEN0,n_delay=NDELAY,npoint=NPOINT0,fs0=FS0,fc0=FC1, gain0=GAIN1, q0=Q1)
a1=pg0.a1
b1=pg0.b1
loss = Variable(np.zeros((), dtype=np.float32))
losses =[]
NUMBER_ITERATION=501
for i in range(NUMBER_ITERATION):
x,y = pg0.get1() # get train data
loss, state = rnn.compute_loss(model, x, y, state) # do one sequence while batch bands
model.cleargrads()
loss.backward()
optimizer.update()
losses.append(loss.data /(SEQUENCE_LEN0 * 1.0)) # total loss while one BAND_BUNKATU0
state = rnn.make_initial_state( batchsize=BAND_BUNKATU0 ) # clear for next batch-sequence-input
if i%20==0:
plt.plot(losses,"b")
plt.yscale('log')
plt.title('loss')
plt.pause(1.0)
print "loss.data (%06d)="%i, loss.data / (SEQUENCE_LEN0 * 1.0)
##if i%100==0: # save model parameter in the directory model20 every 100
## serializers.save_npz('model20/%06d_my.model.npz'%i, model)
示例8: _train
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def _train(self, **kwargs):
gpu = -1 if "gpu" not in kwargs else kwargs["gpu"]
lr = 2e-3 if "lr" not in kwargs else kwargs["lr"]
lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"]
lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"]
decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"]
dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"]
bprop_len = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"]
batchsize = 50 if "batchsize" not in kwargs else kwargs["batchsize"]
grad_clip = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"]
n_epochs = 5 if "epochs" not in kwargs else kwargs["epochs"]
if gpu >= 0:
cuda.get_device(gpu).use()
self.model.to_gpu()
optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8)
optimizer.setup(self.model)
train_data = self.dataset
whole_len = train_data.shape[0]
jump = whole_len // batchsize
epoch = 0
start_at = time.time()
cur_at = start_at
state = self.model.make_initial_state(batchsize=batchsize)
if gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
for key, value in state.items():
value.data = cuda.to_gpu(value.data)#plist
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
print ('going to train {} iterations'.format(jump * n_epochs))
for i in range(jump * n_epochs):
x_batch = np.array([train_data[(jump * j + i) % whole_len]
for j in range(batchsize)])
y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
for j in range(batchsize)])
if gpu >=0:
x_batch = cuda.to_gpu(x_batch)
y_batch = cuda.to_gpu(y_batch)
state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout)
accum_loss += loss_i
if (i + 1) % bprop_len == 0: # Run truncated BPTT
now = time.time()
sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at))
sys.stderr.flush()
cur_at = now
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward() # truncate
if gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
optimizer.clip_grads(grad_clip)
optimizer.update()
if (i + 1) % 10000 == 0:
pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
if (i + 1) % jump == 0:
epoch += 1
if epoch >= lr_decay_after:
optimizer.lr *= lr_decay
print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr))
sys.stdout.flush()
pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
示例9: RNNCharEstimator
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
class RNNCharEstimator(ChainerClassifier):
def __init__(self, net_type='lstm', net_hidden=100,
vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0,
**params):
ChainerClassifier.__init__(self, **params)
self.net_hidden = net_hidden
self.net_type = net_type
self.vocab_size = vocab_size
self.dropout_ratio = dropout_ratio
self.seq_size = seq_size
self.grad_clip = grad_clip
self.param_names.append('vocab_size')
self.param_names.append('net_type')
self.param_names.append('net_hidden')
self.param_names.append('dropout_ratio')
def setup_network(self, n_features):
if self.net_type == 'lstm':
self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size)
elif self.net_type == 'irnn':
self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size)
else:
error("Unknown net_type")
self.reset_accum_loss()
def reset_accum_loss(self):
if self.gpu >= 0:
self.accum_loss = Variable(cuda.zeros(()))
else:
self.accum_loss = Variable(np.zeros(()))
def forward_train(self, x, t):
return self.network.train(x, t, dropout_ratio=self.dropout_ratio)
def predict(self, x_data):
self.network.reset_state(1)
if self.gpu >= 0:
self.network.to_gpu()
x_data = cuda.to_gpu(x_data)
results = None
for i in xrange(x_data.shape[0]):
x = Variable(x_data[i,:])
y = self.network.predict(x)
if results == None:
results = cuda.to_cpu(y.data)
else:
results = np.concatenate([results, cuda.to_cpu(y.data)])
results = results.argmax(1)
return results
def fit_update(self, loss, batch_id):
self.accum_loss += loss
if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT
self.optimizer.zero_grads()
self.accum_loss.backward()
self.accum_loss.unchain_backward() # truncate
self.optimizer.clip_grads(self.grad_clip)
self.optimizer.update()
self.reset_accum_loss()
def make_batch(self, x_data, y_data, batch_id):
batch_num = self.n_samples / self.batch_size
x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples]
for j in xrange(self.batch_size)]).reshape(self.batch_size)
y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples]
for j in xrange(self.batch_size)])
return x_batch, y_batch
示例10: train_encoder_decoder
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def train_encoder_decoder(
model,
dictionary: corpora.Dictionary,
conversation_file: str,
decoder_model_dir: str,
epoch_size: int=100,
batch_size: int=30,
dropout: bool=False,
gpu: bool=False
) -> None:
if gpu >= 0:
model.to_gpu()
print(model.xp)
# setup SGD optimizer
# opt = optimizers.SGD()
opt = optimizers.Adam()
opt.setup(model)
# optimizer hooks
clip_threshold = 5.0
print("set optimizer clip threshold: {}".format(clip_threshold))
opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))
# load conversation sentences
conversation = load_conversation(conversation_file, dictionary)
data_size = len(conversation)
print("data size: {}".format(data_size))
for epoch in range(epoch_size):
print("running epoch {}".format(epoch))
indexes = np.random.permutation(range(data_size))
epoch_loss = 0 # int
for bat_i in range(0, data_size, batch_size):
forward_start_time = datetime.now()
for index in indexes[bat_i:bat_i + batch_size]:
pair_words = conversation[index]
# encoder input words
orig_words = pair_words[0][:-1] # remove END_SYMBOL
reply_words = pair_words[1]
if orig_words:
assert orig_words[-1] is not config.END_SYMBOL
input_words_with_s = tokens2ids(orig_words, dictionary)
ys, state = model.predictor.forward(
[Variable(
model.xp.array(
[word],
dtype=model.xp.int32
)
) for word in input_words_with_s],
state=None,
dropout=dropout,
train=True
)
# decode
assert reply_words[0] == config.END_SYMBOL
assert reply_words[-1] == config.END_SYMBOL
output_words_with_s = tokens2ids(reply_words, dictionary)
batch_loss = Variable(model.xp.zeros((), dtype=np.float32))
try:
new_loss = model(
output_words_with_s,
state=state, # init_state を input の state にする
dropout=dropout,
train=True
)
batch_loss += new_loss
except Exception:
print(index, input_words_with_s)
import traceback
traceback.print_exc()
# 平均化
batch_size_array = model.xp.array(
batch_size,
dtype=model.xp.float32
)
batch_loss = batch_loss / Variable(batch_size_array)
epoch_loss += batch_loss.data
# 時間計測
forward_end_time = datetime.now()
# 最適化
opt_start_time = datetime.now()
model.zerograds()
batch_loss.backward()
opt.update()
opt_end_time = datetime.now()
forward_delta = forward_end_time - forward_start_time
opt_delta = opt_end_time - opt_start_time
# print(
# ("decoder epoch {} batch {}: loss {}, "
# "forward {}, optimizer {},").format(
# epoch,
# int(bat_i / batch_size),
#.........这里部分代码省略.........
示例11: train_encoder
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def train_encoder(
model,
dictionary: corpora.Dictionary,
sentence_file: str,
model_dir: str,
epoch_size: int=100,
batch_size: int=30,
dropout: bool=True,
gpu: bool=False
) -> None:
if gpu >= 0:
model.to_gpu()
print(model.xp)
# setup SGD optimizer
opt = optimizers.SGD()
opt.setup(model)
# optimizer hooks
clip_threshold = 5.0
print("set optimizer clip threshold: {}".format(clip_threshold))
opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))
# load conversation sentences
sentences = load_sentence(sentence_file)
data_size = len(sentences)
print("data size: {}".format(data_size))
for epoch in range(epoch_size):
print("epoch {}".format(epoch))
indexes = np.random.permutation(data_size)
epoch_loss = 0 # int
for bat_i in range(0, data_size, batch_size):
forward_start_time = datetime.now()
batch_loss = Variable(model.xp.zeros((), dtype=model.xp.float32))
for index in indexes[bat_i:bat_i + batch_size]:
input_words = sentences[index]
# id のリストに変換する
input_words_with_s = tokens2ids(
input_words,
dictionary,
verbose=False
)
# フォワード
try:
new_loss = model(
input_words_with_s,
dropout=dropout,
state=None,
train=True
)
if model.xp.isnan(new_loss.data):
sys.exit(1)
batch_loss += new_loss
except Exception:
print(index, input_words_with_s)
import traceback
traceback.print_exc()
# 平均化
batch_size_array = model.xp.array(
batch_size,
dtype=model.xp.float32
)
# if gpu:
# batch_size_array = cuda.to_gpu(batch_size_array)
batch_loss = batch_loss / Variable(batch_size_array)
epoch_loss += batch_loss.data
# 時間計測
forward_end_time = datetime.now()
# 最適化
opt_start_time = datetime.now()
model.zerograds()
batch_loss.backward()
opt.update()
opt_end_time = datetime.now()
forward_delta = forward_end_time - forward_start_time
opt_delta = opt_end_time - opt_start_time
print_fmt = (
"epoch {} batch {}: "
"loss {}, grad L2 norm: {}, forward {}, optimizer {}"
)
print(print_fmt.format(
epoch,
int(bat_i / batch_size),
batch_loss.data,
opt.compute_grads_norm(),
forward_delta,
opt_delta,
))
# save
#.........这里部分代码省略.........
示例12: train
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def train(self, words, steps, batchsize=100, sequence_length=10):
""" Train the Predictor's model on words for steps number of steps. """
whole_len = len(words)
train_data = np.ndarray(whole_len, dtype=np.int32)
jumps = steps * sequence_length
# Initialize training data and maybe vocab.
if self.vocab is None:
vocab_initializing = True
self.vocab = {}
for i, word in enumerate(words):
if vocab_initializing:
if word not in self.vocab:
self.vocab[word] = len(self.vocab)
train_data[i] = self.vocab[word]
vocab_initializing = False
print 'corpus length:', len(words)
print 'self.vocab size:', len(self.vocab)
# Initialize base model (if we need to)
if self.model is None:
self.model = BaseRNN(len(self.vocab), self.units)
if self.gpu >= 0:
cuda.get_device(self.gpu).use()
self.model.to_self.gpu()
optimizer = optimizers.RMSprop(lr=self.settings.learning_rate,
alpha=self.settings.decay_rate,
eps=1e-8)
optimizer.setup(self.model)
jumpsPerEpoch = whole_len / batchsize
epoch = 0
start_at = time.time()
cur_at = start_at
state = make_initial_state(self.units, batchsize=batchsize)
if self.gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
for _, value in state.items():
value.data = cuda.to_self.gpu(value.data)
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
print 'going to train {} iterations'.format(steps)
for i in xrange(jumps):
x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len]
for j in xrange(batchsize)])
y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len]
for j in xrange(batchsize)])
if self.gpu >= 0:
x_batch = cuda.to_self.gpu(x_batch)
y_batch = cuda.to_self.gpu(y_batch)
state, loss_i = self.model.forward_one_step(x_batch,
y_batch,
state,
dropout_ratio=self.settings.dropout)
accum_loss += loss_i
if (i + 1) % sequence_length == 0:
now = time.time()
print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at)
cur_at = now
optimizer.zero_grads()
accum_loss.backward()
accum_loss.unchain_backward() # truncate
if self.gpu >= 0:
accum_loss = Variable(cuda.zeros(()))
else:
accum_loss = Variable(np.zeros((), dtype=np.float32))
optimizer.clip_grads(self.settings.grad_clip)
optimizer.update()
if (i + 1) % jumpsPerEpoch == 0:
epoch += 1
if epoch >= self.settings.learning_rate_decay_after:
optimizer.lr *= self.settings.learning_rate_decay
print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)
示例13: SGD_Embedid
# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
list_sentences = [np.array(row, np.int32) for row in list_sentences]
opt1 = SGD_Embedid() # 確率的勾配法を使用
opt2 = SGD() # 確率的勾配法を使用
opt1.setup(model1) # 学習器の初期化
opt2.setup(model2) # 学習器の初期化
opt1.tuples[0][1].fill(0)
opt2.zero_grads()
random.shuffle(list_sentences)
list_minibatch = []
for i, sentence in enumerate(list_sentences):
list_minibatch.append(sentence)
if len(list_minibatch) == BATCH_SIZE:
accum_loss_total = Variable(np.zeros((), dtype=np.float32)) # 累積損失の初期値
uniq_sentence = np.zeros((), np.int32)
for batch_sentence in list_minibatch:
accum_loss_total += forward(batch_sentence) # 損失の計算
uniq_sentence = np.append(uniq_sentence, batch_sentence)
accum_loss_total.backward() # 誤差逆伝播
opt1.clip_grads(10) # 大きすぎる勾配を抑制
opt2.clip_grads(10) # 大きすぎる勾配を抑制
uniq_sentence = np.unique(uniq_sentence)
opt1.update(uniq_sentence) # パラメータの更新
opt2.update() # パラメータの更新
opt1.zero_grads(uniq_sentence) # 勾配の初期化
opt2.zero_grads() # 勾配の初期化
list_minibatch = []
if i % 1000 == 999:
break