当前位置: 首页>>代码示例>>Python>>正文


Python Variable.backward方法代码示例

本文整理汇总了Python中chainer.Variable.backward方法的典型用法代码示例。如果您正苦于以下问题:Python Variable.backward方法的具体用法?Python Variable.backward怎么用?Python Variable.backward使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在chainer.Variable的用法示例。


在下文中一共展示了Variable.backward方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: optimizeCRNN

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def optimizeCRNN(iterNum,maxIndex,indicies):
    batchSize = 1000
    model = EvalCRNN(maxIndex,500)
    print(len(indicies),computeEntropy(maxIndex,indicies))
    learningRate = 0.001
    epoch = 3 
    for j in range(epoch):
        
        my_optimizer = optimizers.RMSpropGraves(lr = learningRate)
        my_optimizer.setup(model) 
        my_optimizer.add_hook(optimizer.GradientClipping(1))
        
        model.cRNN.reset()
        
        loss = Variable(np.array([[0]]))
        for i in range(iterNum):
            t1 = time.clock()
            model.zerograds()
            loss.unchain_backward()
            loss = model(indicies[batchSize*i:batchSize*(i+1)],iterNum*batchSize)
            loss.backward()
            t2 = time.clock()
            
            msg = "iter: " + str(i + iterNum * j + 1) + "/" + str(iterNum * epoch) 
            msgLoss = "loss: " + str(loss.data/batchSize)
            msgNorm = "grad: " + str(my_optimizer.compute_grads_norm())
            msgTime = "time: " + str(t2 - t1) + " seconds"
            print(msgLoss,msgNorm,msg,msgTime)
            my_optimizer.update()

        learningRate *= 0.50

    print(model(indicies[batchSize*(iterNum):batchSize*(iterNum+10)]).data/(batchSize*10))
    return model.cRNN
开发者ID:CurtisHuebner,项目名称:GPNN,代码行数:36,代码来源:mobyDick.py

示例2: generate_image

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
    batch_size = img_orig.shape[0]
    mid_orig = nn.forward(Variable(img_orig, volatile=True))
    style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen_ = xp.random.uniform(-20,20,(3,width,width),dtype=np.float32)
            img_gen = xp.random.uniform(-20,20,(batch_size,3,width,width),dtype=np.float32)
            img_gen[:,:,:,:] = img_gen_
        else:
            img_gen_ = np.random.uniform(-20,20,(3,width,width)).astype(np.float32)
            img_gen = np.random.uniform(-20,20,(batch_size,3,width,width)).astype(np.float32)
            img_gen[:,:,:,:] = img_gen_
    x = Variable(img_gen)
    xg = xp.zeros_like(x.data)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup((img_gen,xg))
    for i in range(max_iter):

        x = Variable(img_gen)
        y = nn.forward(x)

        optimizer.zero_grads()
        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            gogh_matrix = get_matrix(y[l])

            L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
            L += L1+L2

            if i%100==0:
                print i,l,L1.data,L2.data

        L.backward()
        xg += x.grad
        optimizer.update()

        tmp_shape = img_gen.shape
        if args.gpu >= 0:
            img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
        else:
            def clip(x):
                return -120 if x<-120 else (136 if x>136 else x)
            img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen

        if i%50==0:
            for j in range(img_gen.shape[0]):
                save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_%d/im_%05d.png"%(j,i))

    for j in range(img_gen.shape[0]):
        save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_last/im_%d.png"%(j))
开发者ID:woodrush,项目名称:chainer-gogh,代码行数:55,代码来源:chainer-gogh-multi.py

示例3: generate_image

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
    mid_orig = nn.forward(Variable(img_orig, volatile=True))
    style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen = xp.random.uniform(-20,20,(1,3,width,width),dtype=np.float32)
        else:
            img_gen = np.random.uniform(-20,20,(1,3,width,width)).astype(np.float32)
    x = Variable(img_gen)
    xg = xp.zeros_like(x.data)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup((img_gen,xg))
    for i in range(max_iter):

        x = Variable(img_gen)
        y = nn.forward(x)

        optimizer.zero_grads()
        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            ch = y[l].data.shape[1]
            wd = y[l].data.shape[2]
            gogh_y = F.reshape(y[l], (ch,wd**2))
            gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)

            L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
            L += L1+L2

            if i%100==0:
                print i,l,L1.data,L2.data

        L.backward()
        xg += x.grad
        optimizer.update()

        tmp_shape = img_gen.shape
        if args.gpu >= 0:
            img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
        else:
            def clip(x):
                return -120 if x<-120 else (136 if x>136 else x)
            img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen

        if i%3000==0:
            save_image(img_gen, W, nw, nh, i)
开发者ID:TakuTsuzuki,项目名称:Hackathon2015,代码行数:49,代码来源:chainer-gogh.py

示例4: step

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
    def step(self,perm,batch_index,mode,epoch): 
        if mode=='train':
            data, first_words, label=self.read_batch(perm,batch_index,self.train_data,mode)
            train = True
        else :
            data, first_words, label=self.read_batch(perm,batch_index,self.test_data,mode)
            train = False

        data = Variable(cuda.to_gpu(data))
        state = {name: Variable(self.xp.zeros((self.batchsize, 1024),dtype=self.xp.float32)) for name in ('c1', 'h1')}
        loss=Variable(cuda.cupy.asarray(0.0).astype(np.float32))
        acc=0.0

        ### image-encoder ###
        h = self.enc(data, train=train, test=not train)
        h=h.data
        h=Variable(h)


        ### first LSTM ###
        state,_ = self.dec(h, state,train=train, test=not train, image=True)
        ### input <SOS> ###
        state,y = self.dec(Variable(cuda.to_gpu(first_words)), state,train=train, test=not train)
        loss += F.softmax_cross_entropy(y, Variable(cuda.to_gpu(label.T[1])))
        acc += F.accuracy(y, Variable(cuda.to_gpu(label.T[1])), ignore_label=-1).data.get()

        for cur_word,next_word in zip(label.T[1:-1],label.T[2:]):
            state,y = self.dec(Variable(cuda.to_gpu(cur_word)), state,train=train, test=not train)
            loss += F.softmax_cross_entropy(y, Variable(cuda.to_gpu(next_word)))
            acc += F.accuracy(y, Variable(cuda.to_gpu(next_word)), ignore_label=-1).data.get()

        if mode=='train':
            self.dec.cleargrads()    
            loss.backward()
            self.o_dec.update()


        return {"prediction": 0,
                "current_loss": loss.data.get()/(label.T.shape[0]),
                "current_accuracy": acc/(label.T.shape[0]),
                }
开发者ID:rkuga,项目名称:ImageCaptioning,代码行数:43,代码来源:captioning.py

示例5: range

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
                        for j in range(batchsize)])

    if args.gpu >=0:
        x_batch = cuda.to_gpu(x_batch)
        y_batch = cuda.to_gpu(y_batch)

    state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout)
    accum_loss   += loss_i

    if (i + 1) % bprop_len == 0:  # Run truncated BPTT
        now = time.time()
        print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
        cur_at = now

        optimizer.zero_grads()
        accum_loss.backward()
        accum_loss.unchain_backward()  # truncate
        if args.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        optimizer.clip_grads(grad_clip)
        optimizer.update()

    if (i + 1) % 10000 == 0:
        fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump))
        pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))

    if (i + 1) % jump == 0:
        epoch += 1
开发者ID:ykakihara,项目名称:experiments,代码行数:33,代码来源:train.py

示例6: main

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def main():
    args      = parse_args()
    init_program_state(args)
    vocab     = make_vocab()
    data, batched_data = load_data(args.train, vocab, args.batch_size)
    dev , batched_dev  = load_data(args.dev, vocab, 1)
    test, batched_test = load_data(args.test, vocab, 1)
    model = init_model(input_size = len(vocab),
            embed_size   = args.embed_size,
            hidden_size  = args.hidden_size,
            output_size  = len(vocab))
    optimizer = optimizers.SGD(lr=args.lr)
    
    # Begin Training
    UF.init_model_parameters(model)
    model = UF.convert_to_GPU(USE_GPU, model)
    optimizer.setup(model)
    
    batchsize  = args.batch_size
    epoch      = args.epoch
    accum_loss = Variable(xp.zeros((), dtype=np.float32))
    counter    = 0
    # For each epoch..
    for ep in range(epoch):
        UF.trace("Training Epoch %d" % ep)
        total_tokens = 0
        log_ppl      = 0.0
        
        # For each batch, do forward & backward computations
        for i, batch in enumerate(batched_data):
            loss, nwords  = forward(model, batch)
            accum_loss   += loss
            log_ppl      += loss.data.reshape(())
            # Tracing...
            total_tokens += nwords
#            UF.trace('  %d/%d = %.5f' % (min(i*batchsize, len(data)), len(data), loss.data.reshape(())*batchsize))
            # Counting
            if (counter+1) % bp_len == 0:
                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()
                accum_loss = Variable(xp.zeros((), dtype=np.float32))
                
                optimizer.clip_grads(grad_clip)
                optimizer.update()
            counter += 1
        # Counting Perplexity
        log_ppl /= total_tokens
        UF.trace("  PPL (Train)  = %.10f" % math.exp(UF.to_cpu(USE_GPU, log_ppl)))
        dev_ppl = evaluate(model, batched_dev)
        UF.trace("  PPL (Dev)    = %.10f" % math.exp(UF.to_cpu(USE_GPU, dev_ppl)))

        # Reducing learning rate
        if ep > 6:
            optimizer.lr /= 1.2
            UF.trace("Reducing LR:", optimizer.lr)

    # Begin Testing
    UF.trace("Begin Testing...")
    test_ppl = evaluate(model, batched_test)
    UF.trace("  log(PPL) = %.10f" % test_ppl)
    UF.trace("  PPL      = %.10f" % math.exp(UF.to_cpu(USE_GPU, test_ppl)))
开发者ID:philip30,项目名称:chainn,代码行数:64,代码来源:lm.py

示例7: Variable

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
pg0=get_batch6.GET_BATCH6(band_num=BAND_BUNKATU0, seq_num=SEQUENCE_LEN0,n_delay=NDELAY,npoint=NPOINT0,fs0=FS0,fc0=FC1, gain0=GAIN1, q0=Q1)
a1=pg0.a1
b1=pg0.b1

loss   = Variable(np.zeros((), dtype=np.float32))
losses =[]

NUMBER_ITERATION=501

for i in range(NUMBER_ITERATION):
	
	x,y = pg0.get1()  # get train data
	loss, state =  rnn.compute_loss(model, x, y, state)  # do one sequence while batch bands
	model.cleargrads()
	loss.backward()
	optimizer.update()
	
	losses.append(loss.data /(SEQUENCE_LEN0 * 1.0))  # total loss  while one BAND_BUNKATU0
	
	state = rnn.make_initial_state( batchsize=BAND_BUNKATU0 )  	# clear for next batch-sequence-input
	
	if i%20==0:
		plt.plot(losses,"b")
		plt.yscale('log')
		plt.title('loss')
		plt.pause(1.0)
		print "loss.data (%06d)="%i, loss.data / (SEQUENCE_LEN0 * 1.0)
	##if i%100==0:  # save model parameter in the directory model20  every 100 
	##	serializers.save_npz('model20/%06d_my.model.npz'%i, model)
开发者ID:shun60s,项目名称:chainer-notch-filter,代码行数:31,代码来源:main1.py

示例8: _train

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
    def _train(self, **kwargs):
            gpu = -1 if "gpu" not in kwargs else kwargs["gpu"]
            lr = 2e-3 if "lr" not in kwargs else kwargs["lr"]
            lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"]
            lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"]
            decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"]
            dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"]
            bprop_len   = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"]
            batchsize   = 50 if "batchsize" not in kwargs else kwargs["batchsize"]
            grad_clip   = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"]
            n_epochs  = 5 if "epochs" not in kwargs else kwargs["epochs"]
            if gpu >= 0:
                cuda.get_device(gpu).use()
                self.model.to_gpu()

            optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8)
            optimizer.setup(self.model)

            train_data = self.dataset
            whole_len    = train_data.shape[0]
            jump         = whole_len // batchsize
            epoch        = 0
            start_at     = time.time()
            cur_at       = start_at
            state        = self.model.make_initial_state(batchsize=batchsize)

            if gpu >= 0:
                accum_loss   = Variable(cuda.zeros(()))
                for key, value in state.items():
                    value.data = cuda.to_gpu(value.data)#plist
            else:
                accum_loss   = Variable(np.zeros((), dtype=np.float32))

            print ('going to train {} iterations'.format(jump * n_epochs))

            for i in range(jump * n_epochs):
                x_batch = np.array([train_data[(jump * j + i) % whole_len]
                                    for j in range(batchsize)])
                y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
                                    for j in range(batchsize)])

                if gpu >=0:
                    x_batch = cuda.to_gpu(x_batch)
                    y_batch = cuda.to_gpu(y_batch)

                state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout)
                accum_loss   += loss_i

                if (i + 1) % bprop_len == 0:  # Run truncated BPTT
                    now = time.time()
                    sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at))
                    sys.stderr.flush()
                    cur_at = now

                    optimizer.zero_grads()
                    accum_loss.backward()
                    accum_loss.unchain_backward()  # truncate

                    if gpu >= 0:
                        accum_loss = Variable(cuda.zeros(()))
                    else:
                        accum_loss = Variable(np.zeros((), dtype=np.float32))

                    optimizer.clip_grads(grad_clip)
                    optimizer.update()

                    if (i + 1) % 10000 == 0:
                        pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))

                    if (i + 1) % jump == 0:
                        epoch += 1

                    if epoch >= lr_decay_after:
                        optimizer.lr *= lr_decay
                        print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr))
                sys.stdout.flush()

            pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
开发者ID:ninhydrin,项目名称:clone,代码行数:80,代码来源:RCCout.py

示例9: RNNCharEstimator

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
class RNNCharEstimator(ChainerClassifier):
    def __init__(self, net_type='lstm', net_hidden=100,
                       vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0,
                       **params):
        ChainerClassifier.__init__(self, **params)
        self.net_hidden    = net_hidden
        self.net_type      = net_type
        self.vocab_size    = vocab_size
        self.dropout_ratio = dropout_ratio
        self.seq_size      = seq_size
        self.grad_clip     = grad_clip
        self.param_names.append('vocab_size')
        self.param_names.append('net_type')
        self.param_names.append('net_hidden')
        self.param_names.append('dropout_ratio')

    def setup_network(self, n_features):
        if self.net_type == 'lstm':
            self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size)
        elif self.net_type == 'irnn':
            self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size)
        else:
            error("Unknown net_type")
        self.reset_accum_loss()

    def reset_accum_loss(self):
        if self.gpu >= 0:
            self.accum_loss = Variable(cuda.zeros(()))
        else:
            self.accum_loss = Variable(np.zeros(()))

    def forward_train(self, x, t):
        return self.network.train(x, t, dropout_ratio=self.dropout_ratio)

    def predict(self, x_data):
        self.network.reset_state(1)
        if self.gpu >= 0:
            self.network.to_gpu()
            x_data = cuda.to_gpu(x_data)
        results = None
        for i in xrange(x_data.shape[0]):
            x = Variable(x_data[i,:])
            y = self.network.predict(x)
            if results == None:
                results = cuda.to_cpu(y.data)
            else:
                results = np.concatenate([results, cuda.to_cpu(y.data)])
        results = results.argmax(1)
        return results

    def fit_update(self, loss, batch_id):
        self.accum_loss += loss
        if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT
            self.optimizer.zero_grads()
            self.accum_loss.backward()
            self.accum_loss.unchain_backward()  # truncate
            self.optimizer.clip_grads(self.grad_clip)
            self.optimizer.update()
            self.reset_accum_loss()

    def make_batch(self, x_data, y_data, batch_id):
        batch_num = self.n_samples / self.batch_size
        x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)]).reshape(self.batch_size)
        y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)])
        return x_batch, y_batch
开发者ID:ttakamura,项目名称:chainer-sandbox,代码行数:69,代码来源:skchainer.py

示例10: train_encoder_decoder

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def train_encoder_decoder(
    model,
    dictionary: corpora.Dictionary,
    conversation_file: str,
    decoder_model_dir: str,
    epoch_size: int=100,
    batch_size: int=30,
    dropout: bool=False,
    gpu: bool=False
) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    # opt = optimizers.SGD()
    opt = optimizers.Adam()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    conversation = load_conversation(conversation_file, dictionary)
    data_size = len(conversation)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("running epoch {}".format(epoch))
        indexes = np.random.permutation(range(data_size))
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()

            for index in indexes[bat_i:bat_i + batch_size]:
                pair_words = conversation[index]

                # encoder input words
                orig_words = pair_words[0][:-1]  # remove END_SYMBOL
                reply_words = pair_words[1]
                if orig_words:
                    assert orig_words[-1] is not config.END_SYMBOL
                input_words_with_s = tokens2ids(orig_words, dictionary)
                ys, state = model.predictor.forward(
                    [Variable(
                        model.xp.array(
                            [word],
                            dtype=model.xp.int32
                        )
                    ) for word in input_words_with_s],
                    state=None,
                    dropout=dropout,
                    train=True
                )

                # decode
                assert reply_words[0] == config.END_SYMBOL
                assert reply_words[-1] == config.END_SYMBOL
                output_words_with_s = tokens2ids(reply_words, dictionary)
                batch_loss = Variable(model.xp.zeros((), dtype=np.float32))
                try:
                    new_loss = model(
                        output_words_with_s,
                        state=state,  # init_state を input の state にする
                        dropout=dropout,
                        train=True
                    )
                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()
            # 平均化
            batch_size_array = model.xp.array(
                batch_size,
                dtype=model.xp.float32
            )
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time
            # print(
            #     ("decoder epoch {} batch {}: loss {}, "
            #      "forward {}, optimizer {},").format(
            #         epoch,
            #         int(bat_i / batch_size),
#.........这里部分代码省略.........
开发者ID:YinghanWang,项目名称:seq2seq,代码行数:103,代码来源:seq2seq.py

示例11: train_encoder

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
def train_encoder(
    model,
    dictionary: corpora.Dictionary,
    sentence_file: str,
    model_dir: str,
    epoch_size: int=100,
    batch_size: int=30,
    dropout: bool=True,
    gpu: bool=False
) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    opt = optimizers.SGD()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    sentences = load_sentence(sentence_file)
    data_size = len(sentences)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("epoch {}".format(epoch))

        indexes = np.random.permutation(data_size)
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()
            batch_loss = Variable(model.xp.zeros((), dtype=model.xp.float32))

            for index in indexes[bat_i:bat_i + batch_size]:
                input_words = sentences[index]
                # id のリストに変換する
                input_words_with_s = tokens2ids(
                    input_words,
                    dictionary,
                    verbose=False
                )

                # フォワード
                try:
                    new_loss = model(
                        input_words_with_s,
                        dropout=dropout,
                        state=None,
                        train=True
                    )
                    if model.xp.isnan(new_loss.data):
                        sys.exit(1)

                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()

            # 平均化
            batch_size_array = model.xp.array(
                batch_size,
                dtype=model.xp.float32
            )
            # if gpu:
            #     batch_size_array = cuda.to_gpu(batch_size_array)
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time

            print_fmt = (
                "epoch {} batch {}: "
                "loss {}, grad L2 norm: {}, forward {}, optimizer {}"
            )
            print(print_fmt.format(
                epoch,
                int(bat_i / batch_size),
                batch_loss.data,
                opt.compute_grads_norm(),
                forward_delta,
                opt_delta,
            ))
            # save
#.........这里部分代码省略.........
开发者ID:YinghanWang,项目名称:seq2seq,代码行数:103,代码来源:seq2seq.py

示例12: train

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
    def train(self, words, steps, batchsize=100, sequence_length=10):
        """ Train the Predictor's model on words for steps number of steps. """

        whole_len = len(words)
        train_data = np.ndarray(whole_len, dtype=np.int32)
        jumps = steps * sequence_length

        # Initialize training data and maybe vocab.
        if self.vocab is None:
            vocab_initializing = True
            self.vocab = {}
        for i, word in enumerate(words):
            if vocab_initializing:
                if word not in self.vocab:
                    self.vocab[word] = len(self.vocab)
            train_data[i] = self.vocab[word]
        vocab_initializing = False


        print 'corpus length:', len(words)
        print 'self.vocab size:', len(self.vocab)

        # Initialize base model (if we need to)
        if self.model is None:
            self.model = BaseRNN(len(self.vocab), self.units)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.model.to_self.gpu()

        optimizer = optimizers.RMSprop(lr=self.settings.learning_rate,
                                       alpha=self.settings.decay_rate,
                                       eps=1e-8)
        optimizer.setup(self.model)

        jumpsPerEpoch = whole_len / batchsize
        epoch = 0
        start_at = time.time()
        cur_at = start_at
        state = make_initial_state(self.units, batchsize=batchsize)

        if self.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
            for _, value in state.items():
                value.data = cuda.to_self.gpu(value.data)
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        print 'going to train {} iterations'.format(steps)
        for i in xrange(jumps):
            x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len]
                                for j in xrange(batchsize)])
            y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len]
                                for j in xrange(batchsize)])

            if self.gpu >= 0:
                x_batch = cuda.to_self.gpu(x_batch)
                y_batch = cuda.to_self.gpu(y_batch)


            state, loss_i = self.model.forward_one_step(x_batch,
                                                        y_batch,
                                                        state,
                                                        dropout_ratio=self.settings.dropout)
            accum_loss += loss_i

            if (i + 1) % sequence_length == 0:
                now = time.time()
                print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at)
                cur_at = now

                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()  # truncate
                if self.gpu >= 0:
                    accum_loss = Variable(cuda.zeros(()))
                else:
                    accum_loss = Variable(np.zeros((), dtype=np.float32))


                optimizer.clip_grads(self.settings.grad_clip)
                optimizer.update()

            if (i + 1) % jumpsPerEpoch == 0:
                epoch += 1

                if epoch >= self.settings.learning_rate_decay_after:
                    optimizer.lr *= self.settings.learning_rate_decay
                    print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)
开发者ID:ekatek,项目名称:nanowrinet,代码行数:91,代码来源:Predictor.py

示例13: SGD_Embedid

# 需要导入模块: from chainer import Variable [as 别名]
# 或者: from chainer.Variable import backward [as 别名]
list_sentences = [np.array(row, np.int32) for row in list_sentences]


opt1 = SGD_Embedid() # 確率的勾配法を使用
opt2 = SGD() # 確率的勾配法を使用
opt1.setup(model1) # 学習器の初期化
opt2.setup(model2) # 学習器の初期化
opt1.tuples[0][1].fill(0)
opt2.zero_grads()
random.shuffle(list_sentences)
list_minibatch = []
for i, sentence in enumerate(list_sentences):
    list_minibatch.append(sentence)
    if len(list_minibatch) == BATCH_SIZE:
        accum_loss_total = Variable(np.zeros((), dtype=np.float32)) # 累積損失の初期値
        uniq_sentence = np.zeros((), np.int32)
        for batch_sentence in list_minibatch:
            accum_loss_total += forward(batch_sentence) # 損失の計算
            uniq_sentence = np.append(uniq_sentence, batch_sentence)
        accum_loss_total.backward() # 誤差逆伝播
        opt1.clip_grads(10) # 大きすぎる勾配を抑制
        opt2.clip_grads(10) # 大きすぎる勾配を抑制
        uniq_sentence = np.unique(uniq_sentence)
        opt1.update(uniq_sentence) # パラメータの更新
        opt2.update() # パラメータの更新
        opt1.zero_grads(uniq_sentence) # 勾配の初期化
        opt2.zero_grads() # 勾配の初期化
        list_minibatch = []
    if i % 1000 == 999:
            break
开发者ID:kenchin110100,项目名称:machine_learning,代码行数:32,代码来源:sampleRNN.py


注:本文中的chainer.Variable.backward方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。