本文整理汇总了Python中model.zero_grad方法的典型用法代码示例。如果您正苦于以下问题:Python model.zero_grad方法的具体用法?Python model.zero_grad怎么用?Python model.zero_grad使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类model
的用法示例。
在下文中一共展示了model.zero_grad方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: import model [as 别名]
# 或者: from model import zero_grad [as 别名]
def train():
# Turn on training mode which enables dropout.
model.train()
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(args.batch_size)
for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
data, targets = get_batch(train_data, i)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
model.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output.view(-1, ntokens), targets)
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
for p in model.parameters():
p.data.add_(-lr, p.grad.data)
total_loss += loss.data
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss[0] / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
epoch, batch, len(train_data) // args.bptt, lr,
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
total_loss = 0
start_time = time.time()
# Loop over epochs.
示例2: train
# 需要导入模块: import model [as 别名]
# 或者: from model import zero_grad [as 别名]
def train():
# Turn on training mode which enables dropout.
model.train()
total_loss = 0.
start_time = time.time()
ntokens = len(corpus.dictionary)
if args.model != 'Transformer':
hidden = model.init_hidden(args.batch_size)
for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
data, targets = get_batch(train_data, i)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
model.zero_grad()
if args.model == 'Transformer':
output = model(data)
output = output.view(-1, ntokens)
else:
hidden = repackage_hidden(hidden)
output, hidden = model(data, hidden)
loss = criterion(output, targets)
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
for p in model.parameters():
p.data.add_(-lr, p.grad)
total_loss += loss.item()
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
epoch, batch, len(train_data) // args.bptt, lr,
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
total_loss = 0
start_time = time.time()
if args.dry_run:
break
示例3: train
# 需要导入模块: import model [as 别名]
# 或者: from model import zero_grad [as 别名]
def train():
# Turn on training mode which enables dropout.
model.train()
total_loss = 0.
start_time = time.time()
ntokens = len(corpus.dictionary)
if args.model != 'Transformer':
hidden = model.init_hidden(args.batch_size)
for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
data, targets = get_batch(train_data, i)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
model.zero_grad()
if args.model == 'Transformer':
output = model(data)
else:
hidden = repackage_hidden(hidden)
output, hidden = model(data, hidden)
loss = criterion(output.view(-1, ntokens), targets)
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
for p in model.parameters():
p.data.add_(-lr, p.grad.data)
total_loss += loss.item()
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
epoch, batch, len(train_data) // args.bptt, lr,
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
total_loss = 0
start_time = time.time()
示例4: train
# 需要导入模块: import model [as 别名]
# 或者: from model import zero_grad [as 别名]
def train():
# Turn on training mode which enables dropout.
model.train()
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(args.batch_size)
for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
data, targets = get_batch(train_data, i)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
model.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output.view(-1, ntokens), targets)
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
for p in model.parameters():
p.data.add_(-lr, p.grad.data)
total_loss += loss.data
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss[0] / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
epoch, batch, len(train_data) // args.bptt, lr,
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
total_loss = 0
start_time = time.time()
# Loop over epochs.
示例5: train
# 需要导入模块: import model [as 别名]
# 或者: from model import zero_grad [as 别名]
def train():
global lr, best_val_loss
# Turn on training mode which enables dropout.
model.train()
total_loss, nbatches = 0, 0
start_time = time.time()
ntokens = len(corpus.dictionary.idx2word)
hidden = model.init_hidden(args.batch_size)
for b, batch in enumerate(corpus.iter('train', args.batch_size, args.bptt, use_cuda=args.cuda)):
model.train()
source, target = batch
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
model.zero_grad()
model.softmax.set_target(target.data.view(-1))
output, hidden = model(source, hidden)
loss = criterion(output, target.view(-1))
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
for p in model.parameters():
if p.grad is not None:
p.data.add_(-lr, p.grad.data)
total_loss += loss.data.cpu()
if b % args.log_interval == 0 and b > 0:
cur_loss = total_loss[0] / args.log_interval
elapsed = time.time() - start_time
val_loss = evaluate('valid')
print('| epoch {:3d} | batch {:5d} | lr {:02.5f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(
epoch, b, lr,
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss),
val_loss, math.exp(val_loss)))
# Save the model if the validation loss is the best we've seen so far.
if not best_val_loss or val_loss < best_val_loss:
with open(args.save, 'wb') as f:
torch.save(model, f)
best_val_loss = val_loss
else:
# Anneal the learning rate if no improvement has been seen in the validation dataset.
lr *= args.ar
total_loss = 0
start_time = time.time()
# At any point you can hit Ctrl + C to break out of training early.
示例6: train
# 需要导入模块: import model [as 别名]
# 或者: from model import zero_grad [as 别名]
def train():
# Turn on training mode which enables dropout.
model.train()
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
if (not args.single) and (torch.cuda.device_count() > 1):
# "module" is necessary when using DataParallel
hidden = model.module.init_hidden(args.batch_size)
else:
hidden = model.init_hidden(args.batch_size)
# UNCOMMENT FOR DEBUGGING
#random.seed(10)
order = list(enumerate(range(0, train_lm_data.size(0) + train_ccg_data.size(0) - 1, args.bptt)))
random.shuffle(order)
for batch, i in order:#enumerate(range(0, train_lm_data.size(0) + train_ccg_data.size(0) - 1, args.bptt)):
# TAG
if i > train_lm_data.size(0):
data, targets = get_batch(train_ccg_data, i - train_lm_data.size(0))
# LM
else:
data, targets = get_batch(train_lm_data, i)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
model.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output.view(-1, ntokens), targets)
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
for p in model.parameters():
p.data.add_(-lr, p.grad.data)
total_loss += loss.item()#data
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss[0] / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
epoch, batch, len(train_lm_data)+len(train_ccg_data) // args.bptt, lr,
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
total_loss = 0
start_time = time.time()
# Loop over epochs.