本文整理汇总了Python中model.reset方法的典型用法代码示例。如果您正苦于以下问题:Python model.reset方法的具体用法?Python model.reset怎么用?Python model.reset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类model
的用法示例。
在下文中一共展示了model.reset方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: evaluate
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def evaluate(data_source, source_sampler, target_sampler, batch_size=10):
# Turn on evaluation mode which disables dropout.
model.eval()
if args.model == 'QRNN':
model.reset()
total_loss = 0
hidden = model.init_hidden(batch_size)
for source_sample, target_sample in zip(source_sampler, target_sampler):
model.train()
data = torch.stack([data_source[i] for i in source_sample])
targets = torch.stack([data_source[i] for i in target_sample]).view(-1)
with torch.no_grad():
output, hidden = model(data, hidden)
total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output,
targets).item()
hidden = repackage_hidden(hidden)
return total_loss / len(data_source)
示例2: evaluate
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def evaluate(data_source, batch_size=10):
# Turn on evaluation mode which disables dropout.
if args.model == 'QRNN': model.reset()
model.eval()
total_loss = 0
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, args.bptt):
data, targets = get_batch(data_source, i, args, evaluation=True)
output, hidden = model(data, hidden)
output_flat = output.view(-1, ntokens)
total_loss += len(data) * criterion(output_flat, targets).data
hidden = repackage_hidden(hidden)
return total_loss[0] / len(data_source)
示例3: evaluate
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def evaluate(data_source, batch_size=10):
# Turn on evaluation mode which disables dropout.
model.eval()
if args.model == 'QRNN': model.reset()
total_loss = 0
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, args.bptt):
data, targets = get_batch(data_source, i, args, evaluation=True)
output, hidden = model(data, hidden)
output_flat = output.view(-1, ntokens)
total_loss += len(data) * criterion(output_flat, targets).data
hidden = repackage_hidden(hidden)
return total_loss[0] / len(data_source)
示例4: evaluate
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def evaluate(data_source, batch_size=10):
# Turn on evaluation mode which disables dropout.
model.eval()
if args.model == 'QRNN': model.reset()
total_loss = 0
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
for i in range(0, data_source.size(0) - 1, args.bptt):
data, targets = get_batch(data_source, i, args, evaluation=True)
output, hidden = model(data, hidden)
total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data
hidden = repackage_hidden(hidden)
return total_loss.item() / len(data_source)
示例5: evaluate
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def evaluate(data_source, corpus, batch_size=10, ood=False):
# Turn on evaluation mode which disables dropout.
model.eval()
if args.model == 'QRNN': model.reset()
loss_accum = 0
losses = []
ntokens = len(corpus.dictionary)
for i in range(0, data_source.size(0) - 1, args.bptt):
if (i >= ood_num_examples // test_batch_size) and (ood is True):
break
hidden = model.init_hidden(batch_size)
hidden = repackage_hidden(hidden)
data, targets = get_batch(data_source, i, args, evaluation=True)
output, hidden = model(data, hidden)
logits = model.decoder(output)
smaxes = F.softmax(logits - torch.max(logits, dim=1, keepdim=True)[0], dim=1)
tmp = smaxes[range(targets.size(0)), targets]
log_prob = torch.log(tmp).mean(0) # divided by seq len, so this is the negative nats per char
loss = -log_prob.data.cpu().numpy()[0]
loss_accum += loss
# losses.append(loss)
# Experimental!
# anomaly_score = -torch.max(smaxes, dim=1)[0].mean() # negative MSP
anomaly_score = ((smaxes).add(1e-18).log() * uniform_base_rates.unsqueeze(0)).sum(1).mean(0) # negative KL to uniform
losses.append(anomaly_score.data.cpu().numpy()[0])
#
return loss_accum / (len(data_source) // args.bptt), losses
# Run on test data.
示例6: evaluate
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def evaluate(data_source, batch_size=10, test=False):
# Turn on evaluation mode which disables dropout.
model.eval()
if args.model == 'QRNN': model.reset()
total_loss = 0
total_oe_loss = 0
num_batches = 0
ntokens = len(corpus.dictionary)
for i in range(0, data_source.size(0) - 1, args.bptt):
data, targets = get_batch(data_source, i, args, evaluation=True)
data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True)
if len(data.size()) == 1: # happens for test set?
data.unsqueeze(-1)
data_oe.unsqueeze(-1)
if data.size(0) != data_oe.size(0):
continue
bs = test_batch_size if test else eval_batch_size
hidden = model.init_hidden(2 * bs)
hidden = repackage_hidden(hidden)
output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat([data, data_oe], dim=1), hidden, return_h=True)
output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2)
output, output_oe = output.contiguous(), output_oe.contiguous()
output = output.view(output.size(0)*output.size(1), output.size(2))
loss = criterion(model.decoder.weight, model.decoder.bias, output, targets).data
# OE loss
logits_oe = model.decoder(output_oe)
smaxes_oe = F.softmax(logits_oe - torch.max(logits_oe, dim=-1, keepdim=True)[0], dim=-1)
loss_oe = -smaxes_oe.log().mean(-1)
loss_oe = loss_oe.mean().data
#
total_loss += loss
total_oe_loss += loss_oe
num_batches += 1
return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
示例7: train
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def train():
# Turn on training mode which enables dropout.
if args.model == 'QRNN': model.reset()
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(args.batch_size)
batch, i = 0, 0
while i < train_data.size(0) - 1 - 1:
bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
# Prevent excessively small or negative sequence lengths
seq_len = max(5, int(np.random.normal(bptt, 5)))
# There's a very small chance that it could select a very long sequence length resulting in OOM
seq_len = min(seq_len, args.bptt + 10)
lr2 = optimizer.param_groups[0]['lr']
optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
model.train()
data, targets = get_batch(train_data, i, args, seq_len=seq_len)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
optimizer.zero_grad()
output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
raw_loss = criterion(output.view(-1, ntokens), targets)
loss = raw_loss
# Activiation Regularization
loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
# Temporal Activation Regularization (slowness)
loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
optimizer.step()
total_loss += raw_loss.data
optimizer.param_groups[0]['lr'] = lr2
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss[0] / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
total_loss = 0
start_time = time.time()
###
batch += 1
i += seq_len
# Load the best saved model.
示例8: train
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def train():
# Turn on training mode which enables dropout.
if args.model == 'QRNN': model.reset()
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(args.batch_size)
batch, i = 0, 0
while i < train_data.size(0) - 1 - 1:
bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
# Prevent excessively small or negative sequence lengths
seq_len = max(5, int(np.random.normal(bptt, 5)))
# There's a very small chance that it could select a very long sequence length resulting in OOM
# seq_len = min(seq_len, args.bptt + 10)
lr2 = optimizer.param_groups[0]['lr']
optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
model.train()
data, targets = get_batch(train_data, i, args, seq_len=seq_len)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
optimizer.zero_grad()
output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
raw_loss = criterion(output.view(-1, ntokens), targets)
loss = raw_loss
# Activiation Regularization
loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
# Temporal Activation Regularization (slowness)
loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
optimizer.step()
total_loss += raw_loss.data
optimizer.param_groups[0]['lr'] = lr2
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss[0] / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f}'.format(
epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))
total_loss = 0
start_time = time.time()
###
batch += 1
i += seq_len
# Loop over epochs.
示例9: train
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def train():
# Turn on training mode which enables dropout.
if args.model == 'QRNN':
model.reset()
total_loss = 0
start_time = time.time()
hidden = model.init_hidden(args.batch_size)
batch = 0
for source_sample, target_sample in zip(train_source_sampler, train_target_sampler):
model.train()
data = torch.stack([train_data[i] for i in source_sample]).t_().contiguous()
targets = torch.stack([train_data[i] for i in target_sample]).t_().contiguous().view(-1)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
optimizer.zero_grad()
output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)
loss = raw_loss
# Activiation Regularization
if args.alpha:
loss = loss + sum(
args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
# Temporal Activation Regularization (slowness)
if args.beta:
loss = loss + sum(
args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
if args.clip:
torch.nn.utils.clip_grad_norm_(params, args.clip)
optimizer.step()
total_loss += raw_loss.item()
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
epoch, batch,
len(train_source_sampler) // args.bptt,
optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss,
math.exp(cur_loss), cur_loss / math.log(2)))
total_loss = 0
start_time = time.time()
###
batch += 1
# Loop over epochs.
示例10: train
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def train():
# Turn on training mode which enables dropout.
if args.model == 'QRNN': model.reset()
total_loss = 0
start_time = time.time()
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(args.batch_size)
batch, i = 0, 0
while i < train_data.size(0) - 1 - 1:
bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2.
# Prevent excessively small or negative sequence lengths
seq_len = max(5, int(np.random.normal(bptt, 5)))
# There's a very small chance that it could select a very long sequence length resulting in OOM
# seq_len = min(seq_len, args.bptt + 10)
lr2 = optimizer.param_groups[0]['lr']
optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt
model.train()
data, targets = get_batch(train_data, i, args, seq_len=seq_len)
# Starting each batch, we detach the hidden state from how it was previously produced.
# If we didn't, the model would try backpropagating all the way to start of the dataset.
hidden = repackage_hidden(hidden)
optimizer.zero_grad()
output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True)
raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets)
loss = raw_loss
# Activiation Regularization
if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:])
# Temporal Activation Regularization (slowness)
if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:])
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip)
optimizer.step()
total_loss += raw_loss.data
optimizer.param_groups[0]['lr'] = lr2
if batch % args.log_interval == 0 and batch > 0:
cur_loss = total_loss.item() / args.log_interval
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | '
'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format(
epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'],
elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2)))
total_loss = 0
start_time = time.time()
###
batch += 1
i += seq_len
# Loop over epochs.
示例11: evaluate
# 需要导入模块: import model [as 别名]
# 或者: from model import reset [as 别名]
def evaluate(data_source, batch_size=10, window=args.window):
# Turn on evaluation mode which disables dropout.
if args.model == 'QRNN': model.reset()
model.eval()
total_loss = 0
ntokens = len(corpus.dictionary)
hidden = model.init_hidden(batch_size)
next_word_history = None
pointer_history = None
for i in range(0, data_source.size(0) - 1, args.bptt):
if i > 0: print(i, len(data_source), math.exp(total_loss / i))
data, targets = get_batch(data_source, i, evaluation=True, args=args)
output, hidden, rnn_outs, _ = model(data, hidden, return_h=True)
rnn_out = rnn_outs[-1].squeeze()
output_flat = output.view(-1, ntokens)
###
# Fill pointer history
start_idx = len(next_word_history) if next_word_history is not None else 0
next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])])
#print(next_word_history)
pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0)
#print(pointer_history)
###
# Built-in cross entropy
# total_loss += len(data) * criterion(output_flat, targets).data[0]
###
# Manual cross entropy
# softmax_output_flat = torch.nn.functional.softmax(output_flat)
# soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1))
# entropy = -torch.log(soft)
# total_loss += len(data) * entropy.mean().data[0]
###
# Pointer manual cross entropy
loss = 0
softmax_output_flat = torch.nn.functional.softmax(output_flat)
for idx, vocab_loss in enumerate(softmax_output_flat):
p = vocab_loss
if start_idx + idx > window:
valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx]
valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx]
logits = torch.mv(valid_pointer_history, rnn_out[idx])
theta = args.theta
ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1)
ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze()
lambdah = args.lambdasm
p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss
###
target_loss = p[targets[idx].data]
loss += (-torch.log(target_loss)).data[0]
total_loss += loss / batch_size
###
hidden = repackage_hidden(hidden)
next_word_history = next_word_history[-window:]
pointer_history = pointer_history[-window:]
return total_loss / len(data_source)
# Load the best saved model.