本文整理汇总了Python中pytorch_pretrained_bert.optimization.BertAdam方法的典型用法代码示例。如果您正苦于以下问题:Python optimization.BertAdam方法的具体用法?Python optimization.BertAdam怎么用?Python optimization.BertAdam使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pytorch_pretrained_bert.optimization
的用法示例。
在下文中一共展示了optimization.BertAdam方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_model
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def load_model(config, num_train_steps, label_list):
# device = torch.device(torch.cuda.is_available())
device = torch.device("cuda")
n_gpu = torch.cuda.device_count()
model = BertTagger(config, num_labels=len(label_list))
# model = BertForTagger.from_pretrained(config.bert_model, num_labels=13)
model.to(device)
if n_gpu > 1:
model = torch.nn.DataParallel(model)
# prepare optimzier
param_optimizer = list(model.named_parameters())
no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
{"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay": 0.01},
{"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0}]
# optimizer = Adam(optimizer_grouped_parameters, lr=config.learning_rate)
optimizer = BertAdam(optimizer_grouped_parameters, lr=config.learning_rate, warmup=config.warmup_proportion, t_total=num_train_steps, max_grad_norm=config.clip_grad)
return model, optimizer, device, n_gpu
示例2: load_model
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def load_model(config, num_train_steps, label_list):
device = torch.device("cuda")
n_gpu = torch.cuda.device_count()
model = BertMRCNER(config, )
model.to(device)
if n_gpu > 1:
model = torch.nn.DataParallel(model)
# prepare optimzier
param_optimizer = list(model.named_parameters())
no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
{"params": [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], "weight_decay": 0.01},
{"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], "weight_decay": 0.0}]
# optimizer = Adam(optimizer_grouped_parameters, lr=config.learning_rate)
optimizer = BertAdam(optimizer_grouped_parameters, lr=config.learning_rate, warmup=config.warmup_proportion, t_total=num_train_steps, max_grad_norm=config.clip_grad)
return model, optimizer, device, n_gpu
示例3: get_opt
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def get_opt(param_optimizer, num_train_optimization_steps, args):
"""
Hack to remove pooler, which is not used
Thus it produce None grad that break apex
"""
param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]]
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
return BertAdam(optimizer_grouped_parameters,
lr=args.lr,
warmup=args.warmup_proportion,
t_total=num_train_optimization_steps)
示例4: build_optimizer
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def build_optimizer(model, num_train_steps, learning_rate):
global ARGS
if ARGS.tagger_from_debiaser:
parameters = list(model.cls_classifier.parameters()) + list(
model.tok_classifier.parameters())
parameters = list(filter(lambda p: p.requires_grad, parameters))
return optim.Adam(parameters, lr=ARGS.learning_rate)
else:
param_optimizer = list(model.named_parameters())
param_optimizer = list(filter(lambda name_param: name_param[1].requires_grad, param_optimizer))
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}
]
return BertAdam(optimizer_grouped_parameters,
lr=learning_rate,
warmup=0.1,
t_total=num_train_steps)
示例5: reset_bert_optimizer
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def reset_bert_optimizer(self):
# Prepare optimizer
if self.setting.fp16:
model_named_parameters = [(n, param.clone().detach().to('cpu').float().requires_grad_())
for n, param in self.model.named_parameters()]
elif self.setting.optimize_on_cpu:
model_named_parameters = [(n, param.clone().detach().to('cpu').requires_grad_())
for n, param in self.model.named_parameters()]
else:
model_named_parameters = list(self.model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
{
'params': [p for n, p in model_named_parameters if n not in no_decay],
'weight_decay_rate': 0.01
},
{
'params': [p for n, p in model_named_parameters if n in no_decay],
'weight_decay_rate': 0.0
}
]
num_train_steps = int(len(self.train_examples)
/ self.setting.train_batch_size
/ self.setting.gradient_accumulation_steps
* self.setting.num_train_epochs)
optimizer = BertAdam(optimizer_grouped_parameters,
lr=self.setting.learning_rate,
warmup=self.setting.warmup_proportion,
t_total=num_train_steps)
return optimizer, num_train_steps, model_named_parameters
示例6: train_model
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def train_model(model, args, trainset_reader, validset_reader):
save_path = args.outdir + '/model'
best_acc = 0.0
running_loss = 0.0
t_total = int(
trainset_reader.total_num / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = BertAdam(optimizer_grouped_parameters,
lr=args.learning_rate,
warmup=args.warmup_proportion,
t_total=t_total)
global_step = 0
crit = nn.CrossEntropyLoss()
for epoch in range(int(args.num_train_epochs)):
optimizer.zero_grad()
for inp_tensor, msk_tensor, seg_tensor, label_tensor in trainset_reader:
model.train()
prob = model(inp_tensor, msk_tensor, seg_tensor)
loss = crit(prob, label_tensor)
running_loss += loss.item()
loss.backward()
global_step += 1
if global_step % args.gradient_accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
logger.info('Epoch: {0}, Step: {1}, Loss: {2}'.format(epoch, global_step, (running_loss / global_step)))
if global_step % (args.eval_step * args.gradient_accumulation_steps) == 0:
logger.info('Start eval!')
eval_acc = eval_model(model, validset_reader)
logger.info('Dev acc: {0}'.format(eval_acc))
if eval_acc >= best_acc:
best_acc = eval_acc
torch.save({'epoch': epoch,
'model': model.state_dict()}, save_path + ".best.pt")
logger.info("Saved best epoch {0}, best acc {1}".format(epoch, best_acc))
示例7: create_optimizer
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def create_optimizer(model, learning_rate, t_total, loss_scale, fp16, warmup_proportion, state_dict):
# Prepare optimizer
param_optimizer = list(model.named_parameters())
no_decay = [
'bias', 'LayerNorm.bias', 'LayerNorm.weight',
'adapter.down_project.weight', 'adapter.up_project.weight',
]
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
if fp16:
try:
from apex.optimizers import FP16_Optimizer
from apex.optimizers import FusedAdam
except ImportError:
raise ImportError("Please install apex from https://www.github.com/nvidia/apex "
"to use distributed and fp16 training.")
optimizer = FusedAdam(optimizer_grouped_parameters,
lr=learning_rate,
bias_correction=False,
max_grad_norm=1.0)
if loss_scale == 0:
optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
else:
optimizer = FP16_Optimizer(optimizer, static_loss_scale=loss_scale)
else:
optimizer = BertAdam(optimizer_grouped_parameters,
lr=learning_rate,
warmup=warmup_proportion,
t_total=t_total)
if state_dict is not None:
optimizer.load_state_dict(state_dict)
return optimizer
示例8: __init__
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def __init__(self, args, model, train_examples, use_gpu):
self.use_gpu = use_gpu
self.model = model
self.epochs = args.epochs
self.best_f1 = -1
self.min_loss = 100
self.save_dir = args.save_dir
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
self.lr = args.lr
self.warmup_proportion = args.warmup_proportion
self.t_total = int(train_examples / args.batch_size / 1 * args.epochs)
self.optimizer = BertAdam(optimizer_grouped_parameters,
lr=args.lr,
warmup=args.warmup_proportion,
t_total=self.t_total)
if self.use_gpu:
self.loss_func = nn.CrossEntropyLoss(weight=torch.FloatTensor([1.0, args.weight]).cuda())
else:
self.loss_func = nn.CrossEntropyLoss(weight=torch.FloatTensor([1.0, args.weight]))
示例9: build_optimizer
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def build_optimizer(model, num_train_steps=None):
global ARGS
if ARGS.bert_encoder:
assert num_train_steps
param_optimizer = list(model.named_parameters())
param_optimizer = list(filter(lambda name_param: name_param[1].requires_grad, param_optimizer))
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0}
]
optimizer = BertAdam(optimizer_grouped_parameters,
lr=5e-5,
warmup=0.1,
t_total=num_train_steps)
else:
params = list(model.parameters())
params = list(filter(lambda p: p.requires_grad, params))
optimizer = optim.Adam(params, lr=ARGS.learning_rate)
return optimizer
示例10: _get_optimizer
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def _get_optimizer(self, learning_rate, num_train_optimization_steps, warmup_proportion):
"""
Initializes the optimizer and configure parameters to apply weight
decay on.
"""
param_optimizer = list(self.model.named_parameters())
no_decay_params = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
params_weight_decay = 0.01
optimizer_grouped_parameters = [
{
"params": [
p for n, p in param_optimizer if not any(nd in n for nd in no_decay_params)
],
"weight_decay": params_weight_decay,
},
{
"params": [p for n, p in param_optimizer if any(nd in n for nd in no_decay_params)],
"weight_decay": 0.0,
},
]
if warmup_proportion is None:
optimizer = BertAdam(optimizer_grouped_parameters, lr=learning_rate)
else:
optimizer = BertAdam(
optimizer_grouped_parameters,
lr=learning_rate,
t_total=num_train_optimization_steps,
warmup=warmup_proportion,
)
return optimizer
示例11: train_model
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def train_model(model, ori_model, args, trainset_reader, validset_reader):
save_path = args.outdir + '/model'
best_accuracy = 0.0
running_loss = 0.0
t_total = int(
trainset_reader.total_num / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = BertAdam(optimizer_grouped_parameters,
lr=args.learning_rate,
warmup=args.warmup_proportion,
t_total=t_total)
#optimizer = optim.Adam(model.parameters(),
# lr=args.learning_rate)
global_step = 0
for epoch in range(int(args.num_train_epochs)):
model.train()
optimizer.zero_grad()
for index, data in enumerate(trainset_reader):
inputs, lab_tensor = data
prob = model(inputs)
loss = F.nll_loss(prob, lab_tensor)
running_loss += loss.item()
#if args.gradient_accumulation_steps > 1:
# loss = loss / args.gradient_accumulation_steps
loss.backward()
global_step += 1
if global_step % args.gradient_accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
logger.info('Epoch: {0}, Step: {1}, Loss: {2}'.format(epoch, global_step, (running_loss / global_step)))
if global_step % (args.eval_step * args.gradient_accumulation_steps) == 0:
logger.info('Start eval!')
with torch.no_grad():
dev_accuracy = eval_model(model, validset_reader)
logger.info('Dev total acc: {0}'.format(dev_accuracy))
if dev_accuracy > best_accuracy:
best_accuracy = dev_accuracy
torch.save({'epoch': epoch,
'model': ori_model.state_dict(),
'best_accuracy': best_accuracy}, save_path + ".best.pt")
logger.info("Saved best epoch {0}, best accuracy {1}".format(epoch, best_accuracy))
示例12: train_model
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def train_model(model, args, trainset_reader, validset_reader):
save_path = args.outdir + '/model'
best_acc = 0.0
running_loss = 0.0
t_total = int(
trainset_reader.total_num / args.train_batch_size / args.gradient_accumulation_steps * args.num_train_epochs)
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]
optimizer = BertAdam(optimizer_grouped_parameters,
lr=args.learning_rate,
warmup=args.warmup_proportion,
t_total=t_total)
global_step = 0
crit = nn.MarginRankingLoss(margin=1)
for epoch in range(int(args.num_train_epochs)):
optimizer.zero_grad()
for inp_tensor_pos, msk_tensor_pos, seg_tensor_pos, inp_tensor_neg, msk_tensor_neg, seg_tensor_neg in trainset_reader:
model.train()
score_pos = model(inp_tensor_pos, msk_tensor_pos, seg_tensor_pos)
score_neg = model(inp_tensor_neg, msk_tensor_neg, seg_tensor_neg)
label = torch.ones(score_pos.size())
if args.cuda:
label = label.cuda()
loss = crit(score_pos, score_neg, Variable(label, requires_grad=False))
running_loss += loss.item()
loss.backward()
global_step += 1
if global_step % args.gradient_accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
logger.info('Epoch: {0}, Step: {1}, Loss: {2}'.format(epoch, global_step, (running_loss / global_step)))
if global_step % (args.eval_step * args.gradient_accumulation_steps) == 0:
logger.info('Start eval!')
eval_acc = eval_model(model, validset_reader)
logger.info('Dev acc: {0}'.format(eval_acc))
if eval_acc >= best_acc:
best_acc = eval_acc
torch.save({'epoch': epoch,
'model': model.state_dict()}, save_path + ".best.pt")
logger.info("Saved best epoch {0}, best acc {1}".format(epoch, best_acc))
示例13: train
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def train(config, model, train_iter, dev_iter, test_iter):
start_time = time.time()
model.train()
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
{'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
{'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}]
# optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
optimizer = BertAdam(optimizer_grouped_parameters,
lr=config.learning_rate,
warmup=0.05,
t_total=len(train_iter) * config.num_epochs)
total_batch = 0 # 记录进行到多少batch
dev_best_loss = float('inf')
last_improve = 0 # 记录上次验证集loss下降的batch数
flag = False # 记录是否很久没有效果提升
model.train()
for epoch in range(config.num_epochs):
print('Epoch [{}/{}]'.format(epoch + 1, config.num_epochs))
for i, (trains, labels) in enumerate(train_iter):
outputs = model(trains)
model.zero_grad()
loss = F.cross_entropy(outputs, labels)
loss.backward()
optimizer.step()
if total_batch % 100 == 0:
# 每多少轮输出在训练集和验证集上的效果
true = labels.data.cpu()
predic = torch.max(outputs.data, 1)[1].cpu()
train_acc = metrics.accuracy_score(true, predic)
dev_acc, dev_loss = evaluate(config, model, dev_iter)
if dev_loss < dev_best_loss:
dev_best_loss = dev_loss
torch.save(model.state_dict(), config.save_path)
improve = '*'
last_improve = total_batch
else:
improve = ''
time_dif = get_time_dif(start_time)
msg = 'Iter: {0:>6}, Train Loss: {1:>5.2}, Train Acc: {2:>6.2%}, Val Loss: {3:>5.2}, Val Acc: {4:>6.2%}, Time: {5} {6}'
print(msg.format(total_batch, loss.item(), train_acc, dev_loss, dev_acc, time_dif, improve))
model.train()
total_batch += 1
if total_batch - last_improve > config.require_improvement:
# 验证集loss超过1000batch没下降,结束训练
print("No optimization for a long time, auto-stopping...")
flag = True
break
if flag:
break
test(config, model, test_iter)
示例14: create_optimizer
# 需要导入模块: from pytorch_pretrained_bert import optimization [as 别名]
# 或者: from pytorch_pretrained_bert.optimization import BertAdam [as 别名]
def create_optimizer(
self,
num_train_optimization_steps,
lr=2e-5,
fp16_allreduce=False,
warmup_proportion=None,
):
"""
Method to create an BERT Optimizer based on the inputs from the user.
Args:
num_train_optimization_steps(int): Number of optimization steps.
lr (float): learning rate of the adam optimizer. defaults to 2e-5.
warmup_proportion (float, optional): proportion of training to
perform linear learning rate warmup for. e.g., 0.1 = 10% of
training. defaults to none.
fp16_allreduce(bool, optional)L if true, use fp16 compression
during allreduce.
Returns:
pytorch_pretrained_bert.optimization.BertAdam : A BertAdam optimizer with
user specified config.
"""
if self.use_distributed:
lr = lr * hvd.size()
if warmup_proportion is None:
optimizer = BertAdam(self.optimizer_params, lr=lr)
else:
optimizer = BertAdam(
self.optimizer_params,
lr=lr,
t_total=num_train_optimization_steps,
warmup=warmup_proportion,
)
if self.use_distributed:
compression = (
hvd.Compression.fp16 if fp16_allreduce else hvd.Compression.none
)
optimizer = hvd.DistributedOptimizer(
optimizer,
named_parameters=self.model.named_parameters(),
compression=compression,
)
return optimizer