当前位置: 首页>>代码示例>>Python>>正文


Python optim.AdamW方法代码示例

本文整理汇总了Python中torch.optim.AdamW方法的典型用法代码示例。如果您正苦于以下问题:Python optim.AdamW方法的具体用法?Python optim.AdamW怎么用?Python optim.AdamW使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torch.optim的用法示例。


在下文中一共展示了optim.AdamW方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_adam_equiv

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def test_adam_equiv():
    lr = 3e-4
    betas = (0.9, 0.999)
    weight_decay = 0.5e-4
    eps = 1e-8

    def adam_ctor(params):
        return Adam(params, lr=lr, betas=betas, weight_decay=weight_decay, eps=eps)

    def qhadam_ctor(params):
        return QHAdam(params, lr=lr, betas=betas, weight_decay=weight_decay, nus=(1.0, 1.0), eps=eps)

    def adamw_ctor(params):
        return AdamW(params, lr=lr, betas=betas, weight_decay=weight_decay, eps=eps)

    def qhadamw_ctor(params):
        return QHAdamW(params, lr=lr, betas=betas, weight_decay=weight_decay, nus=(1.0, 1.0), eps=eps)

    assert_optimizers_equal(adam_ctor, qhadam_ctor) 
开发者ID:facebookresearch,项目名称:qhoptim,代码行数:21,代码来源:test_qhadam.py

示例2: build_optimizer

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def build_optimizer(params, opt):
    if opt.optim == 'rmsprop':
        return optim.RMSprop(params, opt.learning_rate, opt.optim_alpha, opt.optim_epsilon, weight_decay=opt.weight_decay)
    elif opt.optim == 'adagrad':
        return optim.Adagrad(params, opt.learning_rate, weight_decay=opt.weight_decay)
    elif opt.optim == 'sgd':
        return optim.SGD(params, opt.learning_rate, weight_decay=opt.weight_decay)
    elif opt.optim == 'sgdm':
        return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay)
    elif opt.optim == 'sgdmom':
        return optim.SGD(params, opt.learning_rate, opt.optim_alpha, weight_decay=opt.weight_decay, nesterov=True)
    elif opt.optim == 'adam':
        return optim.Adam(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay)
    elif opt.optim == 'adamw':
        return optim.AdamW(params, opt.learning_rate, (opt.optim_alpha, opt.optim_beta), opt.optim_epsilon, weight_decay=opt.weight_decay)
    else:
        raise Exception("bad option opt.optim: {}".format(opt.optim)) 
开发者ID:ruotianluo,项目名称:self-critical.pytorch,代码行数:19,代码来源:misc.py

示例3: _get_optimizer

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def _get_optimizer(self, params, config):
        return optim.AdamW(params=params, lr=config['learning_rate'], weight_decay=config['weight_decay']) 
开发者ID:automl,项目名称:Auto-PyTorch,代码行数:4,代码来源:optimizer.py

示例4: configure_optimizers

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def configure_optimizers(self):
        "Prepare optimizer"

        no_decay = ["bias", "LayerNorm.weight"]
        optimizer_grouped_parameters = [
            {
                "params": [
                    p
                    for n, p in self.model.named_parameters()
                    if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": self.hparams["weight_decay"],
            },
            {
                "params": [
                    p
                    for n, p in self.model.named_parameters()
                    if any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.0,
            },
        ]
        optimizer = AdamW(
            optimizer_grouped_parameters,
            lr=self.hparams["learning_rate"],
            eps=self.hparams["adam_epsilon"],
        )

        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=self.hparams["warmup_steps"],
            num_training_steps=self.hparams["num_steps"],
        )

        return [optimizer], [scheduler] 
开发者ID:minimaxir,项目名称:aitextgen,代码行数:37,代码来源:train.py

示例5: __init__

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def __init__(self, args):
        self.model = BaseModel(
            args.screen_size[0]*args.frame_num, args.button_num, args.variable_num, args.frame_num, args.batch_size
        ).to(device)
        if args.load is not None:
            # load weights
            state_dict = torch.load(args.load)
            self.model.load_state_dict(state_dict)

        self.discount = args.episode_discount
        self.steps = []
        self.rewards = []
        self.non_terminals = []
        self.non_terminal = torch.ones(args.batch_size, 1)

        self.cells = Cells(2, self.model.screen_feature_num, args.batch_size)
        self.init_cells = self.cells.clone()

        self.optimizer = optim.AdamW(self.model.parameters(), lr=args.learning_rate,  weight_decay=1e-6, amsgrad=True)
        '''
        if args.load is not None and os.path.isfile(args.load + '_optimizer.pth'):
            optimizer_dict = torch.load(args.load+'_optimizer.pth')
            optimizer.load_state_dict(optimizer_dict)
        '''
        self.optimizer.zero_grad()
        self.args = args 
开发者ID:akolishchak,项目名称:doom-net-pytorch,代码行数:28,代码来源:ppo_map.py

示例6: create_optimizer

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def create_optimizer(args, model_params):
    if args.optim == 'sgd':
        return optim.SGD(model_params, args.lr, momentum=args.momentum,
                         weight_decay=args.weight_decay)
    elif args.optim == 'adam':
        return optim.AdamW(model_params, args.lr, betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay)
    elif args.optim == 'adamod':
        return AdaMod(model_params, args.lr, betas=(args.beta1, args.beta2),
                      beta3=args.beta3, weight_decay=args.weight_decay) 
开发者ID:lancopku,项目名称:AdaMod,代码行数:12,代码来源:main.py

示例7: get_optimizer

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def get_optimizer(params, optimizer, lr, weight_decay=0.0):
    if optimizer == "adam":
        return optim.Adam(params, lr=lr, weight_decay=weight_decay)
    elif optimizer == "adamw":
        return optim.AdamW(params, lr=lr, weight_decay=weight_decay)
    elif optimizer == "sgd":
        return optim.SGD(params, lr=lr, weight_decay=weight_decay, momentum=0.9)
    elif optimizer == "sgd_nesterov":
        return optim.SGD(params, lr=lr, weight_decay=weight_decay, momentum=0.9, nesterov=True)

    raise NotImplementedError 
开发者ID:arthurdouillard,项目名称:incremental_learning.pytorch,代码行数:13,代码来源:factory.py

示例8: get_std_opt

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def get_std_opt(model, optim_func='adam', factor=1, warmup=2000):
    # return NoamOpt(model.tgt_embed[0].d_model, 2, 4000,
    #         torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
    optim_func = dict(adam=torch.optim.Adam,
                      adamw=torch.optim.AdamW)[optim_func]
    return NoamOpt(model.d_model, factor, warmup,
            optim_func(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) 
开发者ID:ruotianluo,项目名称:self-critical.pytorch,代码行数:9,代码来源:misc.py

示例9: get_optimizer

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def get_optimizer(args, model):
    logger = get_logger(args.log_name)
    args.warmup_steps = math.ceil(args.warmup_prop * args.max_train_steps)
    if args.optimizer == 'adamw-bertology':
        if args.different_lr:
            optimizer_grouped_parameters = _get_bertology_different_lr_grouped_parameters(args, model)
        else:
            optimizer_grouped_parameters = _get_bertology_optimizer_grouped_parameters(args, model)
        optimizer = huggingfaceOptim.AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon,
                                           betas=(args.beta1, args.beta2))
        scheduler = huggingfaceOptim.WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps,
                                                          t_total=args.max_train_steps)
        if args.local_rank in [-1, 0]:
            logger.info('Use Huggingface\'s AdamW Optimizer')
    elif args.optimizer == 'adamw-torch':
        try:
            from torch.optim import AdamW
        except ImportError as e:
            debug_print(f'torch version: {torch.__version__}')
            raise e
        if args.different_lr:
            optimizer_grouped_parameters = _get_bertology_different_lr_grouped_parameters(args, model)
        else:
            optimizer_grouped_parameters = _get_bertology_optimizer_grouped_parameters(args, model)
        optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon,
                          betas=(args.beta1, args.beta2))
        scheduler = huggingfaceOptim.WarmupLinearSchedule(optimizer,
                                                          warmup_steps=args.warmup_steps,
                                                          t_total=args.max_train_steps)
    elif args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate)
        scheduler = None
    elif args.optimizer == 'adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=args.learning_rate)
        scheduler = None
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=args.betas, eps=args.eps,
                                     weight_decay=args.weight_decay)
        scheduler = None
    elif args.rnn_optimizer == 'adamax':
        optimizer = torch.optim.Adamax(model.parameters())  # use default lr
        scheduler = None
    else:
        raise Exception("Unsupported optimizer: {}".format(args.optimizer))
    return optimizer, scheduler 
开发者ID:NLPInBLCU,项目名称:BiaffineDependencyParsing,代码行数:47,代码来源:get_optimizer.py

示例10: _set_optimizer

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def _set_optimizer(self, model: EmmentalModel) -> None:
        """Set optimizer for learning process.

        Args:
          model: The model to set up the optimizer.
        """
        optimizer_config = Meta.config["learner_config"]["optimizer_config"]
        opt = optimizer_config["optimizer"]

        # If Meta.config["learner_config"]["optimizer_config"]["parameters"] is None,
        # create a parameter group with all parameters in the model, else load user
        # specified parameter groups.
        if optimizer_config["parameters"] is None:
            parameters = filter(lambda p: p.requires_grad, model.parameters())
        else:
            parameters = optimizer_config["parameters"](model)

        optim_dict = {
            # PyTorch optimizer
            "asgd": optim.ASGD,  # type: ignore
            "adadelta": optim.Adadelta,  # type: ignore
            "adagrad": optim.Adagrad,  # type: ignore
            "adam": optim.Adam,  # type: ignore
            "adamw": optim.AdamW,  # type: ignore
            "adamax": optim.Adamax,  # type: ignore
            "lbfgs": optim.LBFGS,  # type: ignore
            "rms_prop": optim.RMSprop,  # type: ignore
            "r_prop": optim.Rprop,  # type: ignore
            "sgd": optim.SGD,  # type: ignore
            "sparse_adam": optim.SparseAdam,  # type: ignore
            # Customize optimizer
            "bert_adam": BertAdam,
        }

        if opt in ["lbfgs", "r_prop", "sparse_adam"]:
            optimizer = optim_dict[opt](
                parameters,
                lr=optimizer_config["lr"],
                **optimizer_config[f"{opt}_config"],
            )
        elif opt in optim_dict.keys():
            optimizer = optim_dict[opt](
                parameters,
                lr=optimizer_config["lr"],
                weight_decay=optimizer_config["l2"],
                **optimizer_config[f"{opt}_config"],
            )
        elif isinstance(opt, optim.Optimizer):  # type: ignore
            optimizer = opt(parameters)
        else:
            raise ValueError(f"Unrecognized optimizer option '{opt}'")

        self.optimizer = optimizer

        if Meta.config["meta_config"]["verbose"]:
            logger.info(f"Using optimizer {self.optimizer}") 
开发者ID:SenWu,项目名称:emmental,代码行数:58,代码来源:learner.py

示例11: train_model

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def train_model(self, model, args, epoch_num=10):
        dataset = MCTSDataset(args)
        training_data_loader = DataLoader(dataset=dataset, num_workers=1, batch_size=args.batch_size, shuffle=True)

        model.train()
        optimizer = optim.AdamW(model.parameters(), lr=5e-3, weight_decay=1e-4, amsgrad=True)

        mean_value_loss = 0
        mean_policy_loss = 0
        mean_accuracy = 0
        updates = 0

        batch_time = time.time()
        for epoch in range(epoch_num):
            for batch, (state, target_action, target_value) in enumerate(training_data_loader):
                state, target_action, target_value = state.to(device), target_action.to(device), target_value.to(device)

                optimizer.zero_grad()
                value, log_action = model(state)
                value_loss = F.mse_loss(value, target_value[:, None])
                policy_loss = F.nll_loss(log_action, target_action)
                loss = value_loss + policy_loss

                loss.backward()
                optimizer.step()

                grads = []
                weights = []
                for p in model.parameters():
                    if p.grad is not None:
                        grads.append(p.grad.data.view(-1))
                        weights.append(p.data.view(-1))
                grads = torch.cat(grads, 0)
                weights = torch.cat(weights, 0)
                grads_norm = grads.norm()
                weights_norm = weights.norm()

                assert grads_norm == grads_norm

                _, pred_action = log_action.max(1)
                accuracy = (pred_action == target_action.data).float().mean()

                if epoch == epoch_num - 1:
                    mean_value_loss += value_loss.item()
                    mean_policy_loss += policy_loss.item()
                    mean_accuracy += accuracy
                    updates += 1

        mean_value_loss /= updates
        mean_policy_loss /= updates
        mean_accuracy /= updates

        print(
            "value_loss = {:f} policy_loss = {:f} accuracy = {:f}, train_time = {:.3f}".format(mean_value_loss,
                                                                                               mean_policy_loss,
                                                                                               mean_accuracy,
                                                                                               time.time() - batch_time))

        torch.save(model.state_dict(), args.checkpoint_file)
        torch.save(optimizer.state_dict(), args.checkpoint_file + '_optimizer.pth') 
开发者ID:akolishchak,项目名称:doom-net-pytorch,代码行数:62,代码来源:mcts_base.py

示例12: train

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def train(args):

    train_set = DoomDataset(args.h5_path)
    np.save('action_set', train_set.action_sets)
    training_data_loader = DataLoader(dataset=train_set, num_workers=2, batch_size=100, shuffle=True)

    model = BaseModel(train_set.input_shape[0], len(train_set.action_sets), 3, args.frame_num).to(device)

    if args.load is not None and os.path.isfile(args.load):
        print("loading model parameters {}".format(args.load))
        source_model = torch.load(args.load)
        model.load_state_dict(source_model.state_dict())
        del source_model

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=5e-4)

    for epoch in range(1500000):
        running_loss = 0
        running_accuracy = 0
        batch_time = time.time()
        for batch, (screens, variables, labels) in enumerate(training_data_loader):
            screens, variables, labels = screens.to(device), variables.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(screens, variables)[0]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, pred = outputs.max(1)
            accuracy = (pred == labels).float().mean()
            running_accuracy += accuracy

            batches_per_print = 10
            if batch % batches_per_print == batches_per_print-1:  # print every batches_per_print mini-batches
                print(
                    '[{:d}, {:5d}] loss: {:.3f}, accuracy: {:.3f}, time: {:.6f}'.format(
                    epoch + 1, batch + 1, running_loss/batches_per_print, running_accuracy/batches_per_print, (time.time()-batch_time)/batches_per_print
                    )
                )
                running_loss = 0
                running_accuracy = 0
                batch_time = time.time()

        if epoch % args.checkpoint_rate == args.checkpoint_rate - 1:
            torch.save(model, args.checkpoint_file) 
开发者ID:akolishchak,项目名称:doom-net-pytorch,代码行数:52,代码来源:imitation.py

示例13: train

# 需要导入模块: from torch import optim [as 别名]
# 或者: from torch.optim import AdamW [as 别名]
def train(args):

    data_file = h5py.File(args.h5_path, 'r')
    screens = data_file['screens']
    variables = data_file['variables']
    labels = data_file['action_labels']
    print('Dataset size =', len(screens))
    action_sets = data_file['action_sets'][:]
    episodes = data_file['episodes'][:]
    input_shape = screens[0].shape
    train_generator = data_generator(args, screens, variables, labels, episodes, args.skiprate)

    np.save('action_set', action_sets)

    model = BaseModel(input_shape[0]*args.frame_num, len(action_sets), variables.shape[1], args.frame_num).to(device)

    if args.load is not None and os.path.isfile(args.load):
        print("loading model parameters {}".format(args.load))
        source_model = torch.load(args.load)
        model.load_state_dict(source_model.state_dict())
        del source_model

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=5e-4)
    optimizer.zero_grad()
    running_loss = 0
    running_accuracy = 0
    batch_time = time.time()

    for batch, (screens, variables, labels, terminals) in enumerate(train_generator):
        labels = labels.to(device)
        outputs, _ = model(*model.transform_input(screens, variables))
        loss = criterion(outputs, labels)
        model.set_terminal(terminals)

        running_loss += loss.item()
        _, pred = outputs.max(1)
        accuracy = (pred == labels).float().mean()
        running_accuracy += accuracy

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % args.episode_length == args.episode_length - 1:
            running_loss /= args.episode_length
            running_accuracy /= args.episode_length

            print(
                '[{:d}] loss: {:.3f}, accuracy: {:.3f}, time: {:.6f}'.format(
                    batch + 1, running_loss, running_accuracy, time.time()-batch_time
                )
            )
            running_loss = 0
            running_accuracy = 0
            batch_time = time.time()

        if batch % args.checkpoint_rate == args.checkpoint_rate - 1:
            torch.save(model, args.checkpoint_file) 
开发者ID:akolishchak,项目名称:doom-net-pytorch,代码行数:61,代码来源:imitation_frames.py


注:本文中的torch.optim.AdamW方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。