当前位置: 首页>>代码示例>>Python>>正文


Python Variable.float方法代码示例

本文整理汇总了Python中torch.autograd.Variable.float方法的典型用法代码示例。如果您正苦于以下问题:Python Variable.float方法的具体用法?Python Variable.float怎么用?Python Variable.float使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在torch.autograd.Variable的用法示例。


在下文中一共展示了Variable.float方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: eval

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
    def eval(self, epoch, save_score=False, loader_name=['test']):
        self.model.eval()
        self.print_log('Eval epoch: {}'.format(epoch + 1))
        for ln in loader_name:
            loss_value = []
            score_frag = []
            for batch_idx, (data, label) in enumerate(self.data_loader[ln]):
                data = Variable(
                    data.float().cuda(self.output_device),
                    requires_grad=False,
                    volatile=True)
                label = Variable(
                    label.long().cuda(self.output_device),
                    requires_grad=False,
                    volatile=True)
                output = self.model(data)
                loss = self.loss(output, label)
                score_frag.append(output.data.cpu().numpy())
                loss_value.append(loss.data[0])
            score = np.concatenate(score_frag)
            score_dict = dict(
                zip(self.data_loader[ln].dataset.sample_name, score))
            self.print_log('\tMean {} loss of {} batches: {}.'.format(
                ln, len(self.data_loader[ln]), np.mean(loss_value)))
            for k in self.arg.show_topk:
                self.print_log('\tTop{}: {:.2f}%'.format(
                    k, 100 * self.data_loader[ln].dataset.top_k(score, k)))

            if save_score:
                with open('{}/epoch{}_{}_score.pkl'.format(
                        self.arg.work_dir, epoch + 1, ln), 'w') as f:
                    pickle.dump(score_dict, f)
开发者ID:sharpstill,项目名称:st-gcn,代码行数:34,代码来源:main.py

示例2: test_module_cast

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
    def test_module_cast(self):
        """Compiled modules can be casted to other data types"""
        @torch.jit.compile(nderivs=0)
        class Adder(nn.Module):
            def __init__(self):
                super(Adder, self).__init__()
                self.y = nn.Parameter(torch.randn(2, 2))

            def forward(self, x):
                return x + self.y

        x = Variable(torch.randn(2, 2).float())
        # Wrap it in a sequential to make sure it works for submodules
        a = nn.Sequential(Adder()).float()

        def check_type(caster):
            caster(a)
            a(caster(x))
            with self.assertCompiled(a[0]):
                a(caster(x))

        check_type(lambda x: x)
        check_type(lambda x: x.double())
        if torch.cuda.is_available():
            check_type(lambda x: x.float().cuda())
            check_type(lambda x: x.double().cuda())
        self.assertEqual(a[0].hits, 4 if torch.cuda.is_available() else 2)
开发者ID:bhuWenDongchao,项目名称:pytorch,代码行数:29,代码来源:test_jit.py

示例3: masked_cross_entropy

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def masked_cross_entropy(logits, target, length):
    length = Variable(torch.LongTensor(length)).cuda()

    """
    Args:
        logits: A Variable containing a FloatTensor of size
            (batch, max_len, num_classes) which contains the
            unnormalized probability for each class.
        target: A Variable containing a LongTensor of size
            (batch, max_len) which contains the index of the true
            class for each corresponding step.
        length: A Variable containing a LongTensor of size (batch,)
            which contains the length of each data in a batch.

    Returns:
        loss: An average loss value masked by the length.
    """

    # logits_flat: (batch * max_len, num_classes)
    logits_flat = logits.view(-1, logits.size(-1))
    # log_probs_flat: (batch * max_len, num_classes)
    log_probs_flat = functional.log_softmax(logits_flat)
    # target_flat: (batch * max_len, 1)
    target_flat = target.view(-1, 1)
    # losses_flat: (batch * max_len, 1)
    losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
    # losses: (batch, max_len)
    losses = losses_flat.view(*target.size())
    # mask: (batch, max_len)
    mask = sequence_mask(sequence_length=length, max_len=target.size(1))
    losses = losses * mask.float()
    loss = losses.sum() / length.float().sum()
    return loss
开发者ID:Cadene,项目名称:practical-pytorch,代码行数:35,代码来源:masked_cross_entropy.py

示例4: compute_stuff

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def compute_stuff(mask_chosen, scores, weights, volumes):
    bs = weights.size(0)
    mask_chosen = Variable(mask_chosen.float())
    probs = 1e-6 + (1-2e-6) * F.softmax(scores)
    lgp = (torch.log(probs) * mask_chosen + torch.log(1-probs) * (1-mask_chosen)).sum(1)
    w = (weights * mask_chosen).sum(1)
    v = (volumes * mask_chosen).sum(1)
    return lgp, w, v
开发者ID:ParsonsZeng,项目名称:DiCoNet,代码行数:10,代码来源:knapsack.py

示例5: train

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
    def train(self, epoch, save_model=False):
        self.model.train()
        self.print_log('Training epoch: {}'.format(epoch + 1))
        loader = self.data_loader['train']
        lr = self.adjust_learning_rate(epoch)
        loss_value = []

        self.record_time()
        timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
        for batch_idx, (data, label) in enumerate(loader):

            # get data
            data = Variable(
                data.float().cuda(self.output_device), requires_grad=False)
            label = Variable(
                label.long().cuda(self.output_device), requires_grad=False)
            timer['dataloader'] += self.split_time()

            # forward
            output = self.model(data)
            loss = self.loss(output, label)

            # backward
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            loss_value.append(loss.data[0])
            timer['model'] += self.split_time()

            # statistics
            if batch_idx % self.arg.log_interval == 0:
                self.print_log(
                    '\tBatch({}/{}) done. Loss: {:.4f}  lr:{:.6f}'.format(
                        batch_idx, len(loader), loss.data[0], lr))
            timer['statistics'] += self.split_time()

        # statistics of time consumption and loss
        proportion = {
            k: '{:02d}%'.format(int(round(v * 100 / sum(timer.values()))))
            for k, v in timer.items()
        }
        self.print_log(
            '\tMean training loss: {:.4f}.'.format(np.mean(loss_value)))
        self.print_log(
            '\tTime consumption: [Data]{dataloader}, [Network]{model}'.format(
                **proportion))

        if save_model:
            model_path = '{}/epoch{}_model.pt'.format(self.arg.work_dir,
                                                      epoch + 1)
            state_dict = self.model.state_dict()
            weights = OrderedDict([[k.split('module.')[-1],
                                    v.cpu()] for k, v in state_dict.items()])
            torch.save(weights, model_path)
开发者ID:sharpstill,项目名称:st-gcn,代码行数:56,代码来源:main.py

示例6: theta_to_sampling_grid

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def theta_to_sampling_grid(out_h,out_w,theta_aff=None,theta_tps=None,theta_aff_tps=None,use_cuda=True,tps_reg_factor=0):
    affTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='affine',use_cuda=use_cuda)
    tpsTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='tps',use_cuda=use_cuda,tps_reg_factor=tps_reg_factor)

    if theta_aff is not None:
        sampling_grid_aff = affTnf(image_batch=None,
                                               theta_batch=theta_aff.view(1,2,3),
                                               return_sampling_grid=True,
                                               return_warped_image=False)
    else:
        sampling_grid_aff=None
    
    if theta_tps is not None:
        sampling_grid_tps = tpsTnf(image_batch=None,
                                               theta_batch=theta_tps.view(1,-1),
                                               return_sampling_grid=True,
                                               return_warped_image=False)
    else:
        sampling_grid_tps=None
        
    if theta_aff is not None and theta_aff_tps is not None:
        sampling_grid_aff_tps = tpsTnf(image_batch=None,
                                   theta_batch=theta_aff_tps.view(1,-1),
                                   return_sampling_grid=True,
                                   return_warped_image=False)
        
        # put 1e10 value in region out of bounds of sampling_grid_aff
        sampling_grid_aff = sampling_grid_aff.clone()
        in_bound_mask_aff=Variable((sampling_grid_aff.data[:,:,:,0]>-1) & (sampling_grid_aff.data[:,:,:,0]<1) & (sampling_grid_aff.data[:,:,:,1]>-1) & (sampling_grid_aff.data[:,:,:,1]<1)).unsqueeze(3)
        in_bound_mask_aff=in_bound_mask_aff.expand_as(sampling_grid_aff)
        sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),torch.mul(in_bound_mask_aff.float(),sampling_grid_aff))       
        # put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp
        sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3)
        in_bound_mask_aff_tps=Variable((sampling_grid_aff_tps.data[:,:,:,0]>-1) & (sampling_grid_aff_tps.data[:,:,:,0]<1) & (sampling_grid_aff_tps.data[:,:,:,1]>-1) & (sampling_grid_aff_tps.data[:,:,:,1]<1)).unsqueeze(3)
        in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp)
        sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp))       
    else:
        sampling_grid_aff_tps_comp = None

    return (sampling_grid_aff,sampling_grid_tps,sampling_grid_aff_tps_comp) 
开发者ID:codealphago,项目名称:weakalign,代码行数:42,代码来源:eval_util.py

示例7: _score_candidates

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
    def _score_candidates(self, cands, xe, encoder_output, hidden):
        # score each candidate separately

        # cands are exs_with_cands x cands_per_ex x words_per_cand
        # cview is total_cands x words_per_cand
        cview = cands.view(-1, cands.size(2))
        cands_xes = xe.expand(xe.size(0), cview.size(0), xe.size(2))
        sz = hidden.size()
        cands_hn = (
            hidden.view(sz[0], sz[1], 1, sz[2])
            .expand(sz[0], sz[1], cands.size(1), sz[2])
            .contiguous()
            .view(sz[0], -1, sz[2])
        )

        sz = encoder_output.size()
        cands_encoder_output = (
            encoder_output.contiguous()
            .view(sz[0], 1, sz[1], sz[2])
            .expand(sz[0], cands.size(1), sz[1], sz[2])
            .contiguous()
            .view(-1, sz[1], sz[2])
        )

        cand_scores = Variable(
                    self.cand_scores.resize_(cview.size(0)).fill_(0))
        cand_lengths = Variable(
                    self.cand_lengths.resize_(cview.size(0)).fill_(0))

        for i in range(cview.size(1)):
            output = self._apply_attention(cands_xes, cands_encoder_output, cands_hn) \
                    if self.use_attention else cands_xes

            output, cands_hn = self.decoder(output, cands_hn)
            preds, scores = self.hidden_to_idx(output, dropout=False)
            cs = cview.select(1, i)
            non_nulls = cs.ne(self.NULL_IDX)
            cand_lengths += non_nulls.long()
            score_per_cand = torch.gather(scores, 1, cs.unsqueeze(1))
            cand_scores += score_per_cand.squeeze() * non_nulls.float()
            cands_xes = self.lt2dec(self.lt(cs).unsqueeze(0))

        # set empty scores to -1, so when divided by 0 they become -inf
        cand_scores -= cand_lengths.eq(0).float()
        # average the scores per token
        cand_scores /= cand_lengths.float()

        cand_scores = cand_scores.view(cands.size(0), cands.size(1))
        srtd_scores, text_cand_inds = cand_scores.sort(1, True)
        text_cand_inds = text_cand_inds.data

        return text_cand_inds
开发者ID:jojonki,项目名称:ParlAI,代码行数:54,代码来源:seq2seq.py

示例8: main

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('envname', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts', type=int, default=20,
                        help='Number of expert roll outs')
    parser.add_argument('--num_epochs', type=int, default=10, help='Number of epochs for training')
    #need number of epoch
    args = parser.parse_args()
    
    print('loading expert policy data for training')
    with open(args.expert_policy_file, 'rb') as handle:
        expert_data = pickle.load(handle)
    o_expert=expert_data['observations']
    a_expert=expert_data['actions']
    env = gym.make(args.envname)
    max_steps = args.max_timesteps or env.spec.timestep_limit
    rollout_list=list(range(args.num_rollouts))
    
    net=CNN()
    
    #todo:initilize network parameters
    
    import torch.optim as optim
    optimizer=optim.Adam(net.parameters(),lr=5e-4, weight_decay=5e-7)
    criterion=nn.CrossEntropyLoss()
    loss_history=[]
    for j in range(args.num_epochs):
        random.shuffle(rollout_list)
        for i in rollout_list:
            print("epoch %i iteration %i"%(j,i))
            for k in range(max_steps):
                index=i*max_steps+k
                o=Variable(torch.from_numpy(o_expert[index]).reshape(1,1,376))
                o=o.float()
                a_out=net.forward(o)
                a_label=torch.from_numpy(a_expert[index])
                a_label=a_label.long()
                loss=criterion(a_out, torch.max(a_label,1)[1])
                loss.backward()
                loss_history.append(loss)
                optimizer.step()
            print(loss)
    
    plt.plot(loss_history, '-o')
    plt.xlabel('iteration')
    plt.ylabel('loss')
开发者ID:kevin5naug,项目名称:CS294_Deep_RL,代码行数:52,代码来源:2bcloneCNN.py

示例9: train

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def train(train_loader, model, criterion, optimizer, epoch, opt):
    """
    train for one epoch on the training set
    """
    # training mode
    model.train() 

    for i, (input_points, _labels, segs) in enumerate(train_loader):
        # bz x 2048 x 3 
        input_points = Variable(input_points)
        input_points = input_points.transpose(2, 1)
        ###############
        ##
        ###############
        _labels = _labels.long() 
        segs = segs.long() 
        labels_onehot = utils.labels_batch2one_hot_batch(_labels, opt.num_classes)
        labels_onehot = Variable(labels_onehot) # we dnonot calculate the gradients here
        # labels_onehot.requires_grad = True
        segs = Variable(segs) 

        if opt.cuda:
            input_points = input_points.cuda() 
            segs = segs.cuda() # must be long cuda tensor 
            labels_onehot = labels_onehot.float().cuda()  # this will be feed into the network
        
        optimizer.zero_grad()
        # forward, backward optimize 
        # pred, _ = model(input_points, labels_onehot)
        pred, _, _ = model(input_points, labels_onehot)
        pred = pred.view(-1, opt.num_seg_classes)
        segs = segs.view(-1, 1)[:, 0] 
        # debug_here() 
        loss = criterion(pred, segs) 
        loss.backward() 
        ##############################
        # gradient clip stuff 
        ##############################
        utils.clip_gradient(optimizer, opt.gradient_clip)
        optimizer.step() 
        pred_choice = pred.data.max(1)[1]
        correct = pred_choice.eq(segs.data).cpu().sum()

        if i % opt.print_freq == 0:
            print('[%d: %d] train loss: %f accuracy: %f' %(i, len(train_loader), loss.data[0], correct/float(opt.batch_size * opt.num_points)))
开发者ID:ShichaoJin,项目名称:pointnet2.pytorch,代码行数:47,代码来源:main_part_seg.py

示例10: validate

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def validate(val_loader, model, criterion, epoch, opt):
    """Perform validation on the validation set"""
    # switch to evaluate mode
    model.eval()

    top1 = utils.AverageMeter()

    for i, (input_points, _labels, segs) in enumerate(val_loader):
        # bz x 2048 x 3 
        input_points = Variable(input_points, volatile=True)
        input_points = input_points.transpose(2, 1)
        _labels = _labels.long() # this will be feed to the network 
        segs = segs.long()
        labels_onehot = utils.labels_batch2one_hot_batch(_labels, opt.num_classes)
        segs = Variable(segs, volatile=True) 
        labels_onehot = Variable(labels_onehot, volatile=True)

        if opt.cuda:
            input_points = input_points.cuda() 
            segs = segs.cuda() # must be long cuda tensor  
            labels_onehot = labels_onehot.float().cuda() # this will be feed into the network
        
        # forward, backward optimize 
        pred, _, _ = model(input_points, labels_onehot)
        pred = pred.view(-1, opt.num_seg_classes)
        segs = segs.view(-1, 1)[:, 0]  # min is already 0
        # debug_here() 
        loss = criterion(pred, segs) 

        pred_choice = pred.data.max(1)[1]
        correct = pred_choice.eq(segs.data).cpu().sum()

        acc = correct/float(opt.batch_size * opt.num_points)
        top1.update(acc, input_points.size(0))

        if i % opt.print_freq == 0:
            print('[%d: %d] val loss: %f accuracy: %f' %(i, len(val_loader), loss.data[0], acc))
            # print(tested_samples)
    return top1.avg
开发者ID:ShichaoJin,项目名称:pointnet2.pytorch,代码行数:41,代码来源:main_part_seg.py

示例11: fit_model

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def fit_model(model, loss_op, optim_op, train_gen, val_gen, epochs,
              checkpoint_path, patience):
    """ Analog to Keras fit_generator function.

    # Arguments:
        model: Model to be finetuned.
        loss_op: loss operation (BCEWithLogitsLoss or CrossEntropy for e.g.)
        optim_op: optimization operation (Adam e.g.)
        train_gen: Training data iterator (DataLoader)
        val_gen: Validation data iterator (DataLoader)
        epochs: Number of epochs.
        checkpoint_path: Filepath where weights will be checkpointed to
            during training. This file will be rewritten by the function.
        patience: Patience for callback methods.
        verbose: Verbosity flag.

    # Returns:
        Accuracy of the trained model, ONLY if 'evaluate' is set.
    """
    # Save original checkpoint
    torch.save(model.state_dict(), checkpoint_path)

    model.eval()
    best_loss = np.mean([loss_op(model(Variable(xv)).squeeze(), Variable(yv.float()).squeeze()).data.cpu().numpy()[0] for xv, yv in val_gen])
    print("original val loss", best_loss)

    epoch_without_impr = 0
    for epoch in range(epochs):
        for i, data in enumerate(train_gen):
            X_train, y_train = data
            X_train = Variable(X_train, requires_grad=False)
            y_train = Variable(y_train, requires_grad=False)
            model.train()
            optim_op.zero_grad()
            output = model(X_train)
            loss = loss_op(output, y_train.float())
            loss.backward()
            clip_grad_norm(model.parameters(), 1)
            optim_op.step()

            acc = evaluate_using_acc(model, [(X_train.data, y_train.data)])
            print("== Epoch", epoch, "step", i, "train loss", loss.data.cpu().numpy()[0], "train acc", acc)

        model.eval()
        acc = evaluate_using_acc(model, val_gen)
        print("val acc", acc)

        val_loss = np.mean([loss_op(model(Variable(xv)).squeeze(), Variable(yv.float()).squeeze()).data.cpu().numpy()[0] for xv, yv in val_gen])
        print("val loss", val_loss)
        if best_loss is not None and val_loss >= best_loss:
            epoch_without_impr += 1
            print('No improvement over previous best loss: ', best_loss)

        # Save checkpoint
        if best_loss is None or val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), checkpoint_path)
            print('Saving model at', checkpoint_path)

        # Early stopping
        if epoch_without_impr >= patience:
            break
开发者ID:cclauss,项目名称:torchMoji,代码行数:64,代码来源:finetuning.py

示例12: Variable

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
                    np.copyto(input_img[j], data_loader.input_buff[image_buff_read_index])
                    data_loader.buff_status[image_buff_read_index] = 'empty'
                    image_buff_read_index = image_buff_read_index + 1

                    if image_buff_read_index >= data_loader.image_buffer_size:
                        image_buff_read_index = 0
                '''

                # random noise z
                inputs = inputs['image']  
                #noise_z = torch.randn(inputs.shape[0], 3, 4, 4)
                noise_z = torch.randn(inputs.shape[0], 100)

                if is_gpu_mode:
                    #inputs = Variable(torch.from_numpy(inputs).float().cuda())
                    inputs = Variable(inputs.float().cuda())
                    noise_z = Variable(noise_z.cuda())
                else:
                    #inputs = Variable(torch.from_numpy(inputs).float())
                    noise_z = Variable(noise_z)

                # feedforward the inputs. generator
                outputs_gen = gen_model(noise_z)

                # pseudo zero-center
                inputs = inputs - MEAN_VALUE_FOR_ZERO_CENTERED
                outputs_gen = outputs_gen - MEAN_VALUE_FOR_ZERO_CENTERED

                # feedforward the inputs. discriminator
                output_disc_real = disc_model(inputs)
                output_disc_fake = disc_model(outputs_gen)
开发者ID:TheIllusion,项目名称:TheIllusionsLibraries,代码行数:33,代码来源:wgan_gp_with_new_loader.py

示例13: main

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('expert_policy_data', type=str)
    parser.add_argument('envname', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts', type=int, default=20,
                        help='Number of expert roll outs')
    parser.add_argument('--num_epochs', type=int, default=50, help='Number of epochs for training')
    #need number of epoch
    args = parser.parse_args()
    
    print('loading expert policy data for training')
    with open(args.expert_policy_data, 'rb') as handle:
        expert_data = pickle.load(handle)
    
    #train the network
    torch.manual_seed(25)
    o_expert=expert_data['observations']
    (N,N_step,N_obs)=o_expert.shape
    a_expert=expert_data['actions']
    (N,N_step,_,N_action)=a_expert.shape
    import gym
    env = gym.make(args.envname)
    max_steps = args.max_timesteps or env.spec.timestep_limit
    net=CNN(N_obs, N_action)
    
    #todo:initilize network parameters
    net.apply(init_weights)

    import torch.optim as optim
    optimizer=optim.Adam(net.parameters(),lr=1e-3, weight_decay=5e-9)
    criterion=nn.MSELoss()
    loss_history=[]
    reward_mean_history=[]
    reward_std_history=[]
    for j in range(args.num_epochs):
        print("epoch %i"%j)
        net.train()
        (N,N_step,N_obs)=o_expert.shape
        (N,N_step,_,N_action)=a_expert.shape
        for k in range(max_steps):
            optimizer.zero_grad()
            index=k
            o=Variable(torch.from_numpy(o_expert[:,index,:]).reshape(N,1,N_obs))
            o=o.float()
            a_out=net.forward(o)
            a_label=torch.from_numpy(a_expert[:,index,:].reshape(N,N_action,1))
            loss=criterion(a_out.float(), a_label.float())
            loss.backward()
            optimizer.step()
        print("No DAGGER")
        print(loss/N)
        loss_history.append(loss/N)
        
        #test the network
        with tf.Session():
            tf_util.initialize()

            import gym
            env = gym.make(args.envname)
            max_steps = args.max_timesteps or env.spec.timestep_limit
            net.eval()

            r_new=[]
            for i in range (int(args.num_rollouts)//4):
                totalr=0
                obs=env.reset()
                done=False
                steps=0
                while not done:
                    obs=Variable(torch.Tensor(obs).reshape(1,1,N_obs))
                    action_new=net.forward(obs).detach().numpy()
                    obs,r,done,_=env.step(action_new.reshape(N_action))
                    totalr+=r
                    steps+=1
                    if steps >= max_steps:
                        break
                r_new.append(totalr)
            u=np.average(np.array(r_new))
            sigma=np.std(np.array(r_new))
            reward_mean_history.append(u)
            reward_std_history.append(sigma)
            print('current reward mean', u)
            print('current reward std', sigma)
    fig0=plt.figure(0)
    plt.plot(loss_history, '-o')
    plt.xlabel('iteration')
    plt.ylabel('loss')
    fig0.savefig('/Users/joker/imitation_learning/hopper.png')
    
    reward_mean_history=np.array(reward_mean_history)
    reward_std_history=np.array(reward_std_history)
    #print(reward_mean_history.shape)
    #print(reward_std_history.shape)
    print('mean:', reward_mean_history)
    print('std:', reward_std_history)
    
#.........这里部分代码省略.........
开发者ID:kevin5naug,项目名称:CS294_Deep_RL,代码行数:103,代码来源:bcloneCNN_hopper.py

示例14: main

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('expert_policy_file', type=str)
    parser.add_argument('envname', type=str)
    parser.add_argument('--render', action='store_true')
    parser.add_argument("--max_timesteps", type=int)
    parser.add_argument('--num_rollouts', type=int, default=20,
                        help='Number of expert roll outs')
    parser.add_argument('--num_epochs', type=int, default=5, help='Number of epochs for training')
    args = parser.parse_args()

    print('loading and building expert policy')
    policy_fn = load_policy.load_policy(args.expert_policy_file)
    print('loaded and built')

    with tf.Session():
        tf_util.initialize()

        import gym
        env = gym.make(args.envname)
        max_steps = args.max_timesteps or env.spec.timestep_limit

        returns = []
        observations = []
        actions = []
        for i in range(args.num_rollouts):
            print('iter', i)
            this_obs=[]
            this_act=[]
            obs = env.reset()
            done = False
            totalr = 0.
            steps = 0
            while not done:
                action = policy_fn(obs[None,:])
                this_obs.append(obs)
                this_act.append(action)
                obs, r, done, _ = env.step(action)
                totalr += r
                steps += 1
                if args.render:
                    env.render()
                if steps % 100 == 0: print("%i/%i"%(steps, max_steps))
                if steps >= max_steps:
                    break
            returns.append(totalr)
            observations.append(this_obs)
            actions.append(this_act)

        print('returns', returns)
        print('mean return', np.mean(returns))
        print('std of return', np.std(returns))
        print( (np.array(observations)).shape)
        print( (np.array(actions)).shape)
        expert_data = {'observations': np.array(observations),
                       'actions': np.array(actions)}
    
    #train the network
    o_expert=expert_data['observations']
    (N,N_step,N_obs)=o_expert.shape
    a_expert=expert_data['actions']
    (N,N_step,_,N_action)=a_expert.shape
    net=CNN(N_obs, N_action)
    
    #todo:initilize network parameters
    net.apply(init_weights)

    import torch.optim as optim
    optimizer=optim.Adam(net.parameters(),lr=1e-3, weight_decay=5e-12)
    criterion=nn.MSELoss()
    loss_history=[]
    for j in range(args.num_epochs):
        print("epoch %i"%j)
        (N,N_step,N_obs)=o_expert.shape
        (N,N_step,_,N_action)=a_expert.shape
        for k in range(max_steps):
            index=k
            o=Variable(torch.from_numpy(o_expert[:,index,:]).reshape(N,1,N_obs))
            o=o.float()
            a_out=net.forward(o)
            a_label=torch.from_numpy(a_expert[:,index,:].reshape(N,N_action,1))
            loss=criterion(a_out.float(), a_label.float())
            loss.backward()
            loss_history.append(loss)
            optimizer.step()
        print("before DAGGER")
        print(loss) 

        #implement dagger
        with tf.Session():
            tf_util.initialize()

            import gym
            env = gym.make(args.envname)
            max_steps = args.max_timesteps or env.spec.timestep_limit

            o_new_expert=[]
            a_new_expert=[]
            for i in range (int(args.num_rollouts)//2):
#.........这里部分代码省略.........
开发者ID:kevin5naug,项目名称:CS294_Deep_RL,代码行数:103,代码来源:humanoid.py

示例15: predict

# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def predict(model, test_loader,color_map, opt):
    ##################################################
    # switch to evaluate mode
    ##################################################
    model.eval()
    ##################################################
    ## log file 
    ##################################################
    # debug_here()
    flog = open(os.path.join(opt.test_results_dir, 'log.txt'), 'w')

    ################Note############################## 
    # each sample may have different number of points 
    # so just use batch of size 1
    ##################################################
    # debug_here() 
    total_acc = 0.0 
    total_seen = 0 
    total_acc_iou = 0.0 
    total_per_label_acc = np.zeros(opt.num_labels).astype(np.float32)
    total_per_label_iou = np.zeros(opt.num_labels).astype(np.float32)
    total_per_label_seen = np.zeros(opt.num_labels).astype(np.int32)
    # currently only support batch size equal to 1 
    for shape_idx, (points_data, _labels, _seg_data) in enumerate(test_loader):
        if shape_idx%10 == 0:
            print('{0}/{1}'.format(shape_idx, len(test_loader)))

        points_data = Variable(points_data, volatile=True)
        points_data = points_data.transpose(2, 1)
        _labels = _labels.long() 
        _seg_data = _seg_data.long() 
        labels_onehot = utils.labels_batch2one_hot_batch(_labels, opt.num_labels)
        labels_onehot = Variable(labels_onehot, volatile=True) # we dnonot calculate the gradients here

        _seg_data = Variable(_seg_data, volatile=True) 
        ##################################################
        ##
        ##################################################

        cur_gt_label = _labels[0][0] 
        cur_label_one_hot = np.zeros((1, opt.num_labels), dtype=np.float32)
        cur_label_one_hot[0, cur_gt_label] = 1
        # ex: [12, 13, 14, 15]
        iou_pids = opt.label_id2pid_set[opt.label_ids[cur_gt_label]]
        # [0, 1, .., 11, 16, ..., 49]
        non_part_labels = list(set(np.arange(opt.num_seg_classes)).difference(set(iou_pids)))
        
        if opt.cuda:
            points_data = points_data.cuda() 
            labels_onehot = labels_onehot.float().cuda()
            _seg_data = _seg_data.cuda() # must be long cuda tensor  
        
        pred_seg, _, _ = model(points_data, labels_onehot)
        pred_seg = pred_seg.view(-1, opt.num_seg_classes)
        mini = np.min(pred_seg.data.numpy())
        # debug_here()
        pred_seg[:, torch.from_numpy(np.array(non_part_labels))] = mini - 1000
        pred_seg_choice = pred_seg.data.max(1)[1]

        ##################################################################
        ## groundtruth segment mask 
        ##################################################################
        _seg_data = _seg_data.view(-1, 1)[:, 0]  # min is already 0
        
        seg_acc = np.mean(pred_seg_choice.numpy() == _seg_data.data.long().numpy())
        total_acc = seg_acc + total_acc

        total_seen += 1

        total_per_label_seen[cur_gt_label] += 1
        total_per_label_acc[cur_gt_label] += seg_acc
        ############################################
        ##
        ############################################
        mask = np.int32(pred_seg_choice.numpy() == _seg_data.data.long().numpy())
        total_iou = 0.0
        iou_log = ''

        for pid in iou_pids:
            n_pred = np.sum(pred_seg_choice.numpy() == pid)
            n_gt = np.sum(_seg_data.data.long().numpy() == pid)
            n_intersect = np.sum(np.int32(_seg_data.data.long().numpy() == pid) * mask)
            n_union = n_pred + n_gt - n_intersect
            iou_log += '_' + str(n_pred)+'_'+str(n_gt)+'_'+str(n_intersect)+'_'+str(n_union)+'_'
            if n_union == 0:
                total_iou += 1
                iou_log += '_1\n'
            else:
                total_iou += n_intersect * 1.0 / n_union
                iou_log += '_'+str(n_intersect * 1.0 / n_union)+'\n'



        avg_iou = total_iou / len(iou_pids)
        total_acc_iou += avg_iou
        total_per_label_iou[cur_gt_label] += avg_iou
        # debug_here()
        ########################################
        ## transpose data 
        ########################################
#.........这里部分代码省略.........
开发者ID:ShichaoJin,项目名称:pointnet2.pytorch,代码行数:103,代码来源:eval_part_seg_h5.py


注:本文中的torch.autograd.Variable.float方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。