本文整理汇总了Python中torch.autograd.Variable.float方法的典型用法代码示例。如果您正苦于以下问题:Python Variable.float方法的具体用法?Python Variable.float怎么用?Python Variable.float使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.autograd.Variable
的用法示例。
在下文中一共展示了Variable.float方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: eval
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def eval(self, epoch, save_score=False, loader_name=['test']):
self.model.eval()
self.print_log('Eval epoch: {}'.format(epoch + 1))
for ln in loader_name:
loss_value = []
score_frag = []
for batch_idx, (data, label) in enumerate(self.data_loader[ln]):
data = Variable(
data.float().cuda(self.output_device),
requires_grad=False,
volatile=True)
label = Variable(
label.long().cuda(self.output_device),
requires_grad=False,
volatile=True)
output = self.model(data)
loss = self.loss(output, label)
score_frag.append(output.data.cpu().numpy())
loss_value.append(loss.data[0])
score = np.concatenate(score_frag)
score_dict = dict(
zip(self.data_loader[ln].dataset.sample_name, score))
self.print_log('\tMean {} loss of {} batches: {}.'.format(
ln, len(self.data_loader[ln]), np.mean(loss_value)))
for k in self.arg.show_topk:
self.print_log('\tTop{}: {:.2f}%'.format(
k, 100 * self.data_loader[ln].dataset.top_k(score, k)))
if save_score:
with open('{}/epoch{}_{}_score.pkl'.format(
self.arg.work_dir, epoch + 1, ln), 'w') as f:
pickle.dump(score_dict, f)
示例2: test_module_cast
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def test_module_cast(self):
"""Compiled modules can be casted to other data types"""
@torch.jit.compile(nderivs=0)
class Adder(nn.Module):
def __init__(self):
super(Adder, self).__init__()
self.y = nn.Parameter(torch.randn(2, 2))
def forward(self, x):
return x + self.y
x = Variable(torch.randn(2, 2).float())
# Wrap it in a sequential to make sure it works for submodules
a = nn.Sequential(Adder()).float()
def check_type(caster):
caster(a)
a(caster(x))
with self.assertCompiled(a[0]):
a(caster(x))
check_type(lambda x: x)
check_type(lambda x: x.double())
if torch.cuda.is_available():
check_type(lambda x: x.float().cuda())
check_type(lambda x: x.double().cuda())
self.assertEqual(a[0].hits, 4 if torch.cuda.is_available() else 2)
示例3: masked_cross_entropy
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def masked_cross_entropy(logits, target, length):
length = Variable(torch.LongTensor(length)).cuda()
"""
Args:
logits: A Variable containing a FloatTensor of size
(batch, max_len, num_classes) which contains the
unnormalized probability for each class.
target: A Variable containing a LongTensor of size
(batch, max_len) which contains the index of the true
class for each corresponding step.
length: A Variable containing a LongTensor of size (batch,)
which contains the length of each data in a batch.
Returns:
loss: An average loss value masked by the length.
"""
# logits_flat: (batch * max_len, num_classes)
logits_flat = logits.view(-1, logits.size(-1))
# log_probs_flat: (batch * max_len, num_classes)
log_probs_flat = functional.log_softmax(logits_flat)
# target_flat: (batch * max_len, 1)
target_flat = target.view(-1, 1)
# losses_flat: (batch * max_len, 1)
losses_flat = -torch.gather(log_probs_flat, dim=1, index=target_flat)
# losses: (batch, max_len)
losses = losses_flat.view(*target.size())
# mask: (batch, max_len)
mask = sequence_mask(sequence_length=length, max_len=target.size(1))
losses = losses * mask.float()
loss = losses.sum() / length.float().sum()
return loss
示例4: compute_stuff
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def compute_stuff(mask_chosen, scores, weights, volumes):
bs = weights.size(0)
mask_chosen = Variable(mask_chosen.float())
probs = 1e-6 + (1-2e-6) * F.softmax(scores)
lgp = (torch.log(probs) * mask_chosen + torch.log(1-probs) * (1-mask_chosen)).sum(1)
w = (weights * mask_chosen).sum(1)
v = (volumes * mask_chosen).sum(1)
return lgp, w, v
示例5: train
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def train(self, epoch, save_model=False):
self.model.train()
self.print_log('Training epoch: {}'.format(epoch + 1))
loader = self.data_loader['train']
lr = self.adjust_learning_rate(epoch)
loss_value = []
self.record_time()
timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
for batch_idx, (data, label) in enumerate(loader):
# get data
data = Variable(
data.float().cuda(self.output_device), requires_grad=False)
label = Variable(
label.long().cuda(self.output_device), requires_grad=False)
timer['dataloader'] += self.split_time()
# forward
output = self.model(data)
loss = self.loss(output, label)
# backward
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
loss_value.append(loss.data[0])
timer['model'] += self.split_time()
# statistics
if batch_idx % self.arg.log_interval == 0:
self.print_log(
'\tBatch({}/{}) done. Loss: {:.4f} lr:{:.6f}'.format(
batch_idx, len(loader), loss.data[0], lr))
timer['statistics'] += self.split_time()
# statistics of time consumption and loss
proportion = {
k: '{:02d}%'.format(int(round(v * 100 / sum(timer.values()))))
for k, v in timer.items()
}
self.print_log(
'\tMean training loss: {:.4f}.'.format(np.mean(loss_value)))
self.print_log(
'\tTime consumption: [Data]{dataloader}, [Network]{model}'.format(
**proportion))
if save_model:
model_path = '{}/epoch{}_model.pt'.format(self.arg.work_dir,
epoch + 1)
state_dict = self.model.state_dict()
weights = OrderedDict([[k.split('module.')[-1],
v.cpu()] for k, v in state_dict.items()])
torch.save(weights, model_path)
示例6: theta_to_sampling_grid
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def theta_to_sampling_grid(out_h,out_w,theta_aff=None,theta_tps=None,theta_aff_tps=None,use_cuda=True,tps_reg_factor=0):
affTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='affine',use_cuda=use_cuda)
tpsTnf = GeometricTnf(out_h=out_h,out_w=out_w,geometric_model='tps',use_cuda=use_cuda,tps_reg_factor=tps_reg_factor)
if theta_aff is not None:
sampling_grid_aff = affTnf(image_batch=None,
theta_batch=theta_aff.view(1,2,3),
return_sampling_grid=True,
return_warped_image=False)
else:
sampling_grid_aff=None
if theta_tps is not None:
sampling_grid_tps = tpsTnf(image_batch=None,
theta_batch=theta_tps.view(1,-1),
return_sampling_grid=True,
return_warped_image=False)
else:
sampling_grid_tps=None
if theta_aff is not None and theta_aff_tps is not None:
sampling_grid_aff_tps = tpsTnf(image_batch=None,
theta_batch=theta_aff_tps.view(1,-1),
return_sampling_grid=True,
return_warped_image=False)
# put 1e10 value in region out of bounds of sampling_grid_aff
sampling_grid_aff = sampling_grid_aff.clone()
in_bound_mask_aff=Variable((sampling_grid_aff.data[:,:,:,0]>-1) & (sampling_grid_aff.data[:,:,:,0]<1) & (sampling_grid_aff.data[:,:,:,1]>-1) & (sampling_grid_aff.data[:,:,:,1]<1)).unsqueeze(3)
in_bound_mask_aff=in_bound_mask_aff.expand_as(sampling_grid_aff)
sampling_grid_aff = torch.add((in_bound_mask_aff.float()-1)*(1e10),torch.mul(in_bound_mask_aff.float(),sampling_grid_aff))
# put 1e10 value in region out of bounds of sampling_grid_aff_tps_comp
sampling_grid_aff_tps_comp = F.grid_sample(sampling_grid_aff.transpose(2,3).transpose(1,2), sampling_grid_aff_tps).transpose(1,2).transpose(2,3)
in_bound_mask_aff_tps=Variable((sampling_grid_aff_tps.data[:,:,:,0]>-1) & (sampling_grid_aff_tps.data[:,:,:,0]<1) & (sampling_grid_aff_tps.data[:,:,:,1]>-1) & (sampling_grid_aff_tps.data[:,:,:,1]<1)).unsqueeze(3)
in_bound_mask_aff_tps=in_bound_mask_aff_tps.expand_as(sampling_grid_aff_tps_comp)
sampling_grid_aff_tps_comp = torch.add((in_bound_mask_aff_tps.float()-1)*(1e10),torch.mul(in_bound_mask_aff_tps.float(),sampling_grid_aff_tps_comp))
else:
sampling_grid_aff_tps_comp = None
return (sampling_grid_aff,sampling_grid_tps,sampling_grid_aff_tps_comp)
示例7: _score_candidates
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def _score_candidates(self, cands, xe, encoder_output, hidden):
# score each candidate separately
# cands are exs_with_cands x cands_per_ex x words_per_cand
# cview is total_cands x words_per_cand
cview = cands.view(-1, cands.size(2))
cands_xes = xe.expand(xe.size(0), cview.size(0), xe.size(2))
sz = hidden.size()
cands_hn = (
hidden.view(sz[0], sz[1], 1, sz[2])
.expand(sz[0], sz[1], cands.size(1), sz[2])
.contiguous()
.view(sz[0], -1, sz[2])
)
sz = encoder_output.size()
cands_encoder_output = (
encoder_output.contiguous()
.view(sz[0], 1, sz[1], sz[2])
.expand(sz[0], cands.size(1), sz[1], sz[2])
.contiguous()
.view(-1, sz[1], sz[2])
)
cand_scores = Variable(
self.cand_scores.resize_(cview.size(0)).fill_(0))
cand_lengths = Variable(
self.cand_lengths.resize_(cview.size(0)).fill_(0))
for i in range(cview.size(1)):
output = self._apply_attention(cands_xes, cands_encoder_output, cands_hn) \
if self.use_attention else cands_xes
output, cands_hn = self.decoder(output, cands_hn)
preds, scores = self.hidden_to_idx(output, dropout=False)
cs = cview.select(1, i)
non_nulls = cs.ne(self.NULL_IDX)
cand_lengths += non_nulls.long()
score_per_cand = torch.gather(scores, 1, cs.unsqueeze(1))
cand_scores += score_per_cand.squeeze() * non_nulls.float()
cands_xes = self.lt2dec(self.lt(cs).unsqueeze(0))
# set empty scores to -1, so when divided by 0 they become -inf
cand_scores -= cand_lengths.eq(0).float()
# average the scores per token
cand_scores /= cand_lengths.float()
cand_scores = cand_scores.view(cands.size(0), cands.size(1))
srtd_scores, text_cand_inds = cand_scores.sort(1, True)
text_cand_inds = text_cand_inds.data
return text_cand_inds
示例8: main
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('expert_policy_file', type=str)
parser.add_argument('envname', type=str)
parser.add_argument('--render', action='store_true')
parser.add_argument("--max_timesteps", type=int)
parser.add_argument('--num_rollouts', type=int, default=20,
help='Number of expert roll outs')
parser.add_argument('--num_epochs', type=int, default=10, help='Number of epochs for training')
#need number of epoch
args = parser.parse_args()
print('loading expert policy data for training')
with open(args.expert_policy_file, 'rb') as handle:
expert_data = pickle.load(handle)
o_expert=expert_data['observations']
a_expert=expert_data['actions']
env = gym.make(args.envname)
max_steps = args.max_timesteps or env.spec.timestep_limit
rollout_list=list(range(args.num_rollouts))
net=CNN()
#todo:initilize network parameters
import torch.optim as optim
optimizer=optim.Adam(net.parameters(),lr=5e-4, weight_decay=5e-7)
criterion=nn.CrossEntropyLoss()
loss_history=[]
for j in range(args.num_epochs):
random.shuffle(rollout_list)
for i in rollout_list:
print("epoch %i iteration %i"%(j,i))
for k in range(max_steps):
index=i*max_steps+k
o=Variable(torch.from_numpy(o_expert[index]).reshape(1,1,376))
o=o.float()
a_out=net.forward(o)
a_label=torch.from_numpy(a_expert[index])
a_label=a_label.long()
loss=criterion(a_out, torch.max(a_label,1)[1])
loss.backward()
loss_history.append(loss)
optimizer.step()
print(loss)
plt.plot(loss_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
示例9: train
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def train(train_loader, model, criterion, optimizer, epoch, opt):
"""
train for one epoch on the training set
"""
# training mode
model.train()
for i, (input_points, _labels, segs) in enumerate(train_loader):
# bz x 2048 x 3
input_points = Variable(input_points)
input_points = input_points.transpose(2, 1)
###############
##
###############
_labels = _labels.long()
segs = segs.long()
labels_onehot = utils.labels_batch2one_hot_batch(_labels, opt.num_classes)
labels_onehot = Variable(labels_onehot) # we dnonot calculate the gradients here
# labels_onehot.requires_grad = True
segs = Variable(segs)
if opt.cuda:
input_points = input_points.cuda()
segs = segs.cuda() # must be long cuda tensor
labels_onehot = labels_onehot.float().cuda() # this will be feed into the network
optimizer.zero_grad()
# forward, backward optimize
# pred, _ = model(input_points, labels_onehot)
pred, _, _ = model(input_points, labels_onehot)
pred = pred.view(-1, opt.num_seg_classes)
segs = segs.view(-1, 1)[:, 0]
# debug_here()
loss = criterion(pred, segs)
loss.backward()
##############################
# gradient clip stuff
##############################
utils.clip_gradient(optimizer, opt.gradient_clip)
optimizer.step()
pred_choice = pred.data.max(1)[1]
correct = pred_choice.eq(segs.data).cpu().sum()
if i % opt.print_freq == 0:
print('[%d: %d] train loss: %f accuracy: %f' %(i, len(train_loader), loss.data[0], correct/float(opt.batch_size * opt.num_points)))
示例10: validate
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def validate(val_loader, model, criterion, epoch, opt):
"""Perform validation on the validation set"""
# switch to evaluate mode
model.eval()
top1 = utils.AverageMeter()
for i, (input_points, _labels, segs) in enumerate(val_loader):
# bz x 2048 x 3
input_points = Variable(input_points, volatile=True)
input_points = input_points.transpose(2, 1)
_labels = _labels.long() # this will be feed to the network
segs = segs.long()
labels_onehot = utils.labels_batch2one_hot_batch(_labels, opt.num_classes)
segs = Variable(segs, volatile=True)
labels_onehot = Variable(labels_onehot, volatile=True)
if opt.cuda:
input_points = input_points.cuda()
segs = segs.cuda() # must be long cuda tensor
labels_onehot = labels_onehot.float().cuda() # this will be feed into the network
# forward, backward optimize
pred, _, _ = model(input_points, labels_onehot)
pred = pred.view(-1, opt.num_seg_classes)
segs = segs.view(-1, 1)[:, 0] # min is already 0
# debug_here()
loss = criterion(pred, segs)
pred_choice = pred.data.max(1)[1]
correct = pred_choice.eq(segs.data).cpu().sum()
acc = correct/float(opt.batch_size * opt.num_points)
top1.update(acc, input_points.size(0))
if i % opt.print_freq == 0:
print('[%d: %d] val loss: %f accuracy: %f' %(i, len(val_loader), loss.data[0], acc))
# print(tested_samples)
return top1.avg
示例11: fit_model
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def fit_model(model, loss_op, optim_op, train_gen, val_gen, epochs,
checkpoint_path, patience):
""" Analog to Keras fit_generator function.
# Arguments:
model: Model to be finetuned.
loss_op: loss operation (BCEWithLogitsLoss or CrossEntropy for e.g.)
optim_op: optimization operation (Adam e.g.)
train_gen: Training data iterator (DataLoader)
val_gen: Validation data iterator (DataLoader)
epochs: Number of epochs.
checkpoint_path: Filepath where weights will be checkpointed to
during training. This file will be rewritten by the function.
patience: Patience for callback methods.
verbose: Verbosity flag.
# Returns:
Accuracy of the trained model, ONLY if 'evaluate' is set.
"""
# Save original checkpoint
torch.save(model.state_dict(), checkpoint_path)
model.eval()
best_loss = np.mean([loss_op(model(Variable(xv)).squeeze(), Variable(yv.float()).squeeze()).data.cpu().numpy()[0] for xv, yv in val_gen])
print("original val loss", best_loss)
epoch_without_impr = 0
for epoch in range(epochs):
for i, data in enumerate(train_gen):
X_train, y_train = data
X_train = Variable(X_train, requires_grad=False)
y_train = Variable(y_train, requires_grad=False)
model.train()
optim_op.zero_grad()
output = model(X_train)
loss = loss_op(output, y_train.float())
loss.backward()
clip_grad_norm(model.parameters(), 1)
optim_op.step()
acc = evaluate_using_acc(model, [(X_train.data, y_train.data)])
print("== Epoch", epoch, "step", i, "train loss", loss.data.cpu().numpy()[0], "train acc", acc)
model.eval()
acc = evaluate_using_acc(model, val_gen)
print("val acc", acc)
val_loss = np.mean([loss_op(model(Variable(xv)).squeeze(), Variable(yv.float()).squeeze()).data.cpu().numpy()[0] for xv, yv in val_gen])
print("val loss", val_loss)
if best_loss is not None and val_loss >= best_loss:
epoch_without_impr += 1
print('No improvement over previous best loss: ', best_loss)
# Save checkpoint
if best_loss is None or val_loss < best_loss:
best_loss = val_loss
torch.save(model.state_dict(), checkpoint_path)
print('Saving model at', checkpoint_path)
# Early stopping
if epoch_without_impr >= patience:
break
示例12: Variable
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
np.copyto(input_img[j], data_loader.input_buff[image_buff_read_index])
data_loader.buff_status[image_buff_read_index] = 'empty'
image_buff_read_index = image_buff_read_index + 1
if image_buff_read_index >= data_loader.image_buffer_size:
image_buff_read_index = 0
'''
# random noise z
inputs = inputs['image']
#noise_z = torch.randn(inputs.shape[0], 3, 4, 4)
noise_z = torch.randn(inputs.shape[0], 100)
if is_gpu_mode:
#inputs = Variable(torch.from_numpy(inputs).float().cuda())
inputs = Variable(inputs.float().cuda())
noise_z = Variable(noise_z.cuda())
else:
#inputs = Variable(torch.from_numpy(inputs).float())
noise_z = Variable(noise_z)
# feedforward the inputs. generator
outputs_gen = gen_model(noise_z)
# pseudo zero-center
inputs = inputs - MEAN_VALUE_FOR_ZERO_CENTERED
outputs_gen = outputs_gen - MEAN_VALUE_FOR_ZERO_CENTERED
# feedforward the inputs. discriminator
output_disc_real = disc_model(inputs)
output_disc_fake = disc_model(outputs_gen)
示例13: main
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('expert_policy_file', type=str)
parser.add_argument('expert_policy_data', type=str)
parser.add_argument('envname', type=str)
parser.add_argument('--render', action='store_true')
parser.add_argument("--max_timesteps", type=int)
parser.add_argument('--num_rollouts', type=int, default=20,
help='Number of expert roll outs')
parser.add_argument('--num_epochs', type=int, default=50, help='Number of epochs for training')
#need number of epoch
args = parser.parse_args()
print('loading expert policy data for training')
with open(args.expert_policy_data, 'rb') as handle:
expert_data = pickle.load(handle)
#train the network
torch.manual_seed(25)
o_expert=expert_data['observations']
(N,N_step,N_obs)=o_expert.shape
a_expert=expert_data['actions']
(N,N_step,_,N_action)=a_expert.shape
import gym
env = gym.make(args.envname)
max_steps = args.max_timesteps or env.spec.timestep_limit
net=CNN(N_obs, N_action)
#todo:initilize network parameters
net.apply(init_weights)
import torch.optim as optim
optimizer=optim.Adam(net.parameters(),lr=1e-3, weight_decay=5e-9)
criterion=nn.MSELoss()
loss_history=[]
reward_mean_history=[]
reward_std_history=[]
for j in range(args.num_epochs):
print("epoch %i"%j)
net.train()
(N,N_step,N_obs)=o_expert.shape
(N,N_step,_,N_action)=a_expert.shape
for k in range(max_steps):
optimizer.zero_grad()
index=k
o=Variable(torch.from_numpy(o_expert[:,index,:]).reshape(N,1,N_obs))
o=o.float()
a_out=net.forward(o)
a_label=torch.from_numpy(a_expert[:,index,:].reshape(N,N_action,1))
loss=criterion(a_out.float(), a_label.float())
loss.backward()
optimizer.step()
print("No DAGGER")
print(loss/N)
loss_history.append(loss/N)
#test the network
with tf.Session():
tf_util.initialize()
import gym
env = gym.make(args.envname)
max_steps = args.max_timesteps or env.spec.timestep_limit
net.eval()
r_new=[]
for i in range (int(args.num_rollouts)//4):
totalr=0
obs=env.reset()
done=False
steps=0
while not done:
obs=Variable(torch.Tensor(obs).reshape(1,1,N_obs))
action_new=net.forward(obs).detach().numpy()
obs,r,done,_=env.step(action_new.reshape(N_action))
totalr+=r
steps+=1
if steps >= max_steps:
break
r_new.append(totalr)
u=np.average(np.array(r_new))
sigma=np.std(np.array(r_new))
reward_mean_history.append(u)
reward_std_history.append(sigma)
print('current reward mean', u)
print('current reward std', sigma)
fig0=plt.figure(0)
plt.plot(loss_history, '-o')
plt.xlabel('iteration')
plt.ylabel('loss')
fig0.savefig('/Users/joker/imitation_learning/hopper.png')
reward_mean_history=np.array(reward_mean_history)
reward_std_history=np.array(reward_std_history)
#print(reward_mean_history.shape)
#print(reward_std_history.shape)
print('mean:', reward_mean_history)
print('std:', reward_std_history)
#.........这里部分代码省略.........
示例14: main
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('expert_policy_file', type=str)
parser.add_argument('envname', type=str)
parser.add_argument('--render', action='store_true')
parser.add_argument("--max_timesteps", type=int)
parser.add_argument('--num_rollouts', type=int, default=20,
help='Number of expert roll outs')
parser.add_argument('--num_epochs', type=int, default=5, help='Number of epochs for training')
args = parser.parse_args()
print('loading and building expert policy')
policy_fn = load_policy.load_policy(args.expert_policy_file)
print('loaded and built')
with tf.Session():
tf_util.initialize()
import gym
env = gym.make(args.envname)
max_steps = args.max_timesteps or env.spec.timestep_limit
returns = []
observations = []
actions = []
for i in range(args.num_rollouts):
print('iter', i)
this_obs=[]
this_act=[]
obs = env.reset()
done = False
totalr = 0.
steps = 0
while not done:
action = policy_fn(obs[None,:])
this_obs.append(obs)
this_act.append(action)
obs, r, done, _ = env.step(action)
totalr += r
steps += 1
if args.render:
env.render()
if steps % 100 == 0: print("%i/%i"%(steps, max_steps))
if steps >= max_steps:
break
returns.append(totalr)
observations.append(this_obs)
actions.append(this_act)
print('returns', returns)
print('mean return', np.mean(returns))
print('std of return', np.std(returns))
print( (np.array(observations)).shape)
print( (np.array(actions)).shape)
expert_data = {'observations': np.array(observations),
'actions': np.array(actions)}
#train the network
o_expert=expert_data['observations']
(N,N_step,N_obs)=o_expert.shape
a_expert=expert_data['actions']
(N,N_step,_,N_action)=a_expert.shape
net=CNN(N_obs, N_action)
#todo:initilize network parameters
net.apply(init_weights)
import torch.optim as optim
optimizer=optim.Adam(net.parameters(),lr=1e-3, weight_decay=5e-12)
criterion=nn.MSELoss()
loss_history=[]
for j in range(args.num_epochs):
print("epoch %i"%j)
(N,N_step,N_obs)=o_expert.shape
(N,N_step,_,N_action)=a_expert.shape
for k in range(max_steps):
index=k
o=Variable(torch.from_numpy(o_expert[:,index,:]).reshape(N,1,N_obs))
o=o.float()
a_out=net.forward(o)
a_label=torch.from_numpy(a_expert[:,index,:].reshape(N,N_action,1))
loss=criterion(a_out.float(), a_label.float())
loss.backward()
loss_history.append(loss)
optimizer.step()
print("before DAGGER")
print(loss)
#implement dagger
with tf.Session():
tf_util.initialize()
import gym
env = gym.make(args.envname)
max_steps = args.max_timesteps or env.spec.timestep_limit
o_new_expert=[]
a_new_expert=[]
for i in range (int(args.num_rollouts)//2):
#.........这里部分代码省略.........
示例15: predict
# 需要导入模块: from torch.autograd import Variable [as 别名]
# 或者: from torch.autograd.Variable import float [as 别名]
def predict(model, test_loader,color_map, opt):
##################################################
# switch to evaluate mode
##################################################
model.eval()
##################################################
## log file
##################################################
# debug_here()
flog = open(os.path.join(opt.test_results_dir, 'log.txt'), 'w')
################Note##############################
# each sample may have different number of points
# so just use batch of size 1
##################################################
# debug_here()
total_acc = 0.0
total_seen = 0
total_acc_iou = 0.0
total_per_label_acc = np.zeros(opt.num_labels).astype(np.float32)
total_per_label_iou = np.zeros(opt.num_labels).astype(np.float32)
total_per_label_seen = np.zeros(opt.num_labels).astype(np.int32)
# currently only support batch size equal to 1
for shape_idx, (points_data, _labels, _seg_data) in enumerate(test_loader):
if shape_idx%10 == 0:
print('{0}/{1}'.format(shape_idx, len(test_loader)))
points_data = Variable(points_data, volatile=True)
points_data = points_data.transpose(2, 1)
_labels = _labels.long()
_seg_data = _seg_data.long()
labels_onehot = utils.labels_batch2one_hot_batch(_labels, opt.num_labels)
labels_onehot = Variable(labels_onehot, volatile=True) # we dnonot calculate the gradients here
_seg_data = Variable(_seg_data, volatile=True)
##################################################
##
##################################################
cur_gt_label = _labels[0][0]
cur_label_one_hot = np.zeros((1, opt.num_labels), dtype=np.float32)
cur_label_one_hot[0, cur_gt_label] = 1
# ex: [12, 13, 14, 15]
iou_pids = opt.label_id2pid_set[opt.label_ids[cur_gt_label]]
# [0, 1, .., 11, 16, ..., 49]
non_part_labels = list(set(np.arange(opt.num_seg_classes)).difference(set(iou_pids)))
if opt.cuda:
points_data = points_data.cuda()
labels_onehot = labels_onehot.float().cuda()
_seg_data = _seg_data.cuda() # must be long cuda tensor
pred_seg, _, _ = model(points_data, labels_onehot)
pred_seg = pred_seg.view(-1, opt.num_seg_classes)
mini = np.min(pred_seg.data.numpy())
# debug_here()
pred_seg[:, torch.from_numpy(np.array(non_part_labels))] = mini - 1000
pred_seg_choice = pred_seg.data.max(1)[1]
##################################################################
## groundtruth segment mask
##################################################################
_seg_data = _seg_data.view(-1, 1)[:, 0] # min is already 0
seg_acc = np.mean(pred_seg_choice.numpy() == _seg_data.data.long().numpy())
total_acc = seg_acc + total_acc
total_seen += 1
total_per_label_seen[cur_gt_label] += 1
total_per_label_acc[cur_gt_label] += seg_acc
############################################
##
############################################
mask = np.int32(pred_seg_choice.numpy() == _seg_data.data.long().numpy())
total_iou = 0.0
iou_log = ''
for pid in iou_pids:
n_pred = np.sum(pred_seg_choice.numpy() == pid)
n_gt = np.sum(_seg_data.data.long().numpy() == pid)
n_intersect = np.sum(np.int32(_seg_data.data.long().numpy() == pid) * mask)
n_union = n_pred + n_gt - n_intersect
iou_log += '_' + str(n_pred)+'_'+str(n_gt)+'_'+str(n_intersect)+'_'+str(n_union)+'_'
if n_union == 0:
total_iou += 1
iou_log += '_1\n'
else:
total_iou += n_intersect * 1.0 / n_union
iou_log += '_'+str(n_intersect * 1.0 / n_union)+'\n'
avg_iou = total_iou / len(iou_pids)
total_acc_iou += avg_iou
total_per_label_iou[cur_gt_label] += avg_iou
# debug_here()
########################################
## transpose data
########################################
#.........这里部分代码省略.........