本文整理汇总了Python中torch.optim.Adam.zero_grad方法的典型用法代码示例。如果您正苦于以下问题:Python Adam.zero_grad方法的具体用法?Python Adam.zero_grad怎么用?Python Adam.zero_grad使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类torch.optim.Adam
的用法示例。
在下文中一共展示了Adam.zero_grad方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: learn
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
def learn(learning_rate, iterations, x, y, validation=None, stop_early=False, run_comment=''):
# Define a neural network using high-level modules.
writer = SummaryWriter(comment=run_comment)
model = Sequential(
Linear(len(x[0]), len(y[0]), bias=True) # n inputs -> 1 output
)
loss_fn = BCEWithLogitsLoss(reduction='sum') # reduction=mean converges slower.
# TODO: Add an option to twiddle pos_weight, which lets us trade off precision and recall. Maybe also graph using add_pr_curve(), which can show how that tradeoff is going.
optimizer = Adam(model.parameters(),lr=learning_rate)
if validation:
validation_ins, validation_outs = validation
previous_validation_loss = None
with progressbar(range(iterations)) as bar:
for t in bar:
y_pred = model(x) # Make predictions.
loss = loss_fn(y_pred, y)
writer.add_scalar('loss', loss, t)
if validation:
validation_loss = loss_fn(model(validation_ins), validation_outs)
if stop_early:
if previous_validation_loss is not None and previous_validation_loss < validation_loss:
print('Stopping early at iteration {t} because validation error rose.'.format(t=t))
model.load_state_dict(previous_model)
break
else:
previous_validation_loss = validation_loss
previous_model = model.state_dict()
writer.add_scalar('validation_loss', validation_loss, t)
writer.add_scalar('training_accuracy_per_tag', accuracy_per_tag(model, x, y), t)
optimizer.zero_grad() # Zero the gradients.
loss.backward() # Compute gradients.
optimizer.step()
# Horizontal axis is what confidence. Vertical is how many samples were that confidence.
writer.add_histogram('confidence', confidences(model, x), t)
writer.close()
return model
示例2: train
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
def train(args):
device = torch.device("cuda" if args.cuda else "cpu")
np.random.seed(args.seed)
torch.manual_seed(args.seed)
transform = transforms.Compose([
transforms.Resize(args.image_size),
transforms.CenterCrop(args.image_size),
transforms.ToTensor(),
transforms.Lambda(lambda x: x.mul(255))
])
train_dataset = datasets.ImageFolder(args.dataset, transform)
train_loader = DataLoader(train_dataset, batch_size=args.batch_size)
transformer = TransformerNet().to(device)
optimizer = Adam(transformer.parameters(), args.lr)
mse_loss = torch.nn.MSELoss()
vgg = Vgg16(requires_grad=False).to(device)
style_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Lambda(lambda x: x.mul(255))
])
style = utils.load_image(args.style_image, size=args.style_size)
style = style_transform(style)
style = style.repeat(args.batch_size, 1, 1, 1).to(device)
features_style = vgg(utils.normalize_batch(style))
gram_style = [utils.gram_matrix(y) for y in features_style]
for e in range(args.epochs):
transformer.train()
agg_content_loss = 0.
agg_style_loss = 0.
count = 0
for batch_id, (x, _) in enumerate(train_loader):
n_batch = len(x)
count += n_batch
optimizer.zero_grad()
x = x.to(device)
y = transformer(x)
y = utils.normalize_batch(y)
x = utils.normalize_batch(x)
features_y = vgg(y)
features_x = vgg(x)
content_loss = args.content_weight * mse_loss(features_y.relu2_2, features_x.relu2_2)
style_loss = 0.
for ft_y, gm_s in zip(features_y, gram_style):
gm_y = utils.gram_matrix(ft_y)
style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :])
style_loss *= args.style_weight
total_loss = content_loss + style_loss
total_loss.backward()
optimizer.step()
agg_content_loss += content_loss.item()
agg_style_loss += style_loss.item()
if (batch_id + 1) % args.log_interval == 0:
mesg = "{}\tEpoch {}:\t[{}/{}]\tcontent: {:.6f}\tstyle: {:.6f}\ttotal: {:.6f}".format(
time.ctime(), e + 1, count, len(train_dataset),
agg_content_loss / (batch_id + 1),
agg_style_loss / (batch_id + 1),
(agg_content_loss + agg_style_loss) / (batch_id + 1)
)
print(mesg)
if args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0:
transformer.eval().cpu()
ckpt_model_filename = "ckpt_epoch_" + str(e) + "_batch_id_" + str(batch_id + 1) + ".pth"
ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename)
torch.save(transformer.state_dict(), ckpt_model_path)
transformer.to(device).train()
# save model
transformer.eval().cpu()
save_model_filename = "epoch_" + str(args.epochs) + "_" + str(time.ctime()).replace(' ', '_') + "_" + str(
args.content_weight) + "_" + str(args.style_weight) + ".model"
save_model_path = os.path.join(args.save_model_dir, save_model_filename)
torch.save(transformer.state_dict(), save_model_path)
print("\nDone, trained model saved at", save_model_path)
示例3: range
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
if opt.use_gpu:
netd.cuda()
netg.cuda()
criterion.cuda()
true_labels, fake_labels = true_labels.cuda(), fake_labels.cuda()
fix_noises, noises = fix_noises.cuda(), noises.cuda()
for epoch in range(opt.max_epoch):
for ii, (img, _) in enumerate(dataloader):
real_img = Variable(img)
if opt.use_gpu:
real_img = real_img.cuda()
if (ii + 1) % opt.d_every == 0:
optimizer_d.zero_grad()
output = netd(real_img)
error_d_real = criterion(output, true_labels)
error_d_real.backward()
noises.data.copy_(torch.randn(opt.batch_size, opt.nz, 1, 1))
fake_img = netg(noises).detach()
fake_output = netd(fake_img)
error_d_fake = criterion(fake_output, fake_labels)
error_d_fake.backward()
error = error_d_real + error_d_fake
print('error_d:', error.data[0])
writer.add_scalar('data/error_d', error_d_fake.data[0], ii)
示例4: DDPG
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
class DDPG(object):
def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
self.num_inputs = num_inputs
self.action_space = action_space
self.actor = Actor(hidden_size, self.num_inputs, self.action_space)
self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space)
self.actor_perturbed = Actor(hidden_size, self.num_inputs, self.action_space)
self.actor_optim = Adam(self.actor.parameters(), lr=1e-4)
self.critic = Critic(hidden_size, self.num_inputs, self.action_space)
self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space)
self.critic_optim = Adam(self.critic.parameters(), lr=1e-3)
self.gamma = gamma
self.tau = tau
hard_update(self.actor_target, self.actor) # Make sure target is with the same weight
hard_update(self.critic_target, self.critic)
def select_action(self, state, action_noise=None, param_noise=None):
self.actor.eval()
if param_noise is not None:
mu = self.actor_perturbed((Variable(state)))
else:
mu = self.actor((Variable(state)))
self.actor.train()
mu = mu.data
if action_noise is not None:
mu += torch.Tensor(action_noise.noise())
return mu.clamp(-1, 1)
def update_parameters(self, batch):
state_batch = Variable(torch.cat(batch.state))
action_batch = Variable(torch.cat(batch.action))
reward_batch = Variable(torch.cat(batch.reward))
mask_batch = Variable(torch.cat(batch.mask))
next_state_batch = Variable(torch.cat(batch.next_state))
next_action_batch = self.actor_target(next_state_batch)
next_state_action_values = self.critic_target(next_state_batch, next_action_batch)
reward_batch = reward_batch.unsqueeze(1)
mask_batch = mask_batch.unsqueeze(1)
expected_state_action_batch = reward_batch + (self.gamma * mask_batch * next_state_action_values)
self.critic_optim.zero_grad()
state_action_batch = self.critic((state_batch), (action_batch))
value_loss = F.mse_loss(state_action_batch, expected_state_action_batch)
value_loss.backward()
self.critic_optim.step()
self.actor_optim.zero_grad()
policy_loss = -self.critic((state_batch),self.actor((state_batch)))
policy_loss = policy_loss.mean()
policy_loss.backward()
self.actor_optim.step()
soft_update(self.actor_target, self.actor, self.tau)
soft_update(self.critic_target, self.critic, self.tau)
return value_loss.item(), policy_loss.item()
def perturb_actor_parameters(self, param_noise):
"""Apply parameter noise to actor model, for exploration"""
hard_update(self.actor_perturbed, self.actor)
params = self.actor_perturbed.state_dict()
for name in params:
if 'ln' in name:
pass
param = params[name]
param += torch.randn(param.shape) * param_noise.current_stddev
def save_model(self, env_name, suffix="", actor_path=None, critic_path=None):
if not os.path.exists('models/'):
os.makedirs('models/')
if actor_path is None:
actor_path = "models/ddpg_actor_{}_{}".format(env_name, suffix)
if critic_path is None:
critic_path = "models/ddpg_critic_{}_{}".format(env_name, suffix)
print('Saving models to {} and {}'.format(actor_path, critic_path))
torch.save(self.actor.state_dict(), actor_path)
torch.save(self.critic.state_dict(), critic_path)
def load_model(self, actor_path, critic_path):
print('Loading models from {} and {}'.format(actor_path, critic_path))
if actor_path is not None:
self.actor.load_state_dict(torch.load(actor_path))
if critic_path is not None:
#.........这里部分代码省略.........
示例5: __init__
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
#.........这里部分代码省略.........
values = values.expand(self.args.batch_size, self.args.actions, self.args.atoms)
# print(values)
q_value_gammas = q_value_gammas.view(self.args.batch_size, 1, 1)
q_value_gammas = q_value_gammas.expand(self.args.batch_size, self.args.actions, self.args.atoms)
# print(q_value_gammas)
gamma_values = q_value_gammas * values
# print(gamma_values)
rewards = rewards.view(self.args.batch_size, 1, 1)
rewards = rewards.expand(self.args.batch_size, self.args.actions, self.args.atoms)
# print(rewards)
operator_q_values = rewards + gamma_values
# print(operator_q_values)
clipped_operator_q_values = torch.clamp(operator_q_values, self.args.v_min, self.args.v_max)
delta_z = (self.args.v_max - self.args.v_min) / (self.args.atoms - 1)
# Using the notation from the categorical paper
b_j = (clipped_operator_q_values - self.args.v_min) / delta_z
# print(b_j)
lower_bounds = torch.floor(b_j)
upper_bounds = torch.ceil(b_j)
# Work out the max action
atom_values = Variable(torch.linspace(self.args.v_min, self.args.v_max, steps=self.args.atoms))
atom_values = atom_values.view(1, 1, self.args.atoms)
atom_values = atom_values.expand(self.args.batch_size, self.args.actions, self.args.atoms)
# Sum over the atoms dimension
target_expected_qvalues = torch.sum(target_dqn_qvals_data * atom_values, dim=2)
# Get the maximum actions index across the batch size
max_actions = target_expected_qvalues.max(dim=1)[1].view(-1)
# Project back onto the original support for the max actions
q_value_distribution_targets = torch.zeros(self.args.batch_size, self.args.atoms)
# Distributions for the max actions
# print(target_dqn_qvals_data, max_actions)
q_value_max_actions_distribs = target_dqn_qvals_data.index_select(dim=1, index=max_actions)[:,0,:]
# print(q_value_max_actions_distribs)
# Lower_bounds_actions
lower_bounds_actions = lower_bounds.index_select(dim=1, index=max_actions)[:,0,:]
upper_bounds_actions = upper_bounds.index_select(dim=1, index=max_actions)[:,0,:]
b_j_actions = b_j.index_select(dim=1, index=max_actions)[:,0,:]
lower_bound_values_to_add = q_value_max_actions_distribs * (upper_bounds_actions - b_j_actions)
upper_bound_values_to_add = q_value_max_actions_distribs * (b_j_actions - lower_bounds_actions)
# print(lower_bounds_actions)
# print(lower_bound_values_to_add)
# Naive looping
for b in range(self.args.batch_size):
for l, pj in zip(lower_bounds_actions.data.type(torch.LongTensor)[b], lower_bound_values_to_add[b].data):
q_value_distribution_targets[b][l] += pj
for u, pj in zip(upper_bounds_actions.data.type(torch.LongTensor)[b], upper_bound_values_to_add[b].data):
q_value_distribution_targets[b][u] += pj
self.dqn.train()
if self.args.gpu:
actions = actions.cuda()
# q_value_targets = q_value_targets.cuda()
q_value_distribution_targets = q_value_distribution_targets.cuda()
model_predictions = self.dqn(states).index_select(1, actions.view(-1))[:,0,:]
q_value_distribution_targets = Variable(q_value_distribution_targets)
# print(q_value_distribution_targets)
# print(model_predictions)
# Cross entropy loss
ce_loss = -torch.sum(q_value_distribution_targets * torch.log(model_predictions), dim=1)
ce_batch_loss = ce_loss.mean()
info = {}
self.log("DQN/X_Entropy_Loss", ce_batch_loss.data[0], step=self.T)
# Update
self.optimizer.zero_grad()
ce_batch_loss.backward()
# Taken from pytorch clip_grad_norm
# Remove once the pip version it up to date with source
gradient_norm = clip_grad_norm(self.dqn.parameters(), self.args.clip_value)
if gradient_norm is not None:
info["Norm"] = gradient_norm
self.optimizer.step()
if "States" in info:
states_trained = info["States"]
info["States"] = states_trained + columns[0]
else:
info["States"] = columns[0]
# Pad out the states to be of size batch_size
if len(info["States"]) < self.args.batch_size:
old_states = info["States"]
new_states = old_states[0] * (self.args.batch_size - len(old_states))
info["States"] = new_states
return info
示例6: DDPG
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
class DDPG(object):
def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
self.num_inputs = num_inputs
self.action_space = action_space
self.actor = Actor(hidden_size, self.num_inputs, self.action_space)
self.actor_target = Actor(hidden_size, self.num_inputs, self.action_space)
self.actor_optim = Adam(self.actor.parameters(), lr=1e-4)
self.critic = Critic(hidden_size, self.num_inputs, self.action_space)
self.critic_target = Critic(hidden_size, self.num_inputs, self.action_space)
self.critic_optim = Adam(self.critic.parameters(), lr=1e-3)
self.gamma = gamma
self.tau = tau
hard_update(self.actor_target, self.actor) # Make sure target is with the same weight
hard_update(self.critic_target, self.critic)
def select_action(self, state, exploration=None):
self.actor.eval()
mu = self.actor((Variable(state, volatile=True)))
self.actor.train()
mu = mu.data
if exploration is not None:
mu += torch.Tensor(exploration.noise())
return mu.clamp(-1, 1)
def update_parameters(self, batch):
state_batch = Variable(torch.cat(batch.state))
next_state_batch = Variable(torch.cat(batch.next_state), volatile=True)
action_batch = Variable(torch.cat(batch.action))
reward_batch = Variable(torch.cat(batch.reward))
mask_batch = Variable(torch.cat(batch.mask))
next_action_batch = self.actor_target(next_state_batch)
next_state_action_values = self.critic_target(next_state_batch, next_action_batch)
reward_batch = torch.unsqueeze(reward_batch, 1)
expected_state_action_batch = reward_batch + (self.gamma * next_state_action_values)
self.critic_optim.zero_grad()
state_action_batch = self.critic((state_batch), (action_batch))
value_loss = MSELoss(state_action_batch, expected_state_action_batch)
value_loss.backward()
self.critic_optim.step()
self.actor_optim.zero_grad()
policy_loss = -self.critic((state_batch),self.actor((state_batch)))
policy_loss = policy_loss.mean()
policy_loss.backward()
self.actor_optim.step()
soft_update(self.actor_target, self.actor, self.tau)
soft_update(self.critic_target, self.critic, self.tau)
示例7: __init__
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
#.........这里部分代码省略.........
r = self.prev_exp[2]
sn = self.prev_exp[3]
an = action
steps = 1
tt = terminated
pr = self.prev_exp[4]
self.replay.Add_Exp(s, a, r, sn, an, steps, tt, pr, 1)
self.prev_exp = (state, action, reward, state_next, pseudo_reward)
def end_of_trajectory(self):
self.replay.end_of_trajectory()
self.prev_exp = None
# self.replay.Clear()
def train(self):
if self.T - self.target_sync_T > self.args.target:
self.sync_target_network()
self.target_sync_T = self.T
info = {}
if self.T - self.train_T >= self.args.sarsa_train:
for _ in range(self.args.sarsa_train):
self.train_T = self.T
self.dqn.eval()
# TODO: Use a named tuple for experience replay
n_step_sample = self.args.n_step
batch, indices, is_weights = self.replay.Sample_N(self.args.batch_size, n_step_sample, self.args.gamma)
columns = list(zip(*batch))
states = Variable(torch.from_numpy(np.array(columns[0])).float().transpose_(1, 3))
actions = Variable(torch.LongTensor(columns[1]))
terminal_states = Variable(torch.FloatTensor(columns[5]))
rewards = Variable(torch.FloatTensor(columns[2]))
actions_next = Variable(torch.LongTensor(columns[6]))
# Have to clip rewards for DQN
rewards = torch.clamp(rewards, -1, 1)
steps = Variable(torch.FloatTensor(columns[4]))
new_states = Variable(torch.from_numpy(np.array(columns[3])).float().transpose_(1, 3))
target_dqn_qvals = self.target_dqn(new_states).cpu()
# Make a new variable with those values so that these are treated as constants
target_dqn_qvals_data = Variable(target_dqn_qvals.data)
q_value_targets = (Variable(torch.ones(terminal_states.size()[0])) - terminal_states)
inter = Variable(torch.ones(terminal_states.size()[0]) * self.args.gamma)
# print(steps)
q_value_targets = q_value_targets * torch.pow(inter, steps)
q_value_targets = q_value_targets * target_dqn_qvals_data.gather(1, actions_next.view(-1, 1))
q_value_targets = q_value_targets + rewards
self.dqn.train()
if self.args.gpu:
actions = actions.cuda()
q_value_targets = q_value_targets.cuda()
model_predictions = self.dqn(states).gather(1, actions.view(-1, 1))
# info = {}
td_error = model_predictions - q_value_targets
info["TD_Error"] = td_error.mean().data[0]
# Update the priorities
if not self.args.density_priority:
self.replay.Update_Indices(indices, td_error.cpu().data.numpy(), no_pseudo_in_priority=self.args.count_td_priority)
# If using prioritised we need to weight the td_error
if self.args.prioritized and self.args.prioritized_is:
# print(td_error)
weights_tensor = torch.from_numpy(is_weights).float()
weights_tensor = Variable(weights_tensor)
if self.args.gpu:
weights_tensor = weights_tensor.cuda()
# print(weights_tensor)
td_error = td_error * weights_tensor
l2_loss = (td_error).pow(2).mean()
info["Loss"] = l2_loss.data[0]
# Update
self.optimizer.zero_grad()
l2_loss.backward()
# Taken from pytorch clip_grad_norm
# Remove once the pip version it up to date with source
gradient_norm = clip_grad_norm(self.dqn.parameters(), self.args.clip_value)
if gradient_norm is not None:
info["Norm"] = gradient_norm
self.optimizer.step()
if "States" in info:
states_trained = info["States"]
info["States"] = states_trained + columns[0]
else:
info["States"] = columns[0]
self.replay.Clear()
return info
示例8: run
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
def run(setting='discrete_discrete'):
if setting == 'discrete_discrete':
y, wy = make_circle(radius=4, n_samples=n_target_samples)
x, wx = make_circle(radius=2, n_samples=n_target_samples)
x = torch.from_numpy(x).float()
y = torch.from_numpy(y).float()
wy = torch.from_numpy(wy).float()
wx = torch.from_numpy(wx).float()
x = MultivariateNormal(torch.zeros(2), torch.eye(2) / 4)
x = x.sample((n_target_samples, ))
wx = np.full(len(x), 1 / len(x))
wx = torch.from_numpy(wx).float()
ot_plan = OTPlan(source_type='discrete', target_type='discrete',
target_length=len(y), source_length=len(x))
elif setting == 'continuous_discrete':
x = MultivariateNormal(torch.zeros(2), torch.eye(2) / 4)
y, wy = make_circle(radius=4, n_samples=n_target_samples)
y = torch.from_numpy(y).float()
wy = torch.from_numpy(wy).float()
ot_plan = OTPlan(source_type='continuous', target_type='discrete',
target_length=len(y), source_dim=2)
else:
raise ValueError
mapping = Mapping(ot_plan, dim=2)
optimizer = Adam(ot_plan.parameters(), amsgrad=True, lr=lr)
# optimizer = SGD(ot_plan.parameters(), lr=lr)
plan_objectives = []
map_objectives = []
print('Learning OT plan')
for i in range(n_plan_iter):
optimizer.zero_grad()
if setting == 'discrete_discrete':
this_yidx = torch.multinomial(wy, batch_size)
this_y = y[this_yidx]
this_xidx = torch.multinomial(wx, batch_size)
this_x = x[this_xidx]
else:
this_x = x.sample((batch_size,))
this_yidx = torch.multinomial(wy, batch_size)
this_y = y[this_yidx]
this_xidx = None
loss = ot_plan.loss(this_x, this_y, yidx=this_yidx, xidx=this_xidx)
loss.backward()
optimizer.step()
plan_objectives.append(-loss.item())
if i % 100 == 0:
print(f'Iter {i}, loss {-loss.item():.3f}')
optimizer = Adam(mapping.parameters(), amsgrad=True, lr=lr)
# optimizer = SGD(mapping.parameters(), lr=1e-5)
print('Learning barycentric mapping')
for i in range(n_map_iter):
optimizer.zero_grad()
if setting == 'discrete_discrete':
this_yidx = torch.multinomial(wy, batch_size)
this_y = y[this_yidx]
this_xidx = torch.multinomial(wx, batch_size)
this_x = x[this_xidx]
else:
this_x = x.sample((batch_size,))
this_yidx = torch.multinomial(wy, batch_size)
this_y = y[this_yidx]
this_xidx = None
loss = mapping.loss(this_x, this_y, yidx=this_yidx, xidx=this_xidx)
loss.backward()
optimizer.step()
map_objectives.append(loss.item())
if i % 100 == 0:
print(f'Iter {i}, loss {loss.item():.3f}')
if setting == 'continuous_discrete':
x = x.sample((len(y),))
with torch.no_grad():
mapped = mapping(x)
x = x.numpy()
y = y.numpy()
mapped = mapped.numpy()
return x, y, mapped, plan_objectives, map_objectives
示例9: __init__
# 需要导入模块: from torch.optim import Adam [as 别名]
# 或者: from torch.optim.Adam import zero_grad [as 别名]
class NAF:
def __init__(self, gamma, tau, hidden_size, num_inputs, action_space):
self.action_space = action_space
self.num_inputs = num_inputs
self.model = Policy(hidden_size, num_inputs, action_space)
self.target_model = Policy(hidden_size, num_inputs, action_space)
self.optimizer = Adam(self.model.parameters(), lr=1e-3)
self.gamma = gamma
self.tau = tau
hard_update(self.target_model, self.model)
def select_action(self, state, action_noise=None, param_noise=None):
self.model.eval()
mu, _, _ = self.model((Variable(state), None))
self.model.train()
mu = mu.data
if action_noise is not None:
mu += torch.Tensor(action_noise.noise())
return mu.clamp(-1, 1)
def update_parameters(self, batch):
state_batch = Variable(torch.cat(batch.state))
action_batch = Variable(torch.cat(batch.action))
reward_batch = Variable(torch.cat(batch.reward))
mask_batch = Variable(torch.cat(batch.mask))
next_state_batch = Variable(torch.cat(batch.next_state))
_, _, next_state_values = self.target_model((next_state_batch, None))
reward_batch = reward_batch.unsqueeze(1)
mask_batch = mask_batch.unsqueeze(1)
expected_state_action_values = reward_batch + (self.gamma * mask_batch + next_state_values)
_, state_action_values, _ = self.model((state_batch, action_batch))
loss = MSELoss(state_action_values, expected_state_action_values)
self.optimizer.zero_grad()
loss.backward()
torch.nn.utils.clip_grad_norm(self.model.parameters(), 1)
self.optimizer.step()
soft_update(self.target_model, self.model, self.tau)
return loss.item(), 0
def save_model(self, env_name, suffix="", model_path=None):
if not os.path.exists('models/'):
os.makedirs('models/')
if model_path is None:
model_path = "models/naf_{}_{}".format(env_name, suffix)
print('Saving model to {}'.format(actor_path))
torch.save(self.model.state_dict(), model_path)
def load_model(self, model_path):
print('Loading model from {}'.format(model_path))
self.model.load_state_dict(torch.load(model_path))