本文整理汇总了Python中baselines.logger.info方法的典型用法代码示例。如果您正苦于以下问题:Python logger.info方法的具体用法?Python logger.info怎么用?Python logger.info使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类baselines.logger
的用法示例。
在下文中一共展示了logger.info方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: configure_her
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def configure_her(params):
env = cached_make_env(params['make_env'])
env.reset()
def reward_fun(ag_2, g, info): # vectorized
return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info)
# Prepare configuration for HER.
her_params = {
'reward_fun': reward_fun,
}
for name in ['replay_strategy', 'replay_k']:
her_params[name] = params[name]
params['_' + name] = her_params[name]
del params[name]
sample_her_transitions = make_sample_her_transitions(**her_params)
return sample_her_transitions
示例2: configure_dims
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def configure_dims(params):
env = cached_make_env(params['make_env'])
env.reset()
obs, _, _, info = env.step(env.action_space.sample())
dims = {
'o': obs['observation'].shape[0],
'u': env.action_space.shape[0],
'g': obs['desired_goal'].shape[0],
}
for key, value in info.items():
value = np.array(value)
if value.ndim == 0:
value = value.reshape(1)
dims['info_{}'.format(key)] = value.shape[0]
return dims
示例3: setup_param_noise
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def setup_param_noise(self, normalized_obs0):
assert self.param_noise is not None
# Configure perturbed actor.
param_noise_actor = copy(self.actor)
param_noise_actor.name = 'param_noise_actor'
self.perturbed_actor_tf = param_noise_actor(normalized_obs0)
logger.info('setting up param noise')
self.perturb_policy_ops = get_perturbed_actor_updates(self.actor, param_noise_actor, self.param_noise_stddev)
# Configure separate copy for stddev adoption.
adaptive_param_noise_actor = copy(self.actor)
adaptive_param_noise_actor.name = 'adaptive_param_noise_actor'
adaptive_actor_tf = adaptive_param_noise_actor(normalized_obs0)
self.perturb_adaptive_policy_ops = get_perturbed_actor_updates(self.actor, adaptive_param_noise_actor, self.param_noise_stddev)
self.adaptive_policy_distance = tf.sqrt(tf.reduce_mean(tf.square(self.actor_tf - adaptive_actor_tf)))
示例4: setup_critic_optimizer
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def setup_critic_optimizer(self):
logger.info('setting up critic optimizer')
normalized_critic_target_tf = tf.clip_by_value(normalize(self.critic_target, self.ret_rms), self.return_range[0], self.return_range[1])
self.critic_loss = tf.reduce_mean(tf.square(self.normalized_critic_tf - normalized_critic_target_tf))
if self.critic_l2_reg > 0.:
critic_reg_vars = [var for var in self.critic.trainable_vars if 'kernel' in var.name and 'output' not in var.name]
for var in critic_reg_vars:
logger.info(' regularizing: {}'.format(var.name))
logger.info(' applying l2 regularization with {}'.format(self.critic_l2_reg))
critic_reg = tc.layers.apply_regularization(
tc.layers.l2_regularizer(self.critic_l2_reg),
weights_list=critic_reg_vars
)
self.critic_loss += critic_reg
critic_shapes = [var.get_shape().as_list() for var in self.critic.trainable_vars]
critic_nb_params = sum([reduce(lambda x, y: x * y, shape) for shape in critic_shapes])
logger.info(' critic shapes: {}'.format(critic_shapes))
logger.info(' critic params: {}'.format(critic_nb_params))
self.critic_grads = U.flatgrad(self.critic_loss, self.critic.trainable_vars, clip_norm=self.clip_norm)
self.critic_optimizer = MpiAdam(var_list=self.critic.trainable_vars,
beta1=0.9, beta2=0.999, epsilon=1e-08)
示例5: configure_her
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def configure_her(params):
env = cached_make_env(params['make_env'])
env.reset()
def reward_fun(ag_2, g, info): # vectorized
return env.compute_reward(achieved_goal=ag_2, desired_goal=g, info=info)
# Prepare configuration for HER.
her_params = {
'reward_fun': reward_fun,
}
for name in ['replay_strategy', 'replay_k']:
her_params[name] = params[name]
params['_' + name] = her_params[name]
del params[name]
sample_her_transitions = make_sample_her_transitions(**her_params)
return sample_her_transitions
示例6: log_params
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def log_params(params, logger=logger):
for key in sorted(params.keys()):
logger.info('{}: {}'.format(key, params[key]))
示例7: configure_ddpg
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def configure_ddpg(dims, params, reuse=False, use_mpi=True, clip_return=True):
sample_her_transitions = configure_her(params)
# Extract relevant parameters.
gamma = params['gamma']
rollout_batch_size = params['rollout_batch_size']
ddpg_params = params['ddpg_params']
input_dims = dims.copy()
# DDPG agent
env = cached_make_env(params['make_env'])
env.reset()
ddpg_params.update({'input_dims': input_dims, # agent takes an input observations
'T': params['T'],
'clip_pos_returns': True, # clip positive returns
'clip_return': (1. / (1. - gamma)) if clip_return else np.inf, # max abs of return
'rollout_batch_size': rollout_batch_size,
'subtract_goals': simple_goal_subtract,
'sample_transitions': sample_her_transitions,
'gamma': gamma,
})
ddpg_params['info'] = {
'env_name': params['env_name'],
}
policy = DDPG(reuse=reuse, **ddpg_params, use_mpi=use_mpi)
return policy
示例8: get_target_updates
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def get_target_updates(vars, target_vars, tau):
logger.info('setting up target updates ...')
soft_updates = []
init_updates = []
assert len(vars) == len(target_vars)
for var, target_var in zip(vars, target_vars):
logger.info(' {} <- {}'.format(target_var.name, var.name))
init_updates.append(tf.assign(target_var, var))
soft_updates.append(tf.assign(target_var, (1. - tau) * target_var + tau * var))
assert len(init_updates) == len(vars)
assert len(soft_updates) == len(vars)
return tf.group(*init_updates), tf.group(*soft_updates)
示例9: get_perturbed_actor_updates
# 需要导入模块: from baselines import logger [as 别名]
# 或者: from baselines.logger import info [as 别名]
def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stddev):
assert len(actor.vars) == len(perturbed_actor.vars)
assert len(actor.perturbable_vars) == len(perturbed_actor.perturbable_vars)
updates = []
for var, perturbed_var in zip(actor.vars, perturbed_actor.vars):
if var in actor.perturbable_vars:
logger.info(' {} <- {} + noise'.format(perturbed_var.name, var.name))
updates.append(tf.assign(perturbed_var, var + tf.random_normal(tf.shape(var), mean=0., stddev=param_noise_stddev)))
else:
logger.info(' {} <- {}'.format(perturbed_var.name, var.name))
updates.append(tf.assign(perturbed_var, var))
assert len(updates) == len(actor.vars)
return tf.group(*updates)