本文整理匯總了Python中ddpg.DDPG屬性的典型用法代碼示例。如果您正苦於以下問題:Python ddpg.DDPG屬性的具體用法?Python ddpg.DDPG怎麽用?Python ddpg.DDPG使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類ddpg
的用法示例。
在下文中一共展示了ddpg.DDPG屬性的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: prepare_params
# 需要導入模塊: import ddpg [as 別名]
# 或者: from ddpg import DDPG [as 別名]
def prepare_params(kwargs):
# DDPG params
ddpg_params = dict()
env_name = kwargs['env_name']
def make_env():
return gym.make(env_name)
kwargs['make_env'] = make_env
tmp_env = cached_make_env(kwargs['make_env'])
assert hasattr(tmp_env, '_max_episode_steps')
kwargs['T'] = tmp_env._max_episode_steps
tmp_env.reset()
kwargs['max_u'] = np.array(kwargs['max_u']) if isinstance(kwargs['max_u'], list) else kwargs['max_u']
kwargs['gamma'] = 1. - 1. / kwargs['T']
if 'lr' in kwargs:
kwargs['pi_lr'] = kwargs['lr']
kwargs['Q_lr'] = kwargs['lr']
del kwargs['lr']
for name in ['buffer_size', 'hidden', 'layers',
'network_class',
'polyak',
'batch_size', 'Q_lr', 'pi_lr',
'norm_eps', 'norm_clip', 'max_u',
'action_l2', 'clip_obs', 'scope', 'relative_goals']:
ddpg_params[name] = kwargs[name]
kwargs['_' + name] = kwargs[name]
del kwargs[name]
kwargs['ddpg_params'] = ddpg_params
return kwargs
示例2: configure_ddpg
# 需要導入模塊: import ddpg [as 別名]
# 或者: from ddpg import DDPG [as 別名]
def configure_ddpg(dims, params, reuse=False, use_mpi=True, clip_return=True):
sample_her_transitions = configure_her(params)
# Extract relevant parameters.
gamma = params['gamma']
rollout_batch_size = params['rollout_batch_size']
ddpg_params = params['ddpg_params']
input_dims = dims.copy()
# DDPG agent
env = cached_make_env(params['make_env'])
env.reset()
ddpg_params.update({'input_dims': input_dims, # agent takes an input observations
'T': params['T'],
'clip_pos_returns': True, # clip positive returns
'clip_return': (1. / (1. - gamma)) if clip_return else np.inf, # max abs of return
'rollout_batch_size': rollout_batch_size,
'subtract_goals': simple_goal_subtract,
'sample_transitions': sample_her_transitions,
'gamma': gamma,
'bc_loss': params['bc_loss'],
'q_filter': params['q_filter'],
'num_demo': params['num_demo'],
})
ddpg_params['info'] = {
'env_name': params['env_name'],
}
policy = DDPG(reuse=reuse, **ddpg_params, use_mpi=use_mpi)
return policy
示例3: main
# 需要導入模塊: import ddpg [as 別名]
# 或者: from ddpg import DDPG [as 別名]
def main():
experiment= 'InvertedPendulum-v1' #specify environments here
env= gym.make(experiment)
steps= env.spec.timestep_limit #steps per episode
assert isinstance(env.observation_space, Box), "observation space must be continuous"
assert isinstance(env.action_space, Box), "action space must be continuous"
#Randomly initialize critic,actor,target critic, target actor network and replay buffer
agent = DDPG(env, is_batch_norm)
exploration_noise = OUNoise(env.action_space.shape[0])
counter=0
reward_per_episode = 0
total_reward=0
num_states = env.observation_space.shape[0]
num_actions = env.action_space.shape[0]
print "Number of States:", num_states
print "Number of Actions:", num_actions
print "Number of Steps per episode:", steps
#saving reward:
reward_st = np.array([0])
for i in xrange(episodes):
print "==== Starting episode no:",i,"====","\n"
observation = env.reset()
reward_per_episode = 0
for t in xrange(steps):
#rendering environmet (optional)
env.render()
x = observation
action = agent.evaluate_actor(np.reshape(x,[1,num_states]))
noise = exploration_noise.noise()
action = action[0] + noise #Select action according to current policy and exploration noise
print "Action at step", t ," :",action,"\n"
observation,reward,done,info=env.step(action)
#add s_t,s_t+1,action,reward to experience memory
agent.add_experience(x,observation,action,reward,done)
#train critic and actor network
if counter > 64:
agent.train()
reward_per_episode+=reward
counter+=1
#check if episode ends:
if (done or (t == steps-1)):
print 'EPISODE: ',i,' Steps: ',t,' Total Reward: ',reward_per_episode
print "Printing reward to file"
exploration_noise.reset() #reinitializing random noise for action exploration
reward_st = np.append(reward_st,reward_per_episode)
np.savetxt('episode_reward.txt',reward_st, newline="\n")
print '\n\n'
break
total_reward+=reward_per_episode
print "Average reward per episode {}".format(total_reward / episodes)