本文整理匯總了Python中rllab.baselines.linear_feature_baseline.LinearFeatureBaseline方法的典型用法代碼示例。如果您正苦於以下問題:Python linear_feature_baseline.LinearFeatureBaseline方法的具體用法?Python linear_feature_baseline.LinearFeatureBaseline怎麽用?Python linear_feature_baseline.LinearFeatureBaseline使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類rllab.baselines.linear_feature_baseline
的用法示例。
在下文中一共展示了linear_feature_baseline.LinearFeatureBaseline方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main():
env = GymEnv('Pendulum-v0', record_video=False, force_reset=True)
observation_dim = np.prod(env.observation_space.shape)
action_dim = np.prod(env.action_space.shape)
policy = GaussianMLPPolicy(
observation_dim=observation_dim,
action_dim=action_dim,
hidden_sizes=(100, 100))
algo = TRPO(
env=env,
policy=policy,
baseline=LinearFeatureBaseline(env_spec=env.spec),
batch_size=10000,
max_path_length=env.horizon,
n_itr=100,
discount=0.99,
store_paths=True,
step_size=0.01,
)
with logdir(algo=algo, dirname='data/irl/pendulum'):
algo.train()
示例2: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main(exp_name=None, ent_wt=1.0):
tf.reset_default_graph()
env = TfEnv(CustomGymEnv('CustomAnt-v0', record_video=False, record_log=False, force_reset=False))
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
with tf.Session(config=get_session_config()) as sess:
algo = TRPO(
env=env,
sess=sess,
policy=policy,
n_itr=2000,
batch_size=20000,
max_path_length=500,
discount=0.99,
store_paths=True,
entropy_weight=ent_wt,
baseline=LinearFeatureBaseline(env_spec=env.spec),
exp_name=exp_name,
)
#with rllab_logdir(algo=algo, dirname='data/ant_data_collect'):#/%s'%exp_name):
with rllab_logdir(algo=algo, dirname='data/ant_data_collect/%s'%exp_name):
algo.train()
示例3: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main(exp_name, ent_wt=1.0):
tf.reset_default_graph()
env = TfEnv(CustomGymEnv('CustomAnt-v0', record_video=False, record_log=False))
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
with tf.Session(config=get_session_config()) as sess:
algo = TRPO(
env=env,
sess=sess,
policy=policy,
n_itr=1500,
batch_size=20000,
max_path_length=500,
discount=0.99,
store_paths=True,
entropy_weight=ent_wt,
baseline=LinearFeatureBaseline(env_spec=env.spec),
exp_name=exp_name,
)
with rllab_logdir(algo=algo, dirname='data/ant_data_collect/%s'%exp_name):
algo.train()
示例4: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main():
env = TfEnv(GymEnv('Pendulum-v0', record_video=False, record_log=False))
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
algo = TRPO(
env=env,
policy=policy,
n_itr=200,
batch_size=1000,
max_path_length=100,
discount=0.99,
store_paths=True,
baseline=LinearFeatureBaseline(env_spec=env.spec)
)
with rllab_logdir(algo=algo, dirname='data/pendulum'):
algo.train()
示例5: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main():
env = GymEnv('Pendulum-v0', record_video=False, record_log=False)
experts = load_latest_experts('data/irl/pendulum', n=5)
irl_model = AIRL(env_spec=env.spec, expert_trajs=experts)
observation_dim = np.prod(env.observation_space.shape)
action_dim = np.prod(env.action_space.shape)
policy = GaussianMLPPolicy(
observation_dim=observation_dim,
action_dim=action_dim,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32))
algo = IRLTRPO(
env=env,
policy=policy,
irl_model=irl_model,
n_itr=200,
batch_size=4000,
max_path_length=100,
discount=0.99,
store_paths=True,
discrim_train_itrs=50,
irl_model_wt=1.0,
# TODO (ewei), it seems adding the policy entropy term will worsen the result
# compared to not use entropy at all
entropy_weight=0.1, # This should be 1.0 but 0.1 seems to work better
zero_environment_reward=True,
baseline=LinearFeatureBaseline(env_spec=env.spec),
)
algo.train()
示例6: run_task
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def run_task(*_):
env = GymEnv("CartPole-v0", record_video=False, force_reset=True)
observation_dim = np.prod(env.observation_space.shape)
num_actions = env.action_space.n
policy = CategoricalMLPPolicy(
observation_dim=observation_dim,
num_actions=num_actions,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
baseline = LinearFeatureBaseline(env_spec=env.spec)
algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
n_itr=1,
discount=0.99,
step_size=0.01,
use_finite_diff_hvp=True,
symmetric_finite_diff=False,
# Uncomment both lines (this and the plot parameter below) to enable plotting
# plot=True,
)
algo.train()
# if __name__ == '__main__':
# run_task()
示例7: run_task
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def run_task(*_):
# Please note that different environments with different action spaces may
# require different policies. For example with a Discrete action space, a
# CategoricalMLPPolicy works, but for a Box action space may need to use
# a GaussianMLPPolicy (see the trpo_gym_pendulum.py example)
env = normalize(GymEnv("CartPole-v0", record_video=False, force_reset=True))
policy = CategoricalMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
baseline = LinearFeatureBaseline(env_spec=env.spec)
algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
n_itr=50,
discount=0.99,
step_size=0.01,
optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
# Uncomment both lines (this and the plot parameter below) to enable plotting
# plot=True,
)
algo.train()
示例8: run_task
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def run_task(*_):
env = GymEnv('Pendulum-v0', record_video=False, force_reset=True)
observation_dim = np.prod(env.observation_space.shape)
action_dim = np.prod(env.action_space.shape)
policy = GaussianMLPPolicy(
observation_dim=observation_dim,
action_dim=action_dim,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32))
baseline= LinearFeatureBaseline(env_spec=env.spec)
algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=10000,
max_path_length=env.horizon,
n_itr=50,
discount=0.99,
step_size=0.01,
# Uncomment both lines (this and the plot parameter below) to enable plotting
# plot=True,
)
algo.train()
示例9: run_task
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def run_task(*_):
# Please note that different environments with different action spaces may require different
# policies. For example with a Box action space, a GaussianMLPPolicy works, but for a Discrete
# action space may need to use a CategoricalMLPPolicy (see the trpo_gym_cartpole.py example)
env = normalize(GymEnv("Pendulum-v0", record_video=False, force_reset=True))
policy = GaussianMLPPolicy(
env_spec=env.spec,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32)
)
baseline = LinearFeatureBaseline(env_spec=env.spec)
algo = TRPO(
env=env,
policy=policy,
baseline=baseline,
batch_size=4000,
max_path_length=env.horizon,
n_itr=50,
discount=0.99,
step_size=0.01,
optimizer=ConjugateGradientOptimizer(hvp_approach=FiniteDifferenceHvp(base_eps=1e-5, symmetric=False))
# Uncomment both lines (this and the plot parameter below) to enable plotting
# plot=True,
)
algo.train()
示例10: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main():
env = GymEnv('Pendulum-v0', record_video=False, record_log=False)
experts = load_latest_experts('data/irl/pendulum2', n=5)
irl_model = GAIL(env_spec=env.spec, expert_trajs=experts)
observation_dim = np.prod(env.observation_space.shape)
action_dim = np.prod(env.action_space.shape)
policy = GaussianMLPPolicy(
observation_dim=observation_dim,
action_dim=action_dim,
# The neural network policy should have two hidden layers, each with 32 hidden units.
hidden_sizes=(32, 32))
algo = IRLTRPO(
env=env,
policy=policy,
irl_model=irl_model,
n_itr=200,
batch_size=4000,
max_path_length=100,
discount=0.99,
store_paths=True,
discrim_train_itrs=50,
irl_model_wt=1.0,
entropy_weight=0.0, # GAIL should not use entropy unless for exploration
zero_environment_reward=True,
baseline=LinearFeatureBaseline(env_spec=env.spec),
)
algo.train()
示例11: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main(exp_name=None, fusion=True):
env = TfEnv(CustomGymEnv('CustomAnt-v0', record_video=False, record_log=False))
# load ~2 iterations worth of data from each forward RL experiment as demos
experts = load_latest_experts_multiple_runs('data/ant_data_collect', n=2)
#experts = load_latest_experts('data/ant_data_collect', n=5)
#qvar: inverse model q(a|s,s')
qvar= GaussianMLPInversePolicy(name='qvar_model', env_spec=env.spec, hidden_sizes=(32, 32))
qvar_model = Qvar(env=env,qvar=qvar, expert_trajs=experts, fusion=True, max_itrs=10)
#Empowerment-based Adversarial Inverse Reinforcement Learning, set score_discrim=True
irl_model = EAIRL(env=env, expert_trajs=experts, state_only=False, fusion=fusion, max_itrs=10, score_discrim=True)
#Empowerment-based potential functions gamma* Phi(s')-Phi(s)
empw_model = Empowerment(env=env,fusion=True, max_itrs=4)
t_empw_model = Empowerment(env=env,scope='t_efn',fusion=True, max_itrs=2, name='empowerment2')
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
algo = IRLTRPO(
env=env,
policy=policy,
empw=empw_model,
tempw=t_empw_model,
qvar_model=qvar_model,
irl_model=irl_model,
n_itr=130,
batch_size=20000,
max_path_length=500,
discount=0.99,
store_paths=True,
target_empw_update=5,
irl_model_wt=1.0,
entropy_weight=0.1,
lambda_i=1.0,
zero_environment_reward=True,
baseline=LinearFeatureBaseline(env_spec=env.spec),
)
with rllab_logdir(algo=algo, dirname='data/ant_state_irl'):
#with rllab_logdir(algo=algo, dirname='data/ant_state_irl/%s' % exp_name): # if you use multiple runs, use this line instead of above
with tf.Session():
algo.train()
示例12: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main(exp_name=None, fusion=False):
env = TfEnv(CustomGymEnv('CustomAnt-v0', record_video=False, record_log=False))
# load ~2 iterations worth of data from each forward RL experiment as demos
experts = load_latest_experts_multiple_runs('data/ant_data_collect', n=2)
irl_model = AIRL(env=env, expert_trajs=experts, state_only=True, fusion=fusion, max_itrs=10)
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
algo = IRLTRPO(
env=env,
policy=policy,
irl_model=irl_model,
n_itr=1000,
batch_size=10000,
max_path_length=500,
discount=0.99,
store_paths=True,
irl_model_wt=1.0,
entropy_weight=0.1,
zero_environment_reward=True,
baseline=LinearFeatureBaseline(env_spec=env.spec),
)
with rllab_logdir(algo=algo, dirname='data/ant_state_irl/%s' % exp_name):
with tf.Session():
algo.train()
示例13: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main(exp_name, params_folder=None):
env = TfEnv(CustomGymEnv('DisabledAnt-v0', record_video=False, record_log=False))
irl_itr = 100 # earlier IRL iterations overfit less; 100 seems to work well.
params_file = os.path.join(DATA_DIR, '%s/itr_%d.pkl' % (params_folder, irl_itr))
prior_params = load_prior_params(params_file)
irl_model = AIRL(env=env, expert_trajs=None, state_only=True)
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
algo = IRLTRPO(
init_irl_params=prior_params,
env=env,
policy=policy,
irl_model=irl_model,
n_itr=1000,
batch_size=10000,
max_path_length=500,
discount=0.99,
store_paths=False,
train_irl=False,
irl_model_wt=1.0,
entropy_weight=0.1,
zero_environment_reward=True,
baseline=LinearFeatureBaseline(env_spec=env.spec),
log_params_folder=params_folder,
log_experiment_name=exp_name,
)
with rllab_logdir(algo=algo, dirname='data/ant_transfer/%s'%exp_name):
with tf.Session():
algo.train()
示例14: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main():
env = TfEnv(GymEnv('Pendulum-v0', record_video=False, record_log=False))
experts = load_latest_experts('data/pendulum', n=5)
irl_model = GAIL(env_spec=env.spec, expert_trajs=experts)
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
algo = IRLTRPO(
env=env,
policy=policy,
irl_model=irl_model,
n_itr=200,
batch_size=1000,
max_path_length=100,
discount=0.99,
store_paths=True,
discrim_train_itrs=50,
irl_model_wt=1.0,
entropy_weight=0.0, # GAIL should not use entropy unless for exploration
zero_environment_reward=True,
baseline=LinearFeatureBaseline(env_spec=env.spec)
)
with rllab_logdir(algo=algo, dirname='data/pendulum_gail'):
with tf.Session():
algo.train()
示例15: main
# 需要導入模塊: from rllab.baselines import linear_feature_baseline [as 別名]
# 或者: from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline [as 別名]
def main(exp_name=None, params_folder='data/ant_state_irl'):
#env = TfEnv(CustomGymEnv('PointMazeLeft-v0', record_video=True, record_log=True,force_reset=True))
env = TfEnv(CustomGymEnv('DisabledAnt-v0', record_video=False, record_log=False,force_reset=False))
irl_itr=90# earlier IRL iterations overfit less; either 80 or 90 seems to work well. But I usually search through 60,65,70,75, .. uptil 100
#params_file = os.path.join(DATA_DIR, '%s/itr_%d.pkl' % (params_folder, irl_itr))
params_file = os.path.join(DATA_DIR, 'itr_%d.pkl' % (irl_itr))
prior_params = load_prior_params(params_file)
'''q_itr = 400 # earlier IRL iterations overfit less; 100 seems to work well.
#params_file = os.p90ath.join(DATA_DIR, '%s/itr_%d.pkl' % (params_folder, irl_itr))
params_file = os.path.join(DATA_DIR, 'itr_%d.pkl' % (q_itr))
prior_params_q = load_prior_params(params_file)'''
qvar = GaussianMLPInversePolicy(name='qvar_model', env_spec=env.spec, hidden_sizes=(32, 32))
qvar_model = Qvar(env=env,qvar=qvar, expert_trajs=None,max_itrs=10)
irl_model = EAIRL(env=env, expert_trajs=None, state_only=False, score_discrim=False)
empw_model = Empowerment(env=env,max_itrs=1)
t_empw_model = Empowerment(env=env,scope='t_efn', max_itrs=2, name='empowerment2')
policy = GaussianMLPPolicy(name='policy', env_spec=env.spec, hidden_sizes=(32, 32))
algo = IRLTRPO(
init_irl_params=prior_params['irl_params'],
init_empw_params=None,#prior_params['empw_params'],
init_qvar_params=None,#prior_params['qvar_params'],
init_policy_params=None,#prior_params['policy_params'],
env=env,
policy=policy,
empw=empw_model,
tempw=t_empw_model,
qvar_model=qvar_model,
irl_model=irl_model,
n_itr=2000,
batch_size=20000,
max_path_length=500,
discount=0.99,
store_paths=False,
train_irl=False,
train_empw=False,
train_qvar=False,
irl_model_wt=1.0,
entropy_weight=0.1,
zero_environment_reward=True,
baseline=LinearFeatureBaseline(env_spec=env.spec),
log_params_folder=params_folder,
log_experiment_name=exp_name,
)
with rllab_logdir(algo=algo, dirname='data/ant_transfer'):#%s'%exp_name):
#with rllab_logdir(algo=algo, dirname='data/ant_transfer%s'%exp_name):
with tf.Session():
algo.train()