本文整理匯總了Python中baselines.gail.dataset.mujoco_dset.Mujoco_Dset方法的典型用法代碼示例。如果您正苦於以下問題:Python mujoco_dset.Mujoco_Dset方法的具體用法?Python mujoco_dset.Mujoco_Dset怎麽用?Python mujoco_dset.Mujoco_Dset使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類baselines.gail.dataset.mujoco_dset
的用法示例。
在下文中一共展示了mujoco_dset.Mujoco_Dset方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: load_dataset
# 需要導入模塊: from baselines.gail.dataset import mujoco_dset [as 別名]
# 或者: from baselines.gail.dataset.mujoco_dset import Mujoco_Dset [as 別名]
def load_dataset(expert_path):
dataset = Mujoco_Dset(expert_path=expert_path)
return dataset
示例2: main
# 需要導入模塊: from baselines.gail.dataset import mujoco_dset [as 別名]
# 或者: from baselines.gail.dataset.mujoco_dset import Mujoco_Dset [as 別名]
def main(args):
U.make_session(num_cpu=1).__enter__()
set_global_seeds(args.seed)
env = gym.make(args.env_id)
def policy_fn(name, ob_space, ac_space, reuse=False):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2)
env = bench.Monitor(env, logger.get_dir() and
osp.join(logger.get_dir(), "monitor.json"))
env.seed(args.seed)
gym.logger.setLevel(logging.WARN)
task_name = get_task_name(args)
args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
args.log_dir = osp.join(args.log_dir, task_name)
dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation)
savedir_fname = learn(env,
policy_fn,
dataset,
max_iters=args.BC_max_iter,
ckpt_dir=args.checkpoint_dir,
log_dir=args.log_dir,
task_name=task_name,
verbose=True)
avg_len, avg_ret = runner(env,
policy_fn,
savedir_fname,
timesteps_per_batch=1024,
number_trajs=10,
stochastic_policy=args.stochastic_policy,
save=args.save_sample,
reuse=True)
示例3: main
# 需要導入模塊: from baselines.gail.dataset import mujoco_dset [as 別名]
# 或者: from baselines.gail.dataset.mujoco_dset import Mujoco_Dset [as 別名]
def main(args):
U.make_session(num_cpu=1).__enter__()
set_global_seeds(args.seed)
env = gym.make(args.env_id)
def policy_fn(name, ob_space, ac_space, reuse=False):
return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space,
reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2)
env = bench.Monitor(env, logger.get_dir() and
osp.join(logger.get_dir(), "monitor.json"))
env.seed(args.seed)
gym.logger.setLevel(logging.WARN)
task_name = get_task_name(args)
args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name)
args.log_dir = osp.join(args.log_dir, task_name)
if args.task == 'train':
dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation)
reward_giver = TransitionClassifier(env, args.adversary_hidden_size, entcoeff=args.adversary_entcoeff)
train(env,
args.seed,
policy_fn,
reward_giver,
dataset,
args.algo,
args.g_step,
args.d_step,
args.policy_entcoeff,
args.num_timesteps,
args.save_per_iter,
args.checkpoint_dir,
args.log_dir,
args.pretrained,
args.BC_max_iter,
task_name
)
elif args.task == 'evaluate':
runner(env,
policy_fn,
args.load_model_path,
timesteps_per_batch=1024,
number_trajs=10,
stochastic_policy=args.stochastic_policy,
save=args.save_sample
)
else:
raise NotImplementedError
env.close()