本文整理汇总了Java中burlap.behavior.policy.Policy.evaluateBehavior方法的典型用法代码示例。如果您正苦于以下问题:Java Policy.evaluateBehavior方法的具体用法?Java Policy.evaluateBehavior怎么用?Java Policy.evaluateBehavior使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类burlap.behavior.policy.Policy
的用法示例。
在下文中一共展示了Policy.evaluateBehavior方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: IPSS
import burlap.behavior.policy.Policy; //导入方法依赖的package包/类
public static void IPSS(){
InvertedPendulum ip = new InvertedPendulum();
ip.physParams.actionNoise = 0.;
Domain domain = ip.generateDomain();
RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
State initialState = InvertedPendulum.getInitialState(domain);
SparseSampling ss = new SparseSampling(domain, rf, tf, 1, new SimpleHashableStateFactory(), 10 ,1);
ss.setForgetPreviousPlanResults(true);
ss.toggleDebugPrinting(false);
Policy p = new GreedyQPolicy(ss);
EpisodeAnalysis ea = p.evaluateBehavior(initialState, rf, tf, 500);
System.out.println("Num steps: " + ea.maxTimeStep());
Visualizer v = InvertedPendulumVisualizer.getInvertedPendulumVisualizer();
new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));
}
示例2: main
import burlap.behavior.policy.Policy; //导入方法依赖的package包/类
public static void main(String [] args){
GridWorldDomain gwd = new GridWorldDomain(11, 11);
gwd.setMapToFourRooms();
//only go in intended directon 80% of the time
gwd.setProbSucceedTransitionDynamics(0.8);
Domain domain = gwd.generateDomain();
//get initial state with agent in 0,0
State s = GridWorldDomain.getOneAgentNoLocationState(domain);
GridWorldDomain.setAgent(s, 0, 0);
//all transitions return -1
RewardFunction rf = new UniformCostRF();
//terminate in top right corner
TerminalFunction tf = new GridWorldTerminalFunction(10, 10);
//setup vi with 0.99 discount factor, a value
//function initialization that initializes all states to value 0, and which will
//run for 30 iterations over the state space
VITutorial vi = new VITutorial(domain, rf, tf, 0.99, new SimpleHashableStateFactory(),
new ValueFunctionInitialization.ConstantValueFunctionInitialization(0.0), 30);
//run planning from our initial state
Policy p = vi.planFromState(s);
//evaluate the policy with one roll out visualize the trajectory
EpisodeAnalysis ea = p.evaluateBehavior(s, rf, tf);
Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap());
new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));
}
示例3: MCLSPIFB
import burlap.behavior.policy.Policy; //导入方法依赖的package包/类
public static void MCLSPIFB(){
MountainCar mcGen = new MountainCar();
Domain domain = mcGen.generateDomain();
TerminalFunction tf = new MountainCar.ClassicMCTF();
RewardFunction rf = new GoalBasedRF(tf, 100);
StateGenerator rStateGen = new MCRandomStateGenerator(domain);
SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
SARSData dataset = collector.collectNInstances(rStateGen, rf, 5000, 20, tf, null);
ConcatenatedObjectFeatureVectorGenerator featureVectorGenerator = new ConcatenatedObjectFeatureVectorGenerator(true, MountainCar.CLASSAGENT);
FourierBasis fb = new FourierBasis(featureVectorGenerator, 4);
LSPI lspi = new LSPI(domain, 0.99, fb, dataset);
Policy p = lspi.runPolicyIteration(30, 1e-6);
Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
VisualActionObserver vob = new VisualActionObserver(domain, v);
vob.initGUI();
SimulatedEnvironment env = new SimulatedEnvironment(domain, rf, tf, MountainCar.getCleanState(domain, mcGen.physParams));
EnvironmentServer envServ = new EnvironmentServer(env, vob);
for(int i = 0; i < 5; i++){
p.evaluateBehavior(envServ);
envServ.resetEnvironment();
}
System.out.println("Finished");
}
示例4: main
import burlap.behavior.policy.Policy; //导入方法依赖的package包/类
public static void main(String[] args) {
MountainCar mcGen = new MountainCar();
Domain domain = mcGen.generateDomain();
TerminalFunction tf = new MountainCar.ClassicMCTF();
RewardFunction rf = new GoalBasedRF(tf, 100);
StateGenerator rStateGen = new MCRandomStateGenerator(domain);
SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
SARSData dataset = collector.collectNInstances(rStateGen, rf, 5000, 20, tf, null);
ConcatenatedObjectFeatureVectorGenerator fvGen = new ConcatenatedObjectFeatureVectorGenerator(true,
MountainCar.CLASSAGENT);
FourierBasis fb = new FourierBasis(fvGen, 4);
LSPI lspi = new LSPI(domain, 0.99, fb, dataset);
Policy p = lspi.runPolicyIteration(30, 1e-6);
Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
VisualActionObserver vob = new VisualActionObserver(domain, v);
vob.initGUI();
SimulatedEnvironment env = new SimulatedEnvironment(domain, rf, tf,
MountainCar.getCleanState(domain, mcGen.physParams));
EnvironmentServer envServ = new EnvironmentServer(env, vob);
for(int i = 0; i < 100; i++){
p.evaluateBehavior(envServ);
envServ.resetEnvironment();
}
System.out.println("Finished");
}
示例5: MCLSPIRBF
import burlap.behavior.policy.Policy; //导入方法依赖的package包/类
public static void MCLSPIRBF(){
MountainCar mcGen = new MountainCar();
Domain domain = mcGen.generateDomain();
TerminalFunction tf = new MountainCar.ClassicMCTF();
RewardFunction rf = new GoalBasedRF(tf, 100);
State s = MountainCar.getCleanState(domain, mcGen.physParams);
StateGenerator rStateGen = new MCRandomStateGenerator(domain);
SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
SARSData dataset = collector.collectNInstances(rStateGen, rf, 5000, 20, tf, null);
RBFFeatureDatabase rbf = new RBFFeatureDatabase(true);
StateGridder gridder = new StateGridder();
gridder.gridEntireDomainSpace(domain, 5);
List<State> griddedStates = gridder.gridInputState(s);
DistanceMetric metric = new EuclideanDistance(
new ConcatenatedObjectFeatureVectorGenerator(true, MountainCar.CLASSAGENT));
for(State g : griddedStates){
rbf.addRBF(new GaussianRBF(g, metric, .2));
}
LSPI lspi = new LSPI(domain, 0.99, rbf, dataset);
Policy p = lspi.runPolicyIteration(30, 1e-6);
Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
VisualActionObserver vob = new VisualActionObserver(domain, v);
vob.initGUI();
SimulatedEnvironment env = new SimulatedEnvironment(domain, rf, tf, s);
EnvironmentServer envServ = new EnvironmentServer(env, vob);
for(int i = 0; i < 5; i++){
p.evaluateBehavior(envServ);
envServ.resetEnvironment();
}
System.out.println("Finished");
}
示例6: main
import burlap.behavior.policy.Policy; //导入方法依赖的package包/类
public static void main(String[] args) {
GridWorldDomain gwd = new GridWorldDomain(11, 11);
Domain domain = gwd.generateDomain();
State s = GridWorldDomain.getOneAgentNoLocationState(domain, 1, 3);
Policy p = new RandomPolicy(domain);
EpisodeAnalysis ea = p.evaluateBehavior(s, new NullRewardFunction(), new NullTermination(), 30);
String yamlOut = ea.serialize();
System.out.println(yamlOut);
System.out.println("\n\n");
EpisodeAnalysis read = EpisodeAnalysis.parseEpisode(domain, yamlOut);
System.out.println(read.getActionSequenceString());
System.out.println(read.getState(0).toString());
System.out.println(read.actionSequence.size());
System.out.println(read.stateSequence.size());
}