本文整理汇总了Java中burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling类的典型用法代码示例。如果您正苦于以下问题:Java SparseSampling类的具体用法?Java SparseSampling怎么用?Java SparseSampling使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SparseSampling类属于burlap.behavior.singleagent.planning.stochastic.sparsesampling包,在下文中一共展示了SparseSampling类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: IPSS
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
public static void IPSS(){
InvertedPendulum ip = new InvertedPendulum();
ip.physParams.actionNoise = 0.;
Domain domain = ip.generateDomain();
RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
State initialState = InvertedPendulum.getInitialState(domain);
SparseSampling ss = new SparseSampling(domain, rf, tf, 1, new SimpleHashableStateFactory(), 10 ,1);
ss.setForgetPreviousPlanResults(true);
ss.toggleDebugPrinting(false);
Policy p = new GreedyQPolicy(ss);
EpisodeAnalysis ea = p.evaluateBehavior(initialState, rf, tf, 500);
System.out.println("Num steps: " + ea.maxTimeStep());
Visualizer v = InvertedPendulumVisualizer.getInvertedPendulumVisualizer();
new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));
}
示例2: IPSS
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
public static void IPSS(){
InvertedPendulum ip = new InvertedPendulum();
ip.physParams.actionNoise = 0.;
RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
ip.setRf(rf);
ip.setTf(tf);
SADomain domain = ip.generateDomain();
State initialState = new InvertedPendulumState();
SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
ss.setForgetPreviousPlanResults(true);
ss.toggleDebugPrinting(false);
Policy p = new GreedyQPolicy(ss);
Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
System.out.println("Num steps: " + e.maxTimeStep());
Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));
}
示例3: DifferentiableSparseSampling
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Initializes. The model of this planner will automatically be set to a {@link CustomRewardModel} using the provided reward function.
* @param domain the problem domain
* @param rf the differentiable reward function
* @param gamma the discount factor
* @param hashingFactory the hashing factory used to compare state equality
* @param h the planning horizon
* @param c how many samples from the transition dynamics to use. Set to -1 to use the full (unsampled) transition dynamics.
* @param boltzBeta the Boltzmann beta parameter for the differentiable Boltzmann (softmax) backup equation. The larger the value the more deterministic, the closer to 1 the softer.
*/
public DifferentiableSparseSampling(SADomain domain, DifferentiableRF rf, double gamma, HashableStateFactory hashingFactory, int h, int c, double boltzBeta){
this.solverInit(domain, gamma, hashingFactory);
this.h = h;
this.c = c;
this.rf = rf;
this.boltzBeta = boltzBeta;
this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, DiffStateNode>();
this.rootLevelQValues = new HashMap<HashableState, DifferentiableSparseSampling.QAndQGradient>();
this.rfDim = rf.numParameters();
this.vinit = new VanillaDiffVinit(new ConstantValueFunction(), rf);
this.model = new CustomRewardModel(domain.getModel(), rf);
this.operator = new DifferentiableSoftmaxOperator(boltzBeta);
this.debugCode = 6368290;
}
示例4: RewardValueProjection
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Initializes.
* @param rf the input {@link burlap.oomdp.singleagent.RewardFunction} to project for one step.
* @param projectionType the type of reward projection to use.
* @param domain the {@link burlap.oomdp.core.Domain} in which the {@link burlap.oomdp.singleagent.RewardFunction} is evaluated.
*/
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType, Domain domain){
this.rf = rf;
this.projectionType = projectionType;
this.domain = domain;
if(this.projectionType == RewardProjectionType.ONESTEP){
this.oneStepBellmanPlanner = new SparseSampling(domain, rf, new NullTermination(), 1., new SimpleHashableStateFactory(), 1, -1);
this.oneStepBellmanPlanner.toggleDebugPrinting(false);
this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true);
}
}
示例5: DifferentiableSparseSampling
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Initializes.
* @param domain the problem domain
* @param rf the differentiable reward function
* @param tf the terminal function
* @param gamma the discount factor
* @param hashingFactory the hashing factory used to compare state equality
* @param h the planning horizon
* @param c how many samples from the transition dynamics to use. Set to -1 to use the full (unsampled) transition dynamics.
* @param boltzBeta the Boltzmann beta parameter for the differentiable Boltzmann (softmax) backup equation. The larger the value the more deterministic, the closer to 1 the softer.
*/
public DifferentiableSparseSampling(Domain domain, DifferentiableRF rf, TerminalFunction tf, double gamma, HashableStateFactory hashingFactory, int h, int c, double boltzBeta){
this.solverInit(domain, rf, tf, gamma, hashingFactory);
this.h = h;
this.c = c;
this.boltzBeta = boltzBeta;
this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, DiffStateNode>();
this.rootLevelQValues = new HashMap<HashableState, DifferentiableSparseSampling.QAndQGradient>();
this.rfDim = rf.getParameterDimension();
this.vinit = new VanillaDiffVinit(new ValueFunctionInitialization.ConstantValueFunctionInitialization(), rf);
this.debugCode = 6368290;
}
示例6: getStateNode
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Either returns, or creates, indexes, and returns, the state node for the given state at the given height in the tree
* @param s the state
* @param height the height (distance from leaf node) of the node.
* @return the state node for the given state at the given height in the tree
*/
protected DiffStateNode getStateNode(State s, int height){
HashableState sh = this.hashingFactory.hashState(s);
SparseSampling.HashedHeightState hhs = new SparseSampling.HashedHeightState(sh, height);
DiffStateNode sn = this.nodesByHeight.get(hhs);
if(sn == null){
sn = new DiffStateNode(sh, height);
this.nodesByHeight.put(hhs, sn);
}
return sn;
}
示例7: runIteration
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Runs a single iteration of value iteration. Note that if the state samples have not been set, it will throw a runtime exception.
* @return the maximum change in the value function.
*/
public double runIteration(){
if(this.samples == null){
throw new RuntimeException("FittedVI cannot run value iteration because the state samples have not been set. Use the setSamples method or the constructor to set them.");
}
SparseSampling ss = new SparseSampling(this.domain, this.rf, this.tf, this.gamma, this.hashingFactory, this.planningDepth, this.transitionSamples);
ss.setValueForLeafNodes(this.leafNodeInit);
ss.toggleDebugPrinting(false);
List <SupervisedVFA.SupervisedVFAInstance> instances = new ArrayList<SupervisedVFA.SupervisedVFAInstance>(this.samples.size());
List <Double> oldVs = new ArrayList<Double>(this.samples.size());
for(State s : this.samples){
oldVs.add(this.valueFunction.value(s));
instances.add(new SupervisedVFA.SupervisedVFAInstance(s, QFunctionHelper.getOptimalValue(ss, s)));
}
this.valueFunction = this.valueFunctionTrainer.train(instances);
double maxDiff = 0.;
for(int i = 0; i < this.samples.size(); i++){
double newV = this.valueFunction.value(this.samples.get(i));
double diff = Math.abs(newV - oldVs.get(i));
maxDiff = Math.max(maxDiff, diff);
}
return maxDiff;
}
示例8: getQs
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public List<QValue> getQs(State s) {
SparseSampling ss = new SparseSampling(this.domain, this.rf, this.tf, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
ss.setValueForLeafNodes(this.leafNodeInit);
ss.toggleDebugPrinting(false);
return ss.getQs(s);
}
示例9: getQ
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public QValue getQ(State s, AbstractGroundedAction a) {
SparseSampling ss = new SparseSampling(this.domain, this.rf, this.tf, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
ss.setValueForLeafNodes(this.leafNodeInit);
ss.toggleDebugPrinting(false);
return ss.getQ(s, a);
}
示例10: BeliefSparseSampling
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Initializes the planner.
* @param domain the POMDP domain
* @param rf the POMDP reward function
* @param discount the discount factor
* @param hashingFactory the Belief MDP {@link burlap.oomdp.statehashing.HashableStateFactory} that {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use.
* @param h the height of the {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} tree.
* @param c the number of samples {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use. Set to -1 to use the full BeliefMDP transition dynamics.
*/
public BeliefSparseSampling(PODomain domain, RewardFunction rf, double discount, HashableStateFactory hashingFactory, int h, int c){
this.solverInit(domain, rf, null, discount, hashingFactory);
BeliefMDPGenerator bdgen = new BeliefMDPGenerator(domain);
this.beliefMDP = (SADomain)bdgen.generateDomain();
this.beliefRF = new BeliefMDPGenerator.BeliefRF(domain, rf);
this.mdpPlanner = new SparseSampling(this.beliefMDP, this.beliefRF, new NullTermination(), discount, hashingFactory, h, Math.max(1, c));
if(c < 1){
this.mdpPlanner.setComputeExactValueFunction(true);
}
}
示例11: RewardValueProjection
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Initializes.
* @param rf the input {@link RewardFunction} to project for one step.
* @param projectionType the type of reward projection to use.
* @param domain the {@link burlap.mdp.core.Domain} in which the {@link RewardFunction} is evaluated.
*/
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType, SADomain domain){
this.rf = rf;
this.projectionType = projectionType;
this.domain = domain;
if(this.projectionType == RewardProjectionType.ONESTEP){
this.oneStepBellmanPlanner = new SparseSampling(domain, 1., new SimpleHashableStateFactory(), 1, -1);
this.oneStepBellmanPlanner.setModel(new CustomRewardNoTermModel(domain.getModel(), rf));
this.oneStepBellmanPlanner.toggleDebugPrinting(false);
this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true);
}
}
示例12: runIteration
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Runs a single iteration of value iteration. Note that if the state samples have not been set, it will throw a runtime exception.
* @return the maximum change in the value function.
*/
public double runIteration(){
if(this.samples == null){
throw new RuntimeException("FittedVI cannot run value iteration because the state samples have not been set. Use the setSamples method or the constructor to set them.");
}
SparseSampling ss = new SparseSampling(this.domain, this.gamma, this.hashingFactory, this.planningDepth, this.transitionSamples);
ss.setModel(this.model);
ss.setValueForLeafNodes(this.leafNodeInit);
ss.toggleDebugPrinting(false);
List <SupervisedVFA.SupervisedVFAInstance> instances = new ArrayList<SupervisedVFA.SupervisedVFAInstance>(this.samples.size());
List <Double> oldVs = new ArrayList<Double>(this.samples.size());
for(State s : this.samples){
oldVs.add(this.valueFunction.value(s));
instances.add(new SupervisedVFA.SupervisedVFAInstance(s, Helper.maxQ(ss, s)));
}
this.valueFunction = this.valueFunctionTrainer.train(instances);
double maxDiff = 0.;
for(int i = 0; i < this.samples.size(); i++){
double newV = this.valueFunction.value(this.samples.get(i));
double diff = Math.abs(newV - oldVs.get(i));
maxDiff = Math.max(maxDiff, diff);
}
return maxDiff;
}
示例13: qValues
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public List<QValue> qValues(State s) {
SparseSampling ss = new SparseSampling(this.domain, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
ss.setModel(model);
ss.setValueForLeafNodes(this.leafNodeInit);
ss.toggleDebugPrinting(false);
return ss.qValues(s);
}
示例14: qValue
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public double qValue(State s, Action a) {
SparseSampling ss = new SparseSampling(this.domain, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
ss.setModel(model);
ss.setValueForLeafNodes(this.leafNodeInit);
ss.toggleDebugPrinting(false);
return ss.qValue(s, a);
}
示例15: BeliefSparseSampling
import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
* Initializes the planner.
* @param domain the POMDP domain
* @param discount the discount factor
* @param hashingFactory the Belief MDP {@link burlap.statehashing.HashableStateFactory} that {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use.
* @param h the height of the {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} tree.
* @param c the number of samples {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use. Set to -1 to use the full BeliefMDP transition dynamics.
*/
public BeliefSparseSampling(PODomain domain, double discount, HashableStateFactory hashingFactory, int h, int c){
this.solverInit(domain, discount, hashingFactory);
BeliefMDPGenerator bdgen = new BeliefMDPGenerator(domain);
this.beliefMDP = (SADomain)bdgen.generateDomain();
this.mdpPlanner = new SparseSampling(this.beliefMDP, discount, hashingFactory, h, Math.max(1, c));
if(c < 1){
this.mdpPlanner.setComputeExactValueFunction(true);
}
}