当前位置: 首页>>代码示例>>Java>>正文


Java SparseSampling类代码示例

本文整理汇总了Java中burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling的典型用法代码示例。如果您正苦于以下问题:Java SparseSampling类的具体用法?Java SparseSampling怎么用?Java SparseSampling使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


SparseSampling类属于burlap.behavior.singleagent.planning.stochastic.sparsesampling包,在下文中一共展示了SparseSampling类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: IPSS

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		Domain domain = ip.generateDomain();
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		State initialState = InvertedPendulum.getInitialState(domain);

		SparseSampling ss = new SparseSampling(domain, rf, tf, 1, new SimpleHashableStateFactory(), 10 ,1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		EpisodeAnalysis ea = p.evaluateBehavior(initialState, rf, tf, 500);
		System.out.println("Num steps: " + ea.maxTimeStep());
		Visualizer v = InvertedPendulumVisualizer.getInvertedPendulumVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea));

	}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:21,代码来源:ContinuousDomainTutorial.java

示例2: IPSS

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}
 
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:24,代码来源:ContinuousDomainTutorial.java

示例3: DifferentiableSparseSampling

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Initializes. The model of this planner will automatically be set to a {@link CustomRewardModel} using the provided reward function.
 * @param domain the problem domain
 * @param rf the differentiable reward function
 * @param gamma the discount factor
 * @param hashingFactory the hashing factory used to compare state equality
 * @param h the planning horizon
 * @param c how many samples from the transition dynamics to use. Set to -1 to use the full (unsampled) transition dynamics.
 * @param boltzBeta the Boltzmann beta parameter for the differentiable Boltzmann (softmax) backup equation. The larger the value the more deterministic, the closer to 1 the softer.
 */
public DifferentiableSparseSampling(SADomain domain, DifferentiableRF rf, double gamma, HashableStateFactory hashingFactory, int h, int c, double boltzBeta){
	this.solverInit(domain, gamma, hashingFactory);
	this.h = h;
	this.c = c;
	this.rf = rf;
	this.boltzBeta = boltzBeta;
	this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, DiffStateNode>();
	this.rootLevelQValues = new HashMap<HashableState, DifferentiableSparseSampling.QAndQGradient>();
	this.rfDim = rf.numParameters();

	this.vinit = new VanillaDiffVinit(new ConstantValueFunction(), rf);

	this.model = new CustomRewardModel(domain.getModel(), rf);

	this.operator = new DifferentiableSoftmaxOperator(boltzBeta);

	this.debugCode = 6368290;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:29,代码来源:DifferentiableSparseSampling.java

示例4: RewardValueProjection

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Initializes.
 * @param rf the input {@link burlap.oomdp.singleagent.RewardFunction} to project for one step.
 * @param projectionType the type of reward projection to use.
 * @param domain the {@link burlap.oomdp.core.Domain} in which the {@link burlap.oomdp.singleagent.RewardFunction} is evaluated.
 */
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType, Domain domain){
	this.rf = rf;
	this.projectionType = projectionType;
	this.domain = domain;
	if(this.projectionType == RewardProjectionType.ONESTEP){
		this.oneStepBellmanPlanner = new SparseSampling(domain, rf, new NullTermination(), 1., new SimpleHashableStateFactory(), 1, -1);
		this.oneStepBellmanPlanner.toggleDebugPrinting(false);
		this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true);
	}
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:17,代码来源:RewardValueProjection.java

示例5: DifferentiableSparseSampling

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Initializes.
 * @param domain the problem domain
 * @param rf the differentiable reward function
 * @param tf the terminal function
 * @param gamma the discount factor
 * @param hashingFactory the hashing factory used to compare state equality
 * @param h the planning horizon
 * @param c how many samples from the transition dynamics to use. Set to -1 to use the full (unsampled) transition dynamics.
 * @param boltzBeta the Boltzmann beta parameter for the differentiable Boltzmann (softmax) backup equation. The larger the value the more deterministic, the closer to 1 the softer.
 */
public DifferentiableSparseSampling(Domain domain, DifferentiableRF rf, TerminalFunction tf, double gamma, HashableStateFactory hashingFactory, int h, int c, double boltzBeta){
	this.solverInit(domain, rf, tf, gamma, hashingFactory);
	this.h = h;
	this.c = c;
	this.boltzBeta = boltzBeta;
	this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, DiffStateNode>();
	this.rootLevelQValues = new HashMap<HashableState, DifferentiableSparseSampling.QAndQGradient>();
	this.rfDim = rf.getParameterDimension();

	this.vinit = new VanillaDiffVinit(new ValueFunctionInitialization.ConstantValueFunctionInitialization(), rf);

	this.debugCode = 6368290;
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:25,代码来源:DifferentiableSparseSampling.java

示例6: getStateNode

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Either returns, or creates, indexes, and returns, the state node for the given state at the given height in the tree
 * @param s the state
 * @param height the height (distance from leaf node) of the node.
 * @return the state node for the given state at the given height in the tree
 */
protected DiffStateNode getStateNode(State s, int height){
	HashableState sh = this.hashingFactory.hashState(s);
	SparseSampling.HashedHeightState hhs = new SparseSampling.HashedHeightState(sh, height);
	DiffStateNode sn = this.nodesByHeight.get(hhs);
	if(sn == null){
		sn = new DiffStateNode(sh, height);
		this.nodesByHeight.put(hhs, sn);
	}

	return sn;
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:18,代码来源:DifferentiableSparseSampling.java

示例7: runIteration

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Runs a single iteration of value iteration. Note that if the state samples have not been set, it will throw a runtime exception.
 * @return the maximum change in the value function.
 */
public double runIteration(){

	if(this.samples == null){
		throw new RuntimeException("FittedVI cannot run value iteration because the state samples have not been set. Use the setSamples method or the constructor to set them.");
	}

	SparseSampling ss = new SparseSampling(this.domain, this.rf, this.tf, this.gamma, this.hashingFactory, this.planningDepth, this.transitionSamples);
	ss.setValueForLeafNodes(this.leafNodeInit);
	ss.toggleDebugPrinting(false);

	List <SupervisedVFA.SupervisedVFAInstance> instances = new ArrayList<SupervisedVFA.SupervisedVFAInstance>(this.samples.size());
	List <Double> oldVs = new ArrayList<Double>(this.samples.size());
	for(State s : this.samples){
		oldVs.add(this.valueFunction.value(s));
		instances.add(new SupervisedVFA.SupervisedVFAInstance(s, QFunctionHelper.getOptimalValue(ss, s)));
	}

	this.valueFunction = this.valueFunctionTrainer.train(instances);

	double maxDiff = 0.;
	for(int i = 0; i < this.samples.size(); i++){
		double newV = this.valueFunction.value(this.samples.get(i));
		double diff = Math.abs(newV - oldVs.get(i));
		maxDiff = Math.max(maxDiff, diff);
	}

	return maxDiff;

}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:34,代码来源:FittedVI.java

示例8: getQs

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public List<QValue> getQs(State s) {
	SparseSampling ss = new SparseSampling(this.domain, this.rf, this.tf, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
	ss.setValueForLeafNodes(this.leafNodeInit);
	ss.toggleDebugPrinting(false);
	return ss.getQs(s);
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:8,代码来源:FittedVI.java

示例9: getQ

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public QValue getQ(State s, AbstractGroundedAction a) {
	SparseSampling ss = new SparseSampling(this.domain, this.rf, this.tf, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
	ss.setValueForLeafNodes(this.leafNodeInit);
	ss.toggleDebugPrinting(false);
	return ss.getQ(s, a);
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:8,代码来源:FittedVI.java

示例10: BeliefSparseSampling

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Initializes the planner.
 * @param domain the POMDP domain
 * @param rf the POMDP reward function
 * @param discount the discount factor
 * @param hashingFactory the Belief MDP {@link burlap.oomdp.statehashing.HashableStateFactory} that {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use.
 * @param h the height of the {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} tree.
 * @param c the number of samples {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use. Set to -1 to use the full BeliefMDP transition dynamics.
 */
public BeliefSparseSampling(PODomain domain, RewardFunction rf, double discount, HashableStateFactory hashingFactory, int h, int c){

	this.solverInit(domain, rf, null, discount, hashingFactory);
	BeliefMDPGenerator bdgen = new BeliefMDPGenerator(domain);
	this.beliefMDP = (SADomain)bdgen.generateDomain();
	this.beliefRF = new BeliefMDPGenerator.BeliefRF(domain, rf);
	
	this.mdpPlanner = new SparseSampling(this.beliefMDP, this.beliefRF, new NullTermination(), discount, hashingFactory, h, Math.max(1, c));
	if(c < 1){
		this.mdpPlanner.setComputeExactValueFunction(true);
	}
	
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:23,代码来源:BeliefSparseSampling.java

示例11: RewardValueProjection

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Initializes.
 * @param rf the input {@link RewardFunction} to project for one step.
 * @param projectionType the type of reward projection to use.
 * @param domain the {@link burlap.mdp.core.Domain} in which the {@link RewardFunction} is evaluated.
 */
public RewardValueProjection(RewardFunction rf, RewardProjectionType projectionType, SADomain domain){
	this.rf = rf;
	this.projectionType = projectionType;
	this.domain = domain;
	if(this.projectionType == RewardProjectionType.ONESTEP){
		this.oneStepBellmanPlanner = new SparseSampling(domain, 1., new SimpleHashableStateFactory(), 1, -1);
		this.oneStepBellmanPlanner.setModel(new CustomRewardNoTermModel(domain.getModel(), rf));
		this.oneStepBellmanPlanner.toggleDebugPrinting(false);
		this.oneStepBellmanPlanner.setForgetPreviousPlanResults(true);
	}
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:18,代码来源:RewardValueProjection.java

示例12: runIteration

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Runs a single iteration of value iteration. Note that if the state samples have not been set, it will throw a runtime exception.
 * @return the maximum change in the value function.
 */
public double runIteration(){

	if(this.samples == null){
		throw new RuntimeException("FittedVI cannot run value iteration because the state samples have not been set. Use the setSamples method or the constructor to set them.");
	}

	SparseSampling ss = new SparseSampling(this.domain, this.gamma, this.hashingFactory, this.planningDepth, this.transitionSamples);
	ss.setModel(this.model);
	ss.setValueForLeafNodes(this.leafNodeInit);
	ss.toggleDebugPrinting(false);

	List <SupervisedVFA.SupervisedVFAInstance> instances = new ArrayList<SupervisedVFA.SupervisedVFAInstance>(this.samples.size());
	List <Double> oldVs = new ArrayList<Double>(this.samples.size());
	for(State s : this.samples){
		oldVs.add(this.valueFunction.value(s));
		instances.add(new SupervisedVFA.SupervisedVFAInstance(s, Helper.maxQ(ss, s)));
	}

	this.valueFunction = this.valueFunctionTrainer.train(instances);

	double maxDiff = 0.;
	for(int i = 0; i < this.samples.size(); i++){
		double newV = this.valueFunction.value(this.samples.get(i));
		double diff = Math.abs(newV - oldVs.get(i));
		maxDiff = Math.max(maxDiff, diff);
	}

	return maxDiff;

}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:35,代码来源:FittedVI.java

示例13: qValues

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public List<QValue> qValues(State s) {
	SparseSampling ss = new SparseSampling(this.domain, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
	ss.setModel(model);
	ss.setValueForLeafNodes(this.leafNodeInit);
	ss.toggleDebugPrinting(false);
	return ss.qValues(s);
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:9,代码来源:FittedVI.java

示例14: qValue

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
@Override
public double qValue(State s, Action a) {
	SparseSampling ss = new SparseSampling(this.domain, this.gamma, this.hashingFactory, this.controlDepth, this.transitionSamples);
	ss.setModel(model);
	ss.setValueForLeafNodes(this.leafNodeInit);
	ss.toggleDebugPrinting(false);
	return ss.qValue(s, a);
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:9,代码来源:FittedVI.java

示例15: BeliefSparseSampling

import burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling; //导入依赖的package包/类
/**
 * Initializes the planner.
 * @param domain the POMDP domain
 * @param discount the discount factor
 * @param hashingFactory the Belief MDP {@link burlap.statehashing.HashableStateFactory} that {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use.
 * @param h the height of the {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} tree.
 * @param c the number of samples {@link burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling} will use. Set to -1 to use the full BeliefMDP transition dynamics.
 */
public BeliefSparseSampling(PODomain domain, double discount, HashableStateFactory hashingFactory, int h, int c){

	this.solverInit(domain, discount, hashingFactory);
	BeliefMDPGenerator bdgen = new BeliefMDPGenerator(domain);
	this.beliefMDP = (SADomain)bdgen.generateDomain();
	
	this.mdpPlanner = new SparseSampling(this.beliefMDP, discount, hashingFactory, h, Math.max(1, c));
	if(c < 1){
		this.mdpPlanner.setComputeExactValueFunction(true);
	}
	
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:21,代码来源:BeliefSparseSampling.java


注:本文中的burlap.behavior.singleagent.planning.stochastic.sparsesampling.SparseSampling类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。