本文整理汇总了Java中burlap.behavior.valuefunction.QFunction类的典型用法代码示例。如果您正苦于以下问题:Java QFunction类的具体用法?Java QFunction怎么用?Java QFunction使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
QFunction类属于burlap.behavior.valuefunction包,在下文中一共展示了QFunction类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: MAVIPlannerFactory
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Initializes.
* @param domain the domain in which to perform planing
* @param jointModel the joint action model
* @param jointRewardFunction the joint reward function
* @param terminalFunction the terminal state function
* @param discount the discount
* @param hashingFactory the hashing factory to use for storing states
* @param qInit the q-value initialization function to use.
* @param backupOperator the backup operator that defines the solution concept being solved
* @param maxDelta the threshold that causes VI to terminate when the max Q-value change is less than it
* @param maxIterations the maximum number of iterations allowed
*/
public MAVIPlannerFactory(SGDomain domain, JointModel jointModel, JointRewardFunction jointRewardFunction, TerminalFunction terminalFunction,
double discount, HashableStateFactory hashingFactory, QFunction qInit, SGBackupOperator backupOperator, double maxDelta, int maxIterations){
this.domain = domain;
this.jointModel = jointModel;
this.jointRewardFunction = jointRewardFunction;
this.terminalFunction = terminalFunction;
this.discount = discount;
this.hashingFactory = hashingFactory;
this.qInit = qInit;
this.backupOperator = backupOperator;
this.maxDelta = maxDelta;
this.maxIterations = maxIterations;
}
示例2: QLInit
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Initializes the algorithm. By default the agent will only save the last learning episode and a call to the {@link #planFromState(State)} method
* will cause the valueFunction to use only one episode for planning; this should probably be changed to a much larger value if you plan on using this
* algorithm as a planning algorithm.
* @param domain the domain in which to learn
* @param gamma the discount factor
* @param hashingFactory the state hashing factory to use for Q-lookups
* @param qInitFunction a {@link burlap.behavior.valuefunction.QFunction} object that can be used to initialize the Q-values.
* @param learningRate the learning rate
* @param learningPolicy the learning policy to follow during a learning episode.
* @param maxEpisodeSize the maximum number of steps the agent will take in a learning episode for the agent stops trying.
*/
protected void QLInit(SADomain domain, double gamma, HashableStateFactory hashingFactory,
QFunction qInitFunction, double learningRate, Policy learningPolicy, int maxEpisodeSize){
this.solverInit(domain, gamma, hashingFactory);
this.qFunction = new HashMap<HashableState, QLearningStateNode>();
this.learningRate = new ConstantLR(learningRate);
this.learningPolicy = learningPolicy;
this.maxEpisodeSize = maxEpisodeSize;
this.qInitFunction = qInitFunction;
numEpisodesForPlanning = 1;
maxQChangeForPlanningTermination = 0.;
}
示例3: setSolver
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
@Override
public void setSolver(MDPSolverInterface solver){
if(!(solver instanceof QFunction)){
throw new RuntimeErrorException(new Error("Planner is not a QComputablePlanner"));
}
this.qplanner = (QFunction) solver;
}
示例4: setSolver
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
@Override
public void setSolver(MDPSolverInterface solver) {
if(!(solver instanceof QFunction)){
throw new RuntimeErrorException(new Error("Planner is not a QComputablePlanner"));
}
this.qplanner = (QFunction) solver;
}
示例5: logLikelihoodOfTrajectory
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Computes and returns the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
* @param ea the trajectory
* @param weight the weight to assign the trajectory
* @return the log-likelihood of the given trajectory under the current reward function parameters and weights it by the given weight.
*/
public double logLikelihoodOfTrajectory(EpisodeAnalysis ea, double weight){
double logLike = 0.;
Policy p = new BoltzmannQPolicy((QFunction)this.request.getPlanner(), 1./this.request.getBoltzmannBeta());
for(int i = 0; i < ea.numTimeSteps()-1; i++){
this.request.getPlanner().planFromState(ea.getState(i));
double actProb = p.getProbOfAction(ea.getState(i), ea.getAction(i));
logLike += Math.log(actProb);
}
logLike *= weight;
return logLike;
}
示例6: logPolicyGrad
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Computes and returns the gradient of the Boltzmann policy for the given state and action.
* @param s the state in which the policy is queried
* @param ga the action for which the policy is queried.
* @return s the gradient of the Boltzmann policy for the given state and action.
*/
public double [] logPolicyGrad(State s, GroundedAction ga){
Policy p = new BoltzmannQPolicy((QFunction)this.request.getPlanner(), 1./this.request.getBoltzmannBeta());
double invActProb = 1./p.getProbOfAction(s, ga);
double [] gradient = BoltzmannPolicyGradient.computeBoltzmannPolicyGradient(s, ga, (QGradientPlanner)this.request.getPlanner(), this.request.getBoltzmannBeta());
for(int f = 0; f < gradient.length; f++){
gradient[f] *= invActProb;
}
return gradient;
}
示例7: value
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
@Override
public double value(State s) {
if(this.tf != null) {
return QFunction.QFunctionHelper.getOptimalValue(this, s, this.tf);
}
else{
return QFunction.QFunctionHelper.getOptimalValue(this, s);
}
}
示例8: DOOQPolicy
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Default constructor for DOO-Q policy. exploration is turned on and the Random object used for e-greedy exploration is initiated
* with a default seed. Exploration can be turned off with setExploration() method and setRandom() can be used to change the default seed
* @param domain domain.
* @param qSource Source for Q-Values... usually a DistributedQTable is used for this policy.
* @param epsilon epsilon parameter for epsilon-Greedy exploration
*
*/
public DOOQPolicy(SGDomain domain, QFunction qSource, double epsilon, HashableStateFactory hashFactory){
this.qSource = qSource;
this.epsilon = epsilon;
this.rand = new Random();
this.exploring = true;
this.hashFactory = hashFactory;
this.domain = domain;
this.policyMemory = new HashMap<HashableState,AbstractGroundedAction>(100000);
//this.useAbstractActions = useAbstractActions;
}
示例9: QLTutorial
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
public QLTutorial(SADomain domain, double gamma, HashableStateFactory hashingFactory,
QFunction qinit, double learningRate, double epsilon){
this.solverInit(domain, gamma, hashingFactory);
this.qinit = qinit;
this.learningRate = learningRate;
this.qValues = new HashMap<HashableState, List<QValue>>();
this.learningPolicy = new EpsilonGreedy(this, epsilon);
}
示例10: SGNaiveQLAgent
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Initializes with a default 0.1 epsilon greedy policy/strategy
* @param d the domain in which the agent will act
* @param discount the discount factor
* @param learningRate the learning rate
* @param qInitizalizer the Q-value initialization method
* @param hashFactory the state hashing factory
*/
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, QFunction qInitizalizer, HashableStateFactory hashFactory) {
this.init(d);
this.discount = discount;
this.learningRate = new ConstantLR(learningRate);
this.hashFactory = hashFactory;
this.qInit = qInitizalizer;
this.qMap = new HashMap<HashableState, List<QValue>>();
stateRepresentations = new HashMap<HashableState, State>();
this.policy = new EpsilonGreedy(this, 0.1);
this.storedMapAbstraction = new ShallowIdentityStateMapping();
}
示例11: init
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Initializes. The policy will be defaulted to a epsilon-greey max wellfare policy.
* @param d the domain in which to perform learing
* @param discount the discount factor
* @param learningRate the learning rate function
* @param hashFactory the hashing factory used to index states and Q-values
* @param qInit the Q-value initialization function
* @param backupOperator the backup operator to use that defines the solution concept being learned
* @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world.
* @param learningPolicy the learningPolicy to follow
*/
public void init(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues, PolicyFromJointPolicy learningPolicy){
this.domain = d;
this.discount = discount;
this.learningRate = learningRate;
this.hashingFactory = hashFactory;
this.qInit = qInit;
this.backupOperator = backupOperator;
this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues;
this.learningPolicy = learningPolicy;
}
示例12: qLearningExample
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
public void qLearningExample(String outputPath){
LearningAgent agent = new QLearning(domain, 0.99, hashingFactory, 0., 1.);
//run learning for 50 episodes
for(int i = 0; i < 50; i++){
EpisodeAnalysis ea = agent.runLearningEpisode(env);
ea.writeToFile(outputPath + "ql_" + i);
System.out.println(i + ": " + ea.maxTimeStep());
//reset environment for next learning episode
env.resetEnvironment();
}
simpleValueFunctionVis((ValueFunction)agent, new GreedyQPolicy((QFunction)agent));
}
示例13: GreedyQPolicy
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Initializes with a QComputablePlanner
* @param planner the QComputablePlanner to use
*/
public GreedyQPolicy(QFunction planner){
qplanner = planner;
rand = RandomFactory.getMapped(0);
}
示例14: EpsilonGreedy
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
/**
* Initializes with the QComputablePlanner to use and the value of epsilon to use, where epsilon is the probability of taking a random action.
* @param planner the QComputablePlanner to use
* @param epsilon the probability of taking a random action.
*/
public EpsilonGreedy(QFunction planner, double epsilon) {
qplanner = planner;
this.epsilon = epsilon;
rand = RandomFactory.getMapped(0);
}
示例15: value
import burlap.behavior.valuefunction.QFunction; //导入依赖的package包/类
@Override
public double value(State s) {
return QFunction.QFunctionHelper.getOptimalValue(this, s);
}