本文整理匯總了Java中burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare類的典型用法代碼示例。如果您正苦於以下問題:Java EGreedyMaxWellfare類的具體用法?Java EGreedyMaxWellfare怎麽用?Java EGreedyMaxWellfare使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
EGreedyMaxWellfare類屬於burlap.behavior.stochasticgames.madynamicprogramming.policies包,在下文中一共展示了EGreedyMaxWellfare類的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: MultiAgentQLearning
import burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare; //導入依賴的package包/類
/**
* Initializes this Q-learning agent. This agent's Q-source will use a {@link HashBackedQSource} q-source and the learning policy is defaulted
* to an epsilon = 0.1 maximum wellfare ({@link EGreedyMaxWellfare}) derived policy. If queryOtherAgentsForTheirQValues is set to true, then this agent will
* only store its own Q-values and will use the other agent's stored Q-values to determine theirs.
* @param d the domain in which to perform learing
* @param discount the discount factor
* @param learningRate the constant learning rate
* @param hashFactory the hashing factory used to index states and Q-values
* @param qInit the default Q-value to which all initial Q-values will be initialized
* @param backupOperator the backup operator to use that defines the solution concept being learned
* @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world.
*/
public MultiAgentQLearning(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues){
this.init(d);
this.discount = discount;
this.learningRate = new ConstantLR(learningRate);
this.hashingFactory = hashFactory;
this.qInit = new ValueFunctionInitialization.ConstantValueFunctionInitialization(qInit);
this.backupOperator = backupOperator;
this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues;
this.myQSource = new QSourceForSingleAgent.HashBackedQSource(this.hashingFactory, this.qInit);
this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(this, 0.1));
}
示例2: MAQLFactory
import burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare; //導入依賴的package包/類
/**
* Initializes. The policy will be defaulted to a epsilon-greedy max welfare policy.
* @param d the domain in which to perform learing
* @param discount the discount factor
* @param learningRate the constant learning rate
* @param hashFactory the hashing factory used to index states and Q-values
* @param qInit the default Q-value to which all initial Q-values will be initialized
* @param backupOperator the backup operator to use that defines the solution concept being learned
* @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world.
*/
public MAQLFactory(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues){
this.domain = d;
this.discount = discount;
this.learningRate = new ConstantLR(learningRate);
this.hashingFactory = hashFactory;
this.qInit = new ValueFunctionInitialization.ConstantValueFunctionInitialization(qInit);
this.backupOperator = backupOperator;
this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues;
this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(0.1));
}
示例3: MAQLFactory
import burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare; //導入依賴的package包/類
/**
* Initializes. The policy will be defaulted to a epsilon-greedy max welfare policy.
* @param d the domain in which to perform learing
* @param discount the discount factor
* @param learningRate the constant learning rate
* @param hashFactory the hashing factory used to index states and Q-values
* @param qInit the default Q-value to which all initial Q-values will be initialized
* @param backupOperator the backup operator to use that defines the solution concept being learned
* @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world.
*/
public MAQLFactory(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues){
this.domain = d;
this.discount = discount;
this.learningRate = new ConstantLR(learningRate);
this.hashingFactory = hashFactory;
this.qInit = new ConstantValueFunction(qInit);
this.backupOperator = backupOperator;
this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues;
this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(0.1));
}
示例4: CoCoQLearningFactory
import burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare; //導入依賴的package包/類
public CoCoQLearningFactory(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, ValueFunctionInitialization qInit, boolean queryOtherAgentsForTheirQValues, double epsilon){
this.init(d, discount, learningRate, hashFactory, qInit, new CoCoQ(), queryOtherAgentsForTheirQValues, new PolicyFromJointPolicy(new EGreedyMaxWellfare(epsilon)));
}
示例5: VICoCoTest
import burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare; //導入依賴的package包/類
public static void VICoCoTest(){
//grid game domain
GridGame gridGame = new GridGame();
final OOSGDomain domain = gridGame.generateDomain();
final HashableStateFactory hashingFactory = new SimpleHashableStateFactory();
//run the grid game version of prisoner's dilemma
final State s = GridGame.getPrisonersDilemmaInitialState();
//define joint reward function and termination conditions for this game
JointRewardFunction rf = new GridGame.GGJointRewardFunction(domain, -1, 100, false);
TerminalFunction tf = new GridGame.GGTerminalFunction(domain);
//both agents are standard: access to all actions
SGAgentType at = GridGame.getStandardGridGameAgentType(domain);
//create our multi-agent planner
MAValueIteration vi = new MAValueIteration(domain, rf, tf, 0.99, hashingFactory, 0., new CoCoQ(), 0.00015, 50);
//instantiate a world in which our agents will play
World w = new World(domain, rf, tf, s);
//create a greedy joint policy from our planner's Q-values
EGreedyMaxWellfare jp0 = new EGreedyMaxWellfare(0.);
jp0.setBreakTiesRandomly(false); //don't break ties randomly
//create agents that follows their end of the computed the joint policy
MultiAgentDPPlanningAgent a0 = new MultiAgentDPPlanningAgent(domain, vi, new PolicyFromJointPolicy(0, jp0), "agent0", at);
MultiAgentDPPlanningAgent a1 = new MultiAgentDPPlanningAgent(domain, vi, new PolicyFromJointPolicy(1, jp0), "agent1", at);
w.join(a0);
w.join(a1);
//run some games of the agents playing that policy
GameEpisode ga = null;
for(int i = 0; i < 3; i++){
ga = w.runGame();
}
//visualize results
Visualizer v = GGVisualizer.getVisualizer(9, 9);
new GameSequenceVisualizer(v, domain, Arrays.asList(ga));
}
示例6: CoCoQLearningFactory
import burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare; //導入依賴的package包/類
public CoCoQLearningFactory(SGDomain d, double discount, LearningRate learningRate, HashableStateFactory hashFactory, QFunction qInit, boolean queryOtherAgentsForTheirQValues, double epsilon){
this.init(d, discount, learningRate, hashFactory, qInit, new CoCoQ(), queryOtherAgentsForTheirQValues, new PolicyFromJointPolicy(new EGreedyMaxWellfare(epsilon)));
}
示例7: MultiAgentQLearning
import burlap.behavior.stochasticgames.madynamicprogramming.policies.EGreedyMaxWellfare; //導入依賴的package包/類
/**
* Initializes this Q-learning agent. This agent's Q-source will use a {@link HashBackedQSource} q-source and the learning policy is defaulted
* to an epsilon = 0.1 maximum wellfare ({@link EGreedyMaxWellfare}) derived policy. If queryOtherAgentsForTheirQValues is set to true, then this agent will
* only store its own Q-values and will use the other agent's stored Q-values to determine theirs.
* @param d the domain in which to perform learing
* @param discount the discount factor
* @param learningRate the constant learning rate
* @param hashFactory the hashing factory used to index states and Q-values
* @param qInit the default Q-value to which all initial Q-values will be initialized
* @param backupOperator the backup operator to use that defines the solution concept being learned
* @param queryOtherAgentsForTheirQValues it true, then the agent uses the Q-values for other agents that are stored by them; if false then the agent stores a Q-value for each other agent in the world.
* @param agentName the name of the agent
* @param agentType the {@link SGAgentType} for the agent defining its action space
*/
public MultiAgentQLearning(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory, double qInit, SGBackupOperator backupOperator, boolean queryOtherAgentsForTheirQValues, String agentName, SGAgentType agentType){
this.init(d, agentName, agentType);
this.discount = discount;
this.learningRate = new ConstantLR(learningRate);
this.hashingFactory = hashFactory;
this.qInit = new ConstantValueFunction(qInit);
this.backupOperator = backupOperator;
this.queryOtherAgentsQSource = queryOtherAgentsForTheirQValues;
this.myQSource = new QSourceForSingleAgent.HashBackedQSource(this.hashingFactory, this.qInit);
this.learningPolicy = new PolicyFromJointPolicy(new EGreedyMaxWellfare(this, 0.1));
}