本文整理汇总了Java中burlap.mdp.core.action.Action类的典型用法代码示例。如果您正苦于以下问题:Java Action类的具体用法?Java Action怎么用?Java Action使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Action类属于burlap.mdp.core.action包,在下文中一共展示了Action类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: sample
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public DecisionState sample(State state, Action action) {
List<StateTransitionProb> reachableStates;
try {
reachableStates = stateTransitions(state, action);
} catch (NullPointerException e) {
reachableStates = Collections.singletonList(new StateTransitionProb(deadEnd, 1.0));
}
Collections.shuffle(reachableStates);
//sample random roll
double randomThreshold = Math.random(), sumOfProbability = 0;
for (StateTransitionProb reachableState : reachableStates) {
sumOfProbability = sumOfProbability + reachableState.p;
if (randomThreshold <= sumOfProbability) {
return ((DecisionState) reachableState.s).copy();
}
}
throw new IndexOutOfBoundsException("No state found!");
}
示例2: qValue
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public double qValue(State s, Action a) {
if(this.model.terminal(s)){
return 0.;
}
//what are the possible outcomes?
List<TransitionProb> tps = ((FullModel)this.model).transitions(s, a);
//aggregate over each possible outcome
double q = 0.;
for(TransitionProb tp : tps){
//what is reward for this transition?
double r = tp.eo.r;
//what is the value for the next state?
double vp = this.valueFunction.get(this.hashingFactory.hashState(tp.eo.op));
//add contribution weighted by transition probability and
//discounting the next state
q += tp.p * (r + this.gamma * vp);
}
return q;
}
示例3: qValues
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<QValue> qValues(State s) {
//first get hashed state
HashableState sh = this.hashingFactory.hashState(s);
//check if we already have stored values
List<QValue> qs = this.qValues.get(sh);
//create and add initialized Q-values if we don't have them stored for this state
if(qs == null){
List<Action> actions = this.applicableActions(s);
qs = new ArrayList<QValue>(actions.size());
//create a Q-value for each action
for(Action a : actions){
//add q with initialized value
qs.add(new QValue(s, a, this.qinit.qValue(s, a)));
}
//store this for later
this.qValues.put(sh, qs);
}
return qs;
}
示例4: actionDir
import burlap.mdp.core.action.Action; //导入依赖的package包/类
protected int actionDir(Action a){
int adir = -1;
if(a.actionName().equals(ACTION_NORTH)){
adir = 0;
}
else if(a.actionName().equals(ACTION_SOUTH)){
adir = 1;
}
else if(a.actionName().equals(ACTION_EAST)){
adir = 2;
}
else if(a.actionName().equals(ACTION_WEST)){
adir = 3;
}
return adir;
}
示例5: executeAction
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {
State startState = this.currentObservation();
ActionController ac = this.actionControllerMap.get(a.actionName());
int delay = ac.executeAction(a);
if (delay > 0) {
try {
Thread.sleep(delay);
} catch(InterruptedException e) {
e.printStackTrace();
}
}
State finalState = this.currentObservation();
this.lastReward = this.rewardFunction.reward(startState, a, finalState);
EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
return eo;
}
示例6: allApplicableActions
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<Action> allApplicableActions(State s) {
BCAgent a = (BCAgent)((GenericOOState)s).object(CLASS_AGENT);
List<ObjectInstance> blocks = ((OOState)s).objectsOfClass(HelperNameSpace.CLASS_BLOCK);
for (ObjectInstance block : blocks) {
if (HelperActions.blockIsOneOf(Block.getBlockById(((BCBlock)block).type), HelperActions.dangerBlocks)) {
int dangerX = ((BCBlock)block).x;
int dangerY = ((BCBlock)block).y;
int dangerZ = ((BCBlock)block).z;
if ((a.x == dangerX) && (a.y - 1 == dangerY) && (a.z == dangerZ) || (a.x == dangerX) && (a.y == dangerY) && (a.z == dangerZ)) {
return new ArrayList<Action>();
}
}
}
//otherwise we pass check
return Arrays.<Action>asList(new SimpleAction(typeName));
}
示例7: publishAction
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public int publishAction(Action a) {
Timer timer = new Timer();
PublishTask pt = new PublishTask();
timer.schedule(pt, 0, this.period);
if(this.synchronous){
synchronized(pt) {
while(!pt.finished()) {
try {
pt.wait();
} catch(InterruptedException e) {
e.printStackTrace();
}
}
}
}
return this.delayTime;
}
示例8: action
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public Action action(State s) {
if(this.dp == null){
throw new RuntimeException("The valueFunction used by this Policy is not defined; therefore, the policy is undefined.");
}
if(this.dp.hasCachedPlanForState(s)){
Action ga = this.dp.querySelectedActionForState(s);
//the surrounding if condition will probably be sufficient for null cases, but doing double check just to make sure.
if(ga == null){
throw new PolicyUndefinedException();
}
return ga;
}
throw new PolicyUndefinedException();
}
示例9: collectDataFrom
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) {
if(intoDataset == null){
intoDataset = new SARSData();
}
State curState = s;
int nsteps = 0;
boolean terminated = model.terminal(s);
while(!terminated && nsteps < maxSteps){
List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState);
Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
EnvironmentOutcome eo = model.sample(curState, ga);
intoDataset.add(curState, ga, eo.r, eo.op);
curState = eo.op;
terminated = eo.terminated;
nsteps++;
}
return intoDataset;
}
示例10: estimateQs
import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
* Estimates and returns the Q-values for this node. Q-values and used state samples are forgotten after this call completes.
* @return a {@link List} of the estiamted Q-values for each action.
*/
public List<QValue> estimateQs(){
List<Action> gas = SparseSampling.this.applicableActions(this.sh.s());
List<QValue> qs = new ArrayList<QValue>(gas.size());
for(Action ga : gas){
if(this.height <= 0){
qs.add(new QValue(this.sh.s(), ga, SparseSampling.this.vinit.value(this.sh.s())));
}
else{
double q;
if(!SparseSampling.this.computeExactValueFunction){
q = this.sampledQEstimate(ga);
}
else{
q = this.exactQValue(ga);
}
qs.add(new QValue(this.sh.s(), ga, q));
}
}
return qs;
}
示例11: allApplicableActions
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<Action> allApplicableActions(State s) {
List <Action> res = new ArrayList<Action>();
if(!(s instanceof OOState)){
throw new RuntimeException("Cannot get object-parameterized grounded actions in state, because " + s.getClass().getName() + " does not implement OOState");
}
//otherwise need to do parameter binding
List <List <String>> bindings = OOStateUtilities.getPossibleBindingsGivenParamOrderGroups((OOState)s, this.getParameterClasses(), this.getParameterOrderGroups());
for(List <String> params : bindings){
String [] aprams = params.toArray(new String[params.size()]);
ObjectParameterizedAction ga = this.generateAction(aprams);
if(this.applicableInState(s, ga)) {
res.add(ga);
}
}
return res;
}
示例12: action
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public Action action(State s) {
synchronized(this){
while(this.nextAction == null){
try {
this.wait();
} catch(InterruptedException e) {
e.printStackTrace();
}
}
}
Action toTake = this.nextAction;
this.nextAction = null;
return toTake;
}
示例13: UCTStateNode
import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
* Initializes the UCT state node.
* @param s the state that this node wraps
* @param d the depth of the node
* @param actionTypes the possible OO-MDP actions that can be taken
* @param constructor a {@link UCTActionNode} factory that can be used to create ActionNodes for each of the actions.
*/
public UCTStateNode(HashableState s, int d, List <ActionType> actionTypes, UCTActionConstructor constructor){
state = s;
depth = d;
n = 0;
actionNodes = new ArrayList<UCTActionNode>();
List<Action> actions = ActionUtils.allApplicableActionsForTypes(actionTypes, s.s());
for(Action a : actions){
UCTActionNode an = constructor.generate(a);
actionNodes.add(an);
}
}
示例14: getAgentSynchronizedActionSelection
import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
* This method returns the action for a single agent by a synchronized sampling of this joint policy,
* which enables multiple agents to query this policy object and act according to the same selected joint
* actions from it. This is useful when decisions are made from a "referee" who selects the joint action
* that dictates the behavior of each agent. The synchronization is implemented by selecting a joint action.
* Each time an agent queries for their action, it is drawn from the previously sampled joint action.
* A new joint action is only selected after each agent defined in this objects {@link #agentsInJointPolicy} member
* has queried this method for their action or until an action for a different state is queried (that is, *either* condition
* will cause the joint action to be resampled).
* @param agentNum the agent whose action in this joint policy is being queried
* @param s the state in which the action is to be selected.
* @return the single agent action to be taken according to the synchonrized joint action that was selected.
*/
public Action getAgentSynchronizedActionSelection(int agentNum, State s){
if(this.lastSyncedState == null || !this.lastSyncedState.equals(s)){
//then reset syncrhonization
this.lastSyncedState = s;
this.agentsSynchronizedSoFar.clear();
this.lastSynchronizedJointAction = (JointAction)this.action(s);
}
Action a = this.lastSynchronizedJointAction.action(agentNum);
this.agentsSynchronizedSoFar.add(agentNum);
if(this.agentsSynchronizedSoFar.size() == this.agentsInJointPolicy.size()){
//then we're finished getting the actions for all agents and enable the next query
this.lastSyncedState = null;
this.agentsSynchronizedSoFar.clear();
}
return a;
}
示例15: policyDistribution
import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<ActionProb> policyDistribution(State s) {
if(!(this.sourcePolicy instanceof EnumerablePolicy)){
throw new RuntimeException("Cannot return policy distribution because source policy does not implement EnumerablePolicy");
}
List<Action> unmodeled = KWIKModel.Helper.unmodeledActions(model, allActionTypes, s);
if(!unmodeled.isEmpty()){
List<ActionProb> aps = new ArrayList<ActionProb>(unmodeled.size());
double p = 1./(double)unmodeled.size();
for(Action ga : unmodeled){
aps.add(new ActionProb(ga, p));
}
return aps;
}
return ((EnumerablePolicy)this.sourcePolicy).policyDistribution(s);
}