当前位置: 首页>>代码示例>>Java>>正文


Java Action类代码示例

本文整理汇总了Java中burlap.mdp.core.action.Action的典型用法代码示例。如果您正苦于以下问题:Java Action类的具体用法?Java Action怎么用?Java Action使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Action类属于burlap.mdp.core.action包,在下文中一共展示了Action类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: sample

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public DecisionState sample(State state, Action action) {
    List<StateTransitionProb> reachableStates;
    try {
        reachableStates = stateTransitions(state, action);
    } catch (NullPointerException e) {
        reachableStates = Collections.singletonList(new StateTransitionProb(deadEnd, 1.0));
    }
    Collections.shuffle(reachableStates);

    //sample random roll
    double randomThreshold = Math.random(), sumOfProbability = 0;
    for (StateTransitionProb reachableState : reachableStates) {
        sumOfProbability = sumOfProbability + reachableState.p;
        if (randomThreshold <= sumOfProbability) {
            return ((DecisionState) reachableState.s).copy();
        }
    }
    throw new IndexOutOfBoundsException("No state found!");
}
 
开发者ID:honzaMaly,项目名称:kusanagi,代码行数:21,代码来源:DecisionModel.java

示例2: qValue

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public double qValue(State s, Action a) {

	if(this.model.terminal(s)){
		return 0.;
	}

	//what are the possible outcomes?
	List<TransitionProb> tps = ((FullModel)this.model).transitions(s, a);

	//aggregate over each possible outcome
	double q = 0.;
	for(TransitionProb tp : tps){
		//what is reward for this transition?
		double r = tp.eo.r;

		//what is the value for the next state?
		double vp = this.valueFunction.get(this.hashingFactory.hashState(tp.eo.op));

		//add contribution weighted by transition probability and
		//discounting the next state
		q += tp.p * (r + this.gamma * vp);
	}

	return q;
}
 
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:27,代码来源:VITutorial.java

示例3: qValues

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<QValue> qValues(State s) {
	//first get hashed state
	HashableState sh = this.hashingFactory.hashState(s);

	//check if we already have stored values
	List<QValue> qs = this.qValues.get(sh);

	//create and add initialized Q-values if we don't have them stored for this state
	if(qs == null){
		List<Action> actions = this.applicableActions(s);
		qs = new ArrayList<QValue>(actions.size());
		//create a Q-value for each action
		for(Action a : actions){
			//add q with initialized value
			qs.add(new QValue(s, a, this.qinit.qValue(s, a)));
		}
		//store this for later
		this.qValues.put(sh, qs);
	}

	return qs;
}
 
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:24,代码来源:QLTutorial.java

示例4: actionDir

import burlap.mdp.core.action.Action; //导入依赖的package包/类
protected int actionDir(Action a){
	int adir = -1;
	if(a.actionName().equals(ACTION_NORTH)){
		adir = 0;
	}
	else if(a.actionName().equals(ACTION_SOUTH)){
		adir = 1;
	}
	else if(a.actionName().equals(ACTION_EAST)){
		adir = 2;
	}
	else if(a.actionName().equals(ACTION_WEST)){
		adir = 3;
	}
	return adir;
}
 
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:17,代码来源:ExampleOOGridWorld.java

示例5: executeAction

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {
	State startState = this.currentObservation();
	
	ActionController ac = this.actionControllerMap.get(a.actionName());
	int delay = ac.executeAction(a);
	if (delay > 0) {
		try {
			Thread.sleep(delay);
		} catch(InterruptedException e) {
			e.printStackTrace();
		}
	}
	
	State finalState = this.currentObservation();
	
	this.lastReward = this.rewardFunction.reward(startState, a, finalState);
	
	EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
	
	return eo;
}
 
开发者ID:h2r,项目名称:burlapcraft,代码行数:23,代码来源:MinecraftEnvironment.java

示例6: allApplicableActions

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<Action> allApplicableActions(State s) {
	BCAgent a = (BCAgent)((GenericOOState)s).object(CLASS_AGENT);

	List<ObjectInstance> blocks = ((OOState)s).objectsOfClass(HelperNameSpace.CLASS_BLOCK);
	for (ObjectInstance block : blocks) {
		if (HelperActions.blockIsOneOf(Block.getBlockById(((BCBlock)block).type), HelperActions.dangerBlocks)) {
			int dangerX = ((BCBlock)block).x;
			int dangerY = ((BCBlock)block).y;
			int dangerZ = ((BCBlock)block).z;
			if ((a.x == dangerX) && (a.y - 1 == dangerY) && (a.z == dangerZ) || (a.x == dangerX) && (a.y == dangerY) && (a.z == dangerZ)) {
				return new ArrayList<Action>();
			}
		}
	}

	//otherwise we pass check
	return Arrays.<Action>asList(new SimpleAction(typeName));
}
 
开发者ID:h2r,项目名称:burlapcraft,代码行数:20,代码来源:MinecraftActionType.java

示例7: publishAction

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public int publishAction(Action a) {
	Timer timer = new Timer();
	PublishTask pt = new PublishTask();
	timer.schedule(pt, 0, this.period);
	if(this.synchronous){
		synchronized(pt) {
			while(!pt.finished()) {
				try {
					pt.wait();
				} catch(InterruptedException e) {
					e.printStackTrace();
				}
			}
		}
	}

	return this.delayTime;
}
 
开发者ID:h2r,项目名称:burlap_rosbridge,代码行数:20,代码来源:RepeatingActionPublisher.java

示例8: action

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public Action action(State s) {
	
	if(this.dp == null){
		throw new RuntimeException("The valueFunction used by this Policy is not defined; therefore, the policy is undefined.");
	}
	
	if(this.dp.hasCachedPlanForState(s)){
		Action ga = this.dp.querySelectedActionForState(s);
		//the surrounding if condition will probably be sufficient for null cases, but doing double check just to make sure.
		if(ga == null){
			throw new PolicyUndefinedException();
		}
		return ga;
	}
	throw new PolicyUndefinedException();
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:18,代码来源:SDPlannerPolicy.java

示例9: collectDataFrom

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) {
	
	if(intoDataset == null){
		intoDataset = new SARSData();
	}
	
	State curState = s;
	int nsteps = 0;
	boolean terminated = model.terminal(s);
	while(!terminated && nsteps < maxSteps){
		
		List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState);
		Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
		EnvironmentOutcome eo = model.sample(curState, ga);
		intoDataset.add(curState, ga, eo.r, eo.op);
		curState = eo.op;
		terminated = eo.terminated;
		nsteps++;
		
	}
	
	
	return intoDataset;
	
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:27,代码来源:SARSCollector.java

示例10: estimateQs

import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
 * Estimates and returns the Q-values for this node. Q-values and used state samples are forgotten after this call completes.
 * @return a {@link List} of the estiamted Q-values for each action.
 */
public List<QValue> estimateQs(){
	List<Action> gas = SparseSampling.this.applicableActions(this.sh.s());
	List<QValue> qs = new ArrayList<QValue>(gas.size());
	for(Action ga : gas){
		if(this.height <= 0){
			qs.add(new QValue(this.sh.s(), ga, SparseSampling.this.vinit.value(this.sh.s())));
		}
		else{
			double q;
			if(!SparseSampling.this.computeExactValueFunction){
				q = this.sampledQEstimate(ga);
			}
			else{
				q = this.exactQValue(ga);
			}
			
			qs.add(new QValue(this.sh.s(), ga, q));
		}
	}
	
	return qs;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:27,代码来源:SparseSampling.java

示例11: allApplicableActions

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<Action> allApplicableActions(State s) {

	List <Action> res = new ArrayList<Action>();


	if(!(s instanceof OOState)){
		throw new RuntimeException("Cannot get object-parameterized grounded actions in state, because " + s.getClass().getName() + " does not implement OOState");
	}

	//otherwise need to do parameter binding
	List <List <String>> bindings = OOStateUtilities.getPossibleBindingsGivenParamOrderGroups((OOState)s, this.getParameterClasses(), this.getParameterOrderGroups());

	for(List <String> params : bindings){
		String [] aprams = params.toArray(new String[params.size()]);
		ObjectParameterizedAction ga = this.generateAction(aprams);
		if(this.applicableInState(s, ga)) {
			res.add(ga);
		}
	}

	return res;

}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:25,代码来源:ObjectParameterizedActionType.java

示例12: action

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public Action action(State s) {

	synchronized(this){
		while(this.nextAction == null){
			try {
				this.wait();
			} catch(InterruptedException e) {
				e.printStackTrace();
			}
		}
	}
	Action toTake = this.nextAction;
	this.nextAction = null;
	return toTake;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:17,代码来源:ManualAgentsCommands.java

示例13: UCTStateNode

import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
 * Initializes the UCT state node.
 * @param s the state that this node wraps
 * @param d the depth of the node
 * @param actionTypes the possible OO-MDP actions that can be taken
 * @param constructor a {@link UCTActionNode} factory that can be used to create ActionNodes for each of the actions.
 */
public UCTStateNode(HashableState s, int d, List <ActionType> actionTypes, UCTActionConstructor constructor){
	
	state = s;
	depth = d;
	
	n = 0;
	
	actionNodes = new ArrayList<UCTActionNode>();

	List<Action> actions = ActionUtils.allApplicableActionsForTypes(actionTypes, s.s());
	for(Action a : actions){
		UCTActionNode an = constructor.generate(a);
		actionNodes.add(an);
	}

}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:24,代码来源:UCTStateNode.java

示例14: getAgentSynchronizedActionSelection

import burlap.mdp.core.action.Action; //导入依赖的package包/类
/**
 * This method returns the action for a single agent by a synchronized sampling of this joint policy,
 * which enables multiple agents to query this policy object and act according to the same selected joint
 * actions from it. This is useful when decisions are made from a "referee" who selects the joint action
 * that dictates the behavior of each agent. The synchronization is implemented by selecting a joint action.
 * Each time an agent queries for their action, it is drawn from the previously sampled joint action.
 * A new joint action is only selected after each agent defined in this objects {@link #agentsInJointPolicy} member 
 * has queried this method for their action or until an action for a different state is queried (that is, *either* condition
 * will cause the joint action to be resampled).
 * @param agentNum the agent whose action in this joint policy is being queried
 * @param s the state in which the action is to be selected.
 * @return the single agent action to be taken according to the synchonrized joint action that was selected.
 */
public Action getAgentSynchronizedActionSelection(int agentNum, State s){
	
	if(this.lastSyncedState == null || !this.lastSyncedState.equals(s)){
		//then reset syncrhonization
		this.lastSyncedState = s;
		this.agentsSynchronizedSoFar.clear();
		this.lastSynchronizedJointAction = (JointAction)this.action(s);
	}
	
	Action a = this.lastSynchronizedJointAction.action(agentNum);
	this.agentsSynchronizedSoFar.add(agentNum);
	if(this.agentsSynchronizedSoFar.size() == this.agentsInJointPolicy.size()){
		//then we're finished getting the actions for all agents and enable the next query
		this.lastSyncedState = null;
		this.agentsSynchronizedSoFar.clear();
	}
	
	return a;
	
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:34,代码来源:JointPolicy.java

示例15: policyDistribution

import burlap.mdp.core.action.Action; //导入依赖的package包/类
@Override
public List<ActionProb> policyDistribution(State s) {

	if(!(this.sourcePolicy instanceof EnumerablePolicy)){
		throw new RuntimeException("Cannot return policy distribution because source policy does not implement EnumerablePolicy");
	}

	List<Action> unmodeled = KWIKModel.Helper.unmodeledActions(model, allActionTypes, s);

	if(!unmodeled.isEmpty()){
		List<ActionProb> aps = new ArrayList<ActionProb>(unmodeled.size());
		double p = 1./(double)unmodeled.size();
		for(Action ga : unmodeled){
			aps.add(new ActionProb(ga, p));
		}
		return aps;
	}

	return ((EnumerablePolicy)this.sourcePolicy).policyDistribution(s);
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:21,代码来源:UnmodeledFavoredPolicy.java


注:本文中的burlap.mdp.core.action.Action类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。