当前位置: 首页>>代码示例>>Java>>正文


Java EnvironmentOutcome类代码示例

本文整理汇总了Java中burlap.mdp.singleagent.environment.EnvironmentOutcome的典型用法代码示例。如果您正苦于以下问题:Java EnvironmentOutcome类的具体用法?Java EnvironmentOutcome怎么用?Java EnvironmentOutcome使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


EnvironmentOutcome类属于burlap.mdp.singleagent.environment包,在下文中一共展示了EnvironmentOutcome类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: addExperience

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void addExperience(EnvironmentOutcome eo) {
    // If this is the first frame of the episode, add the o frame.
    if (currentFrameHistory.historyLength == 0) {
        currentFrameHistory = addFrame(((ALEState)eo.o).getScreen());
    }

    // If this is experience ends in a terminal state,
    // the terminal frame will never be used so don't add it.
    FrameHistory op;
    if (eo.terminated) {
        op = new FrameHistory(currentFrameHistory.index, 0);
    } else {
        op = addFrame(((ALEState)eo.op).getScreen());
    }

    experiences[next] = new FrameExperience(currentFrameHistory, actionSet.map(eo.a), op, eo.r, eo.terminated);
    next = (next+1) % experiences.length;
    size = Math.min(size+1, experiences.length);

    currentFrameHistory = op;
}
 
开发者ID:h2r,项目名称:burlap_caffe,代码行数:23,代码来源:FrameExperienceMemory.java

示例2: updateQFunction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public void updateQFunction(List<EnvironmentOutcome> samples) {

    // fill up experience replay
    if (runningRandomPolicy) {
        if (totalSteps >= replayStartSize) {
            System.out.println("Replay sufficiently filled. Beginning training...");

            setLearningPolicy(trainingPolicy);
            runningRandomPolicy = false;

            // reset stale update timer
            this.stepsSinceStale = 1;
        }

        return;
    }

    // only update every updateFreq steps
    if (totalSteps % updateFreq == 0) {
        ((DQN)vfa).updateQFunction(samples, (DQN)staleVfa);
    }
}
 
开发者ID:h2r,项目名称:burlap_caffe,代码行数:24,代码来源:DeepQLearner.java

示例3: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {
	State startState = this.currentObservation();
	
	ActionController ac = this.actionControllerMap.get(a.actionName());
	int delay = ac.executeAction(a);
	if (delay > 0) {
		try {
			Thread.sleep(delay);
		} catch(InterruptedException e) {
			e.printStackTrace();
		}
	}
	
	State finalState = this.currentObservation();
	
	this.lastReward = this.rewardFunction.reward(startState, a, finalState);
	
	EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());
	
	return eo;
}
 
开发者ID:h2r,项目名称:burlapcraft,代码行数:23,代码来源:MinecraftEnvironment.java

示例4: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action ga) {

	State prevState = this.currentState;
	synchronized(this.nextAction){
		this.nextAction.val = ga;
		this.nextAction.notifyAll();
	}


	synchronized(this.nextState){
		while(this.nextState.val == null){
			try{
				nextState.wait();
			} catch(InterruptedException ex){
				ex.printStackTrace();
			}
		}
		this.nextState.val = null;
	}

	EnvironmentOutcome eo = new EnvironmentOutcome(prevState, ga, this.currentState, this.lastReward, this.curStateIsTerminal);

	return eo;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:26,代码来源:LearningAgentToSGAgentInterface.java

示例5: collectDataFrom

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public SARSData collectDataFrom(State s, SampleModel model, int maxSteps, SARSData intoDataset) {
	
	if(intoDataset == null){
		intoDataset = new SARSData();
	}
	
	State curState = s;
	int nsteps = 0;
	boolean terminated = model.terminal(s);
	while(!terminated && nsteps < maxSteps){
		
		List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, curState);
		Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
		EnvironmentOutcome eo = model.sample(curState, ga);
		intoDataset.add(curState, ga, eo.r, eo.op);
		curState = eo.op;
		terminated = eo.terminated;
		nsteps++;
		
	}
	
	
	return intoDataset;
	
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:27,代码来源:SARSCollector.java

示例6: computeF

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public double computeF(PrioritizedSearchNode parentNode, Action generatingAction, HashableState successorState, EnvironmentOutcome eo) {
	double cumR = 0.;
	int d = 0;
	if(parentNode != null){
		double pCumR = cumulatedRewardMap.get(parentNode.s);
		cumR = pCumR + eo.r;
		
		int pD = depthMap.get(parentNode.s);
		if(!(generatingAction instanceof Option)){
			d = pD + 1;
		}
		else{
			d = pD + ((EnvironmentOptionOutcome)eo).numSteps();
		}
	}
	
	double H  = heuristic.h(successorState.s());
	lastComputedCumR = cumR;
	lastComputedDepth = d;
	double weightedE = this.epsilon * this.epsilonWeight(d);
	double F = cumR + ((1. + weightedE)*H);
	
	return F;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:25,代码来源:DynamicWeightedAStar.java

示例7: transitions

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<TransitionProb> transitions(State s, Action a) {

	if(!(this.stateModel instanceof FullStateModel)){
		throw new RuntimeException("Factored Model cannot enumerate transition distribution, because the state model does not implement FullStateModel");
	}

	List<StateTransitionProb> stps = ((FullStateModel)this.stateModel).stateTransitions(s, a);
	List<TransitionProb> tps = new ArrayList<TransitionProb>(stps.size());
	for(StateTransitionProb stp : stps){
		double r = this.rf.reward(s, a, stp.s);
		boolean t = this.tf.isTerminal(stp.s);
		TransitionProb tp = new TransitionProb(stp.p, new EnvironmentOutcome(s, a, stp.s, r, t));
		tps.add(tp);
	}

	return tps;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:19,代码来源:FactoredModel.java

示例8: actUntilTerminalOrMaxSteps

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
 * Causes the agent to act for some fixed number of steps. The agent's belief is automatically
 * updated by this method using the specified {@link BeliefUpdate}.
 * The agent's action selection for the current belief state is defined by
 * the {@link #getAction(burlap.mdp.singleagent.pomdp.beliefstate.BeliefState)} method. The observation, action, and reward
 * sequence is saved and {@link Episode} object and returned.
 * @param maxSteps the maximum number of steps to take in the environment
 * @return and {@link Episode} that recorded the observation, action, and reward sequence.
 */
public Episode actUntilTerminalOrMaxSteps(int maxSteps){
	Episode ea = new Episode();
	ea.initializeInState(this.environment.currentObservation());
	int c = 0;
	while(!this.environment.isInTerminalState() && c < maxSteps){
		Action ga = this.getAction(this.curBelief);
		EnvironmentOutcome eo = environment.executeAction(ga);
		ea.transition(ga, eo.op, eo.r);

		//update our belief
		this.curBelief = this.updater.update(this.curBelief, eo.op, eo.a);
		
		c++;
		
	}
	
	return ea;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:28,代码来源:BeliefAgent.java

示例9: sampleExperiences

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public List<EnvironmentOutcome> sampleExperiences(int n) {
    List<FrameExperience> samples = sampleFrameExperiences(n);

    List<EnvironmentOutcome> sampleOutcomes = new ArrayList<>(samples.size());
    for (FrameExperience exp : samples) {
        sampleOutcomes.add(new EnvironmentOutcome(exp.o, actionSet.get(exp.a), exp.op, exp.r, exp.terminated));
    }

    return sampleOutcomes;
}
 
开发者ID:h2r,项目名称:burlap_caffe,代码行数:12,代码来源:FrameExperienceMemory.java

示例10: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public EnvironmentOutcome executeAction(ALEAction a) {
    // save start state
    State startState = currentState;

    // perform action
    boolean closed = io.act(a.aleCode);
    if (closed) {
        // the FIFO stream was closed
        throw new RuntimeException("ALE FIFO stream closed");
    }

    // Obtain the edu.brown.cs.burlap.screen matrix
    Mat screen = io.getScreen();

    // Get RLData
    RLData rlData = io.getRLData();

    // Update Environment State
    lastReward = rlData.reward;
    isTerminal = rlData.isTerminal;
    currentState = new ALEState(screen);

    if (terminateOnEndLife) {
        if (rlData.isTerminal) {
            isTerminal = true;
            currentLives = 0;
        } else if (rlData.lives != currentLives) {
            isTerminal = true;
            currentLives = rlData.lives;
        }
    } else {
        isTerminal = rlData.isTerminal;
        currentLives = rlData.lives;
    }

    return new EnvironmentOutcome(startState, a, currentState, lastReward, isTerminal);
}
 
开发者ID:h2r,项目名称:burlap_ale,代码行数:38,代码来源:ALEEnvironment.java

示例11: runLearningEpisode

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
	//initialize our episode object with the initial state of the environment
	Episode e = new Episode(env.currentObservation());

	//behave until a terminal state or max steps is reached
	State curState = env.currentObservation();
	int steps = 0;
	while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){

		//select an action
		Action a = this.learningPolicy.action(curState);

		//take the action and observe outcome
		EnvironmentOutcome eo = env.executeAction(a);

		//record result
		e.transition(eo);

		//get the max Q value of the resulting state if it's not terminal, 0 otherwise
		double maxQ = eo.terminated ? 0. : this.value(eo.op);

		//update the old Q-value
		QValue oldQ = this.storedQ(curState, a);
		oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);


		//update state pointer to next environment state observed
		curState = eo.op;
		steps++;

	}

	return e;
}
 
开发者ID:jmacglashan,项目名称:burlap_examples,代码行数:36,代码来源:QLTutorial.java

示例12: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
/**
     * Executes the specified action in this environment
     *
     * @param a the Action that is to be performed in this environment.
     * @return the resulting observation and reward transition from applying the given GroundedAction in this environment.
     */
    @Override
    public EnvironmentOutcome executeAction(Action a) {

        ShodanStateOil currentState =  currentObservation();


        if(a.actionName().equals(ACTION_OPEN))
            shodan.setOpen(true);
        else {
            assert a.actionName().equals(ACTION_CLOSE);
            shodan.setOpen(false);
        }
        //lspiRun the model for another 30 days
        for(int day=0; day<30; day++)
            state.schedule.step(state);

        /*
        System.out.println(a.actionName() + "  " + state.getFishers().get(0).getRegulation().allowedAtSea(null,state) +
                                   "   " + state.getMap().getPorts().iterator().next().getGasPricePerLiter()
        );
*/

        ShodanStateOil newState =  currentObservation();


        return new EnvironmentOutcome(
                currentState,
                a,
                newState,
                lastReward(),
                isInTerminalState()
        );

    }
 
开发者ID:CarrKnight,项目名称:POSEIDON,代码行数:41,代码来源:ShodanEnvironment.java

示例13: executeAction

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome executeAction(Action a) {

	State startState = this.currentObservation();

	ActionPublisher ap = this.actionPublishers.get(a.actionName());
	if(ap == null){
		throw new RuntimeException("AbstractRosEnvironment has no ActionPublisher available to handle action " + a.toString());
	}

	int delay = ap.publishAction(a);
	if(delay > 0){
		try {
			Thread.sleep(delay);
		} catch (InterruptedException e) {
			e.printStackTrace();
		}
	}

	State finalState = this.currentObservation();

	this.lastReward = this.getMostRecentRewardSignal(startState, a, finalState);

	EnvironmentOutcome eo = new EnvironmentOutcome(startState, a, finalState, this.lastReward, this.isInTerminalState());

	if(this.isInTerminalState()){
		this.handleEnterTerminalState();
	}

	return eo;
}
 
开发者ID:h2r,项目名称:burlap_rosbridge,代码行数:32,代码来源:AbstractRosEnvironment.java

示例14: sample

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
@Override
public EnvironmentOutcome sample(State s, Action a) {
	if(!(a instanceof Option)){
		return model.sample(s, a);
	}

	Option o = (Option)a;

	SimulatedEnvironment env = new SimulatedEnvironment(model, s);
	return o.control(env, discount);
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:12,代码来源:BFSMarkovOptionModel.java

示例15: control

import burlap.mdp.singleagent.environment.EnvironmentOutcome; //导入依赖的package包/类
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
	Random rand = RandomFactory.getMapped(0);
	State initial = env.currentObservation();
	State cur = initial;

	Episode episode = new Episode(cur);
	Episode history = new Episode(cur);
	double roll;
	double pT;
	int nsteps = 0;
	double r = 0.;
	double cd = 1.;
	do{
		Action a = o.policy(cur, history);
		EnvironmentOutcome eo = env.executeAction(a);
		nsteps++;
		r += cd*eo.r;
		cur = eo.op;
		cd *= discount;


		history.transition(a, eo.op, eo.r);

		AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
		episode.transition(annotatedAction, eo.op, r);


		pT = o.probabilityOfTermination(eo.op, history);
		roll = rand.nextDouble();

	}while(roll > pT && !env.isInTerminalState());

	EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);

	return eoo;

}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:38,代码来源:Option.java


注:本文中的burlap.mdp.singleagent.environment.EnvironmentOutcome类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。