本文整理汇总了Java中burlap.mdp.singleagent.environment.Environment类的典型用法代码示例。如果您正苦于以下问题:Java Environment类的具体用法?Java Environment怎么用?Java Environment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Environment类属于burlap.mdp.singleagent.environment包,在下文中一共展示了Environment类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
Episode ea = maxSteps != -1 ? PolicyUtils.rollout(this.learningPolicy, env, maxSteps) : PolicyUtils.rollout(this.learningPolicy, env);
this.updateDatasetWithLearningEpisode(ea);
if(this.shouldRereunPolicyIteration(ea)){
this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange);
this.numStepsSinceLastLearningPI = 0;
}
else{
this.numStepsSinceLastLearningPI += ea.numTimeSteps()-1;
}
if(episodeHistory.size() >= numEpisodesToStore){
episodeHistory.poll();
}
episodeHistory.offer(ea);
return ea;
}
示例2: collectNInstances
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
/**
* Collects nSamples of SARS tuples from an {@link burlap.mdp.singleagent.environment.Environment} and returns it in a {@link burlap.behavior.singleagent.learning.lspi.SARSData} object.
* Each sequence of samples is no longer than maxEpisodeSteps and samples are collected using this object's {@link #collectDataFrom(burlap.mdp.singleagent.environment.Environment, int, SARSData)}
* method. After each call to {@link #collectDataFrom(burlap.mdp.singleagent.environment.Environment, int, SARSData)}, the provided {@link burlap.mdp.singleagent.environment.Environment}
* is sent the {@link burlap.mdp.singleagent.environment.Environment#resetEnvironment()} message.
* @param env The {@link burlap.mdp.singleagent.environment.Environment} from which samples should be collected.
* @param nSamples The number of samples to generate.
* @param maxEpisodeSteps the maximum number of steps to take from any initial state of the {@link burlap.mdp.singleagent.environment.Environment}.
* @param intoDataset the dataset into which the results will be collected. If null, a new dataset is created.
* @return the intoDataset object, which is created if it is input as null.
*/
public SARSData collectNInstances(Environment env, int nSamples, int maxEpisodeSteps, SARSData intoDataset){
if(intoDataset == null){
intoDataset = new SARSData(nSamples);
}
while(nSamples > 0 && !env.isInTerminalState()){
int maxSteps = Math.min(nSamples, maxEpisodeSteps);
int oldSize = intoDataset.size();
this.collectDataFrom(env, maxSteps, intoDataset);
int delta = intoDataset.size() - oldSize;
nSamples -= delta;
env.resetEnvironment();
}
return intoDataset;
}
示例3: collectDataFrom
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
@Override
public SARSData collectDataFrom(Environment env, int maxSteps, SARSData intoDataset) {
if(intoDataset == null){
intoDataset = new SARSData();
}
int nsteps = 0;
while(!env.isInTerminalState() && nsteps < maxSteps){
List<Action> gas = ActionUtils.allApplicableActionsForTypes(this.actionTypes, env.currentObservation());
Action ga = gas.get(RandomFactory.getMapped(0).nextInt(gas.size()));
EnvironmentOutcome eo = env.executeAction(ga);
intoDataset.add(eo.o, eo.a, eo.r, eo.op);
nsteps++;
}
return intoDataset;
}
示例4: EnvironmentShell
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
public EnvironmentShell(Domain domain, Environment env, InputStream is, PrintStream os) {
super(domain, is, os);
this.env = env;
this.welcomeMessage = "Welcome to the BURLAP agent environment shell. Type the command 'help' to bring " +
"up additional information about using this shell.";
this.helpText = "Use the command help to bring up this message again. " +
"Here is a list of standard reserved commands:\n" +
"cmds - list all known commands.\n" +
"aliases - list all known command aliases.\n" +
"alias - set an alias for a command.\n" +
"quit - terminate this shell.\n\n" +
"Other useful, but non-reserved, commands are:\n" +
"obs - print the current observation of the environment\n" +
"ex - execute an action\n\n" +
"Usually, you can get help on an individual command by passing it the -h option.";
}
示例5: AtariDQN
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
public AtariDQN(DeepQLearner learner, DeepQTester tester, DQN vfa, ActionSet actionSet, Environment env,
FrameExperienceMemory trainingMemory,
FrameExperienceMemory testMemory) {
super(learner, tester, vfa, actionSet, env);
this.trainingMemory = trainingMemory;
this.testMemory = testMemory;
}
示例6: TrainingHelper
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
public TrainingHelper(DeepQLearner learner, Tester tester, DQN vfa, ActionSet actionSet, Environment env) {
this.learner = learner;
this.vfa = vfa;
this.tester = tester;
this.env = env;
this.actionSet = actionSet;
this.stepCounter = 0;
this.episodeCounter = 0;
}
示例7: ALEVisualExplorer
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
public ALEVisualExplorer(SADomain domain, Environment env, Visualizer painter, boolean human) {
super(domain, env, painter,
(int)(ALEDomainConstants.ALEScreenWidth * widthRatio),
(int)(ALEDomainConstants.ALEScreenHeight * heightRatio));
if (human) {
enableHumanInput();
}
}
示例8: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
//initialize our episode object with the initial state of the environment
Episode e = new Episode(env.currentObservation());
//behave until a terminal state or max steps is reached
State curState = env.currentObservation();
int steps = 0;
while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){
//select an action
Action a = this.learningPolicy.action(curState);
//take the action and observe outcome
EnvironmentOutcome eo = env.executeAction(a);
//record result
e.transition(eo);
//get the max Q value of the resulting state if it's not terminal, 0 otherwise
double maxQ = eo.terminated ? 0. : this.value(eo.op);
//update the old Q-value
QValue oldQ = this.storedQ(curState, a);
oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);
//update state pointer to next environment state observed
curState = eo.op;
steps++;
}
return e;
}
示例9: RelativeRewardEnvironmentDecorator
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
public RelativeRewardEnvironmentDecorator(
QProvider provider, Environment delegate,
State baselineState, Action baselineAction) {
this.provider = provider;
this.delegate = delegate;
this.baselineState = baselineState;
this.baselineAction = baselineAction;
}
示例10: rollout
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
/**
* Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
* in the environment is reached.
* @param p the {@link Policy}
* @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
* @return An {@link Episode} object specifying the interaction with the environment.
*/
public static Episode rollout(Policy p, Environment env){
Episode ea = new Episode(env.currentObservation());
do{
followAndRecordPolicy(p, env, ea);
}while(!env.isInTerminalState());
return ea;
}
示例11: control
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
Random rand = RandomFactory.getMapped(0);
State initial = env.currentObservation();
State cur = initial;
Episode episode = new Episode(cur);
Episode history = new Episode(cur);
double roll;
double pT;
int nsteps = 0;
double r = 0.;
double cd = 1.;
do{
Action a = o.policy(cur, history);
EnvironmentOutcome eo = env.executeAction(a);
nsteps++;
r += cd*eo.r;
cur = eo.op;
cd *= discount;
history.transition(a, eo.op, eo.r);
AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
episode.transition(annotatedAction, eo.op, r);
pT = o.probabilityOfTermination(eo.op, history);
roll = rand.nextDouble();
}while(roll > pT && !env.isInTerminalState());
EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);
return eoo;
}
示例12: LearningAlgorithmExperimenter
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
/**
* Initializes.
* The trialLength will be interpreted as the number of episodes, but it can be reinterpreted as a total number of steps per trial using the
* {@link #toggleTrialLengthInterpretation(boolean)}.
* @param testEnvironment the test {@link burlap.mdp.singleagent.environment.Environment} in which experiments will be performed.
* @param nTrials the number of trials
* @param trialLength the length of the trials (by default in episodes, but can be intereted as maximum step length)
* @param agentFactories factories to generate the agents to be tested.
*/
public LearningAlgorithmExperimenter(Environment testEnvironment, int nTrials, int trialLength, LearningAgentFactory...agentFactories){
if(agentFactories.length == 0){
throw new RuntimeException("Zero agent factories provided. At least one must be given for an experiment");
}
this.testEnvironment = testEnvironment;
this.nTrials = nTrials;
this.trialLength = trialLength;
this.agentFactories = agentFactories;
}
示例13: constructServerOrAddObservers
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
/**
* If the input {@link burlap.mdp.singleagent.environment.Environment} is an instance {@link EnvironmentServerInterface},
* then all the input observers are added to it and it is returned. Otherwise, a new {@link EnvironmentServer}
* is created around it, with all of the observers added.
* @param env the {@link burlap.mdp.singleagent.environment.Environment} that will have observers added to it
* @param observers the {@link EnvironmentObserver} objects to add.
* @return the input {@link burlap.mdp.singleagent.environment.Environment} or an {@link EnvironmentServer}.
*/
public static EnvironmentServerInterface constructServerOrAddObservers(Environment env, EnvironmentObserver...observers){
if(env instanceof EnvironmentServerInterface){
((EnvironmentServerInterface)env).addObservers(observers);
return (EnvironmentServerInterface)env;
}
else{
return constructor(env, observers);
}
}
示例14: getRootEnvironmentDelegate
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
/**
* Returns the root {@link burlap.mdp.singleagent.environment.Environment} delegate. Useful
* if an {@link EnvironmentDelegation} is expected to have
* a delegate that is an {@link EnvironmentDelegation}.
* @param env the {@link EnvironmentDelegation} to inspect
* @return the root {@link burlap.mdp.singleagent.environment.Environment} delegate
*/
public static Environment getRootEnvironmentDelegate(EnvironmentDelegation env){
Environment envChild = env.getEnvironmentDelegate();
if(envChild instanceof EnvironmentDelegation){
envChild = getRootEnvironmentDelegate((EnvironmentDelegation)envChild);
}
return envChild;
}
示例15: getDelegateImplementing
import burlap.mdp.singleagent.environment.Environment; //导入依赖的package包/类
/**
* Returns the {@link burlap.mdp.singleagent.environment.Environment} or {@link burlap.mdp.singleagent.environment.Environment}
* delegate that implements the class/interface type, or null if none do.
* @param env An {@link burlap.mdp.singleagent.environment.Environment} to inspect
* @param type the class/interface type against which and {@link burlap.mdp.singleagent.environment.Environment} or
* its delegates are being compared.
* @return the {@link burlap.mdp.singleagent.environment.Environment} delegate implementing the input type or null if none do.
*/
public static Environment getDelegateImplementing(Environment env, Class<?> type){
if(type.isAssignableFrom(env.getClass())){
return env;
}
else if(env instanceof EnvironmentDelegation){
return getDelegateImplementing(((EnvironmentDelegation)env).getEnvironmentDelegate(), type);
}
return null;
}