本文整理汇总了Java中burlap.mdp.singleagent.environment.Environment.currentObservation方法的典型用法代码示例。如果您正苦于以下问题:Java Environment.currentObservation方法的具体用法?Java Environment.currentObservation怎么用?Java Environment.currentObservation使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类burlap.mdp.singleagent.environment.Environment
的用法示例。
在下文中一共展示了Environment.currentObservation方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
//initialize our episode object with the initial state of the environment
Episode e = new Episode(env.currentObservation());
//behave until a terminal state or max steps is reached
State curState = env.currentObservation();
int steps = 0;
while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){
//select an action
Action a = this.learningPolicy.action(curState);
//take the action and observe outcome
EnvironmentOutcome eo = env.executeAction(a);
//record result
e.transition(eo);
//get the max Q value of the resulting state if it's not terminal, 0 otherwise
double maxQ = eo.terminated ? 0. : this.value(eo.op);
//update the old Q-value
QValue oldQ = this.storedQ(curState, a);
oldQ.q = oldQ.q + this.learningRate * (eo.r + this.gamma * maxQ - oldQ.q);
//update state pointer to next environment state observed
curState = eo.op;
steps++;
}
return e;
}
示例2: rollout
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
/**
* Follows the policy in the given {@link burlap.mdp.singleagent.environment.Environment}. The policy will stop being followed once a terminal state
* in the environment is reached.
* @param p the {@link Policy}
* @param env The {@link burlap.mdp.singleagent.environment.Environment} in which this policy is to be evaluated.
* @return An {@link Episode} object specifying the interaction with the environment.
*/
public static Episode rollout(Policy p, Environment env){
Episode ea = new Episode(env.currentObservation());
do{
followAndRecordPolicy(p, env, ea);
}while(!env.isInTerminalState());
return ea;
}
示例3: control
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
public static EnvironmentOptionOutcome control(Option o, Environment env, double discount){
Random rand = RandomFactory.getMapped(0);
State initial = env.currentObservation();
State cur = initial;
Episode episode = new Episode(cur);
Episode history = new Episode(cur);
double roll;
double pT;
int nsteps = 0;
double r = 0.;
double cd = 1.;
do{
Action a = o.policy(cur, history);
EnvironmentOutcome eo = env.executeAction(a);
nsteps++;
r += cd*eo.r;
cur = eo.op;
cd *= discount;
history.transition(a, eo.op, eo.r);
AnnotatedAction annotatedAction = new AnnotatedAction(a, o.toString() + "(" + nsteps + ")");
episode.transition(annotatedAction, eo.op, r);
pT = o.probabilityOfTermination(eo.op, history);
roll = rand.nextDouble();
}while(roll > pT && !env.isInTerminalState());
EnvironmentOptionOutcome eoo = new EnvironmentOptionOutcome(initial, o, cur, r, env.isInTerminalState(), discount, episode);
return eoo;
}
示例4: call
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {
Environment env = ((EnvironmentShell)shell).getEnv();
OptionSet oset = this.parser.parse(argString.split(" "));
List<String> args = (List<String>)oset.nonOptionArguments();
if(oset.has("h")){
os.println("[-v] objectName\nRemoves an OO-MDP object instance with name objectName" +
"from the current state of the environment. The environment must implement StateSettableEnvironment " +
"for this operation to work.\n\n" +
"-v print the new Environment state after completion.");
return 0;
}
StateSettableEnvironment senv = (StateSettableEnvironment) EnvironmentDelegation.Helper.getDelegateImplementing(env, StateSettableEnvironment.class);
if(senv == null){
os.println("Cannot remove object from environment state, because the environment does not implement StateSettableEnvironment");
return 0;
}
if(args.size() != 1){
return -1;
}
State s = env.currentObservation();
if(!(s instanceof MutableOOState)){
os.println("Cannot remove object from state, because state is not a MutableOOState");
return 0;
}
((MutableOOState)s).removeObject(args.get(0));
senv.setCurStateTo(s);
if(oset.has("v")){
os.println(env.currentObservation().toString());
}
return 1;
}
示例5: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
State initialState = env.currentObservation();
Episode e = new Episode(initialState);
int eStepCounter = 0;
while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){
//check state
State curState = stateMapping.mapState(env.currentObservation());
//select action
Action a = this.learningPolicy.action(curState);
//take action
EnvironmentOutcome eo = env.executeAction(a);
//save outcome in memory
this.memory.addExperience(eo);
//record transition and manage option case
int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1;
eStepCounter += stepInc;
this.totalSteps += stepInc;
e.transition(a, eo.op, eo.r);
//perform learners
List<EnvironmentOutcome> samples = this.memory.sampleExperiences(this.numReplay);
this.updateQFunction(samples);
//update stale function
this.stepsSinceStale++;
if(this.stepsSinceStale >= this.staleDuration){
this.updateStaleFunction();
}
}
this.totalEpisodes++;
return e;
}
示例6: call
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {
Environment env = ((EnvironmentShell)shell).getEnv();
OptionSet oset = this.parser.parse(argString.split(" "));
if(oset.has("h")){
os.println("[s]\nCommand to list applicable and executable actions for the current environment observation.\n" +
"-n: list the name of all known actions (no parameters specified), regardless of whether they are applicable in the current observation\n" +
"-s: query applicable actions w.r.t. a POMDP hidden state, rather than environment observation. Environment must extend SimulatedPOEnvironment");
return 0;
}
if(oset.has("n")){
for(ActionType a : ((SADomain)shell.getDomain()).getActionTypes()){
os.println(a.typeName());
}
return 0;
}
State qs = env.currentObservation();
if(oset.has("s")){
if(!(env instanceof SimulatedPOEnvironment)){
os.println("Cannot query applicable actions with respect to POMDP hidden state, because the environment does not extend SimulatedPOEnvironment.");
return 0;
}
qs = ((SimulatedPOEnvironment)env).getCurrentHiddenState();
}
List<Action> actions = ActionUtils.allApplicableActionsForTypes(((SADomain)shell.getDomain()).getActionTypes(), qs);
for(Action ga : actions){
os.println(ga.toString());
}
return 0;
}
示例7: call
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {
Environment env = ((EnvironmentShell)shell).getEnv();
OptionSet oset = this.parser.parse(argString.split(" "));
List<String> args = (List<String>)oset.nonOptionArguments();
if(oset.has("h")){
os.println("[-v] [key value]+ \nSets the values for one or more state variables in an " +
"environment state. Requires one or more key value pairs." +
"The environment must implement StateSettableEnvironment and the states must be MutableState instances\n\n" +
"-v print the new Environment state after completion.");
return 0;
}
StateSettableEnvironment senv = (StateSettableEnvironment) EnvironmentDelegation.Helper.getDelegateImplementing(env, StateSettableEnvironment.class);
if(senv == null){
os.println("Cannot set object values for environment states, because the environment does not implement StateSettableEnvironment");
return 0;
}
if(args.size() % 2 != 0 && args.size() < 3){
return -1;
}
State s = env.currentObservation();
if(!(s instanceof MutableState)){
os.println("Cannot modify state values, because the state does not implement MutableState");
}
for(int i = 0; i < args.size(); i+=2){
try{
((MutableState)s).set(args.get(i), args.get(i+1));
}catch(Exception e){
os.println("Could not set key " + args.get(i) + " to value " + args.get(i+1) + ". Aborting.");
return 0;
}
}
senv.setCurStateTo(s);
if(oset.has("v")){
os.println(senv.currentObservation().toString());
}
return 1;
}
示例8: call
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public int call(BurlapShell shell, String argString, Scanner is, PrintStream os) {
Environment env = ((EnvironmentShell)shell).getEnv();
OptionSet oset = this.parser.parse(argString.split(" "));
if(oset.has("h")){
os.println("[s]\nCommand to list all true (or false) grounded propositional function for the current environment observation.\n" +
"-f: list false grounded propositional functions, rather than true ones. " +
"-n: list the name of all propositional functions, rather than grounded evaluations\n" +
"-s: evaluate propositional functions on POMDP environment hidden state, rather than environment observation. Environment must extend SimulatedPOEnvironment");
return 0;
}
if(!(shell.getDomain() instanceof OODomain)){
os.println("cannot query propositional functions because the domain is not an OODomain");
return 0;
}
if(oset.has("n")){
for(PropositionalFunction pf : ((OODomain)shell.getDomain()).propFunctions()){
os.println(pf.getName());
}
return 0;
}
State qs = env.currentObservation();
if(oset.has("s")){
if(!(env instanceof SimulatedPOEnvironment)){
os.println("Cannot query applicable actions with respect to POMDP hidden state, because the environment does not extend SimulatedPOEnvironment.");
return 0;
}
qs = ((SimulatedPOEnvironment)env).getCurrentHiddenState();
}
List<GroundedProp> gps = PropositionalFunction.allGroundingsFromList(((OODomain)shell.getDomain()).propFunctions(), (OOState)qs);
for(GroundedProp gp : gps){
if(gp.isTrue((OOState)qs) == !oset.has("f")){
os.println(gp.toString());
}
}
return 0;
}
示例9: runTestEpisode
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public Episode runTestEpisode(Environment env, int maxSteps) {
State initialState = env.currentObservation();
Episode e = new Episode(initialState);
int eStepCounter = 0;
while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){
//check state
State curState = stateMapping.mapState(env.currentObservation());
//select action
Action a = this.policy.action(curState);
//take action
EnvironmentOutcome eo = env.executeAction(a);
//save outcome in memory
this.memory.addExperience(eo);
//record transition and manage option case
int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1;
eStepCounter += stepInc;
e.transition(a, eo.op, eo.r);
}
return e;
}
示例10: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
State initialState = env.currentObservation();
Episode ea = new Episode(initialState);
State curState = initialState;
this.critic.startEpisode(curState);
this.actor.startEpisode(curState);
int timeSteps = 0;
while(!env.isInTerminalState() && (timeSteps < maxSteps || maxSteps == -1)){
Action ga = this.actor.action(curState);
EnvironmentOutcome eo = env.executeAction(ga);
ea.transition(eo);
double critique = this.critic.critique(eo);
this.actor.update(eo, critique);
curState = env.currentObservation();
timeSteps++;
}
this.critic.endEpisode();
this.actor.endEpisode();
if(episodeHistory.size() >= numEpisodesToStore){
episodeHistory.poll();
}
episodeHistory.offer(ea);
return ea;
}
示例11: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
State initialState = env.currentObservation();
Episode ea = new Episode(initialState);
State curState = initialState;
int steps = 0;
while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){
Action ga = policy.action(curState);
EnvironmentOutcome eo = env.executeAction(ga);
ea.transition(ga, eo.op, eo.r);
this.model.updateModel(eo);
this.modelPlanner.performBellmanUpdateOn(eo.o);
curState = env.currentObservation();
steps++;
}
return ea;
}
示例12: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
State initialState = env.currentObservation();
Episode ea = new Episode(initialState);
HashableState curState = this.stateHash(initialState);
eStepCounter = 0;
maxQChangeInLastEpisode = 0.;
while(!env.isInTerminalState() && (eStepCounter < maxSteps || maxSteps == -1)){
Action action = learningPolicy.action(curState.s());
QValue curQ = this.getQ(curState, action);
EnvironmentOutcome eo;
if(!(action instanceof Option)){
eo = env.executeAction(action);
}
else{
eo = ((Option)action).control(env, this.gamma);
}
HashableState nextState = this.stateHash(eo.op);
double maxQ = 0.;
if(!eo.terminated){
maxQ = this.getMaxQ(nextState);
}
//manage option specifics
double r = eo.r;
double discount = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).discount : this.gamma;
int stepInc = eo instanceof EnvironmentOptionOutcome ? ((EnvironmentOptionOutcome)eo).numSteps() : 1;
eStepCounter += stepInc;
if(!(action instanceof Option) || !this.shouldDecomposeOptions){
ea.transition(action, nextState.s(), r);
}
else{
ea.appendAndMergeEpisodeAnalysis(((EnvironmentOptionOutcome)eo).episode);
}
double oldQ = curQ.q;
//update Q-value
curQ.q = curQ.q + this.learningRate.pollLearningRate(this.totalNumberOfSteps, curState.s(), action) * (r + (discount * maxQ) - curQ.q);
double deltaQ = Math.abs(oldQ - curQ.q);
if(deltaQ > maxQChangeInLastEpisode){
maxQChangeInLastEpisode = deltaQ;
}
//move on polling environment for its current state in case it changed during processing
curState = this.stateHash(env.currentObservation());
this.totalNumberOfSteps++;
}
return ea;
}
示例13: runLearningEpisode
import burlap.mdp.singleagent.environment.Environment; //导入方法依赖的package包/类
@Override
public Episode runLearningEpisode(Environment env, int maxSteps) {
State initialState = env.currentObservation();
this.modelPlanner.initializePlannerIn(initialState);
Episode ea = new Episode(initialState);
Policy policy = this.createUnmodeledFavoredPolicy();
State curState = initialState;
int steps = 0;
while(!env.isInTerminalState() && (steps < maxSteps || maxSteps == -1)){
Action ga = policy.action(curState);
EnvironmentOutcome eo = env.executeAction(ga);
ea.transition(ga, eo.op, eo.r);
boolean modeledTerminal = this.model.terminal(eo.op);
if(!this.model.transitionIsModeled(curState, ga)
|| (!KWIKModel.Helper.stateTransitionsModeled(model, this.getActionTypes(), eo.op) && !modeledTerminal)){
this.model.updateModel(eo);
if(this.model.transitionIsModeled(curState, ga) || (eo.terminated != modeledTerminal && modeledTerminal != this.model.terminal(eo.op))){
this.modelPlanner.modelChanged(curState);
policy = this.createUnmodeledFavoredPolicy();
}
}
curState = env.currentObservation();
steps++;
}
if(episodeHistory.size() >= numEpisodesToStore){
episodeHistory.poll();
}
episodeHistory.offer(ea);
return ea;
}