本文整理汇总了Java中burlap.behavior.functionapproximation.dense.NormalizedVariableFeatures类的典型用法代码示例。如果您正苦于以下问题:Java NormalizedVariableFeatures类的具体用法?Java NormalizedVariableFeatures怎么用?Java NormalizedVariableFeatures使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
NormalizedVariableFeatures类属于burlap.behavior.functionapproximation.dense包,在下文中一共展示了NormalizedVariableFeatures类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import burlap.behavior.functionapproximation.dense.NormalizedVariableFeatures; //导入依赖的package包/类
public static void main(String[] args) {
MountainCar mcGen = new MountainCar();
SADomain domain = mcGen.generateDomain();
StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);
NormalizedVariableFeatures features = new NormalizedVariableFeatures()
.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
FourierBasis fb = new FourierBasis(features, 4);
LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
Policy p = lspi.runPolicyIteration(30, 1e-6);
Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
VisualActionObserver vob = new VisualActionObserver(v);
vob.initGUI();
SimulatedEnvironment env = new SimulatedEnvironment(domain,
new MCState(mcGen.physParams.valleyPos(), 0));
EnvironmentServer envServ = new EnvironmentServer(env, vob);
for(int i = 0; i < 100; i++){
PolicyUtils.rollout(p, envServ);
envServ.resetEnvironment();
}
System.out.println("Finished");
}
示例2: sarsaRunFourier
import burlap.behavior.functionapproximation.dense.NormalizedVariableFeatures; //导入依赖的package包/类
public static void sarsaRunFourier(
final double discount, final String name, final int order, final double learningRate, final double lambda,
NormalizedVariableFeatures inputFeatures, final PrototypeScenario scenario, final Path containerPath,
final Steppable additionalSteppable, final double initialEpsilon, @Nullable Pair<ShodanStateOil,Action> baseline,
String... featureNames) throws IOException, NoSuchFieldException, IllegalAccessException {
//write a YAML for the results
HashMap<String,Object> resultObject = new HashMap<>();
resultObject.put("method","sarsa");
resultObject.put("lambda",lambda);
resultObject.put("discount",discount);
resultObject.put("learning_rate",learningRate);
resultObject.put("factors",featureNames);
resultObject.put("name",name);
resultObject.put("base","fourier");
resultObject.put("order",order);
resultObject.put("initial_epsilon", initialEpsilon);
resultObject.put("normalized",true);
resultObject.put("baseline", baseline != null);
//run sarsa, return last fitness
double fitness = runSarsa(new FourierBasis(inputFeatures, order), name, discount, learningRate, lambda,
containerPath, scenario,baseline, resultObject, initialEpsilon);
double bestFitness = fitness;
if(resultObject.containsKey("fitness"))
bestFitness = Math.max(bestFitness, (Double) resultObject.get("fitness"));
resultObject.put("fitness",bestFitness);
resultObject.put("episodes",NUMBER_OF_EPISODES);
//to file
File yamlFile = containerPath.resolve("results").resolve(name + ".yaml").toFile();
Yaml yaml = new Yaml();
yaml.dump(resultObject,new FileWriter(yamlFile));
}
示例3: MCLSPIRBF
import burlap.behavior.functionapproximation.dense.NormalizedVariableFeatures; //导入依赖的package包/类
public static void MCLSPIRBF(){
MountainCar mcGen = new MountainCar();
SADomain domain = mcGen.generateDomain();
MCState s = new MCState(mcGen.physParams.valleyPos(), 0.);
NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);
RBFFeatures rbf = new RBFFeatures(inputFeatures, true);
FlatStateGridder gridder = new FlatStateGridder()
.gridDimension("x", mcGen.physParams.xmin, mcGen.physParams.xmax, 5)
.gridDimension("v", mcGen.physParams.vmin, mcGen.physParams.vmax, 5);
List<State> griddedStates = gridder.gridState(s);
DistanceMetric metric = new EuclideanDistance();
for(State g : griddedStates){
rbf.addRBF(new GaussianRBF(inputFeatures.features(g), metric, 0.2));
}
LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(rbf, 3), dataset);
Policy p = lspi.runPolicyIteration(30, 1e-6);
Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
VisualActionObserver vob = new VisualActionObserver(v);
vob.initGUI();
SimulatedEnvironment env = new SimulatedEnvironment(domain, s);
env.addObservers(vob);
for(int i = 0; i < 5; i++){
PolicyUtils.rollout(p, env);
env.resetEnvironment();
}
System.out.println("Finished");
}
示例4: MCLSPIFB
import burlap.behavior.functionapproximation.dense.NormalizedVariableFeatures; //导入依赖的package包/类
public static void MCLSPIFB(){
MountainCar mcGen = new MountainCar();
SADomain domain = mcGen.generateDomain();
StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams);
SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain);
SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null);
NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures()
.variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax))
.variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax));
FourierBasis fb = new FourierBasis(inputFeatures, 4);
LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset);
Policy p = lspi.runPolicyIteration(30, 1e-6);
Visualizer v = MountainCarVisualizer.getVisualizer(mcGen);
VisualActionObserver vob = new VisualActionObserver(v);
vob.initGUI();
SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.));
env.addObservers(vob);
for(int i = 0; i < 5; i++){
PolicyUtils.rollout(p, env);
env.resetEnvironment();
}
System.out.println("Finished");
}