Java SADomain类代码示例

本文整理汇总了Java中burlap.mdp.singleagent.SADomain类的典型用法代码示例。如果您正苦于以下问题：Java SADomain类的具体用法？Java SADomain怎么用？Java SADomain使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

SADomain类属于burlap.mdp.singleagent包，在下文中一共展示了SADomain类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: DeepQLearner

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public DeepQLearner(SADomain domain, double gamma, int replayStartSize, Policy policy, DQN vfa, StateMapping stateMapping) {
    super(domain, gamma, vfa, stateMapping);

    if (replayStartSize > 0) {
        System.out.println(String.format("Starting with random policy for %d frames", replayStartSize));

        this.replayStartSize = replayStartSize;
        this.trainingPolicy = policy;
        setLearningPolicy(new RandomPolicy(domain));
        runningRandomPolicy = true;
    } else {
        setLearningPolicy(policy);

        runningRandomPolicy = false;
    }
}

开发者ID:h2r，项目名称:burlap_caffe，代码行数:17，代码来源:DeepQLearner.java

示例2: generateDomain

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {

    SADomain domain = new SADomain();


    domain.addActionTypes(
            new UniversalActionType(ACTION_NORTH),
            new UniversalActionType(ACTION_SOUTH),
            new UniversalActionType(ACTION_EAST),
            new UniversalActionType(ACTION_WEST));

    GridWorldStateModel smodel = new GridWorldStateModel();
    RewardFunction rf = new ExampleRF();
    TerminalFunction tf = new ExampleTF();

    domain.setModel(new FactoredModel(smodel, rf, tf));

    return domain;
}

开发者ID:honzaMaly，项目名称:kusanagi，代码行数:21，代码来源:ExampleGridWorld.java

示例3: generateDomain

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {
    SADomain domain = new SADomain();
    domain.addActionTypes(
            new UniversalActionType(NextActionEnumerations.YES.name()),
            new UniversalActionType(NextActionEnumerations.NO.name()));

    //unknown reward
    RewardFunction rf = (state, action, state1) -> defaultReward;

    //no terminal state
    TerminalFunction tf = state -> false;

    domain.setModel(new FactoredModel(model, rf, tf));

    return domain;
}

开发者ID:honzaMaly，项目名称:kusanagi，代码行数:18，代码来源:DecisionDomainGenerator.java

示例4: IPSS

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void IPSS(){

		InvertedPendulum ip = new InvertedPendulum();
		ip.physParams.actionNoise = 0.;
		RewardFunction rf = new InvertedPendulum.InvertedPendulumRewardFunction(Math.PI/8.);
		TerminalFunction tf = new InvertedPendulum.InvertedPendulumTerminalFunction(Math.PI/8.);
		ip.setRf(rf);
		ip.setTf(tf);
		SADomain domain = ip.generateDomain();

		State initialState = new InvertedPendulumState();

		SparseSampling ss = new SparseSampling(domain, 1, new SimpleHashableStateFactory(), 10, 1);
		ss.setForgetPreviousPlanResults(true);
		ss.toggleDebugPrinting(false);
		Policy p = new GreedyQPolicy(ss);

		Episode e = PolicyUtils.rollout(p, initialState, domain.getModel(), 500);
		System.out.println("Num steps: " + e.maxTimeStep());
		Visualizer v = CartPoleVisualizer.getCartPoleVisualizer();
		new EpisodeSequenceVisualizer(v, domain, Arrays.asList(e));

	}

开发者ID:jmacglashan，项目名称:burlap_examples，代码行数:24，代码来源:ContinuousDomainTutorial.java

示例5: main

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void main(String[] args) {

		GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world
		gw.setMapToFourRooms(); //four rooms layout
		gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate
		SADomain domain = gw.generateDomain(); //generate the grid world domain

		//setup initial state
		State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0"));

		//create visualizer and explorer
		Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap());
		VisualExplorer exp = new VisualExplorer(domain, v, s);

		//set control keys to use w-s-a-d
		exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, "");
		exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, "");
		exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, "");
		exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, "");

		exp.initGUI();

	}

开发者ID:jmacglashan，项目名称:burlap_examples，代码行数:24，代码来源:HelloGridWorld.java

示例6: generateDomain

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {

	SADomain domain = new SADomain();


	domain.addActionTypes(
			new UniversalActionType(ACTION_NORTH),
			new UniversalActionType(ACTION_SOUTH),
			new UniversalActionType(ACTION_EAST),
			new UniversalActionType(ACTION_WEST));

	GridWorldStateModel smodel = new GridWorldStateModel();
	RewardFunction rf = new ExampleRF(this.goalx, this.goaly);
	TerminalFunction tf = new ExampleTF(this.goalx, this.goaly);

	domain.setModel(new FactoredModel(smodel, rf, tf));

	return domain;
}

开发者ID:jmacglashan，项目名称:burlap_examples，代码行数:21，代码来源:ExampleGridWorld.java

示例7: main

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public static void main(String [] args){

		ExampleGridWorld gen = new ExampleGridWorld();
		gen.setGoalLocation(10, 10);
		SADomain domain = gen.generateDomain();
		State initialState = new EXGridState(0, 0);
		SimulatedEnvironment env = new SimulatedEnvironment(domain, initialState);

		Visualizer v = gen.getVisualizer();
		VisualExplorer exp = new VisualExplorer(domain, env, v);

		exp.addKeyAction("w", ACTION_NORTH, "");
		exp.addKeyAction("s", ACTION_SOUTH, "");
		exp.addKeyAction("d", ACTION_EAST, "");
		exp.addKeyAction("a", ACTION_WEST, "");

		exp.initGUI();


	}

开发者ID:jmacglashan，项目名称:burlap_examples，代码行数:21，代码来源:ExampleGridWorld.java

示例8: processCommand

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public void processCommand(ICommandSender p_71515_1_, String[] p_71515_2_) {

	MinecraftDomainGenerator mdg = new MinecraftDomainGenerator();
	SADomain domain = mdg.generateDomain();

	State in = MinecraftStateGeneratorHelper.getCurrentState(BurlapCraft.currentDungeon);
	List<State> reachable = StateReachability.getReachableStates(in, domain, new SimpleHashableStateFactory());
	for(State s : reachable){
		OOState os = (OOState)s;
		BCAgent a = (BCAgent)os.object(CLASS_AGENT);
		System.out.println(a.x + ", " + a.y + ", " + a.z + ", " + a.rdir + ", "+ a.vdir + ", " + a.selected);
	}
	System.out.println(reachable.size());

}

开发者ID:h2r，项目名称:burlapcraft，代码行数:17，代码来源:CommandReachable.java

示例9: testSimpleHashFactoryIdentifierDependent

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Test
public void testSimpleHashFactoryIdentifierDependent() {
	SADomain domain = (SADomain)this.gridWorldTest.getDomain();
	State startState = this.gridWorldTest.generateState();
	HashableStateFactory factory = new SimpleHashableStateFactory(false);
	Set<HashableState> hashedStates = this.getReachableHashedStates(startState, domain, factory);
	assert(hashedStates.size() == 104);
	
	Set<HashableState> renamedStates = new HashSet<HashableState>();
	for (HashableState state : hashedStates) {
		State source = state.s();
		State renamed = this.renameObjects((GridWorldState)source.copy());
		HashableState renamedHashed = factory.hashState(renamed);
		renamedStates.add(renamedHashed);
	}
	hashedStates.addAll(renamedStates);
	assert(hashedStates.size() == 208);
}

开发者ID:jmacglashan，项目名称:burlap，代码行数:19，代码来源:TestHashing.java

示例10: DifferentiableSparseSampling

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
/**
 * Initializes. The model of this planner will automatically be set to a {@link CustomRewardModel} using the provided reward function.
 * @param domain the problem domain
 * @param rf the differentiable reward function
 * @param gamma the discount factor
 * @param hashingFactory the hashing factory used to compare state equality
 * @param h the planning horizon
 * @param c how many samples from the transition dynamics to use. Set to -1 to use the full (unsampled) transition dynamics.
 * @param boltzBeta the Boltzmann beta parameter for the differentiable Boltzmann (softmax) backup equation. The larger the value the more deterministic, the closer to 1 the softer.
 */
public DifferentiableSparseSampling(SADomain domain, DifferentiableRF rf, double gamma, HashableStateFactory hashingFactory, int h, int c, double boltzBeta){
	this.solverInit(domain, gamma, hashingFactory);
	this.h = h;
	this.c = c;
	this.rf = rf;
	this.boltzBeta = boltzBeta;
	this.nodesByHeight = new HashMap<SparseSampling.HashedHeightState, DiffStateNode>();
	this.rootLevelQValues = new HashMap<HashableState, DifferentiableSparseSampling.QAndQGradient>();
	this.rfDim = rf.numParameters();

	this.vinit = new VanillaDiffVinit(new ConstantValueFunction(), rf);

	this.model = new CustomRewardModel(domain.getModel(), rf);

	this.operator = new DifferentiableSoftmaxOperator(boltzBeta);

	this.debugCode = 6368290;
}

开发者ID:jmacglashan，项目名称:burlap，代码行数:29，代码来源:DifferentiableSparseSampling.java

示例11: main

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
/**
 * Main function to test the domain.
 * Note: The termination conditions are not checked when testing the domain this way, which means it is
 * impossible to win or die and might trigger bugs. To enable them, uncomment the code in the "update" function.
 *
 * @param args command line args
 */
public static void main(String[] args) {
	FrostbiteDomain fd = new FrostbiteDomain();
	SADomain d = fd.generateDomain();
	State s = new FrostbiteState();

	Visualizer vis = FrostbiteVisualizer.getVisualizer();
	VisualExplorer exp = new VisualExplorer(d, vis, s);

	exp.addKeyAction("a", ACTION_WEST, "");
	exp.addKeyAction("d", ACTION_EAST, "");
	exp.addKeyAction("w", ACTION_NORTH, "");
	exp.addKeyAction("s", ACTION_SOUTH, "");
	exp.addKeyAction("x", ACTION_IDLE, "");

	exp.initGUI();
}

开发者ID:jmacglashan，项目名称:burlap，代码行数:24，代码来源:FrostbiteDomain.java

示例12: ActionSet

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public ActionSet(SADomain domain) {
    List<Action> actionList = ActionUtils.allApplicableActionsForTypes(domain.getActionTypes(), null);
    size = actionList.size();
    actions = new Action[size];
    actionList.toArray(actions);

    initActionMap();
}

开发者ID:h2r，项目名称:burlap_caffe，代码行数:9，代码来源:ActionSet.java

示例13: generateDomain

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
public SADomain generateDomain() {
    SADomain domain = new SADomain();

    // add in NullActions for Domain
    for (String actionName : actionNames) {
        domain.addActionType(new UniversalActionType(new ALEAction(actionName)));
    }

    return domain;
}

开发者ID:h2r，项目名称:burlap_ale，代码行数:12，代码来源:ALEDomainGenerator.java

示例14: ALEVisualExplorer

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
public ALEVisualExplorer(SADomain domain, Environment env, Visualizer painter, boolean human) {
    super(domain, env, painter,
            (int)(ALEDomainConstants.ALEScreenWidth * widthRatio),
            (int)(ALEDomainConstants.ALEScreenHeight * heightRatio));

    if (human) {
        enableHumanInput();
    }
}

开发者ID:h2r，项目名称:burlap_ale，代码行数:10，代码来源:ALEVisualExplorer.java

示例15: learnPolicy

import burlap.mdp.singleagent.SADomain; //导入依赖的package包/类
@Override
    public Policy learnPolicy(SADomain domain, List<Episode> episodes, int numberOfStates, int numberOfSamplesToUse) {

        //create reward function features to use
        LocationFeatures features = new LocationFeatures(numberOfStates);

        //create a reward function that is linear with respect to those features and has small random
        //parameter values to start
        LinearStateDifferentiableRF rf = new LinearStateDifferentiableRF(features, numberOfStates);
        for (int i = 0; i < rf.numParameters() - 1; i++) {
            rf.setParameter(i, RandomFactory.getMapped(0).nextDouble() * 0.2 - 0.1);
        }
        //set last "dummy state" to large negative number as we do not want to go there
        rf.setParameter(rf.numParameters() - 1, MLIRLWithGuard.minReward);

        //use either DifferentiableVI or DifferentiableSparseSampling for planning. The latter enables receding horizon IRL,
        //but you will probably want to use a fairly large horizon for this kind of reward function.
        HashableStateFactory hashingFactory = new SimpleHashableStateFactory();
//        DifferentiableVI dplanner = new DifferentiableVI(domain, rf, 0.99, beta, hashingFactory, 0.01, 100);
        DifferentiableSparseSampling dplanner = new DifferentiableSparseSampling(domain, rf, 0.99, hashingFactory, (int) Math.sqrt(numberOfStates), numberOfSamplesToUse, beta);

        dplanner.toggleDebugPrinting(doNotPrintDebug);

        //define the IRL problem
        MLIRLRequest request = new MLIRLRequest(domain, dplanner, episodes, rf);
        request.setBoltzmannBeta(beta);

        //run MLIRL on it
        MLIRL irl = new MLIRLWithGuard(request, 0.1, 0.1, steps);
        irl.performIRL();

        return new GreedyQPolicy((QProvider) request.getPlanner());
    }

开发者ID:honzaMaly，项目名称:kusanagi，代码行数:34，代码来源:PolicyLearningServiceImpl.java

注：本文中的burlap.mdp.singleagent.SADomain类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。