当前位置: 首页>>代码示例>>Java>>正文


Java BoltzmannDistribution类代码示例

本文整理汇总了Java中burlap.datastructures.BoltzmannDistribution的典型用法代码示例。如果您正苦于以下问题:Java BoltzmannDistribution类的具体用法?Java BoltzmannDistribution怎么用?Java BoltzmannDistribution使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


BoltzmannDistribution类属于burlap.datastructures包,在下文中一共展示了BoltzmannDistribution类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: performBellmanUpdateOn

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
/**
 * Overrides the superclass method to perform a Boltzmann backup operator
 * instead of a Bellman backup operator.
 * Results are stored in this valueFunction's internal map.
 * @param sh the hashed state on which to perform the Boltzmann update.
 * @return the new value
 */
@Override
protected double performBellmanUpdateOn(HashableState sh){

	if(this.tf.isTerminal(sh.s)){
		this.valueFunction.put(sh, 0.);
		return 0.;
	}

	List<QValue> qs = this.getQs(sh.s);
	double [] dqs = new double[qs.size()];
	for(int i = 0; i < qs.size(); i++){
		dqs[i] = qs.get(i).q;
	}
	BoltzmannDistribution bd = new BoltzmannDistribution(dqs, 1./this.boltzBeta);
	double [] dist = bd.getProbabilities();

	double sum = 0.;
	for(int i = 0; i < dqs.length; i++){
		sum += dqs[i] * dist[i];
	}

	this.valueFunction.put(sh, sum);

	return sum;
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:33,代码来源:DifferentiableDP.java

示例2: drawFromSoftmax

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
/**
 * given a set of arms and a function telling me the expected reward from each, use softmax to draw one arm at random
 * @param random randomizer
 * @param numberOfArms the number of arms to choose from
 * @param expectedReturnOfArm a function returning the expected return associated with a particular arm
 * @param temperature a number that can add stochasticity to the draw
 * @return the index of the arm to pick
 */
public static Integer drawFromSoftmax(MersenneTwisterFast random,
                                      int numberOfArms,
                                      Function<Integer,Double> expectedReturnOfArm,
                                      double temperature) {

    double[] preferences = new double[numberOfArms];
    for(int i=0; i<preferences.length; i++) {
        preferences[i] = expectedReturnOfArm.apply(i);
        if(!Double.isFinite(preferences[i]))
            preferences[i] = 0; //non-sampled areas default preference is 0
    }
    BoltzmannDistribution distribution = new BoltzmannDistribution(preferences,temperature);

    return distribution.sample();

}
 
开发者ID:CarrKnight,项目名称:POSEIDON,代码行数:25,代码来源:SoftmaxBanditAlgorithm.java

示例3: policyDistribution

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
@Override
public List<ActionProb> policyDistribution(State s) {
	
	HashableState sh = this.hashingFactory.hashState(s);
	PolicyNode node = this.getNode(sh);
	
	double [] prefs = new double[node.preferences.size()];
	for(int i = 0; i < node.preferences.size(); i++){
		prefs[i] = node.preferences.get(i).preference;
	}
	
	BoltzmannDistribution bd = new BoltzmannDistribution(prefs);
	double [] probsArray = bd.getProbabilities();
	
	List <ActionProb> probs = new ArrayList<ActionProb>(probsArray.length);
	for(int i = 0; i < probsArray.length; i++){
		ActionPreference ap = node.preferences.get(i);
		probs.add(new ActionProb(ap.ga, probsArray[i]));
	}
	
	
	return probs;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:24,代码来源:BoltzmannActor.java

示例4: setV

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
protected void setV(QAndQGradient qvs){
	double [] qArray = new double[qvs.qs.size()];
	for(int i = 0; i < qvs.qs.size(); i++){
		qArray[i] = qvs.qs.get(i).q;
	}
	BoltzmannDistribution bd = new BoltzmannDistribution(qArray, 1./DifferentiableSparseSampling.this.boltzBeta);
	double [] probs = bd.getProbabilities();
	double sum = 0.;
	for(int i = 0; i < qArray.length; i++){
		sum += qArray[i] * probs[i];
	}
	this.v = sum;
}
 
开发者ID:f-leno,项目名称:DOO-Q_BRACIS2016,代码行数:14,代码来源:DifferentiableSparseSampling.java

示例5: getProbabilities

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
public static double[] getProbabilities(
        int numberOfArms, Function<Integer, Double> expectedReturnOfArm, double temperature) {
    double[] preferences = new double[numberOfArms];
    for(int i=0; i<preferences.length; i++)
        preferences[i] = expectedReturnOfArm.apply(i);
    BoltzmannDistribution distribution = new BoltzmannDistribution(preferences,temperature);

    return distribution.getProbabilities();
}
 
开发者ID:CarrKnight,项目名称:POSEIDON,代码行数:10,代码来源:SoftmaxBanditAlgorithm.java

示例6: evolve

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
private void evolve(FishState model) {
    //softmax selector
    BoltzmannDistribution distribution = new BoltzmannDistribution(lastObservedFitnesses,temperature);
    for(Fisher fisher : (model).getFishers()) {
        //inertia can block you as well as regulations
        if(!model.getRandom().nextBoolean(inertia) && fisher.isAllowedAtSea() && fisher.getHoursAtPort() < 48) {
            int newStrategy = distribution.sample();
            fisher.setDestinationStrategy(
                    new ReplicatorDrivenDestinationStrategy(newStrategy,
                                                            options.get(newStrategy).apply(model)));
        }
    }
}
 
开发者ID:CarrKnight,项目名称:POSEIDON,代码行数:14,代码来源:StrategyReplicator.java

示例7: apply

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
@Override
public double apply(double[] qs) {
	BoltzmannDistribution bd = new BoltzmannDistribution(qs, temp);
	double [] dist = bd.getProbabilities();
	double sum = 0.;
	for(int i = 0; i < qs.length; i++){
		sum += qs[i] * dist[i];
	}
	return sum;
}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:11,代码来源:SoftmaxOperator.java

示例8: computePolicyGradient

import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
/**
    * Computes the gradient of the Boltzmann (softmax) policy wrt some parameters.
    * @param prefs the action-wise preference values passed through the softmax
    * @param grads the gradients of the preference-values with respect to the parameters
    * @param aind the index of the action for which the gradient is being queried
    * @param beta the softmax beta parameter. This parameter is the inverse of the Botlzmann temperature. As beta becomes larger, the policy becomes more deterministic. Should lie in [0, +ifnty].
    * @return the gradient of the policy
    */
public static FunctionGradient computePolicyGradient(double [] prefs, FunctionGradient[] grads, int aind, double beta){

	//first compute policy probs
	BoltzmannDistribution bd = new BoltzmannDistribution(prefs, 1./beta);
	double [] probs = bd.getProbabilities();

	return computePolicyGradient(probs, prefs, grads, aind, beta);

}
 
开发者ID:jmacglashan,项目名称:burlap,代码行数:18,代码来源:BoltzmannPolicyGradient.java


注:本文中的burlap.datastructures.BoltzmannDistribution类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。