本文整理汇总了Java中burlap.datastructures.BoltzmannDistribution类的典型用法代码示例。如果您正苦于以下问题:Java BoltzmannDistribution类的具体用法?Java BoltzmannDistribution怎么用?Java BoltzmannDistribution使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
BoltzmannDistribution类属于burlap.datastructures包,在下文中一共展示了BoltzmannDistribution类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: performBellmanUpdateOn
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
/**
* Overrides the superclass method to perform a Boltzmann backup operator
* instead of a Bellman backup operator.
* Results are stored in this valueFunction's internal map.
* @param sh the hashed state on which to perform the Boltzmann update.
* @return the new value
*/
@Override
protected double performBellmanUpdateOn(HashableState sh){
if(this.tf.isTerminal(sh.s)){
this.valueFunction.put(sh, 0.);
return 0.;
}
List<QValue> qs = this.getQs(sh.s);
double [] dqs = new double[qs.size()];
for(int i = 0; i < qs.size(); i++){
dqs[i] = qs.get(i).q;
}
BoltzmannDistribution bd = new BoltzmannDistribution(dqs, 1./this.boltzBeta);
double [] dist = bd.getProbabilities();
double sum = 0.;
for(int i = 0; i < dqs.length; i++){
sum += dqs[i] * dist[i];
}
this.valueFunction.put(sh, sum);
return sum;
}
示例2: drawFromSoftmax
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
/**
* given a set of arms and a function telling me the expected reward from each, use softmax to draw one arm at random
* @param random randomizer
* @param numberOfArms the number of arms to choose from
* @param expectedReturnOfArm a function returning the expected return associated with a particular arm
* @param temperature a number that can add stochasticity to the draw
* @return the index of the arm to pick
*/
public static Integer drawFromSoftmax(MersenneTwisterFast random,
int numberOfArms,
Function<Integer,Double> expectedReturnOfArm,
double temperature) {
double[] preferences = new double[numberOfArms];
for(int i=0; i<preferences.length; i++) {
preferences[i] = expectedReturnOfArm.apply(i);
if(!Double.isFinite(preferences[i]))
preferences[i] = 0; //non-sampled areas default preference is 0
}
BoltzmannDistribution distribution = new BoltzmannDistribution(preferences,temperature);
return distribution.sample();
}
示例3: policyDistribution
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
@Override
public List<ActionProb> policyDistribution(State s) {
HashableState sh = this.hashingFactory.hashState(s);
PolicyNode node = this.getNode(sh);
double [] prefs = new double[node.preferences.size()];
for(int i = 0; i < node.preferences.size(); i++){
prefs[i] = node.preferences.get(i).preference;
}
BoltzmannDistribution bd = new BoltzmannDistribution(prefs);
double [] probsArray = bd.getProbabilities();
List <ActionProb> probs = new ArrayList<ActionProb>(probsArray.length);
for(int i = 0; i < probsArray.length; i++){
ActionPreference ap = node.preferences.get(i);
probs.add(new ActionProb(ap.ga, probsArray[i]));
}
return probs;
}
示例4: setV
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
protected void setV(QAndQGradient qvs){
double [] qArray = new double[qvs.qs.size()];
for(int i = 0; i < qvs.qs.size(); i++){
qArray[i] = qvs.qs.get(i).q;
}
BoltzmannDistribution bd = new BoltzmannDistribution(qArray, 1./DifferentiableSparseSampling.this.boltzBeta);
double [] probs = bd.getProbabilities();
double sum = 0.;
for(int i = 0; i < qArray.length; i++){
sum += qArray[i] * probs[i];
}
this.v = sum;
}
示例5: getProbabilities
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
public static double[] getProbabilities(
int numberOfArms, Function<Integer, Double> expectedReturnOfArm, double temperature) {
double[] preferences = new double[numberOfArms];
for(int i=0; i<preferences.length; i++)
preferences[i] = expectedReturnOfArm.apply(i);
BoltzmannDistribution distribution = new BoltzmannDistribution(preferences,temperature);
return distribution.getProbabilities();
}
示例6: evolve
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
private void evolve(FishState model) {
//softmax selector
BoltzmannDistribution distribution = new BoltzmannDistribution(lastObservedFitnesses,temperature);
for(Fisher fisher : (model).getFishers()) {
//inertia can block you as well as regulations
if(!model.getRandom().nextBoolean(inertia) && fisher.isAllowedAtSea() && fisher.getHoursAtPort() < 48) {
int newStrategy = distribution.sample();
fisher.setDestinationStrategy(
new ReplicatorDrivenDestinationStrategy(newStrategy,
options.get(newStrategy).apply(model)));
}
}
}
示例7: apply
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
@Override
public double apply(double[] qs) {
BoltzmannDistribution bd = new BoltzmannDistribution(qs, temp);
double [] dist = bd.getProbabilities();
double sum = 0.;
for(int i = 0; i < qs.length; i++){
sum += qs[i] * dist[i];
}
return sum;
}
示例8: computePolicyGradient
import burlap.datastructures.BoltzmannDistribution; //导入依赖的package包/类
/**
* Computes the gradient of the Boltzmann (softmax) policy wrt some parameters.
* @param prefs the action-wise preference values passed through the softmax
* @param grads the gradients of the preference-values with respect to the parameters
* @param aind the index of the action for which the gradient is being queried
* @param beta the softmax beta parameter. This parameter is the inverse of the Botlzmann temperature. As beta becomes larger, the policy becomes more deterministic. Should lie in [0, +ifnty].
* @return the gradient of the policy
*/
public static FunctionGradient computePolicyGradient(double [] prefs, FunctionGradient[] grads, int aind, double beta){
//first compute policy probs
BoltzmannDistribution bd = new BoltzmannDistribution(prefs, 1./beta);
double [] probs = bd.getProbabilities();
return computePolicyGradient(probs, prefs, grads, aind, beta);
}