本文整理汇总了Java中burlap.datastructures.BoltzmannDistribution.getProbabilities方法的典型用法代码示例。如果您正苦于以下问题:Java BoltzmannDistribution.getProbabilities方法的具体用法?Java BoltzmannDistribution.getProbabilities怎么用?Java BoltzmannDistribution.getProbabilities使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类burlap.datastructures.BoltzmannDistribution
的用法示例。
在下文中一共展示了BoltzmannDistribution.getProbabilities方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: performBellmanUpdateOn
import burlap.datastructures.BoltzmannDistribution; //导入方法依赖的package包/类
/**
* Overrides the superclass method to perform a Boltzmann backup operator
* instead of a Bellman backup operator.
* Results are stored in this valueFunction's internal map.
* @param sh the hashed state on which to perform the Boltzmann update.
* @return the new value
*/
@Override
protected double performBellmanUpdateOn(HashableState sh){
if(this.tf.isTerminal(sh.s)){
this.valueFunction.put(sh, 0.);
return 0.;
}
List<QValue> qs = this.getQs(sh.s);
double [] dqs = new double[qs.size()];
for(int i = 0; i < qs.size(); i++){
dqs[i] = qs.get(i).q;
}
BoltzmannDistribution bd = new BoltzmannDistribution(dqs, 1./this.boltzBeta);
double [] dist = bd.getProbabilities();
double sum = 0.;
for(int i = 0; i < dqs.length; i++){
sum += dqs[i] * dist[i];
}
this.valueFunction.put(sh, sum);
return sum;
}
示例2: policyDistribution
import burlap.datastructures.BoltzmannDistribution; //导入方法依赖的package包/类
@Override
public List<ActionProb> policyDistribution(State s) {
HashableState sh = this.hashingFactory.hashState(s);
PolicyNode node = this.getNode(sh);
double [] prefs = new double[node.preferences.size()];
for(int i = 0; i < node.preferences.size(); i++){
prefs[i] = node.preferences.get(i).preference;
}
BoltzmannDistribution bd = new BoltzmannDistribution(prefs);
double [] probsArray = bd.getProbabilities();
List <ActionProb> probs = new ArrayList<ActionProb>(probsArray.length);
for(int i = 0; i < probsArray.length; i++){
ActionPreference ap = node.preferences.get(i);
probs.add(new ActionProb(ap.ga, probsArray[i]));
}
return probs;
}
示例3: setV
import burlap.datastructures.BoltzmannDistribution; //导入方法依赖的package包/类
protected void setV(QAndQGradient qvs){
double [] qArray = new double[qvs.qs.size()];
for(int i = 0; i < qvs.qs.size(); i++){
qArray[i] = qvs.qs.get(i).q;
}
BoltzmannDistribution bd = new BoltzmannDistribution(qArray, 1./DifferentiableSparseSampling.this.boltzBeta);
double [] probs = bd.getProbabilities();
double sum = 0.;
for(int i = 0; i < qArray.length; i++){
sum += qArray[i] * probs[i];
}
this.v = sum;
}
示例4: getProbabilities
import burlap.datastructures.BoltzmannDistribution; //导入方法依赖的package包/类
public static double[] getProbabilities(
int numberOfArms, Function<Integer, Double> expectedReturnOfArm, double temperature) {
double[] preferences = new double[numberOfArms];
for(int i=0; i<preferences.length; i++)
preferences[i] = expectedReturnOfArm.apply(i);
BoltzmannDistribution distribution = new BoltzmannDistribution(preferences,temperature);
return distribution.getProbabilities();
}
示例5: apply
import burlap.datastructures.BoltzmannDistribution; //导入方法依赖的package包/类
@Override
public double apply(double[] qs) {
BoltzmannDistribution bd = new BoltzmannDistribution(qs, temp);
double [] dist = bd.getProbabilities();
double sum = 0.;
for(int i = 0; i < qs.length; i++){
sum += qs[i] * dist[i];
}
return sum;
}
示例6: computePolicyGradient
import burlap.datastructures.BoltzmannDistribution; //导入方法依赖的package包/类
/**
* Computes the gradient of the Boltzmann (softmax) policy wrt some parameters.
* @param prefs the action-wise preference values passed through the softmax
* @param grads the gradients of the preference-values with respect to the parameters
* @param aind the index of the action for which the gradient is being queried
* @param beta the softmax beta parameter. This parameter is the inverse of the Botlzmann temperature. As beta becomes larger, the policy becomes more deterministic. Should lie in [0, +ifnty].
* @return the gradient of the policy
*/
public static FunctionGradient computePolicyGradient(double [] prefs, FunctionGradient[] grads, int aind, double beta){
//first compute policy probs
BoltzmannDistribution bd = new BoltzmannDistribution(prefs, 1./beta);
double [] probs = bd.getProbabilities();
return computePolicyGradient(probs, prefs, grads, aind, beta);
}