本文整理汇总了Java中weka.core.ContingencyTables.entropyConditionedOnRows方法的典型用法代码示例。如果您正苦于以下问题:Java ContingencyTables.entropyConditionedOnRows方法的具体用法?Java ContingencyTables.entropyConditionedOnRows怎么用?Java ContingencyTables.entropyConditionedOnRows使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类weka.core.ContingencyTables
的用法示例。
在下文中一共展示了ContingencyTables.entropyConditionedOnRows方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: findSplitNominalNominal
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Finds best split for nominal attribute and nominal class
* and returns value.
*
* @param index attribute index
* @return value of criterion for the best split
* @throws Exception if something goes wrong
*/
protected double findSplitNominalNominal(int index) throws Exception {
double bestVal = Double.MAX_VALUE, currVal;
double[][] counts = new double[m_Instances.attribute(index).numValues()
+ 1][m_Instances.numClasses()];
double[] sumCounts = new double[m_Instances.numClasses()];
double[][] bestDist = new double[3][m_Instances.numClasses()];
int numMissing = 0;
// Compute counts for all the values
for (int i = 0; i < m_Instances.numInstances(); i++) {
Instance inst = m_Instances.instance(i);
if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
[(int)inst.classValue()] += inst.weight();
} else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
.weight();
}
}
// Compute sum of counts
for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
}
}
// Make split counts for each possible split and evaluate
System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
m_Distribution[2], 0, m_Instances.numClasses());
for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
}
currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
System.arraycopy(m_Distribution[j], 0, bestDist[j], 0,
m_Instances.numClasses());
}
}
}
// No missing values in training data.
if (numMissing == 0) {
System.arraycopy(sumCounts, 0, bestDist[2], 0,
m_Instances.numClasses());
}
m_Distribution = bestDist;
return bestVal;
}
示例2: FayyadAndIranisMDL
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Test using Fayyad and Irani's MDL criterion.
*
* @param priorCounts
* @param bestCounts
* @param numInstances
* @param numCutPoints
* @return true if the splits is acceptable
*/
private boolean FayyadAndIranisMDL(double[] priorCounts,
double[][] bestCounts, double numInstances, int numCutPoints) {
double priorEntropy, entropy, gain;
double entropyLeft, entropyRight, delta;
int numClassesTotal, numClassesRight, numClassesLeft;
// Compute entropy before split.
priorEntropy = ContingencyTables.entropy(priorCounts);
// Compute entropy after split.
entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);
// Compute information gain.
gain = priorEntropy - entropy;
// Number of classes occuring in the set
numClassesTotal = 0;
for (double priorCount : priorCounts) {
if (priorCount > 0) {
numClassesTotal++;
}
}
// Number of classes occuring in the left subset
numClassesLeft = 0;
for (int i = 0; i < bestCounts[0].length; i++) {
if (bestCounts[0][i] > 0) {
numClassesLeft++;
}
}
// Number of classes occuring in the right subset
numClassesRight = 0;
for (int i = 0; i < bestCounts[1].length; i++) {
if (bestCounts[1][i] > 0) {
numClassesRight++;
}
}
// Entropy of the left and the right subsets
entropyLeft = ContingencyTables.entropy(bestCounts[0]);
entropyRight = ContingencyTables.entropy(bestCounts[1]);
// Compute terms for MDL formula
delta = Utils.log2(Math.pow(3, numClassesTotal) - 2)
- ((numClassesTotal * priorEntropy) - (numClassesRight * entropyRight) - (numClassesLeft * entropyLeft));
// Check if split is to be accepted
return (gain > (Utils.log2(numCutPoints) + delta) / numInstances);
}
示例3: findSplitNominalNominal
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Finds best split for nominal attribute and nominal class
* and returns value.
*
* @param index attribute index
* @return value of criterion for the best split
* @throws Exception if something goes wrong
*/
private double findSplitNominalNominal(int index) throws Exception {
double bestVal = Double.MAX_VALUE, currVal;
double[][] counts = new double[m_Instances.attribute(index).numValues()
+ 1][m_Instances.numClasses()];
double[] sumCounts = new double[m_Instances.numClasses()];
double[][] bestDist = new double[3][m_Instances.numClasses()];
int numMissing = 0;
// Compute counts for all the values
for (int i = 0; i < m_Instances.numInstances(); i++) {
Instance inst = m_Instances.instance(i);
if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
[(int)inst.classValue()] += inst.weight();
} else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
.weight();
}
}
// Compute sum of counts
for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
}
}
// Make split counts for each possible split and evaluate
System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
m_Distribution[2], 0, m_Instances.numClasses());
for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
}
currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
System.arraycopy(m_Distribution[j], 0, bestDist[j], 0,
m_Instances.numClasses());
}
}
}
// No missing values in training data.
if (numMissing == 0) {
System.arraycopy(sumCounts, 0, bestDist[2], 0,
m_Instances.numClasses());
}
m_Distribution = bestDist;
return bestVal;
}
示例4: FayyadAndIranisMDL
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Test using Fayyad and Irani's MDL criterion.
*
* @param priorCounts
* @param bestCounts
* @param numInstances
* @param numCutPoints
* @return true if the splits is acceptable
*/
private boolean FayyadAndIranisMDL(double[] priorCounts,
double[][] bestCounts,
double numInstances,
int numCutPoints) {
double priorEntropy, entropy, gain;
double entropyLeft, entropyRight, delta;
int numClassesTotal, numClassesRight, numClassesLeft;
// Compute entropy before split.
priorEntropy = ContingencyTables.entropy(priorCounts);
// Compute entropy after split.
entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);
// Compute information gain.
gain = priorEntropy - entropy;
// Number of classes occuring in the set
numClassesTotal = 0;
for (int i = 0; i < priorCounts.length; i++) {
if (priorCounts[i] > 0) {
numClassesTotal++;
}
}
// Number of classes occuring in the left subset
numClassesLeft = 0;
for (int i = 0; i < bestCounts[0].length; i++) {
if (bestCounts[0][i] > 0) {
numClassesLeft++;
}
}
// Number of classes occuring in the right subset
numClassesRight = 0;
for (int i = 0; i < bestCounts[1].length; i++) {
if (bestCounts[1][i] > 0) {
numClassesRight++;
}
}
// Entropy of the left and the right subsets
entropyLeft = ContingencyTables.entropy(bestCounts[0]);
entropyRight = ContingencyTables.entropy(bestCounts[1]);
// Compute terms for MDL formula
delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) -
(((double) numClassesTotal * priorEntropy) -
(numClassesRight * entropyRight) -
(numClassesLeft * entropyLeft));
// Check if split is to be accepted
return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
}
示例5: FayyadAndIranisMDL
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Test using Fayyad and Irani's MDL criterion.
*
* @param priorCounts
* @param bestCounts
* @param numInstances
* @param numCutPoints
* @return true if the splits is acceptable
*/
private boolean FayyadAndIranisMDL(double[] priorCounts,
double[][] bestCounts,
double numInstances,
int numCutPoints) {
double priorEntropy, entropy, gain;
double entropyLeft, entropyRight, delta;
int numClassesTotal, numClassesRight, numClassesLeft;
// Compute entropy before split.
priorEntropy = ContingencyTables.entropy(priorCounts);
// Compute entropy after split.
entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);
// Compute information gain.
gain = priorEntropy - entropy;
// Number of classes occuring in the set
numClassesTotal = 0;
for (int i = 0; i < priorCounts.length; i++) {
if (priorCounts[i] > 0) {
numClassesTotal++;
}
}
// Number of classes occuring in the left subset
numClassesLeft = 0;
for (int i = 0; i < bestCounts[0].length; i++) {
if (bestCounts[0][i] > 0) {
numClassesLeft++;
}
}
// Number of classes occuring in the right subset
numClassesRight = 0;
for (int i = 0; i < bestCounts[1].length; i++) {
if (bestCounts[1][i] > 0) {
numClassesRight++;
}
}
// Entropy of the left and the right subsets
entropyLeft = ContingencyTables.entropy(bestCounts[0]);
entropyRight = ContingencyTables.entropy(bestCounts[1]);
// Compute terms for MDL formula
delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) -
(((double) numClassesTotal * priorEntropy) -
(numClassesRight * entropyRight) -
(numClassesLeft * entropyLeft));
// Check if split is to be accepted
return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
}
示例6: gain
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Computes value of splitting criterion after split.
*
* @param dist the distributions
* @param priorVal the splitting criterion
* @return the gain after the split
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
示例7: gain
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Computes value of splitting criterion after split.
*
* @param dist
* @param priorVal the splitting criterion
* @return the gain after splitting
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
示例8: gain
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Computes value of splitting criterion after split.
*
* @param dist
* the distributions
* @param priorVal
* the splitting criterion
* @return the gain after the split
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
示例9: gain
import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
* Computes value of splitting criterion after split.
*
* @param dist the distribution
* @param priorVal the prior val
* @return the gain
*/
protected double gain(double[][] dist, double priorVal) {
return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}