当前位置: 首页>>代码示例>>Java>>正文


Java ContingencyTables.entropyConditionedOnRows方法代码示例

本文整理汇总了Java中weka.core.ContingencyTables.entropyConditionedOnRows方法的典型用法代码示例。如果您正苦于以下问题:Java ContingencyTables.entropyConditionedOnRows方法的具体用法?Java ContingencyTables.entropyConditionedOnRows怎么用?Java ContingencyTables.entropyConditionedOnRows使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在weka.core.ContingencyTables的用法示例。


在下文中一共展示了ContingencyTables.entropyConditionedOnRows方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: findSplitNominalNominal

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 protected double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
			  + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }
   
   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
	     m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
		   m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
	       m_Instances.numClasses());
   }
  
   m_Distribution = bestDist;
   return bestVal;
 }
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:66,代码来源:DecisionStump.java

示例2: FayyadAndIranisMDL

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
 * Test using Fayyad and Irani's MDL criterion.
 * 
 * @param priorCounts
 * @param bestCounts
 * @param numInstances
 * @param numCutPoints
 * @return true if the splits is acceptable
 */
private boolean FayyadAndIranisMDL(double[] priorCounts,
  double[][] bestCounts, double numInstances, int numCutPoints) {

  double priorEntropy, entropy, gain;
  double entropyLeft, entropyRight, delta;
  int numClassesTotal, numClassesRight, numClassesLeft;

  // Compute entropy before split.
  priorEntropy = ContingencyTables.entropy(priorCounts);

  // Compute entropy after split.
  entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

  // Compute information gain.
  gain = priorEntropy - entropy;

  // Number of classes occuring in the set
  numClassesTotal = 0;
  for (double priorCount : priorCounts) {
    if (priorCount > 0) {
      numClassesTotal++;
    }
  }

  // Number of classes occuring in the left subset
  numClassesLeft = 0;
  for (int i = 0; i < bestCounts[0].length; i++) {
    if (bestCounts[0][i] > 0) {
      numClassesLeft++;
    }
  }

  // Number of classes occuring in the right subset
  numClassesRight = 0;
  for (int i = 0; i < bestCounts[1].length; i++) {
    if (bestCounts[1][i] > 0) {
      numClassesRight++;
    }
  }

  // Entropy of the left and the right subsets
  entropyLeft = ContingencyTables.entropy(bestCounts[0]);
  entropyRight = ContingencyTables.entropy(bestCounts[1]);

  // Compute terms for MDL formula
  delta = Utils.log2(Math.pow(3, numClassesTotal) - 2)
    - ((numClassesTotal * priorEntropy) - (numClassesRight * entropyRight) - (numClassesLeft * entropyLeft));

  // Check if split is to be accepted
  return (gain > (Utils.log2(numCutPoints) + delta) / numInstances);
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:61,代码来源:Discretize.java

示例3: findSplitNominalNominal

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
  * Finds best split for nominal attribute and nominal class
  * and returns value.
  *
  * @param index attribute index
  * @return value of criterion for the best split
  * @throws Exception if something goes wrong
  */
 private double findSplitNominalNominal(int index) throws Exception {

   double bestVal = Double.MAX_VALUE, currVal;
   double[][] counts = new double[m_Instances.attribute(index).numValues() 
			  + 1][m_Instances.numClasses()];
   double[] sumCounts = new double[m_Instances.numClasses()];
   double[][] bestDist = new double[3][m_Instances.numClasses()];
   int numMissing = 0;

   // Compute counts for all the values
   for (int i = 0; i < m_Instances.numInstances(); i++) {
     Instance inst = m_Instances.instance(i);
     if (inst.isMissing(index)) {
numMissing++;
counts[m_Instances.attribute(index).numValues()]
  [(int)inst.classValue()] += inst.weight();
     } else {
counts[(int)inst.value(index)][(int)inst.classValue()] += inst
  .weight();
     }
   }

   // Compute sum of counts
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
sumCounts[j] += counts[i][j];
     }
   }
   
   // Make split counts for each possible split and evaluate
   System.arraycopy(counts[m_Instances.attribute(index).numValues()], 0,
	     m_Distribution[2], 0, m_Instances.numClasses());
   for (int i = 0; i < m_Instances.attribute(index).numValues(); i++) {
     for (int j = 0; j < m_Instances.numClasses(); j++) {
m_Distribution[0][j] = counts[i][j];
m_Distribution[1][j] = sumCounts[j] - counts[i][j];
     }
     currVal = ContingencyTables.entropyConditionedOnRows(m_Distribution);
     if (currVal < bestVal) {
bestVal = currVal;
m_SplitPoint = (double)i;
for (int j = 0; j < 3; j++) {
  System.arraycopy(m_Distribution[j], 0, bestDist[j], 0, 
		   m_Instances.numClasses());
}
     }
   }

   // No missing values in training data.
   if (numMissing == 0) {
     System.arraycopy(sumCounts, 0, bestDist[2], 0, 
	       m_Instances.numClasses());
   }
  
   m_Distribution = bestDist;
   return bestVal;
 }
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:66,代码来源:DecisionStump.java

示例4: FayyadAndIranisMDL

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
  * Test using Fayyad and Irani's MDL criterion.
  *
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the splits is acceptable
  */
 private boolean FayyadAndIranisMDL(double[] priorCounts,
			     double[][] bestCounts,
			     double numInstances,
			     int numCutPoints) {

   double priorEntropy, entropy, gain;
   double entropyLeft, entropyRight, delta;
   int numClassesTotal, numClassesRight, numClassesLeft;

   // Compute entropy before split.
   priorEntropy = ContingencyTables.entropy(priorCounts);

   // Compute entropy after split.
   entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

   // Compute information gain.
   gain = priorEntropy - entropy;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Number of classes occuring in the left subset
   numClassesLeft = 0;
   for (int i = 0; i < bestCounts[0].length; i++) {
     if (bestCounts[0][i] > 0) {
numClassesLeft++;
     }
   }

   // Number of classes occuring in the right subset
   numClassesRight = 0;
   for (int i = 0; i < bestCounts[1].length; i++) {
     if (bestCounts[1][i] > 0) {
numClassesRight++;
     }
   }

   // Entropy of the left and the right subsets
   entropyLeft = ContingencyTables.entropy(bestCounts[0]);
   entropyRight = ContingencyTables.entropy(bestCounts[1]);

   // Compute terms for MDL formula
   delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) -
     (((double) numClassesTotal * priorEntropy) -
      (numClassesRight * entropyRight) -
      (numClassesLeft * entropyLeft));

   // Check if split is to be accepted
   return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
 }
 
开发者ID:dsibournemouth,项目名称:autoweka,代码行数:65,代码来源:Discretize.java

示例5: FayyadAndIranisMDL

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/** 
  * Test using Fayyad and Irani's MDL criterion.
  * 
  * @param priorCounts
  * @param bestCounts
  * @param numInstances
  * @param numCutPoints
  * @return true if the splits is acceptable
  */
 private boolean FayyadAndIranisMDL(double[] priorCounts,
			     double[][] bestCounts,
			     double numInstances,
			     int numCutPoints) {

   double priorEntropy, entropy, gain; 
   double entropyLeft, entropyRight, delta;
   int numClassesTotal, numClassesRight, numClassesLeft;

   // Compute entropy before split.
   priorEntropy = ContingencyTables.entropy(priorCounts);

   // Compute entropy after split.
   entropy = ContingencyTables.entropyConditionedOnRows(bestCounts);

   // Compute information gain.
   gain = priorEntropy - entropy;

   // Number of classes occuring in the set
   numClassesTotal = 0;
   for (int i = 0; i < priorCounts.length; i++) {
     if (priorCounts[i] > 0) {
numClassesTotal++;
     }
   }

   // Number of classes occuring in the left subset
   numClassesLeft = 0;
   for (int i = 0; i < bestCounts[0].length; i++) {
     if (bestCounts[0][i] > 0) {
numClassesLeft++;
     }
   }

   // Number of classes occuring in the right subset
   numClassesRight = 0;
   for (int i = 0; i < bestCounts[1].length; i++) {
     if (bestCounts[1][i] > 0) {
numClassesRight++;
     }
   }

   // Entropy of the left and the right subsets
   entropyLeft = ContingencyTables.entropy(bestCounts[0]);
   entropyRight = ContingencyTables.entropy(bestCounts[1]);

   // Compute terms for MDL formula
   delta = Utils.log2(Math.pow(3, numClassesTotal) - 2) - 
     (((double) numClassesTotal * priorEntropy) - 
      (numClassesRight * entropyRight) - 
      (numClassesLeft * entropyLeft));

   // Check if split is to be accepted
   return (gain > (Utils.log2(numCutPoints) + delta) / (double)numInstances);
 }
 
开发者ID:williamClanton,项目名称:jbossBA,代码行数:65,代码来源:Discretize.java

示例6: gain

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist the distributions
 * @param priorVal the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
开发者ID:seqcode,项目名称:seqcode-core,代码行数:12,代码来源:AttributeRandomTree.java

示例7: gain

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 * @param priorVal the splitting criterion
 * @return the gain after splitting
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
开发者ID:mydzigear,项目名称:repo.kmeanspp.silhouette_score,代码行数:12,代码来源:REPTree.java

示例8: gain

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
 * Computes value of splitting criterion after split.
 * 
 * @param dist
 *            the distributions
 * @param priorVal
 *            the splitting criterion
 * @return the gain after the split
 */
protected double gain(double[][] dist, double priorVal) {

  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
开发者ID:triguero,项目名称:Keel3.0,代码行数:14,代码来源:RandomTree.java

示例9: gain

import weka.core.ContingencyTables; //导入方法依赖的package包/类
/**
 * Computes value of splitting criterion after split.
 *
 * @param dist	the distribution
 * @param priorVal	the prior val
 * @return		the gain
 */
protected double gain(double[][] dist, double priorVal) {
  return priorVal - ContingencyTables.entropyConditionedOnRows(dist);
}
 
开发者ID:fracpete,项目名称:collective-classification-weka-package,代码行数:11,代码来源:CollectiveTree.java


注:本文中的weka.core.ContingencyTables.entropyConditionedOnRows方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。