本文整理汇总了C++中DataFrame::isDataSetPure方法的典型用法代码示例。如果您正苦于以下问题:C++ DataFrame::isDataSetPure方法的具体用法?C++ DataFrame::isDataSetPure怎么用?C++ DataFrame::isDataSetPure使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类DataFrame
的用法示例。
在下文中一共展示了DataFrame::isDataSetPure方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: _build
void RandomTree::_build(DataFrame & data, std::vector<unsigned int> & dataSet, TreeNode * node, unsigned int nodeSize)
{
static unsigned int idCtr = 0;
node->leftChild = NULL;
node->rightChild = NULL;
if(data.isDataSetPure(dataSet) || dataSet.size() <= nodeSize) //Data is pure
{
//If data set is all of same class then it is pure and done
//Give it a class label
node->classLabel = data.getTrainingLabel(dataSet[0]);
node->isPure = true;
node->purityDelta = 0;
node->rangeMin = node->rangeMax = 0;
node->dataList = dataSet;
idCtr++;
node->nodeId = idCtr;
}
else //Data is not pure
{
std::vector<unsigned int> factors;
unsigned int splitIdx = 0;
unsigned int fIdx = 0;
double splitVal = 0.0;
double purityDelta = 0.0;
data.selectRandomFactors(_factPerNode, factors);
bool splitPossible = _igc.findDataSplit(data, factors, dataSet, splitIdx, fIdx, splitVal, purityDelta);
if(splitPossible) //Data is not all same value
{
node->isPure = false;
std::vector<unsigned int> leftSplit;
std::vector<unsigned int> rightSplit;
node->leftChild = new TreeNode();
node->rightChild = new TreeNode();
node->splitValue = splitVal;
node->factorIndex = fIdx;
node->purityDelta = purityDelta;
node->nodeId = 0;
double minVal, maxVal, mean, q1, q3;
double bandwidth = data.computeBandwidthByFactor(fIdx, dataSet, minVal,
maxVal, mean, q1, q3);
bandwidth = bandwidth;
// node->rangeMin = mean - (6 * bandwidth);
// node->rangeMax = mean + (6 * bandwidth);
// double midVal = (maxVal - minVal) / 2.0;
// node->rangeMin = minVal - (0.5 *(maxVal - minVal));
// node->rangeMax = maxVal + (0.5 * (maxVal - minVal));
double iqr = q3 - q1;
node->rangeMin = q1 - ( 3 * iqr);
node->rangeMax = q3 + (3 * iqr);
data.sortIndicesOnFactorValue(dataSet, fIdx);
for(unsigned int i = 0; i < splitIdx; i++)
{
leftSplit.push_back(dataSet[i]);
}
_build(data, leftSplit, node->leftChild, nodeSize);
for(unsigned int i = splitIdx; i < dataSet.size(); i++)
{
rightSplit.push_back(dataSet[i]);
}
_build(data, rightSplit, node->rightChild, nodeSize);
}
else //Data is all same value
{
//No split possible (all factors values same across all factors)
//Vote on classes and make pure node.
node->classLabel = data.getMajorityTrainingLabel(dataSet);
node->isPure = true;
node->purityDelta = 0;
node->dataList = dataSet;
idCtr++;
node->nodeId = idCtr;
}
}
}