本文整理汇总了C++中CDataset类的典型用法代码示例。如果您正苦于以下问题:C++ CDataset类的具体用法?C++ CDataset怎么用?C++ CDataset使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了CDataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Deviance
double CAdaBoost::Deviance(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate) {
double loss = 0.0;
double weight = 0.0;
// Switch to validation set if necessary
unsigned long num_of_rows_in_set = kData.get_size_of_set();
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
reduction(+ : loss, weight) num_threads(get_num_threads())
for (unsigned long i = 0; i < num_of_rows_in_set; i++) {
loss += kData.weight_ptr()[i] *
std::exp(-(2 * kData.y_ptr()[i] - 1) *
(kData.offset_ptr()[i] + kFuncEstimate[i]));
weight += kData.weight_ptr()[i];
}
// TODO: Check if weights are all zero for validation set
if ((weight == 0.0) && (loss == 0.0)) {
return nan("");
} else if (weight == 0.0) {
return HUGE_VAL;
}
return loss / weight;
}
示例2: ComputeWorkingResponse
void CAdaBoost::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate,
std::vector<double>& residuals) {
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
num_threads(get_num_threads())
for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
residuals[i] = -(2 * kData.y_ptr()[i] - 1) *
std::exp(-(2 * kData.y_ptr()[i] - 1) *
(kData.offset_ptr()[i] + kFuncEstimate[i]));
}
}
示例3: ComputeWorkingResponse
void CPoisson::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate,
std::vector<double>& residuals) {
// compute working response
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
num_threads(get_num_threads())
for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
const double delta_func_est = kFuncEstimate[i] + kData.offset_ptr()[i];
residuals[i] = kData.y_ptr()[i] - std::exp(delta_func_est);
}
}
示例4: PrepareData
void CSVM::PrepareData(const CDataset &OrgSet,struct svm_problem &DataDesc)
{
//for SVM, we need to expand all multivalued discrete attributes of the training data into multi continuous attributes.
//expand discrete attribute
const CDataset *TrainSet=&OrgSet;
if(!OrgSet.AllContinuous())
TrainSet=OrgSet.ExpandDiscrete();
const MATRIX &TrainData=TrainSet->GetData();
const CASE_INFO &CaseInfo=TrainSet->GetInfo();
//number of attribute for data set
AttributeNum=CaseInfo.ValidWidth-1;
//instances is formated as libsvm's requirements
//number of instances
DataDesc.l=CaseInfo.Height;
//labels of instances
DataDesc.y=new double[DataDesc.l];
//content of instances (all attributes plus a tag for end of line, each node is initialized as end of a row)
struct svm_node Val={-1,0};
fill_d2(struct svm_node,DataDesc.x,CaseInfo.Height,CaseInfo.ValidWidth,Val);
for(int i=0;i<CaseInfo.Height;i++)
{
DataDesc.y[i]=(double)TrainData[i][CaseInfo.ValidWidth-1].Discr;
int ValidValue=0;
for(int j=0;j<CaseInfo.ValidWidth-1;j++)
{
if(CaseInfo.ValidAttrs[j].AttType==ATT_DISCRETE)
{
throw(CError("SVM: discrete attribute should have been expanded!\n",100,0));
}
else//range expanding
{
if(TrainData[i][j].Cont==0)
continue;
else if(CaseInfo.ValidAttrs[j].Max==CaseInfo.ValidAttrs[j].Min)
continue;
else
{
DataDesc.x[i][ValidValue].index=j+1;
DataDesc.x[i][ValidValue].value=(TrainData[i][j].Cont-CaseInfo.ValidAttrs[j].Min)/
(CaseInfo.ValidAttrs[j].Max-CaseInfo.ValidAttrs[j].Min);
ValidValue++;
}
}
}
//tag for end of line has been set
}
if(!OrgSet.AllContinuous())
delete TrainSet;
return;
}
示例5: InitF
double CPoisson::InitF(const CDataset& kData) {
double sum = 0.0;
double denom = 0.0;
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
reduction(+ : sum, denom) num_threads(get_num_threads())
for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
sum += kData.weight_ptr()[i] * kData.y_ptr()[i];
denom += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
}
return std::log(sum / denom);
}
示例6: ComputeWorkingResponse
void CGaussian::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate,
std::vector<double>& residuals) {
if (!(kData.y_ptr() && kFuncEstimate &&
kData.weight_ptr())) {
throw gbm_exception::InvalidArgument();
}
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
num_threads(get_num_threads())
for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
residuals[i] = kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i];
}
}
示例7: InitF
double CGaussian::InitF(const CDataset& kData) {
double sum = 0.0;
double totalweight = 0.0;
// compute the mean
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
reduction(+ : sum, totalweight) num_threads(get_num_threads())
for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
sum += kData.weight_ptr()[i] * (kData.y_ptr()[i] - kData.offset_ptr()[i]);
totalweight += kData.weight_ptr()[i];
}
return sum / totalweight;
}
示例8:
//select the base classifier with the highest accuracy on validation set
CForwardSelect::CForwardSelect(const CEnsemble &UEnsemble,const CDataset &ValidatingSet)
:CEnsemblePruner(UEnsemble)
{
Name=MyName;
//Info
int CaseNum=ValidatingSet.GetInfo().Height;
int EnsembleSize=Ensemble.GetSize();
//start time for training
clock_t start=clock();
//get prediction
vector<CPrediction*> *Predictions=Ensemble.AllClassify(ValidatingSet);
//initialize with no classifier selected
for(int i=0;i<EnsembleSize;i++)
Weights.push_back(0);
//add classifier one by one
double BestAccr=0;
for(int i=0;i<EnsembleSize;i++)
{
//add the best in each round
int Best=-1;
for(int j=0;j<EnsembleSize;j++)
{
//skip the one has been selected
if(Weights[j]>0)continue;
//add this classifier temporarily
Weights[j]=1;
//predicting
CPrediction *Prediction=Ensemble.Classify(ValidatingSet,*Predictions,Weights);
double Accuracy=Prediction->GetAccuracy();
delete Prediction;
//better accuracy?
if(Accuracy>BestAccr)
{
Best=j;
BestAccr=Accuracy;
//if accuracy is 1.0, no better one can be found
if(Accuracy>=1.0)
break;
}
//recover to the initial state
Weights[j]=0;
}
//if accuracy is 1.0, no better one can be found
if(BestAccr>=1.0)
break;
//select the best one of this round
if(Best!=-1)
Weights[Best]=1;
}
for(int i=0;i<EnsembleSize;i++)
delete ((*Predictions)[i]);
delete Predictions;
//time consumed
CreatingTime = (double)(clock() - start) / CLOCKS_PER_SEC;
}
示例9: FitBestConstant
void CPoisson::FitBestConstant(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate,
unsigned long num_terminalnodes,
std::vector<double>& residuals,
CCARTTree& tree) {
unsigned long obs_num = 0;
unsigned long node_num = 0;
vector<double> numerator_vec(num_terminalnodes, 0.0);
vector<double> denominator_vec(num_terminalnodes, 0.0);
vector<double> max_vec(num_terminalnodes, -HUGE_VAL);
vector<double> min_vec(num_terminalnodes, HUGE_VAL);
for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
if (kBag.get_element(obs_num)) {
numerator_vec[tree.get_node_assignments()[obs_num]] +=
kData.weight_ptr()[obs_num] * kData.y_ptr()[obs_num];
denominator_vec[tree.get_node_assignments()[obs_num]] +=
kData.weight_ptr()[obs_num] *
std::exp(kData.offset_ptr()[obs_num] + kFuncEstimate[obs_num]);
}
}
for (node_num = 0; node_num < num_terminalnodes; node_num++) {
if (tree.has_node(node_num)) {
if (numerator_vec[node_num] == 0.0) {
// DEBUG: if vecdNum==0 then prediction = -Inf
// Not sure what else to do except plug in an arbitrary
// negative number, -1? -10? Let's use -1, then make
// sure |adF| < 19 always.
tree.get_terminal_nodes()[node_num]->set_prediction(-19.0);
} else if (denominator_vec[node_num] == 0.0) {
tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
} else {
tree.get_terminal_nodes()[node_num]->set_prediction(
std::log(numerator_vec[node_num] / denominator_vec[node_num]));
}
tree.get_terminal_nodes()[node_num]->set_prediction(
R::fmin2(tree.get_terminal_nodes()[node_num]->get_prediction(),
19 - max_vec[node_num]));
tree.get_terminal_nodes()[node_num]->set_prediction(
R::fmax2(tree.get_terminal_nodes()[node_num]->get_prediction(),
-19 - min_vec[node_num]));
}
}
}
示例10: BagImprovement
double CGaussian::BagImprovement(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate,
const double kShrinkage,
const std::vector<double>& kDeltaEstimate) {
double returnvalue = 0.0;
double weight = 0.0;
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
reduction(+ : returnvalue, weight) num_threads(get_num_threads())
for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
if (!kBag.get_element(i)) {
const double deltafunc_est = kFuncEstimate[i] + kData.offset_ptr()[i];
returnvalue += kData.weight_ptr()[i] * kShrinkage * kDeltaEstimate[i] *
(2.0 * (kData.y_ptr()[i] - deltafunc_est) -
kShrinkage * kDeltaEstimate[i]);
weight += kData.weight_ptr()[i];
}
}
return returnvalue / weight;
}
示例11: Deviance
double CGaussian::Deviance(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate) {
double loss = 0.0;
double weight = 0.0;
unsigned long num_rows_in_set = kData.get_size_of_set();
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
reduction(+ : loss, weight) num_threads(get_num_threads())
for (unsigned long i = 0; i < num_rows_in_set; i++) {
const double tmp =
(kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i]);
loss += kData.weight_ptr()[i] * tmp * tmp;
weight += kData.weight_ptr()[i];
}
// TODO: Check if weights are all zero for validation set
if ((weight == 0.0) && (loss == 0.0)) {
return nan("");
} else if (weight == 0.0) {
return copysign(HUGE_VAL, loss);
}
return loss / weight;
}
示例12: InitF
double CAdaBoost::InitF(const CDataset& kData) {
double numerator = 0.0;
double denominator = 0.0;
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
reduction(+ : numerator, denominator) num_threads(get_num_threads())
for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
if (kData.y_ptr()[i] == 1.0) {
numerator += kData.weight_ptr()[i] * std::exp(-kData.offset_ptr()[i]);
} else {
denominator += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
}
}
return 0.5 * std::log(numerator / denominator);
}
示例13: FitBestConstant
void CAdaBoost::FitBestConstant(const CDataset& kData, const Bag& kBag,
const double* kFuncEstimate,
unsigned long num_terminalnodes,
std::vector<double>& residuals,
CCARTTree& tree) {
unsigned long obs_num = 0;
unsigned long node_num = 0;
numerator_bestconstant_.resize(num_terminalnodes);
numerator_bestconstant_.assign(numerator_bestconstant_.size(), 0.0);
denominator_bestconstant_.resize(num_terminalnodes);
denominator_bestconstant_.assign(denominator_bestconstant_.size(), 0.0);
for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
if (kBag.get_element(obs_num)) {
const double deltafunc_est =
kFuncEstimate[obs_num] + kData.offset_ptr()[obs_num];
numerator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
kData.weight_ptr()[obs_num] * (2 * kData.y_ptr()[obs_num] - 1) *
std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
denominator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
kData.weight_ptr()[obs_num] *
std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
}
}
for (node_num = 0; node_num < num_terminalnodes; node_num++) {
if (tree.has_node(node_num)) {
if (denominator_bestconstant_[node_num] == 0) {
tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
} else {
tree.get_terminal_nodes()[node_num]->set_prediction(
numerator_bestconstant_[node_num] /
denominator_bestconstant_[node_num]);
}
}
}
}
示例14: Train
void CNaiveBayes::Train(const CDataset &TrainSet)
{
//start time for training
clock_t start=clock();
//data
const MATRIX &OrgData=TrainSet.GetData();
const CASE_INFO &OrgInfo=TrainSet.GetInfo();
//if range of a continuous attribute changed (extended), should we re-calculate all existed statistics?
//we can't, some information has lost. We can only extend the first and the last intervals
//statistics
for(int i=0;i<OrgInfo.Height;i++)
{
//label of instance
int Class=OrgData[i][OrgInfo.ValidWidth-1].Discr;
//each attribute
for(int j=0;j<OrgInfo.ValidWidth-1;j++)
switch(OrgInfo.ValidAttrs[j].AttType)
{
case ATT_DISCRETE:
{
//value of this attribute
int Val=OrgData[i][j].Discr;
Estims[j][Class].DiscEst.Count++;
//j: attribute, Class: label, Val: value of attribute
Estims[j][Class].DiscEst.AttrCount[Val]++;
}
break;
case ATT_CONTINUOUS:
case ATT_DATETIME:
{
double Val=OrgData[i][j].Cont;
int ValNo;
if(OrgInfo.ValidAttrs[j].Max==OrgInfo.ValidAttrs[j].Min)
ValNo=0;
else
ValNo=(int)((OrgData[i][j].Cont-Estims[j][Class].ContEst.Min)*10/
(Estims[j][Class].ContEst.Max-Estims[j][Class].ContEst.Min));
if(ValNo>=SplitNum)
ValNo=SplitNum-1;
if(ValNo<0)
ValNo=0;
Estims[j][Class].ContEst.Vals[ValNo]++;
Estims[j][Class].ContEst.Count++;
}
break;
default:
break;
}//case: attribute type
}//for data
//calculate all other statistics
for(int i=0;i<OrgInfo.ValidWidth-1;i++)
{
switch(OrgInfo.ValidAttrs[i].AttType)
{
case ATT_DISCRETE:
for(int j=0;j<OrgInfo.ClassNum;j++)
{
int ValNum=(int)OrgInfo.ValidAttrs[i].Disc.size();
for(int k=0;k<ValNum;k++)
Estims[i][j].DiscEst.AttrCount[k]/=Estims[i][j].DiscEst.Count;
}
break;
case ATT_CONTINUOUS:
case ATT_DATETIME:
for(int j=0;j<OrgInfo.ClassNum;j++)
{
for(int k=0;k<SplitNum;k++)
Estims[i][j].ContEst.Vals[k]/=Estims[i][j].ContEst.Count;
}
break;
default:
break;
}//switch
}//for attributes
//time consumed
CreatingTime+=((double)(clock() - start) / CLOCKS_PER_SEC);
}
示例15: if
std::auto_ptr<CDistribution> gbm_setup
(
const CDataset& data,
const std::string& family,
int cTrees,
int cDepth,
int cMinObsInNode,
int cNumClasses,
double dShrinkage,
double dBagFraction,
int cTrain,
int cFeatures,
int& cGroups
)
{
std::auto_ptr<CDistribution> pDist;
cGroups = -1;
// set the distribution
if (family == "gamma") {
pDist.reset(new CGamma());
}
else if (family == "tweedie") {
pDist.reset(new CTweedie(data.misc_ptr()[0]));
}
else if (family == "bernoulli")
{
pDist.reset(new CBernoulli());
}
else if (family == "gaussian")
{
pDist.reset(new CGaussian());
}
else if (family == "poisson")
{
pDist.reset(new CPoisson());
}
else if (family == "adaboost")
{
pDist.reset(new CAdaBoost());
}
else if (family == "coxph")
{
pDist.reset(new CCoxPH());
}
else if (family == "laplace")
{
pDist.reset(new CLaplace());
}
else if (family == "quantile")
{
pDist.reset(new CQuantile(data.misc_ptr()[0]));
}
else if (family == "tdist")
{
pDist.reset(new CTDist(data.misc_ptr()[0]));
}
else if (family == "multinomial")
{
pDist.reset(new CMultinomial(cNumClasses, data.nrow()));
}
else if (family == "huberized")
{
pDist.reset(new CHuberized());
}
else if (family == "pairwise_conc")
{
pDist.reset(new CPairwise("conc"));
}
else if (family == "pairwise_ndcg")
{
pDist.reset(new CPairwise("ndcg"));
}
else if (family == "pairwise_map")
{
pDist.reset(new CPairwise("map"));
}
else if (family == "pairwise_mrr")
{
pDist.reset(new CPairwise("mrr"));
}
else
{
throw GBM::invalid_argument();
}
if (0==family.compare(0, 8, "pairwise"))
{
cGroups = num_groups(data.misc_ptr(), cTrain);
}
return pDist;
}