当前位置: 首页>>代码示例>>C++>>正文


C++ CDataset类代码示例

本文整理汇总了C++中CDataset的典型用法代码示例。如果您正苦于以下问题:C++ CDataset类的具体用法?C++ CDataset怎么用?C++ CDataset使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了CDataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: Deviance

double CAdaBoost::Deviance(const CDataset& kData, const Bag& kBag,
                           const double* kFuncEstimate) {
  double loss = 0.0;
  double weight = 0.0;

  // Switch to validation set if necessary
  unsigned long num_of_rows_in_set = kData.get_size_of_set();

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : loss, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < num_of_rows_in_set; i++) {
    loss += kData.weight_ptr()[i] *
            std::exp(-(2 * kData.y_ptr()[i] - 1) *
                     (kData.offset_ptr()[i] + kFuncEstimate[i]));
    weight += kData.weight_ptr()[i];
  }

  // TODO: Check if weights are all zero for validation set
  if ((weight == 0.0) && (loss == 0.0)) {
    return nan("");
  } else if (weight == 0.0) {
    return HUGE_VAL;
  }

  return loss / weight;
}
开发者ID:arnocandel,项目名称:gbm,代码行数:26,代码来源:adaboost.cpp

示例2: ComputeWorkingResponse

void CAdaBoost::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
                                       const double* kFuncEstimate,
                                       std::vector<double>& residuals) {
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
  num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    residuals[i] = -(2 * kData.y_ptr()[i] - 1) *
                   std::exp(-(2 * kData.y_ptr()[i] - 1) *
                            (kData.offset_ptr()[i] + kFuncEstimate[i]));
  }
}
开发者ID:arnocandel,项目名称:gbm,代码行数:11,代码来源:adaboost.cpp

示例3: ComputeWorkingResponse

void CPoisson::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
                                      const double* kFuncEstimate,
                                      std::vector<double>& residuals) {
// compute working response
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
  num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    const double delta_func_est = kFuncEstimate[i] + kData.offset_ptr()[i];
    residuals[i] = kData.y_ptr()[i] - std::exp(delta_func_est);
  }
}
开发者ID:arnocandel,项目名称:gbm,代码行数:11,代码来源:poisson.cpp

示例4: PrepareData

void CSVM::PrepareData(const CDataset &OrgSet,struct svm_problem &DataDesc)
{
	//for SVM, we need to expand all multivalued discrete attributes of the training data into multi continuous attributes.
	//expand discrete attribute
	const CDataset *TrainSet=&OrgSet;
	if(!OrgSet.AllContinuous())
		TrainSet=OrgSet.ExpandDiscrete();
	const MATRIX &TrainData=TrainSet->GetData();
	const CASE_INFO &CaseInfo=TrainSet->GetInfo();

	//number of attribute for data set
	AttributeNum=CaseInfo.ValidWidth-1;
	//instances is formated as libsvm's requirements
	//number of instances
	DataDesc.l=CaseInfo.Height;
	//labels of instances
	DataDesc.y=new double[DataDesc.l];
	//content of instances (all attributes plus a tag for end of line, each node is initialized as end of a row)
	struct svm_node Val={-1,0};
	fill_d2(struct svm_node,DataDesc.x,CaseInfo.Height,CaseInfo.ValidWidth,Val);
	for(int i=0;i<CaseInfo.Height;i++)
	{
		DataDesc.y[i]=(double)TrainData[i][CaseInfo.ValidWidth-1].Discr;
		int ValidValue=0;
		for(int j=0;j<CaseInfo.ValidWidth-1;j++)
		{
			if(CaseInfo.ValidAttrs[j].AttType==ATT_DISCRETE)
			{
				throw(CError("SVM: discrete attribute should have been expanded!\n",100,0));
			}
			else//range expanding
			{
				if(TrainData[i][j].Cont==0)
					continue;
				else if(CaseInfo.ValidAttrs[j].Max==CaseInfo.ValidAttrs[j].Min)
					continue;
				else
				{
					DataDesc.x[i][ValidValue].index=j+1;
					DataDesc.x[i][ValidValue].value=(TrainData[i][j].Cont-CaseInfo.ValidAttrs[j].Min)/
						(CaseInfo.ValidAttrs[j].Max-CaseInfo.ValidAttrs[j].Min);
					ValidValue++;
				}
			}
		}
		//tag for end of line has been set
	}

	if(!OrgSet.AllContinuous())
		delete TrainSet;
	return;
}
开发者ID:Qiangli-Zhao,项目名称:LibEDM,代码行数:52,代码来源:b-svm.cpp

示例5: InitF

double CPoisson::InitF(const CDataset& kData) {
  double sum = 0.0;
  double denom = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : sum, denom) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    sum += kData.weight_ptr()[i] * kData.y_ptr()[i];
    denom += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
  }

  return std::log(sum / denom);
}
开发者ID:arnocandel,项目名称:gbm,代码行数:13,代码来源:poisson.cpp

示例6: ComputeWorkingResponse

void CGaussian::ComputeWorkingResponse(const CDataset& kData, const Bag& kBag,
                                       const double* kFuncEstimate,
                                       std::vector<double>& residuals) {
  if (!(kData.y_ptr() && kFuncEstimate &&
        kData.weight_ptr())) {
    throw gbm_exception::InvalidArgument();
  }

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
  num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    residuals[i] = kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i];
  }
}
开发者ID:arnocandel,项目名称:gbm,代码行数:14,代码来源:gaussian.cpp

示例7: InitF

double CGaussian::InitF(const CDataset& kData) {
  double sum = 0.0;
  double totalweight = 0.0;

// compute the mean

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : sum, totalweight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    sum += kData.weight_ptr()[i] * (kData.y_ptr()[i] - kData.offset_ptr()[i]);
    totalweight += kData.weight_ptr()[i];
  }

  return sum / totalweight;
}
开发者ID:arnocandel,项目名称:gbm,代码行数:15,代码来源:gaussian.cpp

示例8:

//select the base classifier with the highest accuracy on validation set
CForwardSelect::CForwardSelect(const CEnsemble &UEnsemble,const CDataset &ValidatingSet)
:CEnsemblePruner(UEnsemble)
{
	Name=MyName;
	//Info
	int CaseNum=ValidatingSet.GetInfo().Height;
	int EnsembleSize=Ensemble.GetSize();


	//start time for training
	clock_t start=clock();

	//get prediction
	vector<CPrediction*> *Predictions=Ensemble.AllClassify(ValidatingSet);

	//initialize with no classifier selected
	for(int i=0;i<EnsembleSize;i++)
		Weights.push_back(0);
	//add classifier one by one
	double BestAccr=0;
	for(int i=0;i<EnsembleSize;i++)
	{
		//add the best in each round
		int Best=-1;
		for(int j=0;j<EnsembleSize;j++)
		{
			//skip the one has been selected
			if(Weights[j]>0)continue;
			//add this classifier temporarily
			Weights[j]=1;
			//predicting
			CPrediction *Prediction=Ensemble.Classify(ValidatingSet,*Predictions,Weights);
			double Accuracy=Prediction->GetAccuracy();
			delete Prediction;
			//better accuracy?
			if(Accuracy>BestAccr)
			{
				Best=j;
				BestAccr=Accuracy;
				//if accuracy is 1.0, no better one can be found
				if(Accuracy>=1.0)
					break;
			}
			//recover to the initial state
			Weights[j]=0;
		}
		//if accuracy is 1.0, no better one can be found
		if(BestAccr>=1.0)
			break;
		//select the best one of this round
		if(Best!=-1)
			Weights[Best]=1;
	}

	for(int i=0;i<EnsembleSize;i++)
		delete ((*Predictions)[i]);
	delete Predictions;
	//time consumed
	CreatingTime = (double)(clock() - start) / CLOCKS_PER_SEC;
}
开发者ID:DafeiYin,项目名称:LibEDM,代码行数:61,代码来源:FS.cpp

示例9: FitBestConstant

void CPoisson::FitBestConstant(const CDataset& kData, const Bag& kBag,
                               const double* kFuncEstimate,
                               unsigned long num_terminalnodes,
                               std::vector<double>& residuals,
                               CCARTTree& tree) {
  unsigned long obs_num = 0;
  unsigned long node_num = 0;
  vector<double> numerator_vec(num_terminalnodes, 0.0);
  vector<double> denominator_vec(num_terminalnodes, 0.0);
  vector<double> max_vec(num_terminalnodes, -HUGE_VAL);
  vector<double> min_vec(num_terminalnodes, HUGE_VAL);

  for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
    if (kBag.get_element(obs_num)) {
      numerator_vec[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] * kData.y_ptr()[obs_num];
      denominator_vec[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] *
          std::exp(kData.offset_ptr()[obs_num] + kFuncEstimate[obs_num]);
    }
  }

  for (node_num = 0; node_num < num_terminalnodes; node_num++) {
    if (tree.has_node(node_num)) {
      if (numerator_vec[node_num] == 0.0) {
        // DEBUG: if vecdNum==0 then prediction = -Inf
        // Not sure what else to do except plug in an arbitrary
        //   negative number, -1? -10? Let's use -1, then make
        //   sure |adF| < 19 always.
        tree.get_terminal_nodes()[node_num]->set_prediction(-19.0);
      } else if (denominator_vec[node_num] == 0.0) {
        tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
      } else {
        tree.get_terminal_nodes()[node_num]->set_prediction(
            std::log(numerator_vec[node_num] / denominator_vec[node_num]));
      }
      tree.get_terminal_nodes()[node_num]->set_prediction(
          R::fmin2(tree.get_terminal_nodes()[node_num]->get_prediction(),
                   19 - max_vec[node_num]));
      tree.get_terminal_nodes()[node_num]->set_prediction(
          R::fmax2(tree.get_terminal_nodes()[node_num]->get_prediction(),
                   -19 - min_vec[node_num]));
    }
  }
}
开发者ID:arnocandel,项目名称:gbm,代码行数:45,代码来源:poisson.cpp

示例10: BagImprovement

double CGaussian::BagImprovement(const CDataset& kData, const Bag& kBag,
                                 const double* kFuncEstimate,
                                 const double kShrinkage,
                                 const std::vector<double>& kDeltaEstimate) {
  double returnvalue = 0.0;
  double weight = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : returnvalue, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    if (!kBag.get_element(i)) {
      const double deltafunc_est = kFuncEstimate[i] + kData.offset_ptr()[i];

      returnvalue += kData.weight_ptr()[i] * kShrinkage * kDeltaEstimate[i] *
                     (2.0 * (kData.y_ptr()[i] - deltafunc_est) -
                      kShrinkage * kDeltaEstimate[i]);
      weight += kData.weight_ptr()[i];
    }
  }

  return returnvalue / weight;
}
开发者ID:arnocandel,项目名称:gbm,代码行数:22,代码来源:gaussian.cpp

示例11: Deviance

double CGaussian::Deviance(const CDataset& kData, const Bag& kBag,
                           const double* kFuncEstimate) {
  double loss = 0.0;
  double weight = 0.0;

  unsigned long num_rows_in_set = kData.get_size_of_set();
#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : loss, weight) num_threads(get_num_threads())
  for (unsigned long i = 0; i < num_rows_in_set; i++) {
    const double tmp =
        (kData.y_ptr()[i] - kData.offset_ptr()[i] - kFuncEstimate[i]);
    loss += kData.weight_ptr()[i] * tmp * tmp;
    weight += kData.weight_ptr()[i];
  }

  // TODO: Check if weights are all zero for validation set
  if ((weight == 0.0) && (loss == 0.0)) {
    return nan("");
  } else if (weight == 0.0) {
    return copysign(HUGE_VAL, loss);
  }

  return loss / weight;
}
开发者ID:arnocandel,项目名称:gbm,代码行数:24,代码来源:gaussian.cpp

示例12: InitF

double CAdaBoost::InitF(const CDataset& kData) {
  double numerator = 0.0;
  double denominator = 0.0;

#pragma omp parallel for schedule(static, get_array_chunk_size()) \
    reduction(+ : numerator, denominator) num_threads(get_num_threads())
  for (unsigned long i = 0; i < kData.get_trainsize(); i++) {
    if (kData.y_ptr()[i] == 1.0) {
      numerator += kData.weight_ptr()[i] * std::exp(-kData.offset_ptr()[i]);
    } else {
      denominator += kData.weight_ptr()[i] * std::exp(kData.offset_ptr()[i]);
    }
  }

  return 0.5 * std::log(numerator / denominator);
}
开发者ID:arnocandel,项目名称:gbm,代码行数:16,代码来源:adaboost.cpp

示例13: FitBestConstant

void CAdaBoost::FitBestConstant(const CDataset& kData, const Bag& kBag,
                                const double* kFuncEstimate,
                                unsigned long num_terminalnodes,
                                std::vector<double>& residuals,
                                CCARTTree& tree) {
  unsigned long obs_num = 0;
  unsigned long node_num = 0;
  numerator_bestconstant_.resize(num_terminalnodes);
  numerator_bestconstant_.assign(numerator_bestconstant_.size(), 0.0);
  denominator_bestconstant_.resize(num_terminalnodes);
  denominator_bestconstant_.assign(denominator_bestconstant_.size(), 0.0);

  for (obs_num = 0; obs_num < kData.get_trainsize(); obs_num++) {
    if (kBag.get_element(obs_num)) {
      const double deltafunc_est =
          kFuncEstimate[obs_num] + kData.offset_ptr()[obs_num];
      numerator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] * (2 * kData.y_ptr()[obs_num] - 1) *
          std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
      denominator_bestconstant_[tree.get_node_assignments()[obs_num]] +=
          kData.weight_ptr()[obs_num] *
          std::exp(-(2 * kData.y_ptr()[obs_num] - 1) * deltafunc_est);
    }
  }

  for (node_num = 0; node_num < num_terminalnodes; node_num++) {
    if (tree.has_node(node_num)) {
      if (denominator_bestconstant_[node_num] == 0) {
        tree.get_terminal_nodes()[node_num]->set_prediction(0.0);
      } else {
        tree.get_terminal_nodes()[node_num]->set_prediction(
            numerator_bestconstant_[node_num] /
            denominator_bestconstant_[node_num]);
      }
    }
  }
}
开发者ID:arnocandel,项目名称:gbm,代码行数:37,代码来源:adaboost.cpp

示例14: Train

void CNaiveBayes::Train(const CDataset &TrainSet)
{
	//start time for training
	clock_t start=clock();

	//data
	const MATRIX &OrgData=TrainSet.GetData();
	const CASE_INFO &OrgInfo=TrainSet.GetInfo();

	//if range of a continuous attribute changed (extended), should we re-calculate all existed statistics?
	//we can't, some information has lost. We can only extend the first and the last intervals

	//statistics
	for(int i=0;i<OrgInfo.Height;i++)
	{
		//label of instance
		int Class=OrgData[i][OrgInfo.ValidWidth-1].Discr;
		//each attribute
		for(int j=0;j<OrgInfo.ValidWidth-1;j++)
			switch(OrgInfo.ValidAttrs[j].AttType)
			{
				case ATT_DISCRETE:
					{
						//value of this attribute
						int Val=OrgData[i][j].Discr;
						Estims[j][Class].DiscEst.Count++;
						//j: attribute, Class: label, Val: value of attribute
						Estims[j][Class].DiscEst.AttrCount[Val]++;
					}
					break;
				case ATT_CONTINUOUS:
				case ATT_DATETIME:
					{
						double Val=OrgData[i][j].Cont;
						int ValNo;

						if(OrgInfo.ValidAttrs[j].Max==OrgInfo.ValidAttrs[j].Min)
							ValNo=0;
						else
							ValNo=(int)((OrgData[i][j].Cont-Estims[j][Class].ContEst.Min)*10/
							(Estims[j][Class].ContEst.Max-Estims[j][Class].ContEst.Min));
						if(ValNo>=SplitNum)
							ValNo=SplitNum-1;
						if(ValNo<0)
							ValNo=0;
						Estims[j][Class].ContEst.Vals[ValNo]++;
						Estims[j][Class].ContEst.Count++;
					}
					break;
				default:
					break;
			}//case: attribute type
	}//for data

	//calculate all other statistics
	for(int i=0;i<OrgInfo.ValidWidth-1;i++)
	{
		switch(OrgInfo.ValidAttrs[i].AttType)
		{
		case ATT_DISCRETE:
			for(int j=0;j<OrgInfo.ClassNum;j++)
			{
				int ValNum=(int)OrgInfo.ValidAttrs[i].Disc.size();
				for(int k=0;k<ValNum;k++)
					Estims[i][j].DiscEst.AttrCount[k]/=Estims[i][j].DiscEst.Count;
			}
			break;
		case ATT_CONTINUOUS:
		case ATT_DATETIME:
			for(int j=0;j<OrgInfo.ClassNum;j++)
			{
				for(int k=0;k<SplitNum;k++)
					Estims[i][j].ContEst.Vals[k]/=Estims[i][j].ContEst.Count;
			}
			break;
		default:
			break;
		}//switch
	}//for attributes

	//time consumed
	CreatingTime+=((double)(clock() - start) / CLOCKS_PER_SEC);
}
开发者ID:DafeiYin,项目名称:LibEDM,代码行数:83,代码来源:NaiveBayes.cpp

示例15: if

std::auto_ptr<CDistribution> gbm_setup
(
 const CDataset& data,
 const std::string& family,
 int cTrees,
 int cDepth,
 int cMinObsInNode,
 int cNumClasses,
 double dShrinkage,
 double dBagFraction,
 int cTrain,
 int cFeatures,
 int& cGroups
 )
{
  std::auto_ptr<CDistribution> pDist;
  cGroups = -1;
  
    // set the distribution
  if (family == "gamma") {
    pDist.reset(new CGamma());
  } 
  else if (family == "tweedie") {
    pDist.reset(new CTweedie(data.misc_ptr()[0]));
  }
  else if (family == "bernoulli") 
    {
      pDist.reset(new CBernoulli());
    }
  else if (family == "gaussian") 
    {
      pDist.reset(new CGaussian());
    }
  else if (family == "poisson")
    {
      pDist.reset(new CPoisson());
    }
  else if (family == "adaboost")
    {
      pDist.reset(new CAdaBoost());
    }
  else if (family == "coxph")
    {
      pDist.reset(new CCoxPH());
    }
  else if (family == "laplace")
    {
      pDist.reset(new CLaplace());
    }
  else if (family == "quantile")
    {
      pDist.reset(new CQuantile(data.misc_ptr()[0]));
    }
  else if (family == "tdist")
    {
      pDist.reset(new CTDist(data.misc_ptr()[0]));
    }
  else if (family == "multinomial")
    {
      pDist.reset(new CMultinomial(cNumClasses, data.nrow()));
    }
  else if (family == "huberized")
    {
      pDist.reset(new CHuberized());
    }
  else if (family == "pairwise_conc")
    {
      pDist.reset(new CPairwise("conc"));
    }
  else if (family == "pairwise_ndcg")
    {
      pDist.reset(new CPairwise("ndcg"));
    }
  else if (family == "pairwise_map")
    {
      pDist.reset(new CPairwise("map"));
    }
  else if (family == "pairwise_mrr")
    {
      pDist.reset(new CPairwise("mrr"));
    }
  else
    {
      throw GBM::invalid_argument();
    }

  if (0==family.compare(0, 8, "pairwise")) 
    {
      cGroups = num_groups(data.misc_ptr(), cTrain);
    }
  
  return pDist;
}
开发者ID:RyanMichaluk,项目名称:gbm,代码行数:93,代码来源:gbm.cpp


注:本文中的CDataset类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。