当前位置: 首页>>代码示例>>C++>>正文


C++ dataset类代码示例

本文整理汇总了C++中dataset的典型用法代码示例。如果您正苦于以下问题:C++ dataset类的具体用法?C++ dataset怎么用?C++ dataset使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: foreach_well

F foreach_well(const dataset& data, F fn, std::string id_field)
{
    const auto& id = data.at(id_field);

    std::size_t begin_rec = 0, end_rec = 0;
    for (std::size_t i = 0; i < id.size(); ++i) {
        if (id[i] != id[begin_rec] || i == id.size() - 1) {
            if (i == id.size() - 1)
                end_rec = i;

            dataset well;
            std::for_each(data.begin(), data.end(),
                    [&](const std::pair<std::string,
                        std::vector<std::string>>& column)
                    {
                        well[column.first] = std::vector<std::string>(
                            column.second.data() + begin_rec,
                            column.second.data() + end_rec + 1);
                    }
            );
            fn(well);

            begin_rec = i;
        }
        end_rec = i;
    }

    return fn;
}
开发者ID:derrickturk,项目名称:libdca,代码行数:29,代码来源:peakmonth.cpp

示例2: load_training_data

int model::load_training_data(const dataset &ds)
{
	int nrow, ncol;

        nrow = ds.ins_num();
        ncol = ds.fea_num();

	if (nrow <= 0 || ncol < 1) {
		ULIB_FATAL("invalid training data dimensions");
		return -1;
	}
        if (nrow > FLAGS_max_num_examples)
                nrow = FLAGS_max_num_examples;

	if (alloc_training_data(nrow, ncol)) {
		ULIB_FATAL("couldn't allocate training data");
		return -1;
	}

        double tavg = 0;
        double tvar = 0;
	for (int i = 0; i < nrow; ++i) {
		double t = ds.get_tgv(i);
                tavg += t;
                tvar += t*t;
		gsl_vector_set(_tv, i, t);
                for (int j = 0; j < ncol; ++j)
                        gsl_matrix_set(_fm, i, j, ds.get_fea(i, j));
	}
        _t_avg = tavg/nrow;
        _t_std = sqrt(tvar/nrow - _t_avg*_t_avg);

        return 0;
}
开发者ID:ZilongTan,项目名称:MachineLearning,代码行数:34,代码来源:model.cpp

示例3: timestep

void slam::slam_data<ControlModel, ObservationModel>
::add_dataset (const dataset<ControlModel, ObservationModel>& data,
               const typename ControlModel::builder& control_model_builder,
               const typename ObservationModel::builder& obs_model_builder) {
    
    using namespace boost::adaptors;

    auto add_observations = [&](timestep_type t) {
        for (const auto& obs : values(data.observations_at(t))) {
            add_observation (obs.id, obs_model_builder(obs.observation));
        }
    };
    
    add_observations (current_timestep());
    timestep (current_timestep());
    
    while (current_timestep() < data.current_timestep()) {
        add_control (control_model_builder (data.control(current_timestep()),
                                            data.timedelta(current_timestep())));
        add_observations (current_timestep());
        timestep (current_timestep());
    }
    
    completed();
}
开发者ID:caomw,项目名称:slam-4,代码行数:25,代码来源:slam_data.hpp

示例4: process

void silhouette_ksearch::process(const dataset & p_data, silhouette_ksearch_data & p_result) {
    if (m_kmax > p_data.size()) {
        throw std::invalid_argument("K max value '" + std::to_string(m_kmax) + 
            "' should be bigger than amount of objects '" + std::to_string(p_data.size()) + "' in input data.");
    }

    p_result.scores().reserve(m_kmax - m_kmin);

    for (std::size_t k = m_kmin; k < m_kmax; k++) {
        cluster_sequence clusters;
        m_allocator->allocate(k, p_data, clusters);

        if (clusters.size() != k) {
            p_result.scores().push_back(std::nan("1"));
            continue;
        }
        
        silhouette_data result;
        silhouette().process(p_data, clusters, result);

        const double score = std::accumulate(result.get_score().begin(), result.get_score().end(), (double) 0.0) / result.get_score().size();
        p_result.scores().push_back(score);

        if (score > p_result.get_score()) {
            p_result.set_amount(k);
            p_result.set_score(score);
        }
    }
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:29,代码来源:silhouette_ksearch.cpp

示例5: generateSet

// generate a result set from two sets of datapoints of which the first set contains all
// datapoints with other datapoints in the buffer zone and the of which the second set
// contains all datapoints without other datapoints in the buffer zone
dataset generateSet(dataset& withNearbyDataset, dataset& standaloneDataset) {
    random_device rd;
    mt19937 rng(rd());
    
    dataset remainingDataset(withNearbyDataset.begin(), withNearbyDataset.end());
    dataset resultSet(standaloneDataset.begin(), standaloneDataset.end());
    
    while (remainingDataset.size() != 0) {
        // create iterator
        dataset::iterator it = remainingDataset.begin();
        
        // generate random index
        uniform_int_distribution<int> uni(0, (int)remainingDataset.size());
        int r = uni(rng);
        // pick random datapoint by advancing the iterator to the random position
        advance(it, r % remainingDataset.size());
        
        
        // add picked datapoint to result list
        resultSet.insert(*it);
        
        // remove all datapoints within buffer zone if still in remaining dataset
        for (dataset::iterator j = it->buffer.begin(); j != it->buffer.end(); ++j) {
            dataset::iterator tmp = remainingDataset.find(*j);
            if (tmp != remainingDataset.end()) {
                remainingDataset.erase(tmp);
            }
        }
        
        // remove picked datapoint from remaining list
        remainingDataset.erase(remainingDataset.find(*it));
    }
    
    return resultSet;
}
开发者ID:rendro,项目名称:sampleselector,代码行数:38,代码来源:main.cpp

示例6: p_test_set

void experiment_datasets::set_train_test_pairs(const dataset & train, const dataset & test, int pair_num)
{
	shared_ptr<dataset> p_test_set(test.clone());

	shared_ptr<dataset> p_train_set(train.clone());

	train_test_pairs.erase(train_test_pairs.begin(),train_test_pairs.end());

	for (int i = 0; i < pair_num; i++)
	{
		train_test_pairs.push_back(train_test_pair(p_train_set,p_test_set));
	}

}
开发者ID:rudaoshi,项目名称:artifact,代码行数:14,代码来源:experiment_datasets.cpp

示例7: update_clusters

void kmeans::update_clusters(const dataset & p_centers, cluster_sequence & p_clusters) {
    const dataset & data = *m_ptr_data;

    p_clusters.clear();
    p_clusters.resize(p_centers.size());

    /* fill clusters again in line with centers. */
    if (m_ptr_indexes->empty()) {
        std::vector<std::size_t> winners(data.size(), 0);
        parallel_for(std::size_t(0), data.size(), [this, &p_centers, &winners](std::size_t p_index) {
            assign_point_to_cluster(p_index, p_centers, winners);
        });

        for (std::size_t index_point = 0; index_point < winners.size(); index_point++) {
            const std::size_t suitable_index_cluster = winners[index_point];
            p_clusters[suitable_index_cluster].push_back(index_point);
        }
    }
    else {
        /* This part of code is used by X-Means and in case of parallel implementation of this part in scope of X-Means
           performance is slightly reduced. Experiments has been performed our implementation and Intel TBB library. 
           But in K-Means case only - it works perfectly and increase performance. */
        std::vector<std::size_t> winners(data.size(), 0);
        parallel_for_each(*m_ptr_indexes, [this, &p_centers, &winners](std::size_t p_index) {
            assign_point_to_cluster(p_index, p_centers, winners);
        });

        for (std::size_t index_point : *m_ptr_indexes) {
            const std::size_t suitable_index_cluster = winners[index_point];
            p_clusters[suitable_index_cluster].push_back(index_point);
        }
    }

    erase_empty_clusters(p_clusters);
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:35,代码来源:kmeans.cpp

示例8: temp

vector<vector<int>> random_shuffer_dataset_splitter ::split_impl(const dataset& data) const
{
	vector<vector<int>> batch_ids(batch_num);

	int sample_num = data.get_sample_num();
	vector<int> temp(sample_num);
	for (int i = 0;i<sample_num;i++)
		temp[i] = i;

	std::random_shuffle ( temp.begin(), temp.end() );


	int batch_size = ceil(float(sample_num)/batch_num);

	for (int i = 0;i<batch_num;i++)
	{
		int cur_batch_size = batch_size;
		if (i == batch_num-1)
			cur_batch_size = sample_num - (batch_num-1)*batch_size;
		vector<int> cur_batch_id(cur_batch_size);

		for (int j = 0;j<cur_batch_size;j++)
			cur_batch_id[j] = temp[i*batch_size + j];

		batch_ids[i] = cur_batch_id;

	}

	return batch_ids;
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:30,代码来源:data_splitter.cpp

示例9: write_dataset

inline typename boost::enable_if<is_multi_array<T>, void>::type
write_dataset(dataset& dset, T const& value)
{
    typedef typename T::element value_type;
    hid_t type_id = ctype<value_type>::hid();
    dset.write(type_id, value.origin());
}
开发者ID:KaiSzuttor,项目名称:h5xx,代码行数:7,代码来源:boost_multi_array.hpp

示例10: read_dataset

typename boost::enable_if<is_multi_array<T>, void>::type
read_dataset(dataset & data_set, T & array)
{
    const int array_rank = T::dimensionality;
    typedef typename T::element value_type;

    // --- use temporary dataspace object to get the shape of the dataset
    dataspace file_space(data_set);
    if (!(file_space.rank() == array_rank))
        H5XX_THROW("dataset \"" + get_name(data_set) + "\" and target array have mismatching dimensions");

    boost::array<hsize_t, array_rank> file_dims = file_space.extents<array_rank>();

    // --- clear array - TODO check if this feature is necessary/wanted
    boost::array<size_t, array_rank> array_zero;
    array_zero.assign(0);
    array.resize(array_zero);

    // --- resize array to match the dataset - TODO check if this feature is necessary/wanted
    boost::array<size_t, array_rank> array_shape;
    std::copy(file_dims.begin(), file_dims.begin() + array_rank, array_shape.begin());
    array.resize(array_shape);

    hid_t mem_space_id = H5S_ALL;
    hid_t file_space_id = H5S_ALL;
    hid_t xfer_plist_id = H5P_DEFAULT;

    data_set.read(ctype<value_type>::hid(), array.origin(), mem_space_id, file_space_id, xfer_plist_id);
}
开发者ID:KaiSzuttor,项目名称:h5xx,代码行数:29,代码来源:boost_multi_array.hpp

示例11: update_medians

double kmedians::update_medians(cluster_sequence & clusters, dataset & medians) {
    const dataset & data = *m_ptr_data;
    const std::size_t dimension = data[0].size();

    std::vector<point> prev_medians(medians);

    medians.clear();
    medians.resize(clusters.size(), point(dimension, 0.0));

    double maximum_change = 0.0;

    for (std::size_t index_cluster = 0; index_cluster < clusters.size(); index_cluster++) {
        calculate_median(clusters[index_cluster], medians[index_cluster]);

        double change = m_metric(prev_medians[index_cluster], medians[index_cluster]);
        if (change > maximum_change) {
            maximum_change = change;
        }
    }

    return maximum_change;
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:22,代码来源:kmedians.cpp

示例12: update_clusters

void kmedians::update_clusters(const dataset & medians, cluster_sequence & clusters) {
    const dataset & data = *m_ptr_data;

    clusters.clear();
    clusters.resize(medians.size());

    for (size_t index_point = 0; index_point < data.size(); index_point++) {
        size_t index_cluster_optim = 0;
        double distance_optim = std::numeric_limits<double>::max();

        for (size_t index_cluster = 0; index_cluster < medians.size(); index_cluster++) {
            double distance = m_metric(data[index_point], medians[index_cluster]);
            if (distance < distance_optim) {
                index_cluster_optim = index_cluster;
                distance_optim = distance;
            }
        }

        clusters[index_cluster_optim].push_back(index_point);
    }

    erase_empty_clusters(clusters);
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:23,代码来源:kmedians.cpp

示例13: split

dataset_group dataset_splitter::split(const dataset & data) const
{
	dataset_group group;

	vector<vector<int>> batch_ids = this->split_impl(data);

	for (int i = 0;i<batch_ids.size();i++)
	{

		group.add_dataset(data.sub_set(batch_ids[i]));
	}

	return group;
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:14,代码来源:data_splitter.cpp

示例14: assign_point_to_cluster

void kmeans::assign_point_to_cluster(const std::size_t p_index_point, const dataset & p_centers, std::vector<std::size_t> & p_clusters) {
    double    minimum_distance = std::numeric_limits<double>::max();
    size_t    suitable_index_cluster = 0;

    for (size_t index_cluster = 0; index_cluster < p_centers.size(); index_cluster++) {
        double distance = m_metric(p_centers[index_cluster], (*m_ptr_data)[p_index_point]);

        if (distance < minimum_distance) {
            minimum_distance = distance;
            suitable_index_cluster = index_cluster;
        }
    }

    p_clusters[p_index_point] = suitable_index_cluster;
}
开发者ID:annoviko,项目名称:pyclustering,代码行数:15,代码来源:kmeans.cpp

示例15: percent

vector<vector<int>> random_shuffer_ratio_splitter ::split_impl(const dataset& data) const
{
	vector<NumericType> percent(ratio);
	
	NumericType total = std::accumulate(ratio.begin(),ratio.end(),0);

	BOOST_FOREACH(NumericType & x,percent){ x = x/total; }
//	std::transform(percent.begin(),percent.end(),percent.begin(),[total](NumericType val){return val/total;});

	vector<vector<int>> group_ids(percent.size());

	int sample_num = data.get_sample_num();
	vector<int> temp;

	std::copy(
		boost::counting_iterator<unsigned int>(0),
		boost::counting_iterator<unsigned int>(sample_num), 
		std::back_inserter(temp));

	std::random_shuffle ( temp.begin(), temp.end() );

	vector<int>::iterator cur_begin_iter = temp.begin();
	for (int i = 0;i<percent.size();i++)
	{
		int cur_group_size = floor(sample_num * percent[i]);
		vector<int>::iterator  cur_end_iter = cur_begin_iter + cur_group_size;
		if (i == percent.size()-1)
		{
			cur_end_iter = temp.end();
			cur_group_size = cur_end_iter - cur_begin_iter;
		}
		vector<int> cur_group_id(cur_group_size);

		copy(cur_begin_iter,cur_end_iter, cur_group_id.begin());

		cur_begin_iter = cur_end_iter;

		group_ids[i] = cur_group_id;

	}

	return group_ids;
	
}
开发者ID:rudaoshi,项目名称:artifact,代码行数:44,代码来源:data_splitter.cpp


注:本文中的dataset类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。