本文整理汇总了C++中dataset类的典型用法代码示例。如果您正苦于以下问题:C++ dataset类的具体用法?C++ dataset怎么用?C++ dataset使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了dataset类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: foreach_well
F foreach_well(const dataset& data, F fn, std::string id_field)
{
const auto& id = data.at(id_field);
std::size_t begin_rec = 0, end_rec = 0;
for (std::size_t i = 0; i < id.size(); ++i) {
if (id[i] != id[begin_rec] || i == id.size() - 1) {
if (i == id.size() - 1)
end_rec = i;
dataset well;
std::for_each(data.begin(), data.end(),
[&](const std::pair<std::string,
std::vector<std::string>>& column)
{
well[column.first] = std::vector<std::string>(
column.second.data() + begin_rec,
column.second.data() + end_rec + 1);
}
);
fn(well);
begin_rec = i;
}
end_rec = i;
}
return fn;
}
示例2: load_training_data
int model::load_training_data(const dataset &ds)
{
int nrow, ncol;
nrow = ds.ins_num();
ncol = ds.fea_num();
if (nrow <= 0 || ncol < 1) {
ULIB_FATAL("invalid training data dimensions");
return -1;
}
if (nrow > FLAGS_max_num_examples)
nrow = FLAGS_max_num_examples;
if (alloc_training_data(nrow, ncol)) {
ULIB_FATAL("couldn't allocate training data");
return -1;
}
double tavg = 0;
double tvar = 0;
for (int i = 0; i < nrow; ++i) {
double t = ds.get_tgv(i);
tavg += t;
tvar += t*t;
gsl_vector_set(_tv, i, t);
for (int j = 0; j < ncol; ++j)
gsl_matrix_set(_fm, i, j, ds.get_fea(i, j));
}
_t_avg = tavg/nrow;
_t_std = sqrt(tvar/nrow - _t_avg*_t_avg);
return 0;
}
示例3: timestep
void slam::slam_data<ControlModel, ObservationModel>
::add_dataset (const dataset<ControlModel, ObservationModel>& data,
const typename ControlModel::builder& control_model_builder,
const typename ObservationModel::builder& obs_model_builder) {
using namespace boost::adaptors;
auto add_observations = [&](timestep_type t) {
for (const auto& obs : values(data.observations_at(t))) {
add_observation (obs.id, obs_model_builder(obs.observation));
}
};
add_observations (current_timestep());
timestep (current_timestep());
while (current_timestep() < data.current_timestep()) {
add_control (control_model_builder (data.control(current_timestep()),
data.timedelta(current_timestep())));
add_observations (current_timestep());
timestep (current_timestep());
}
completed();
}
示例4: process
void silhouette_ksearch::process(const dataset & p_data, silhouette_ksearch_data & p_result) {
if (m_kmax > p_data.size()) {
throw std::invalid_argument("K max value '" + std::to_string(m_kmax) +
"' should be bigger than amount of objects '" + std::to_string(p_data.size()) + "' in input data.");
}
p_result.scores().reserve(m_kmax - m_kmin);
for (std::size_t k = m_kmin; k < m_kmax; k++) {
cluster_sequence clusters;
m_allocator->allocate(k, p_data, clusters);
if (clusters.size() != k) {
p_result.scores().push_back(std::nan("1"));
continue;
}
silhouette_data result;
silhouette().process(p_data, clusters, result);
const double score = std::accumulate(result.get_score().begin(), result.get_score().end(), (double) 0.0) / result.get_score().size();
p_result.scores().push_back(score);
if (score > p_result.get_score()) {
p_result.set_amount(k);
p_result.set_score(score);
}
}
}
示例5: generateSet
// generate a result set from two sets of datapoints of which the first set contains all
// datapoints with other datapoints in the buffer zone and the of which the second set
// contains all datapoints without other datapoints in the buffer zone
dataset generateSet(dataset& withNearbyDataset, dataset& standaloneDataset) {
random_device rd;
mt19937 rng(rd());
dataset remainingDataset(withNearbyDataset.begin(), withNearbyDataset.end());
dataset resultSet(standaloneDataset.begin(), standaloneDataset.end());
while (remainingDataset.size() != 0) {
// create iterator
dataset::iterator it = remainingDataset.begin();
// generate random index
uniform_int_distribution<int> uni(0, (int)remainingDataset.size());
int r = uni(rng);
// pick random datapoint by advancing the iterator to the random position
advance(it, r % remainingDataset.size());
// add picked datapoint to result list
resultSet.insert(*it);
// remove all datapoints within buffer zone if still in remaining dataset
for (dataset::iterator j = it->buffer.begin(); j != it->buffer.end(); ++j) {
dataset::iterator tmp = remainingDataset.find(*j);
if (tmp != remainingDataset.end()) {
remainingDataset.erase(tmp);
}
}
// remove picked datapoint from remaining list
remainingDataset.erase(remainingDataset.find(*it));
}
return resultSet;
}
示例6: p_test_set
void experiment_datasets::set_train_test_pairs(const dataset & train, const dataset & test, int pair_num)
{
shared_ptr<dataset> p_test_set(test.clone());
shared_ptr<dataset> p_train_set(train.clone());
train_test_pairs.erase(train_test_pairs.begin(),train_test_pairs.end());
for (int i = 0; i < pair_num; i++)
{
train_test_pairs.push_back(train_test_pair(p_train_set,p_test_set));
}
}
示例7: update_clusters
void kmeans::update_clusters(const dataset & p_centers, cluster_sequence & p_clusters) {
const dataset & data = *m_ptr_data;
p_clusters.clear();
p_clusters.resize(p_centers.size());
/* fill clusters again in line with centers. */
if (m_ptr_indexes->empty()) {
std::vector<std::size_t> winners(data.size(), 0);
parallel_for(std::size_t(0), data.size(), [this, &p_centers, &winners](std::size_t p_index) {
assign_point_to_cluster(p_index, p_centers, winners);
});
for (std::size_t index_point = 0; index_point < winners.size(); index_point++) {
const std::size_t suitable_index_cluster = winners[index_point];
p_clusters[suitable_index_cluster].push_back(index_point);
}
}
else {
/* This part of code is used by X-Means and in case of parallel implementation of this part in scope of X-Means
performance is slightly reduced. Experiments has been performed our implementation and Intel TBB library.
But in K-Means case only - it works perfectly and increase performance. */
std::vector<std::size_t> winners(data.size(), 0);
parallel_for_each(*m_ptr_indexes, [this, &p_centers, &winners](std::size_t p_index) {
assign_point_to_cluster(p_index, p_centers, winners);
});
for (std::size_t index_point : *m_ptr_indexes) {
const std::size_t suitable_index_cluster = winners[index_point];
p_clusters[suitable_index_cluster].push_back(index_point);
}
}
erase_empty_clusters(p_clusters);
}
示例8: temp
vector<vector<int>> random_shuffer_dataset_splitter ::split_impl(const dataset& data) const
{
vector<vector<int>> batch_ids(batch_num);
int sample_num = data.get_sample_num();
vector<int> temp(sample_num);
for (int i = 0;i<sample_num;i++)
temp[i] = i;
std::random_shuffle ( temp.begin(), temp.end() );
int batch_size = ceil(float(sample_num)/batch_num);
for (int i = 0;i<batch_num;i++)
{
int cur_batch_size = batch_size;
if (i == batch_num-1)
cur_batch_size = sample_num - (batch_num-1)*batch_size;
vector<int> cur_batch_id(cur_batch_size);
for (int j = 0;j<cur_batch_size;j++)
cur_batch_id[j] = temp[i*batch_size + j];
batch_ids[i] = cur_batch_id;
}
return batch_ids;
}
示例9: write_dataset
inline typename boost::enable_if<is_multi_array<T>, void>::type
write_dataset(dataset& dset, T const& value)
{
typedef typename T::element value_type;
hid_t type_id = ctype<value_type>::hid();
dset.write(type_id, value.origin());
}
示例10: read_dataset
typename boost::enable_if<is_multi_array<T>, void>::type
read_dataset(dataset & data_set, T & array)
{
const int array_rank = T::dimensionality;
typedef typename T::element value_type;
// --- use temporary dataspace object to get the shape of the dataset
dataspace file_space(data_set);
if (!(file_space.rank() == array_rank))
H5XX_THROW("dataset \"" + get_name(data_set) + "\" and target array have mismatching dimensions");
boost::array<hsize_t, array_rank> file_dims = file_space.extents<array_rank>();
// --- clear array - TODO check if this feature is necessary/wanted
boost::array<size_t, array_rank> array_zero;
array_zero.assign(0);
array.resize(array_zero);
// --- resize array to match the dataset - TODO check if this feature is necessary/wanted
boost::array<size_t, array_rank> array_shape;
std::copy(file_dims.begin(), file_dims.begin() + array_rank, array_shape.begin());
array.resize(array_shape);
hid_t mem_space_id = H5S_ALL;
hid_t file_space_id = H5S_ALL;
hid_t xfer_plist_id = H5P_DEFAULT;
data_set.read(ctype<value_type>::hid(), array.origin(), mem_space_id, file_space_id, xfer_plist_id);
}
示例11: update_medians
double kmedians::update_medians(cluster_sequence & clusters, dataset & medians) {
const dataset & data = *m_ptr_data;
const std::size_t dimension = data[0].size();
std::vector<point> prev_medians(medians);
medians.clear();
medians.resize(clusters.size(), point(dimension, 0.0));
double maximum_change = 0.0;
for (std::size_t index_cluster = 0; index_cluster < clusters.size(); index_cluster++) {
calculate_median(clusters[index_cluster], medians[index_cluster]);
double change = m_metric(prev_medians[index_cluster], medians[index_cluster]);
if (change > maximum_change) {
maximum_change = change;
}
}
return maximum_change;
}
示例12: update_clusters
void kmedians::update_clusters(const dataset & medians, cluster_sequence & clusters) {
const dataset & data = *m_ptr_data;
clusters.clear();
clusters.resize(medians.size());
for (size_t index_point = 0; index_point < data.size(); index_point++) {
size_t index_cluster_optim = 0;
double distance_optim = std::numeric_limits<double>::max();
for (size_t index_cluster = 0; index_cluster < medians.size(); index_cluster++) {
double distance = m_metric(data[index_point], medians[index_cluster]);
if (distance < distance_optim) {
index_cluster_optim = index_cluster;
distance_optim = distance;
}
}
clusters[index_cluster_optim].push_back(index_point);
}
erase_empty_clusters(clusters);
}
示例13: split
dataset_group dataset_splitter::split(const dataset & data) const
{
dataset_group group;
vector<vector<int>> batch_ids = this->split_impl(data);
for (int i = 0;i<batch_ids.size();i++)
{
group.add_dataset(data.sub_set(batch_ids[i]));
}
return group;
}
示例14: assign_point_to_cluster
void kmeans::assign_point_to_cluster(const std::size_t p_index_point, const dataset & p_centers, std::vector<std::size_t> & p_clusters) {
double minimum_distance = std::numeric_limits<double>::max();
size_t suitable_index_cluster = 0;
for (size_t index_cluster = 0; index_cluster < p_centers.size(); index_cluster++) {
double distance = m_metric(p_centers[index_cluster], (*m_ptr_data)[p_index_point]);
if (distance < minimum_distance) {
minimum_distance = distance;
suitable_index_cluster = index_cluster;
}
}
p_clusters[p_index_point] = suitable_index_cluster;
}
示例15: percent
vector<vector<int>> random_shuffer_ratio_splitter ::split_impl(const dataset& data) const
{
vector<NumericType> percent(ratio);
NumericType total = std::accumulate(ratio.begin(),ratio.end(),0);
BOOST_FOREACH(NumericType & x,percent){ x = x/total; }
// std::transform(percent.begin(),percent.end(),percent.begin(),[total](NumericType val){return val/total;});
vector<vector<int>> group_ids(percent.size());
int sample_num = data.get_sample_num();
vector<int> temp;
std::copy(
boost::counting_iterator<unsigned int>(0),
boost::counting_iterator<unsigned int>(sample_num),
std::back_inserter(temp));
std::random_shuffle ( temp.begin(), temp.end() );
vector<int>::iterator cur_begin_iter = temp.begin();
for (int i = 0;i<percent.size();i++)
{
int cur_group_size = floor(sample_num * percent[i]);
vector<int>::iterator cur_end_iter = cur_begin_iter + cur_group_size;
if (i == percent.size()-1)
{
cur_end_iter = temp.end();
cur_group_size = cur_end_iter - cur_begin_iter;
}
vector<int> cur_group_id(cur_group_size);
copy(cur_begin_iter,cur_end_iter, cur_group_id.begin());
cur_begin_iter = cur_end_iter;
group_ids[i] = cur_group_id;
}
return group_ids;
}