本文整理汇总了C++中vex::profiler::tic_cpu方法的典型用法代码示例。如果您正苦于以下问题:C++ profiler::tic_cpu方法的具体用法?C++ profiler::tic_cpu怎么用?C++ profiler::tic_cpu使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类vex::profiler
的用法示例。
在下文中一共展示了profiler::tic_cpu方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: make_pair
std::pair<double, double> benchmark_reductor(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 1024 / 16;
double time_elapsed;
std::vector<real> A = random_vector<real>(N);
std::vector<real> B = random_vector<real>(N);
vex::vector<real> a(ctx, A);
vex::vector<real> b(ctx, B);
vex::Reductor<real, vex::SUM> sum(ctx);
real sum_cl = sum(a * b);
sum_cl = 0;
prof.tic_cpu("OpenCL");
for(size_t i = 0; i < M; i++)
sum_cl += sum(a * b);
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = 2.0 * N * M / time_elapsed / 1e9;
double bwidth = 2.0 * N * M * sizeof(real) / time_elapsed / 1e9;
std::cout
<< "Reduction (" << vex::type_name<real>() << ")\n"
<< " OpenCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
real sum_cpp = 0;
prof.tic_cpu("C++");
for(size_t i = 0; i < M; i++)
sum_cpp += std::inner_product(A.begin(), A.end(), B.begin(), static_cast<real>(0));
time_elapsed = prof.toc("C++");
{
double gflops = 2.0 * N * M / time_elapsed / 1e9;
double bwidth = 2.0 * N * M * sizeof(real) / time_elapsed / 1e9;
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
std::cout << " res = " << fabs( (sum_cl - sum_cpp) / sum_cpp )
<< std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
示例2: benchmark_rng
void benchmark_rng(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 1024;
prof.tic_cpu("OpenCL (threefry)");
double rps = rng_throughput<real, vex::random::threefry>(ctx, N, M);
prof.toc("OpenCL (threefry)");
std::cout
<< "Random numbers per second (" << vex::type_name<real>() << ")\n"
<< " OpenCL (threefry): " << rps << std::endl;
prof.tic_cpu("OpenCL (philox)");
rps = rng_throughput<real, vex::random::philox>(ctx, N, M);
prof.toc("OpenCL (philox)");
std::cout
<< " OpenCL (philox): " << rps << std::endl;
if (options.bm_cpu) {
std::mt19937 rng( std::rand() );
std::uniform_real_distribution<real> rnd(0.0, 1.0);
prof.tic_cpu("C++ (mt19937)");
real s = 0;
for(size_t j = 0; j < N; j++)
s = std::max(s, rnd(rng));
double time_elapsed = prof.toc("C++ (mt19937)");
std::cout
<< " C++ (mt19937): " << N / time_elapsed << std::endl;
}
std::cout << std::endl;
}
示例3: run_tests
void run_tests(const vex::Context &ctx, vex::profiler<> &prof, int n_size)
{
std::cout
<< "----------------------------------------------------------\n"
<< "Profiling \"" << vex::type_name<real>() << "\" performance\n"
<< "----------------------------------------------------------\n"
<< ctx << std::endl;
std::ostringstream fname;
fname << "profile_" << vex::type_name<real>() << ".dat";
std::ofstream log(fname.str().c_str(), std::ios::app);
log << ctx.size() << " ";
double gflops, bwidth;
prof.tic_cpu( vex::type_name<real>() );
if (options.bm_saxpy) {
prof.tic_cpu("Vector SAXPY");
std::tie(gflops, bwidth) = benchmark_saxpy<real>(ctx, prof);
prof.toc("Vector SAXPY");
log << gflops << " " << bwidth << " ";
}
if (options.bm_vector) {
prof.tic_cpu("Vector arithmetic");
std::tie(gflops, bwidth) = benchmark_vector<real>(ctx, prof);
prof.toc("Vector arithmetic");
log << gflops << " " << bwidth << " ";
}
if (options.bm_reductor) {
prof.tic_cpu("Reduction");
std::tie(gflops, bwidth) = benchmark_reductor<real>(ctx, prof, n_size);
prof.toc("Reduction");
log << gflops << " " << bwidth << " ";
}
if (options.bm_stencil) {
prof.tic_cpu("Stencil");
std::tie(gflops, bwidth) = benchmark_stencil<real>(ctx, prof);
prof.toc("Stencil");
log << gflops << " " << bwidth << " ";
}
if (options.bm_spmv) {
prof.tic_cpu("SpMV");
std::tie(gflops, bwidth) = benchmark_spmv<real>(ctx, prof);
prof.toc("SpMV");
log << gflops << " " << bwidth << std::endl;
prof.tic_cpu("SpMV (CCSR)");
std::tie(gflops, bwidth) = benchmark_spmv_ccsr<real>(ctx, prof);
prof.toc("SpMV (CCSR)");
}
if (options.bm_rng) {
prof.tic_cpu("Random number generation");
benchmark_rng<real>(ctx, prof);
prof.toc("Random number generation");
}
if (options.bm_sort) {
prof.tic_cpu("Sorting");
benchmark_sort<real>(ctx, prof);
prof.toc("Sorting");
}
/*
if (options.bm_scan) {
prof.tic_cpu("Scanning");
benchmark_scan<real>(ctx, prof);
prof.toc("Scanning");
}
*/
prof.toc( vex::type_name<real>() );
std::cout << std::endl << std::endl;
}
示例4: make_pair
std::pair<double,double> benchmark_saxpy(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 1024 * 1024;
const size_t M = 1024;
double time_elapsed;
std::vector<real> A(N, 0);
std::vector<real> B = random_vector<real>(N);
std::vector<real> alphavec = random_vector<real>(1);
real alpha = alphavec[0];
vex::vector<real> a(ctx, A);
vex::vector<real> b(ctx, B);
auto ta = vex::tag<1>(a);
ta = alpha * ta + b;
ta = 0;
prof.tic_cpu("OpenCL");
for(size_t i = 0; i < M; i++)
ta = alpha * ta + b;
ctx.finish();
time_elapsed = prof.toc("OpenCL");
double gflops = (2.0 * N * M) / time_elapsed / 1e9;
double bwidth = (3.0 * N * M * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< "Vector SAXPY (" << vex::type_name<real>() << ")\n"
<< " OCL"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
if (options.bm_cpu) {
prof.tic_cpu("C++");
for(size_t i = 0; i < M; i++)
for(size_t j = 0; j < N; j++)
A[j] = alpha * A[j] + B[j];
time_elapsed = prof.toc("C++");
{
double gflops = (2.0 * N * M) / time_elapsed / 1e9;
double bwidth = (3.0 * N * M * sizeof(real)) / time_elapsed / 1e9;
std::cout
<< " C++"
<< "\n GFLOPS: " << gflops
<< "\n Bandwidth: " << bwidth
<< std::endl;
}
vex::copy(A, b);
vex::Reductor<real, vex::SUM> sum(ctx);
a -= b;
std::cout << " res = " << sum(a * a)
<< std::endl << std::endl;
}
return std::make_pair(gflops, bwidth);
}
示例5: benchmark_scan
void benchmark_scan(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 16;
typedef typename std::conditional<
std::is_same<float, real>::value, cl_uint, cl_ulong
>::type key_type;
std::default_random_engine rng( std::rand() );
std::uniform_int_distribution<key_type> rnd;
std::vector<key_type> x0(N);
std::vector<key_type> x1(N);
std::generate(x0.begin(), x0.end(), [&]() { return rnd(rng); });
vex::vector<key_type> X0(ctx, x0);
vex::vector<key_type> X1(ctx, N);
vex::exclusive_scan(X0, X1);
ctx.finish();
prof.tic_cpu("VexCL");
for(size_t i = 0; i < M; i++)
vex::exclusive_scan(X0, X1);
ctx.finish();
double tot_time = prof.toc("VexCL");
std::cout
<< "Scan (" << vex::type_name<key_type>() << ")\n"
<< " VexCL: " << N * M / tot_time << " keys/sec\n";
#ifdef HAVE_BOOST_COMPUTE
vex::compute::exclusive_scan(X0, X1);
ctx.finish();
prof.tic_cpu("Boost.Compute");
for(size_t i = 0; i < M; i++)
vex::compute::exclusive_scan(X0, X1);
ctx.finish();
tot_time = prof.toc("Boost.Compute");
std::cout
<< " Boost.Compute: " << N * M / tot_time << " keys/sec\n";
#endif
#ifdef HAVE_CLOGS
vex::clogs::exclusive_scan(X0, X1);
ctx.finish();
prof.tic_cpu("CLOGS");
for(size_t i = 0; i < M; i++)
vex::clogs::exclusive_scan(X0, X1);
ctx.finish();
tot_time = prof.toc("CLOGS");
std::cout
<< " CLOGS: " << N * M / tot_time << " keys/sec\n";
#endif
if (options.bm_cpu) {
prof.tic_cpu("CPU");
for(size_t i = 0; i < M; i++) {
key_type sum = key_type();
for(size_t j = 0; j < N; ++j) {
key_type next = sum + x0[j];
x1[j] = sum;
sum = next;
}
}
tot_time = prof.toc("CPU");
std::cout << " CPU: " << N * M / tot_time << " keys/sec\n";
}
std::cout << std::endl;
}
示例6: benchmark_sort
void benchmark_sort(
const vex::Context &ctx, vex::profiler<> &prof
)
{
const size_t N = 16 * 1024 * 1024;
const size_t M = 16;
typedef typename std::conditional<
std::is_same<float, real>::value, cl_uint, cl_ulong
>::type key_type;
std::default_random_engine rng( std::rand() );
std::uniform_int_distribution<key_type> rnd;
std::vector<key_type> x0(N);
std::vector<key_type> x1(N);
std::generate(x0.begin(), x0.end(), [&]() { return rnd(rng); });
vex::vector<key_type> X0(ctx, x0);
vex::vector<key_type> X1(ctx, N);
X1 = X0;
vex::sort(X1);
double tot_time = 0;
for(size_t i = 0; i < M; i++) {
X1 = X0;
ctx.finish();
prof.tic_cpu("VexCL");
vex::sort(X1);
ctx.finish();
tot_time += prof.toc("VexCL");
}
std::cout
<< "Sort (" << vex::type_name<key_type>() << ")\n"
<< " VexCL: " << N * M / tot_time << " keys/sec\n";
#ifdef HAVE_BOOST_COMPUTE
X1 = X0;
vex::compute::sort(X1);
tot_time = 0;
for(size_t i = 0; i < M; i++) {
X1 = X0;
ctx.finish();
prof.tic_cpu("Boost.Compute");
vex::compute::sort(X1);
ctx.finish();
tot_time += prof.toc("Boost.Compute");
}
std::cout
<< " Boost.Compute: " << N * M / tot_time << " keys/sec\n";
#endif
#ifdef HAVE_CLOGS
X1 = X0;
vex::clogs::sort(X1);
tot_time = 0;
for(size_t i = 0; i < M; i++) {
X1 = X0;
ctx.finish();
prof.tic_cpu("CLOGS");
vex::clogs::sort(X1);
ctx.finish();
tot_time += prof.toc("CLOGS");
}
std::cout
<< " CLOGS: " << N * M / tot_time << " keys/sec\n";
#endif
if (options.bm_cpu) {
tot_time = 0;
for(size_t i = 0; i < M; i++) {
std::copy(x0.begin(), x0.end(), x1.begin());
prof.tic_cpu("STL");
std::sort(x1.begin(), x1.end());
tot_time += prof.toc("STL");
}
std::cout << " STL: " << N * M / tot_time << " keys/sec\n";
}
std::cout << std::endl;
}