本文整理汇总了C++中kokkos::impl::Timer::seconds方法的典型用法代码示例。如果您正苦于以下问题:C++ Timer::seconds方法的具体用法?C++ Timer::seconds怎么用?C++ Timer::seconds使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kokkos::impl::Timer
的用法示例。
在下文中一共展示了Timer::seconds方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: test_global_to_local_ids
size_t test_global_to_local_ids(unsigned num_ids, unsigned capacity, unsigned num_find_iterations)
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
double elasped_time = 0;
Kokkos::Impl::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local(capacity);
int shiftw = 15;
//create
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "allocate: " << elasped_time << std::endl;
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
// generate
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "generate: " << elasped_time << std::endl;
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
// fill
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "fill: " << elasped_time << std::endl;
timer.reset();
size_t num_errors = global_2_local.failed_insert();
if (num_errors == 0u) {
for (unsigned i=0; i<num_find_iterations; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
// find
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "lookup: " << elasped_time << std::endl;
}
else {
std::cout << " !!! Fill Failed !!!" << std::endl;
}
return num_errors;
}
示例2: Loop
void Loop(int loop, int test, const char* type_name) {
LoopVariant<T>(loop,test);
Kokkos::Impl::Timer timer;
T res = LoopVariant<T>(loop,test);
double time1 = timer.seconds();
timer.reset();
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
double time2 = timer.seconds();
timer.reset();
T resSerial = LoopVariantSerial<T>(loop,test);
double time3 = timer.seconds();
time1*=1e6/loop;
time2*=1e6/loop;
time3*=1e6/loop;
textcolor_standard();
bool passed = true;
if(resSerial!=res) passed = false;
if(!passed) textcolor(RESET,BLACK,YELLOW);
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
if(!passed) textcolor_standard();
printf("\n");
}
示例3: main
int main(int narg, char* arg[]) {
Kokkos::initialize(narg,arg);
int size = 1000000;
// Create DualViews. This will allocate on both the device and its
// host_mirror_device.
idx_type idx("Idx",size,64);
view_type dest("Dest",size);
view_type src("Src",size);
srand(134231);
// Get a reference to the host view of idx directly (equivalent to
// idx.view<idx_type::host_mirror_device_type>() )
idx_type::t_host h_idx = idx.h_view;
for (int i = 0; i < size; ++i) {
for (view_type::size_type j=0; j < h_idx.dimension_1 (); ++j) {
h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
}
}
// Mark idx as modified on the host_mirror_device_type so that a
// sync to the device will actually move data. The sync happens in
// the functor's constructor.
idx.modify<idx_type::host_mirror_device_type>();
// Run on the device. This will cause a sync of idx to the device,
// since it was marked as modified on the host.
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::device_type>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::device_type>(idx,dest,src));
Kokkos::fence();
double sec2_dev = timer.seconds();
// Run on the host (could be the same as device). This will cause a
// sync back to the host of dest. Note that if the Device is CUDA,
// the data layout will not be optimal on host, so performance is
// lower than what it would be for a pure host compilation.
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::host_mirror_device_type>(idx,dest,src));
Kokkos::fence();
double sec1_host = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::host_mirror_device_type>(idx,dest,src));
Kokkos::fence();
double sec2_host = timer.seconds();
printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);
Kokkos::finalize();
}
示例4: test_global_to_local_ids
void test_global_to_local_ids(unsigned num_ids)
{
typedef Device device_type;
typedef typename device_type::size_type size_type;
typedef Kokkos::View<uint32_t*,device_type> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,device_type> global_id_view;
//size
std::cout << num_ids << ", ";
double elasped_time = 0;
Kokkos::Impl::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local((3u*num_ids)/2u);
//create
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
Device::fence();
// generate
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
Device::fence();
// fill
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
size_t num_errors = 0;
for (int i=0; i<100; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
Device::fence();
// find
elasped_time = timer.seconds();
std::cout << elasped_time << std::endl;
ASSERT_EQ( num_errors, 0u);
}
示例5: main
int main(int narg, char* arg[]) {
Kokkos::initialize(narg,arg);
int size = 1000000;
// Create Views
idx_type idx("Idx",size,64);
view_type dest("Dest",size);
view_type src("Src",size);
srand(134231);
// When using UVM Cuda views can be accessed on the Host directly
for(int i=0; i<size; i++) {
for(int j=0; j<idx.dimension_1(); j++)
idx(i,j) = (size + i + (rand()%500 - 250))%size;
}
Kokkos::fence();
// Run on the device
// This will cause a sync of idx to the device since it was modified on the host
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
// No data transfer will happen now, since nothing is accessed on the host
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_dev = timer.seconds();
// Run on the host
// This will cause a sync back to the host of dest which was changed on the device
// Compare runtime here with the dual_view example: dest will be copied back in 4k blocks
// when they are accessed the first time during the parallel_for. Due to the latency of a memcpy
// this gives lower effective bandwidth when doing a manual copy via dual views
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_host = timer.seconds();
// No data transfers will happen now
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_host = timer.seconds();
printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);
Kokkos::finalize();
}
示例6: if
BASKER_INLINE
int Basker<Int, Entry, Exe_Space>::Symbolic(Int option)
{
printf("calling symbolic \n");
#ifdef BASKER_KOKKOS_TIME
Kokkos::Impl::Timer timer;
#endif
//symmetric_sfactor();
sfactor();
if(option == 0)
{
}
else if(option == 1)
{
}
#ifdef BASKER_KOKKOS_TIME
stats.time_sfactor += timer.seconds();
#endif
return 0;
}//end Symbolic
示例7: Factor
BASKER_INLINE
int Basker<Int, Entry, Exe_Space>::Factor(Int option)
{
#ifdef BASKER_KOKKOS_TIME
Kokkos::Impl::Timer timer;
#endif
factor_notoken(option);
#ifdef BASKER_KOKKOS_TIME
stats.time_nfactor += timer.seconds();
#endif
// NDE
MALLOC_ENTRY_1DARRAY(x_view_ptr_copy, gn); //used in basker_solve_rhs - move alloc
MALLOC_ENTRY_1DARRAY(y_view_ptr_copy, gm);
MALLOC_INT_1DARRAY(perm_inv_comp_array , gm); //y
MALLOC_INT_1DARRAY(perm_comp_array, gn); //x
MALLOC_INT_1DARRAY(perm_comp_iworkspace_array, gn);
MALLOC_ENTRY_1DARRAY(perm_comp_fworkspace_array, gn);
permute_composition_for_solve();
factor_flag = BASKER_TRUE;
return 0;
}//end Factor()
示例8: graph_color_symbolic
void graph_color_symbolic(KernelHandle *handle){
Kokkos::Impl::Timer timer;
typename KernelHandle::idx_array_type row_map = handle->get_row_map();
typename KernelHandle::idx_edge_array_type entries = handle->get_entries();
typename KernelHandle::GraphColoringHandleType *gch = handle->get_graph_coloring_handle();
Experimental::KokkosKernels::Graph::ColoringAlgorithm algorithm = gch->get_coloring_type();
typedef typename KernelHandle::GraphColoringHandleType::color_array_type color_view_type;
color_view_type colors_out = color_view_type("Graph Colors", row_map.dimension_0() - 1);
typedef typename Experimental::KokkosKernels::Graph::Impl::GraphColor
<typename KernelHandle::GraphColoringHandleType> BaseGraphColoring;
BaseGraphColoring *gc = NULL;
switch (algorithm){
case Experimental::KokkosKernels::Graph::COLORING_SERIAL:
gc = new BaseGraphColoring(
row_map.dimension_0() - 1, entries.dimension_0(),
row_map, entries, gch);
break;
case Experimental::KokkosKernels::Graph::COLORING_VB:
case Experimental::KokkosKernels::Graph::COLORING_VBBIT:
case Experimental::KokkosKernels::Graph::COLORING_VBCS:
typedef typename Experimental::KokkosKernels::Graph::Impl::GraphColor_VB
<typename KernelHandle::GraphColoringHandleType> VBGraphColoring;
gc = new VBGraphColoring(
row_map.dimension_0() - 1, entries.dimension_0(),
row_map, entries, gch);
break;
case Experimental::KokkosKernels::Graph::COLORING_EB:
typedef typename Experimental::KokkosKernels::Graph::Impl::GraphColor_EB
<typename KernelHandle::GraphColoringHandleType> EBGraphColoring;
gc = new EBGraphColoring(row_map.dimension_0() - 1, entries.dimension_0(),row_map, entries, gch);
break;
case Experimental::KokkosKernels::Graph::COLORING_DEFAULT:
break;
}
int num_phases = 0;
gc->color_graph(colors_out, num_phases);
delete gc;
double coloring_time = timer.seconds();
gch->add_to_overall_coloring_time(coloring_time);
gch->set_coloring_time(coloring_time);
gch->set_num_phases(num_phases);
gch->set_vertex_colors(colors_out);
}
示例9: Factor
BASKER_INLINE
int Basker<Int, Entry, Exe_Space>::Factor(Int option)
{
#ifdef BASKER_KOKKOS_TIME
Kokkos::Impl::Timer timer;
#endif
factor_notoken(option);
#ifdef BASKER_KOKKOS_TIME
stats.time_nfactor += timer.seconds();
#endif
return 0;
}//end Factor()
示例10: mexFunction
void mexFunction(int nlhs,
mxArray *plhs [],
int nrhs,
const mxArray *prhs []) {
Kokkos::Impl::Timer time;
Kokkos::initialize();
string name = typeid(Kokkos::DefaultExecutionSpace).name();
mexPrintf("\n Kokkos is initialized with a default spaceL: %s\n", name.c_str());
Kokkos::finalize();
mexPrintf("\n Kokkos is finalized\n");
plhs[0] = mxCreateDoubleScalar(time.seconds());
}
示例11: factorization
static double factorization( const multivector_type Q_ ,
const multivector_type R_ )
{
#if defined( KOKKOS_USING_EXPERIMENTAL_VIEW )
using Kokkos::Experimental::ALL ;
#else
const Kokkos::ALL ALL ;
#endif
const size_type count = Q_.dimension_1();
value_view tmp("tmp");
value_view one("one");
Kokkos::deep_copy( one , (Scalar) 1 );
Kokkos::Impl::Timer timer ;
for ( size_type j = 0 ; j < count ; ++j ) {
// Reduction : tmp = dot( Q(:,j) , Q(:,j) );
// PostProcess : tmp = sqrt( tmp ); R(j,j) = tmp ; tmp = 1 / tmp ;
const vector_type Qj = Kokkos::subview( Q_ , ALL , j );
const value_view Rjj = Kokkos::subview( R_ , j , j );
invnorm2( Qj , Rjj , tmp );
// Q(:,j) *= ( 1 / R(j,j) ); => Q(:,j) *= tmp ;
Kokkos::scale( tmp , Qj );
for ( size_t k = j + 1 ; k < count ; ++k ) {
const vector_type Qk = Kokkos::subview( Q_ , ALL , k );
const value_view Rjk = Kokkos::subview( R_ , j , k );
// Reduction : R(j,k) = dot( Q(:,j) , Q(:,k) );
// PostProcess : tmp = - R(j,k);
dot_neg( Qj , Qk , Rjk , tmp );
// Q(:,k) -= R(j,k) * Q(:,j); => Q(:,k) += tmp * Q(:,j)
Kokkos::axpby( tmp , Qj , one , Qk );
}
}
execution_space::fence();
return timer.seconds();
}
示例12: profile_end_kernel
void profile_end_kernel(const std::string& kernel_name, const std::string& exec_space) {
Kokkos::Impl::Timer* timer = get_timer();
double time = timer->seconds();
KernelEntry* entry = get_kernel_list_head();
if(entry == NULL) {
KernelEntry* entry_new = new KernelEntry(entry,time,kernel_name,exec_space);
get_kernel_list_head(entry_new);
return;
}
bool found = entry->matches(kernel_name,exec_space);
while(!found && (entry->next!=NULL) ) {
entry = entry->next;
found = entry->matches(kernel_name,exec_space);
}
if(found)
entry->add_time(time);
else
entry->next = new KernelEntry(entry,time,kernel_name,exec_space);
}
示例13: main
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
int chunk_size = 1024;
int nchunks = 100000; //1024*1024;
Kokkos::DualView<int*> data("data",nchunks*chunk_size+1);
srand(1231093);
for(int i = 0; i < data.dimension_0(); i++) {
data.h_view(i) = rand()%TS;
}
data.modify<Host>();
data.sync<Device>();
Kokkos::DualView<int**> histogram("histogram",TS,TS);
Kokkos::Impl::Timer timer;
// Threads/team (TS) is automically limited to the maximum supported by the device.
Kokkos::parallel_for( team_policy( nchunks , TS )
, find_2_tuples(chunk_size,data,histogram) );
Kokkos::fence();
double time = timer.seconds();
histogram.sync<Host>();
printf("Time: %lf \n\n",time);
int sum = 0;
for(int k=0; k<TS; k++) {
for(int l=0; l<TS; l++) {
printf("%i ",histogram.h_view(k,l));
sum += histogram.h_view(k,l);
}
printf("\n");
}
printf("Result: %i %i\n",sum,chunk_size*nchunks);
Kokkos::finalize();
}
示例14: main
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
int chunk_size = 1024;
int nchunks = 100000; //1024*1024;
Kokkos::DualView<int*> data("data",nchunks*chunk_size+1);
srand(1231093);
for(int i = 0; i < data.dimension_0(); i++) {
data.h_view(i) = rand()%TS;
}
data.modify<Host>();
data.sync<Device>();
Kokkos::DualView<int**> histogram("histogram",TS,TS);
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(
Kokkos::ParallelWorkRequest(nchunks,TS<Device::team_max()?TS:Device::team_max()),
find_2_tuples(chunk_size,data,histogram));
Kokkos::fence();
double time = timer.seconds();
histogram.sync<Host>();
printf("Time: %lf \n\n",time);
int sum = 0;
for(int k=0; k<TS; k++) {
for(int l=0; l<TS; l++) {
printf("%i ",histogram.h_view(k,l));
sum += histogram.h_view(k,l);
}
printf("\n");
}
printf("Result: %i %i\n",sum,chunk_size*nchunks);
Kokkos::finalize();
}
示例15: test
static double test( const int count , const int iter = 1 )
{
elem_coord_type coord( "coord" , count );
elem_grad_type grad ( "grad" , count );
// Execute the parallel kernels on the arrays:
double dt_min = 0 ;
Kokkos::parallel_for( count , Init( coord ) );
device_type::fence();
for ( int i = 0 ; i < iter ; ++i ) {
Kokkos::Impl::Timer timer ;
Kokkos::parallel_for( count , HexGrad<device_type>( coord , grad ) );
device_type::fence();
const double dt = timer.seconds();
if ( 0 == i ) dt_min = dt ;
else dt_min = dt < dt_min ? dt : dt_min ;
}
return dt_min ;
}