本文整理汇总了C++中kokkos::impl::Timer::reset方法的典型用法代码示例。如果您正苦于以下问题:C++ Timer::reset方法的具体用法?C++ Timer::reset怎么用?C++ Timer::reset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kokkos::impl::Timer
的用法示例。
在下文中一共展示了Timer::reset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Loop
void Loop(int loop, int test, const char* type_name) {
LoopVariant<T>(loop,test);
Kokkos::Impl::Timer timer;
T res = LoopVariant<T>(loop,test);
double time1 = timer.seconds();
timer.reset();
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
double time2 = timer.seconds();
timer.reset();
T resSerial = LoopVariantSerial<T>(loop,test);
double time3 = timer.seconds();
time1*=1e6/loop;
time2*=1e6/loop;
time3*=1e6/loop;
textcolor_standard();
bool passed = true;
if(resSerial!=res) passed = false;
if(!passed) textcolor(RESET,BLACK,YELLOW);
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",type_name,test,passed?"PASSED":"FAILED",loop,1.0*resSerial,1.0*res,1.0*resNonAtomic,time1,time2,time3,(int)sizeof(T));
if(!passed) textcolor_standard();
printf("\n");
}
示例2: test_global_to_local_ids
size_t test_global_to_local_ids(unsigned num_ids, unsigned capacity, unsigned num_find_iterations)
{
typedef Device execution_space;
typedef typename execution_space::size_type size_type;
typedef Kokkos::View<uint32_t*,execution_space> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,execution_space> global_id_view;
double elasped_time = 0;
Kokkos::Impl::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local(capacity);
int shiftw = 15;
//create
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "allocate: " << elasped_time << std::endl;
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
// generate
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "generate: " << elasped_time << std::endl;
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
// fill
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "fill: " << elasped_time << std::endl;
timer.reset();
size_t num_errors = global_2_local.failed_insert();
if (num_errors == 0u) {
for (unsigned i=0; i<num_find_iterations; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
// find
elasped_time = timer.seconds();
std::cout << std::setw(shiftw) << "lookup: " << elasped_time << std::endl;
}
else {
std::cout << " !!! Fill Failed !!!" << std::endl;
}
return num_errors;
}
示例3: main
int main(int narg, char* arg[]) {
Kokkos::initialize(narg,arg);
int size = 1000000;
// Create DualViews. This will allocate on both the device and its
// host_mirror_device.
idx_type idx("Idx",size,64);
view_type dest("Dest",size);
view_type src("Src",size);
srand(134231);
// Get a reference to the host view of idx directly (equivalent to
// idx.view<idx_type::host_mirror_device_type>() )
idx_type::t_host h_idx = idx.h_view;
for (int i = 0; i < size; ++i) {
for (view_type::size_type j=0; j < h_idx.dimension_1 (); ++j) {
h_idx(i,j) = (size + i + (rand () % 500 - 250)) % size;
}
}
// Mark idx as modified on the host_mirror_device_type so that a
// sync to the device will actually move data. The sync happens in
// the functor's constructor.
idx.modify<idx_type::host_mirror_device_type>();
// Run on the device. This will cause a sync of idx to the device,
// since it was marked as modified on the host.
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::device_type>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::device_type>(idx,dest,src));
Kokkos::fence();
double sec2_dev = timer.seconds();
// Run on the host (could be the same as device). This will cause a
// sync back to the host of dest. Note that if the Device is CUDA,
// the data layout will not be optimal on host, so performance is
// lower than what it would be for a pure host compilation.
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::host_mirror_device_type>(idx,dest,src));
Kokkos::fence();
double sec1_host = timer.seconds();
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::host_mirror_device_type>(idx,dest,src));
Kokkos::fence();
double sec2_host = timer.seconds();
printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);
Kokkos::finalize();
}
示例4: test_global_to_local_ids
void test_global_to_local_ids(unsigned num_ids)
{
typedef Device device_type;
typedef typename device_type::size_type size_type;
typedef Kokkos::View<uint32_t*,device_type> local_id_view;
typedef Kokkos::UnorderedMap<uint32_t,size_type,device_type> global_id_view;
//size
std::cout << num_ids << ", ";
double elasped_time = 0;
Kokkos::Impl::Timer timer;
local_id_view local_2_global("local_ids", num_ids);
global_id_view global_2_local((3u*num_ids)/2u);
//create
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
// generate unique ids
{
generate_ids<Device> gen(local_2_global);
}
Device::fence();
// generate
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
{
fill_map<Device> fill(global_2_local, local_2_global);
}
Device::fence();
// fill
elasped_time = timer.seconds();
std::cout << elasped_time << ", ";
timer.reset();
size_t num_errors = 0;
for (int i=0; i<100; ++i)
{
find_test<Device> find(global_2_local, local_2_global,num_errors);
}
Device::fence();
// find
elasped_time = timer.seconds();
std::cout << elasped_time << std::endl;
ASSERT_EQ( num_errors, 0u);
}
示例5: main
int main(int narg, char* arg[]) {
Kokkos::initialize(narg,arg);
int size = 1000000;
// Create Views
idx_type idx("Idx",size,64);
view_type dest("Dest",size);
view_type src("Src",size);
srand(134231);
// When using UVM Cuda views can be accessed on the Host directly
for(int i=0; i<size; i++) {
for(int j=0; j<idx.dimension_1(); j++)
idx(i,j) = (size + i + (rand()%500 - 250))%size;
}
Kokkos::fence();
// Run on the device
// This will cause a sync of idx to the device since it was modified on the host
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_dev = timer.seconds();
// No data transfer will happen now, since nothing is accessed on the host
timer.reset();
Kokkos::parallel_for(size,localsum<view_type::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_dev = timer.seconds();
// Run on the host
// This will cause a sync back to the host of dest which was changed on the device
// Compare runtime here with the dual_view example: dest will be copied back in 4k blocks
// when they are accessed the first time during the parallel_for. Due to the latency of a memcpy
// this gives lower effective bandwidth when doing a manual copy via dual views
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec1_host = timer.seconds();
// No data transfers will happen now
timer.reset();
Kokkos::parallel_for(size,localsum<Kokkos::HostSpace::execution_space>(idx,dest,src));
Kokkos::fence();
double sec2_host = timer.seconds();
printf("Device Time with Sync: %lf without Sync: %lf \n",sec1_dev,sec2_dev);
printf("Host Time with Sync: %lf without Sync: %lf \n",sec1_host,sec2_host);
Kokkos::finalize();
}
示例6: exampleCholByBlocks
KOKKOS_INLINE_FUNCTION
int exampleCholByBlocks(const string file_input,
const int nthreads,
const int max_task_dependence,
const int team_size,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> CrsHierMatrixBaseType;
typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType;
typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "CholByBlocks:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
t = timer.seconds();
if (verbose)
cout << AA << endl;
}
cout << "CholByBlocks:: import input file::time = " << t << endl;
cout << "CholByBlocks:: reorder the matrix" << endl;
CrsMatrixBaseType UU("UU"); // permuted base matrix
CrsHierMatrixBaseType HU("HU"); // hierarchical matrix of views
{
timer.reset();
typename GraphHelperType::size_type_array rptr(AA.Label()+"Graph::RowPtrArray", AA.NumRows() + 1);
typename GraphHelperType::ordinal_type_array cidx(AA.Label()+"Graph::ColIndexArray", AA.NumNonZeros());
AA.convertGraph(rptr, cidx);
GraphHelperType S(AA.Label()+"ScotchHelper",
AA.NumRows(),
rptr,
cidx);
S.computeOrdering();
CrsMatrixBaseType PA("Permuted AA");
PA.copy(S.PermVector(), S.InvPermVector(), AA);
UU.copy(Uplo::Upper, PA);
CrsMatrixHelper::flat2hier(Uplo::Upper, UU, HU,
S.NumBlocks(),
S.RangeVector(),
S.TreeVector());
for (ordinal_type k=0;k<HU.NumNonZeros();++k)
HU.Value(k).fillRowViewArray();
t = timer.seconds();
if (verbose)
cout << UU << endl;
}
cout << "CholByBlocks:: reorder the matrix::time = " << t << endl;
const size_t max_concurrency = 16384;
cout << "CholByBlocks:: max concurrency = " << max_concurrency << endl;
const size_t max_task_size = 3*sizeof(CrsTaskViewType)+128;
cout << "CholByBlocks:: max task size = " << max_task_size << endl;
typename TaskFactoryType::policy_type policy(max_concurrency,
max_task_size,
max_task_dependence,
team_size);
TaskFactoryType::setMaxTaskDependence(max_task_dependence);
TaskFactoryType::setPolicy(&policy);
//.........这里部分代码省略.........
示例7: exampleSymbolicFactor
KOKKOS_INLINE_FUNCTION
int exampleSymbolicFactor(const string file_input,
const int treecut,
const int minblksize,
const int seed,
const int fill_level,
const int league_size,
const bool reorder,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
typedef SymbolicFactorHelper<CrsMatrixBaseType> SymbolicFactorHelperType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "SymbolicFactor:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
t = timer.seconds();
cout << "SymbolicFactor:: AA nnz = " << AA.NumNonZeros() << endl;
if (verbose)
cout << AA << endl;
}
cout << "SymbolicFactor:: import input file::time = " << t << endl;
CrsMatrixBaseType PA("Permuted AA");
GraphHelperType S(AA, seed);
if (reorder) {
timer.reset();
S.computeOrdering(treecut, minblksize);
PA.copy(S.PermVector(), S.InvPermVector(), AA);
t = timer.seconds();
if (verbose)
cout << S << endl
<< PA << endl;
} else {
PA = AA;
t = 0.0;
}
cout << "SymbolicFactor:: reorder the matrix::time = " << t << endl;
CrsMatrixBaseType UU("UU");
{
timer.reset();
SymbolicFactorHelperType symbolic(PA, league_size);
symbolic.createNonZeroPattern(fill_level, Uplo::Upper, UU);
t = timer.seconds();
cout << "SymbolicFactor:: UU nnz = " << UU.NumNonZeros() << endl;
if (verbose) {
cout << symbolic << endl;
cout << UU << endl;
}
}
cout << "SymbolicFactor:: factorize the matrix::time = " << t << endl;
return r_val;
}
示例8: printf
BASKER_INLINE
int Basker<Int, Entry, Exe_Space>::factor_inc_lvl(Int option)
{
printf("Factor Inc Level Called \n");
gn = A.ncol;
gm = A.nrow;
if(Options.btf == BASKER_TRUE)
{
//JDB: We can change this for the new inteface
//call reference copy constructor
gn = A.ncol;
gm = A.nrow;
A = BTF_A;
//printf("\n\n Switching A, newsize: %d \n",
// A.ncol);
//printMTX("A_FACTOR.mtx", A);
}
//Spit into Domain and Sep
//----------------------Domain-------------------------//
#ifdef BASKER_KOKKOS
//====TIMER==
#ifdef BASKER_TIME
Kokkos::Impl::Timer timer;
#endif
//===TIMER===
typedef Kokkos::TeamPolicy<Exe_Space> TeamPolicy;
if(btf_tabs_offset != 0)
{
kokkos_nfactor_domain_inc_lvl <Int,Entry,Exe_Space>
domain_nfactor(this);
Kokkos::parallel_for(TeamPolicy(num_threads,1),
domain_nfactor);
Kokkos::fence();
//=====Check for error======
while(true)
{
INT_1DARRAY thread_start;
MALLOC_INT_1DARRAY(thread_start, num_threads+1);
init_value(thread_start, num_threads+1,
(Int) BASKER_MAX_IDX);
int nt = nfactor_domain_error(thread_start);
if(nt == BASKER_SUCCESS)
{
break;
}
else
{
printf("restart \n");
kokkos_nfactor_domain_remalloc <Int, Entry, Exe_Space>
diag_nfactor_remalloc(this, thread_start);
Kokkos::parallel_for(TeamPolicy(num_threads,1),
diag_nfactor_remalloc);
Kokkos::fence();
}
}//end while
//====TIMER===
#ifdef BASKER_TIME
printf("Time DOMAIN: %f \n", timer.seconds());
timer.reset();
#endif
//====TIMER====
#else// else basker_kokkos
#pragma omp parallel
{
}//end omp parallel
#endif //end basker_kokkos
}
//-------------------End--Domian--------------------------//
//---------------------------Sep--------------------------//
if(btf_tabs_offset != 0)
{
//for(Int l=1; l<=1; l++)
for(Int l=1; l <= tree.nlvls; l++)
{
//.........这里部分代码省略.........
示例9: exampleCholUnblocked
KOKKOS_INLINE_FUNCTION
int exampleCholUnblocked(const string file_input,
const int max_task_dependence,
const int team_size,
const int algo,
const int variant,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "CholUnblocked:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA"), UU("UU");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
UU.copy(Uplo::Upper, AA);
t = timer.seconds();
if (verbose)
cout << UU << endl;
}
cout << "CholUnblocked:: import input file::time = " << t << endl;
#ifdef __USE_FIXED_TEAM_SIZE__
typename TaskFactoryType::policy_type policy(max_task_dependence);
#else
typename TaskFactoryType::policy_type policy(max_task_dependence, team_size);
#endif
TaskFactoryType::setMaxTaskDependence(max_task_dependence);
TaskFactoryType::setPolicy(&policy);
cout << "CholUnblocked:: factorize the matrix" << endl;
CrsTaskViewType U(&UU);
U.fillRowViewArray();
{
timer.reset();
typename TaskFactoryType::future_type future;
switch (algo) {
case AlgoChol::UnblockedOpt: {
if (variant == Variant::One)
future = TaskFactoryType::Policy().create_team(Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::One>
::TaskFunctor<CrsTaskViewType>(U), 0);
else if (variant == Variant::Two)
future = TaskFactoryType::Policy().create_team(Chol<Uplo::Upper,AlgoChol::UnblockedOpt,Variant::Two>
::TaskFunctor<CrsTaskViewType>(U), 0);
else {
ERROR(">> Not supported algorithm variant");
}
break;
}
case AlgoChol::Dummy: {
future = TaskFactoryType::Policy().create_team(Chol<Uplo::Upper,AlgoChol::Dummy>
::TaskFunctor<CrsTaskViewType>(U), 0);
break;
}
default:
ERROR(">> Not supported algorithm");
break;
}
TaskFactoryType::Policy().spawn(future);
Kokkos::Experimental::wait(TaskFactoryType::Policy());
t = timer.seconds();
if (verbose)
cout << UU << endl;
}
cout << "CholUnblocked:: factorize the matrix::time = " << t << endl;
return r_val;
}
示例10: exampleMatrixMarket
int exampleMatrixMarket(const std::string file_input,
const bool verbose) {
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
const bool detail = false;
std::cout << "DeviceSpace:: "; DeviceSpaceType::print_configuration(std::cout, detail);
std::cout << "HostSpace:: "; HostSpaceType::print_configuration(std::cout, detail);
typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> CrsMatrixBaseHostType;
int r_val = 0;
Kokkos::Impl::Timer timer;
CrsMatrixBaseHostType AA("AA");
timer.reset();
{
std::ifstream in;
in.open(file_input);
if (!in.good()) {
std::cout << "Failed in open the file: " << file_input << std::endl;
return -1;
}
MatrixMarket::read(AA, in);
}
double t_read = timer.seconds();
timer.reset();
{
std::string file_output = "mm-test-output.mtx";
std::ofstream out;
out.open(file_output);
if (!out.good()) {
std::cout << "Failed in open the file: " << file_output << std::endl;
return -1;
}
MatrixMarket::write(out, AA, "%% Test output");
}
double t_write = timer.seconds();
{
const auto prec = std::cout.precision();
std::cout.precision(4);
std::cout << std::scientific
<< "MatrixMarket:: dimension = " << AA.NumRows() << " x " << AA.NumCols()
<< ", " << " nnz = " << AA.NumNonZeros() << ", "
<< "read = " << t_read << " [sec], "
<< "write = " << t_write << " [sec] "
<< std::endl;
std::cout.unsetf(std::ios::scientific);
std::cout.precision(prec);
}
if (verbose) {
AA.showMe(std::cout) << std::endl;
}
CrsMatrixBaseHostType BB("BB");
BB.createConfTo(AA);
CrsMatrixTools::copy(BB, Uplo::Upper, 0, AA);
if (verbose) {
BB.setLabel("Copy::AA:Upper::0"); BB.showMe(std::cout) << std::endl;
}
CrsMatrixTools::copy(BB, Uplo::Upper, 1, AA);
if (verbose) {
BB.setLabel("Copy::AA:Upper::1"); BB.showMe(std::cout) << std::endl;
}
CrsMatrixTools::copy(BB, Uplo::Lower, 0, AA);
if (verbose) {
BB.setLabel("Copy::AA:Lower::0"); BB.showMe(std::cout) << std::endl;
}
CrsMatrixTools::copy(BB, Uplo::Lower, 1, AA);
if (verbose) {
BB.setLabel("Copy::AA:Lower::1"); BB.showMe(std::cout) << std::endl;
}
return r_val;
}
示例11: exampleTriSolvePerformance
KOKKOS_INLINE_FUNCTION
int exampleTriSolvePerformance(const string file_input,
const OrdinalType nrhs,
const OrdinalType nb,
const int niter,
const int nthreads,
const int max_task_dependence,
const int team_size,
const bool team_interface,
const bool skip_serial,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> CrsHierMatrixBaseType;
typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType;
typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;
typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;
typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;
typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double
t_import = 0.0,
t_reorder = 0.0,
t_solve_seq = 0.0,
t_solve_task = 0.0;
const int start = -2;
cout << "TriSolvePerformance:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
t_import = timer.seconds();
if (verbose)
cout << AA << endl;
}
cout << "TriSolvePerformance:: import input file::time = " << t_import << endl;
CrsMatrixBaseType UU("UU");
DenseMatrixBaseType BB("BB", AA.NumRows(), nrhs);
cout << "TriSolvePerformance:: reorder the matrix and partition right hand side, nb = " << nb << endl;
CrsHierMatrixBaseType HU("HU");
DenseHierMatrixBaseType HB("HB");
{
timer.reset();
GraphHelperType S(AA);
S.computeOrdering();
CrsMatrixBaseType PA("Permuted AA");
PA.copy(S.PermVector(), S.InvPermVector(), AA);
UU.copy(Uplo::Upper, PA);
CrsMatrixHelper::flat2hier(Uplo::Upper, UU, HU,
S.NumBlocks(),
S.RangeVector(),
S.TreeVector());
DenseMatrixHelper::flat2hier(BB, HB,
S.NumBlocks(),
S.RangeVector(),
nb);
t_reorder = timer.seconds();
cout << "TriSolvePerformance:: Hier (dof, nnz) = " << HU.NumRows() << ", " << HU.NumNonZeros() << endl;
//.........这里部分代码省略.........
示例12: exampleDenseGemmByBlocks
KOKKOS_INLINE_FUNCTION
int exampleDenseGemmByBlocks(const OrdinalType mmin,
const OrdinalType mmax,
const OrdinalType minc,
const OrdinalType k,
const OrdinalType mb,
const int max_concurrency,
const int max_task_dependence,
const int team_size,
const int mkl_nthreads,
const bool check,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;
typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;
typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;
typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "DenseGemmByBlocks:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << " , k = "<< k << " , mb = " << mb << endl;
const size_t max_task_size = (3*sizeof(DenseTaskViewType)+196); // when 128 error
//cout << "max task size = "<< max_task_size << endl;
typename TaskFactoryType::policy_type policy(max_concurrency,
max_task_size,
max_task_dependence,
team_size);
TaskFactoryType::setMaxTaskDependence(max_task_dependence);
TaskFactoryType::setPolicy(&policy);
ostringstream os;
os.precision(3);
os << scientific;
for (ordinal_type m=mmin;m<=mmax;m+=minc) {
os.str("");
DenseMatrixBaseType AA, BB, CC("CC", m, m), CB("CB", m, m);
if (ArgTransA == Trans::NoTranspose)
AA = DenseMatrixBaseType("AA", m, k);
else
AA = DenseMatrixBaseType("AA", k, m);
if (ArgTransB == Trans::NoTranspose)
BB = DenseMatrixBaseType("BB", k, m);
else
BB = DenseMatrixBaseType("BB", m, k);
for (ordinal_type j=0;j<AA.NumCols();++j)
for (ordinal_type i=0;i<AA.NumRows();++i)
AA.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
for (ordinal_type j=0;j<BB.NumCols();++j)
for (ordinal_type i=0;i<BB.NumRows();++i)
BB.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
for (ordinal_type j=0;j<CC.NumCols();++j)
for (ordinal_type i=0;i<CC.NumRows();++i)
CC.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
CB.copy(CC);
const double flop = get_flop_gemm<value_type>(m, m, k);
#ifdef HAVE_SHYLUTACHO_MKL
mkl_set_num_threads(mkl_nthreads);
#endif
os << "DenseGemmByBlocks:: m = " << m << " n = " << m << " k = " << k;
if (check) {
timer.reset();
DenseTaskViewType A(&AA), B(&BB), C(&CB);
Gemm<ArgTransA,ArgTransB,AlgoGemm::ExternalBlas>::invoke
(TaskFactoryType::Policy(),
TaskFactoryType::Policy().member_single(),
1.0, A, B, 1.0, C);
t = timer.seconds();
os << ":: Serial Performance = " << (flop/t/1.0e9) << " [GFLOPs] ";
}
{
DenseHierMatrixBaseType HA, HB, HC;
DenseMatrixHelper::flat2hier(AA, HA, mb, mb);
//.........这里部分代码省略.........
示例13: exampleDenseMatrixBase
int exampleDenseMatrixBase(const ordinal_type mmin,
const ordinal_type mmax,
const ordinal_type minc,
const bool verbose) {
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
const bool detail = false;
std::cout << "DeviceSpace:: ";
DeviceSpaceType::print_configuration(std::cout, detail);
std::cout << "HostSpace:: ";
HostSpaceType::print_configuration(std::cout, detail);
std::cout << std::endl;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> DenseMatrixBaseHostType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,DeviceSpaceType> DenseMatrixBaseDeviceType;
int r_val = 0;
Kokkos::Impl::Timer timer;
std::cout << "DenseMatrixBase:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << std::endl;
for (auto m=mmin; m<=mmax; m+=minc) {
// random test matrix on host
DenseMatrixBaseHostType TT("TT", m, m);
for (ordinal_type j=0; j<TT.NumCols(); ++j) {
for (ordinal_type i=0; i<TT.NumRows(); ++i)
TT.Value(i,j) = 2.0*((value_type)std::rand()/(RAND_MAX)) - 1.0;
TT.Value(j,j) = std::fabs(TT.Value(j,j));
}
if (verbose)
TT.showMe(std::cout) << std::endl;
DenseMatrixBaseDeviceType AA("AA");
timer.reset();
AA.mirror(TT);
double t_mirror = timer.seconds();
DenseMatrixBaseDeviceType BB("BB");
BB.createConfTo(AA);
timer.reset();
DenseMatrixTools::copy(BB, AA);
double t_copy = timer.seconds();
// check
DenseMatrixBaseHostType RR("RR");
RR.createConfTo(BB);
RR.mirror(BB);
if (verbose)
RR.showMe(std::cout) << std::endl;
double err = 0.0;
for (ordinal_type j=0; j<TT.NumCols(); ++j)
for (ordinal_type i=0; i<TT.NumRows(); ++i)
err += std::fabs(TT.Value(i,j) - RR.Value(i,j));
{
const auto prec = std::cout.precision();
std::cout.precision(4);
std::cout << std::scientific
<< "DenseMatrixBase:: dimension = " << m << " x " << m << ", "
<< "Mirroring to device = " << t_mirror << " [sec], "
<< "Elementwise copy on device = " << t_copy << " [sec], "
<< "Error = " << err
<< std::endl;
std::cout.unsetf(std::ios::scientific);
std::cout.precision(prec);
}
}
return r_val;
}
示例14: exampleICholUnblocked
KOKKOS_INLINE_FUNCTION
int exampleICholUnblocked(const string file_input,
const int max_task_dependence,
const int team_size,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
typedef TaskTeamFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType>,
Kokkos::Impl::TeamThreadRangeBoundariesStruct> TaskFactoryType;
typedef ParallelFor ForType;
typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "ICholUnblocked:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA"), UU("UU");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
UU.copy(Uplo::Upper, AA);
t = timer.seconds();
if (verbose)
cout << UU << endl;
}
cout << "ICholUnblocked:: import input file::time = " << t << endl;
#ifdef __USE_FIXED_TEAM_SIZE__
typename TaskFactoryType::policy_type policy(max_task_dependence);
#else
typename TaskFactoryType::policy_type policy(max_task_dependence, team_size);
#endif
TaskFactoryType::setMaxTaskDependence(max_task_dependence);
TaskFactoryType::setPolicy(&policy);
cout << "ICholUnblocked:: factorize the matrix" << endl;
CrsTaskViewType U(&UU);
U.fillRowViewArray();
{
timer.reset();
auto future = TaskFactoryType::Policy().create_team(IChol<Uplo::Upper,AlgoIChol::UnblockedOpt1>
::TaskFunctor<ForType,CrsTaskViewType>(U), 0);
TaskFactoryType::Policy().spawn(future);
Kokkos::Experimental::wait(TaskFactoryType::Policy());
t = timer.seconds();
if (verbose)
cout << UU << endl;
}
cout << "ICholUnblocked:: factorize the matrix::time = " << t << endl;
return r_val;
}
示例15: color
void color(bool useConflictList, bool serialConflictResolution, bool ticToc){
Ordinal numUncolored = _size; // on host
double t, total = 0.0;
Kokkos::Impl::Timer timer;
if(useConflictList)
_conflictType = CONFLICT_LIST;
// While vertices to color, do speculative coloring.
int iter = 0;
for(iter = 0; (iter<20) && (numUncolored>0); iter++){
std::cout<< "Start iteration " << iter << std::endl;
// First color greedy speculatively, some conflicts expected
this -> colorGreedy();
ExecSpace::fence();
if(ticToc){
t = timer.seconds();
total += t;
std::cout << "Time speculative greedy phase " << iter << " : " << std::endl;
timer.reset();
}
#ifdef DEBUG
// UVM required - will be slow!
printf("\n 100 first vertices: ");
for(int i = 0; i < 100; i++){
printf(" %i", _colors[i]);
}
printf("\n");
#endif
// Check for conflicts (parallel), find vertices to recolor
numUncolored = this -> findConflicts();
ExecSpace::fence();
if(ticToc){
t = timer.seconds();
total += t;
std::cout << "Time conflict detection " << iter << " : " << t << std::endl;
timer.reset();
}
if (serialConflictResolution) break; // Break after first iteration
/* if(_conflictType == CONFLICT_LIST){
array_type temp = _vertexList;
_vertexList = _recolorList;
_vertexListLength() = _recolorListLength();
_recolorList = temp;
_recolorListLength() = 0;
}
*/ if(_conflictType == CONFLICT_LIST){
array_type temp = _vertexList;
_vertexList = _recolorList;
host_vertexListLength() = host_recolorListLength();
_recolorList = temp;
host_recolorListLength() = 0;
Kokkos::deep_copy(_vertexListLength, host_vertexListLength);
Kokkos::deep_copy(_recolorListLength, host_recolorListLength);
}
}
std::cout << "Number of coloring iterations: " << iter << std::endl;
if(numUncolored > 0){
// Resolve conflicts by recolor in serial
this -> resolveConflicts();
ExecSpace::fence();
if(ticToc){
t = timer.seconds();
total += t;
std::cout << "Time conflict resolution: " << t << std::endl;
std::cout << "Total time: " << total << std::endl;
}
}
}