本文整理汇总了C++中kokkos::impl::Timer类的典型用法代码示例。如果您正苦于以下问题:C++ Timer类的具体用法?C++ Timer怎么用?C++ Timer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Timer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: Loop
void Loop(int loop, int test, const char* type_name) {
LoopVariant<T>(loop,test);
Kokkos::Impl::Timer timer;
T res = LoopVariant<T>(loop,test);
double time = timer.seconds();
timer.reset();
T resNonAtomic = LoopVariantNonAtomic<T>(loop,test);
double timeNonAtomic = timer.seconds();
timer.reset();
T resSerial = LoopVariantSerial<T>(loop,test);
double timeSerial = timer.seconds();
time *=1e6/loop;
timeNonAtomic*=1e6/loop;
timeSerial *=1e6/loop;
//textcolor_standard();
bool passed = true;
if(resSerial!=res) passed = false;
//if(!passed) textcolor(RESET,BLACK,YELLOW);
printf("%s Test %i %s --- Loop: %i Value (S,A,NA): %e %e %e Time: %7.4e %7.4e %7.4e Size of Type %i)",
type_name,test,passed?"PASSED":"FAILED",loop,
1.0*resSerial,1.0*res,1.0*resNonAtomic,
timeSerial,time,timeNonAtomic,(int)sizeof(T));
//if(!passed) textcolor_standard();
printf("\n");
}
示例2: main
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
int chunk_size = 1024;
int nchunks = 100000; //1024*1024;
Kokkos::DualView<int*> data("data",nchunks*chunk_size+1);
srand(1231093);
for(int i = 0; i < data.dimension_0(); i++) {
data.h_view(i) = rand()%TS;
}
data.modify<Host>();
data.sync<Device>();
Kokkos::DualView<int**> histogram("histogram",TS,TS);
Kokkos::Impl::Timer timer;
// Threads/team (TS) is automically limited to the maximum supported by the device.
Kokkos::parallel_for( team_policy( nchunks , TS )
, find_2_tuples(chunk_size,data,histogram) );
Kokkos::fence();
double time = timer.seconds();
histogram.sync<Host>();
printf("Time: %lf \n\n",time);
int sum = 0;
for(int k=0; k<TS; k++) {
for(int l=0; l<TS; l++) {
printf("%i ",histogram.h_view(k,l));
sum += histogram.h_view(k,l);
}
printf("\n");
}
printf("Result: %i %i\n",sum,chunk_size*nchunks);
Kokkos::finalize();
}
示例3: main
int main(int narg, char* args[]) {
Kokkos::initialize(narg,args);
int chunk_size = 1024;
int nchunks = 100000; //1024*1024;
Kokkos::DualView<int*> data("data",nchunks*chunk_size+1);
srand(1231093);
for(int i = 0; i < data.dimension_0(); i++) {
data.h_view(i) = rand()%TS;
}
data.modify<Host>();
data.sync<Device>();
Kokkos::DualView<int**> histogram("histogram",TS,TS);
Kokkos::Impl::Timer timer;
Kokkos::parallel_for(
Kokkos::ParallelWorkRequest(nchunks,TS<Device::team_max()?TS:Device::team_max()),
find_2_tuples(chunk_size,data,histogram));
Kokkos::fence();
double time = timer.seconds();
histogram.sync<Host>();
printf("Time: %lf \n\n",time);
int sum = 0;
for(int k=0; k<TS; k++) {
for(int l=0; l<TS; l++) {
printf("%i ",histogram.h_view(k,l));
sum += histogram.h_view(k,l);
}
printf("\n");
}
printf("Result: %i %i\n",sum,chunk_size*nchunks);
Kokkos::finalize();
}
示例4: test
static double test( const int count , const int iter = 1 )
{
elem_coord_type coord( "coord" , count );
elem_grad_type grad ( "grad" , count );
// Execute the parallel kernels on the arrays:
double dt_min = 0 ;
Kokkos::parallel_for( count , Init( coord ) );
device_type::fence();
for ( int i = 0 ; i < iter ; ++i ) {
Kokkos::Impl::Timer timer ;
Kokkos::parallel_for( count , HexGrad<device_type>( coord , grad ) );
device_type::fence();
const double dt = timer.seconds();
if ( 0 == i ) dt_min = dt ;
else dt_min = dt < dt_min ? dt : dt_min ;
}
return dt_min ;
}
示例5: main
int main (int argc, char ** argv){
if (argc < 2){
std::cerr << "Usage:" << argv[0] << " input_bin_file" << std::endl;
exit(1);
}
Kokkos::initialize(argc, argv);
MyExecSpace::print_configuration(std::cout);
idx nv = 0, ne = 0;
idx *xadj, *adj;
wt *ew;
KokkosKernels::Experimental::Graph::Utils::read_graph_bin<idx, wt> (&nv, &ne, &xadj, &adj, &ew, argv[1]);
std::cout << "nv:" << nv << " ne:" << ne << std::endl;
um_array_type _xadj (xadj, nv + 1);
um_edge_array_type _adj (adj, ne);
idx_array_type kok_xadj ("xadj", nv + 1);
idx_edge_array_type kok_adj("adj", ne);
idx_array_type sym_xadj;
idx_edge_array_type sym_adj;
Kokkos::deep_copy (kok_xadj, _xadj);
Kokkos::deep_copy (kok_adj, _adj);
wt_um_edge_array_type _mtx_vals (ew, ne);
value_array_type kok_mtx_vals ("MTX_VALS", ne);
Kokkos::deep_copy (kok_mtx_vals, _mtx_vals);
delete [] xadj;
delete [] adj;
delete [] ew;
std::cout << "Symetrizing Graph" << std::endl;
Kokkos::Impl::Timer timer;
KokkosKernels::Experimental::Util::symmetrize_graph_symbolic_hashmap<
idx_array_type, idx_edge_array_type, idx_array_type, idx_edge_array_type, MyExecSpace>
(nv, kok_xadj, kok_adj,sym_xadj, sym_adj);
Kokkos::fence();
double t = timer.seconds();
std::cout << "Time to symmetrize:" << t << std::endl;
KokkosKernels::Experimental::Util::print_1Dview(kok_xadj);
KokkosKernels::Experimental::Util::print_1Dview(kok_adj);
std::cout << "Symetric Graph" << std::endl;
KokkosKernels::Experimental::Util::print_1Dview(sym_xadj);
KokkosKernels::Experimental::Util::print_1Dview(sym_adj);
Kokkos::finalize();
return 0;
}
示例6: exampleCholDirectPerformance
KOKKOS_INLINE_FUNCTION
int exampleCholDirectPerformance(const string file_input,
const int treecut,
const int minblksize,
const int prunecut,
const int seed,
const int niter,
const int nthreads,
const int max_task_dependence,
const int team_size,
const int league_size,
const bool team_interface,
const bool skip_serial,
const bool mkl_interface,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
typedef SymbolicFactorHelper<CrsMatrixBaseType> SymbolicFactorHelperType;
#ifdef HAVE_SHYLUTACHO_MKL
typedef typename CrsMatrixBaseType::value_type_array value_type_array;
#endif
typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> CrsHierMatrixBaseType;
typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType;
typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double
t_import = 0.0,
t_reorder = 0.0,
t_symbolic = 0.0,
t_flat2hier = 0.0,
#ifdef HAVE_SHYLUTACHO_MKL
t_mkl = 0.0,
#endif
t_factor_seq = 0.0, t_solve_seq = 0.0,
t_factor_task = 0.0, t_solve_task = 0.0;
const int start = 0;
cout << "CholDirectPerformance:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
t_import = timer.seconds();
}
cout << "CholDirectPerformance:: import input file::time = " << t_import << endl;
cout << "CholDirectPerformance:: reorder the matrix" << endl;
CrsMatrixBaseType PA("Permuted AA");
CrsMatrixBaseType UU("UU"); // permuted base upper triangular matrix
CrsHierMatrixBaseType HU("HU"); // hierarchical matrix of views
DenseMatrixBaseType BB("BB", AA.NumRows(), nrhs);
DenseHierMatrixBaseType HB("HB");
{
GraphHelperType S(AA, seed);
{
timer.reset();
S.computeOrdering(treecut, minblksize);
S.pruneTree(prunecut);
PA.copy(S.PermVector(), S.InvPermVector(), AA);
t_reorder = timer.seconds();
}
cout << "CholDirectPerformance:: reorder the matrix::time = " << t_reorder << endl;
{
SymbolicFactorHelperType F(PA, league_size);
for (int i=start;i<niter;++i) {
timer.reset();
F.createNonZeroPattern(Uplo::Upper, UU);
t_symbolic += timer.seconds() * (i>=0);
//.........这里部分代码省略.........
示例7: exampleTriSolvePerformance
KOKKOS_INLINE_FUNCTION
int exampleTriSolvePerformance(const string file_input,
const OrdinalType nrhs,
const OrdinalType nb,
const int niter,
const int nthreads,
const int max_task_dependence,
const int team_size,
const bool team_interface,
const bool skip_serial,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
typedef TaskView<CrsMatrixViewType,TaskFactoryType> CrsTaskViewType;
typedef CrsMatrixBase<CrsTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> CrsHierMatrixBaseType;
typedef CrsMatrixView<CrsHierMatrixBaseType> CrsHierMatrixViewType;
typedef TaskView<CrsHierMatrixViewType,TaskFactoryType> CrsHierTaskViewType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;
typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;
typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;
typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double
t_import = 0.0,
t_reorder = 0.0,
t_solve_seq = 0.0,
t_solve_task = 0.0;
const int start = -2;
cout << "TriSolvePerformance:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
t_import = timer.seconds();
if (verbose)
cout << AA << endl;
}
cout << "TriSolvePerformance:: import input file::time = " << t_import << endl;
CrsMatrixBaseType UU("UU");
DenseMatrixBaseType BB("BB", AA.NumRows(), nrhs);
cout << "TriSolvePerformance:: reorder the matrix and partition right hand side, nb = " << nb << endl;
CrsHierMatrixBaseType HU("HU");
DenseHierMatrixBaseType HB("HB");
{
timer.reset();
GraphHelperType S(AA);
S.computeOrdering();
CrsMatrixBaseType PA("Permuted AA");
PA.copy(S.PermVector(), S.InvPermVector(), AA);
UU.copy(Uplo::Upper, PA);
CrsMatrixHelper::flat2hier(Uplo::Upper, UU, HU,
S.NumBlocks(),
S.RangeVector(),
S.TreeVector());
DenseMatrixHelper::flat2hier(BB, HB,
S.NumBlocks(),
S.RangeVector(),
nb);
t_reorder = timer.seconds();
cout << "TriSolvePerformance:: Hier (dof, nnz) = " << HU.NumRows() << ", " << HU.NumNonZeros() << endl;
//.........这里部分代码省略.........
示例8: exampleDenseGemmByBlocks
KOKKOS_INLINE_FUNCTION
int exampleDenseGemmByBlocks(const OrdinalType mmin,
const OrdinalType mmax,
const OrdinalType minc,
const OrdinalType k,
const OrdinalType mb,
const int max_concurrency,
const int max_task_dependence,
const int team_size,
const int mkl_nthreads,
const bool check,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;
typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;
typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;
typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "DenseGemmByBlocks:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << " , k = "<< k << " , mb = " << mb << endl;
const size_t max_task_size = (3*sizeof(DenseTaskViewType)+196); // when 128 error
//cout << "max task size = "<< max_task_size << endl;
typename TaskFactoryType::policy_type policy(max_concurrency,
max_task_size,
max_task_dependence,
team_size);
TaskFactoryType::setMaxTaskDependence(max_task_dependence);
TaskFactoryType::setPolicy(&policy);
ostringstream os;
os.precision(3);
os << scientific;
for (ordinal_type m=mmin;m<=mmax;m+=minc) {
os.str("");
DenseMatrixBaseType AA, BB, CC("CC", m, m), CB("CB", m, m);
if (ArgTransA == Trans::NoTranspose)
AA = DenseMatrixBaseType("AA", m, k);
else
AA = DenseMatrixBaseType("AA", k, m);
if (ArgTransB == Trans::NoTranspose)
BB = DenseMatrixBaseType("BB", k, m);
else
BB = DenseMatrixBaseType("BB", m, k);
for (ordinal_type j=0;j<AA.NumCols();++j)
for (ordinal_type i=0;i<AA.NumRows();++i)
AA.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
for (ordinal_type j=0;j<BB.NumCols();++j)
for (ordinal_type i=0;i<BB.NumRows();++i)
BB.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
for (ordinal_type j=0;j<CC.NumCols();++j)
for (ordinal_type i=0;i<CC.NumRows();++i)
CC.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
CB.copy(CC);
const double flop = get_flop_gemm<value_type>(m, m, k);
#ifdef HAVE_SHYLUTACHO_MKL
mkl_set_num_threads(mkl_nthreads);
#endif
os << "DenseGemmByBlocks:: m = " << m << " n = " << m << " k = " << k;
if (check) {
timer.reset();
DenseTaskViewType A(&AA), B(&BB), C(&CB);
Gemm<ArgTransA,ArgTransB,AlgoGemm::ExternalBlas>::invoke
(TaskFactoryType::Policy(),
TaskFactoryType::Policy().member_single(),
1.0, A, B, 1.0, C);
t = timer.seconds();
os << ":: Serial Performance = " << (flop/t/1.0e9) << " [GFLOPs] ";
}
{
DenseHierMatrixBaseType HA, HB, HC;
DenseMatrixHelper::flat2hier(AA, HA, mb, mb);
//.........这里部分代码省略.........
示例9: exampleCholByBlocks
int exampleCholByBlocks(const std::string file_input,
const int treecut,
const int prunecut,
const int fill_level,
const int rows_per_team,
const int max_concurrency,
const int max_task_dependence,
const int team_size,
const bool check,
const bool verbose) {
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
const bool detail = false;
std::cout << "DeviceSpace:: "; DeviceSpaceType::print_configuration(std::cout, detail);
std::cout << "HostSpace:: "; HostSpaceType::print_configuration(std::cout, detail);
// for simple test, let's use host space only here, for device it needs mirroring.
typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> CrsMatrixBaseHostType;
typedef CrsMatrixView<CrsMatrixBaseHostType> CrsMatrixViewHostType;
typedef GraphTools<ordinal_type,size_type,HostSpaceType> GraphToolsHostType;
typedef GraphTools_Scotch<ordinal_type,size_type,HostSpaceType> GraphToolsHostType_Scotch;
typedef GraphTools_CAMD<ordinal_type,size_type,HostSpaceType> GraphToolsHostType_CAMD;
typedef IncompleteSymbolicFactorization<CrsMatrixBaseHostType> IncompleteSymbolicFactorizationType;
typedef Kokkos::Experimental::TaskPolicy<DeviceSpaceType> PolicyType;
typedef TaskView<CrsMatrixViewHostType> CrsTaskViewHostType;
typedef CrsMatrixBase<CrsTaskViewHostType,ordinal_type,size_type,HostSpaceType> CrsHierBaseHostType;
typedef CrsMatrixView<CrsHierBaseHostType> CrsHierViewHostType;
typedef TaskView<CrsHierViewHostType> CrsTaskHierViewHostType;
int r_val = 0;
Kokkos::Impl::Timer timer;
///
/// Read from matrix market
///
/// input - file
/// output - AA_host
///
CrsMatrixBaseHostType AA_host("AA_host");
timer.reset();
{
std::ifstream in;
in.open(file_input);
if (!in.good()) {
std::cout << "Failed in open the file: " << file_input << std::endl;
return -1;
}
MatrixMarket::read(AA_host, in);
}
double t_read = timer.seconds();
if (verbose)
AA_host.showMe(std::cout) << std::endl;
///
/// Create a graph structure for Scotch and CAMD (rptr, cidx)
///
/// rptr and cidx are need to be set up for Scotch and CAMD
///
typename GraphToolsHostType::size_type_array rptr("Graph::RowPtrArray", AA_host.NumRows() + 1);
typename GraphToolsHostType::ordinal_type_array cidx("Graph::ColIndexArray", AA_host.NumNonZeros());
///
/// Run Scotch
///
/// input - rptr, cidx, A_host
/// output - S (perm, iperm, nblks, range, tree), AA_scotch_host (permuted)
///
timer.reset();
GraphToolsHostType::getGraph(rptr, cidx, AA_host);
double t_graph = timer.seconds();
GraphToolsHostType_Scotch S;
S.setGraph(AA_host.NumRows(), rptr, cidx);
S.setSeed(0);
S.setTreeLevel();
S.setStrategy( SCOTCH_STRATSPEED
| SCOTCH_STRATLEVELMAX
| SCOTCH_STRATLEVELMIN
| SCOTCH_STRATLEAFSIMPLE
| SCOTCH_STRATSEPASIMPLE
);
timer.reset();
S.computeOrdering(treecut);
double t_scotch = timer.seconds();
if (verbose)
S.showMe(std::cout) << std::endl;
CrsMatrixBaseHostType AA_scotch_host("AA_scotch_host");
AA_scotch_host.createConfTo(AA_host);
//.........这里部分代码省略.........
示例10: exampleSymbolicFactor
KOKKOS_INLINE_FUNCTION
int exampleSymbolicFactor(const string file_input,
const int treecut,
const int minblksize,
const int seed,
const int fill_level,
const int league_size,
const bool reorder,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
typedef SymbolicFactorHelper<CrsMatrixBaseType> SymbolicFactorHelperType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "SymbolicFactor:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
t = timer.seconds();
cout << "SymbolicFactor:: AA nnz = " << AA.NumNonZeros() << endl;
if (verbose)
cout << AA << endl;
}
cout << "SymbolicFactor:: import input file::time = " << t << endl;
CrsMatrixBaseType PA("Permuted AA");
GraphHelperType S(AA, seed);
if (reorder) {
timer.reset();
S.computeOrdering(treecut, minblksize);
PA.copy(S.PermVector(), S.InvPermVector(), AA);
t = timer.seconds();
if (verbose)
cout << S << endl
<< PA << endl;
} else {
PA = AA;
t = 0.0;
}
cout << "SymbolicFactor:: reorder the matrix::time = " << t << endl;
CrsMatrixBaseType UU("UU");
{
timer.reset();
SymbolicFactorHelperType symbolic(PA, league_size);
symbolic.createNonZeroPattern(fill_level, Uplo::Upper, UU);
t = timer.seconds();
cout << "SymbolicFactor:: UU nnz = " << UU.NumNonZeros() << endl;
if (verbose) {
cout << symbolic << endl;
cout << UU << endl;
}
}
cout << "SymbolicFactor:: factorize the matrix::time = " << t << endl;
return r_val;
}
示例11: exampleCholUnblocked
int exampleCholUnblocked(const std::string file_input,
const int treecut,
const int prunecut,
const int fill_level,
const int rows_per_team,
const bool verbose) {
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
const bool detail = false;
std::cout << "DeviceSpace:: "; DeviceSpaceType::print_configuration(std::cout, detail);
std::cout << "HostSpace:: "; HostSpaceType::print_configuration(std::cout, detail);
typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> CrsMatrixBaseHostType;
typedef GraphTools<ordinal_type,size_type,HostSpaceType> GraphToolsHostType;
typedef GraphTools_Scotch<ordinal_type,size_type,HostSpaceType> GraphToolsHostType_Scotch;
typedef GraphTools_CAMD<ordinal_type,size_type,HostSpaceType> GraphToolsHostType_CAMD;
typedef IncompleteSymbolicFactorization<CrsMatrixBaseHostType> IncompleteSymbolicFactorizationType;
typedef Kokkos::Experimental::TaskPolicy<DeviceSpaceType> PolicyType;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,DeviceSpaceType> CrsMatrixBaseDeviceType;
typedef CrsMatrixView<CrsMatrixBaseDeviceType> CrsMatrixViewDeviceType;
typedef TaskView<CrsMatrixViewDeviceType> CrsTaskViewDeviceType;
int r_val = 0;
Kokkos::Impl::Timer timer;
CrsMatrixBaseHostType AA_host("AA_host");
timer.reset();
{
std::ifstream in;
in.open(file_input);
if (!in.good()) {
std::cout << "Failed in open the file: " << file_input << std::endl;
return -1;
}
MatrixMarket::read(AA_host, in);
}
double t_read = timer.seconds();
if (verbose)
AA_host.showMe(std::cout) << std::endl;
typename GraphToolsHostType::size_type_array rptr("Graph::RowPtrArray", AA_host.NumRows() + 1);
typename GraphToolsHostType::ordinal_type_array cidx("Graph::ColIndexArray", AA_host.NumNonZeros());
timer.reset();
GraphToolsHostType::getGraph(rptr, cidx, AA_host);
double t_graph = timer.seconds();
GraphToolsHostType_Scotch S;
S.setGraph(AA_host.NumRows(), rptr, cidx);
S.setSeed(0);
S.setTreeLevel();
S.setStrategy( SCOTCH_STRATSPEED
| SCOTCH_STRATLEVELMAX
| SCOTCH_STRATLEVELMIN
| SCOTCH_STRATLEAFSIMPLE
| SCOTCH_STRATSEPASIMPLE
);
timer.reset();
S.computeOrdering(treecut);
double t_scotch = timer.seconds();
S.pruneTree(prunecut);
if (verbose)
S.showMe(std::cout) << std::endl;
CrsMatrixBaseHostType BB_host("BB_host");
BB_host.createConfTo(AA_host);
CrsMatrixTools::copy(BB_host,
S.PermVector(),
S.InvPermVector(),
AA_host);
if (verbose)
BB_host.showMe(std::cout) << std::endl;
timer.reset();
GraphToolsHostType::getGraph(rptr, cidx, BB_host);
t_graph += timer.seconds();
GraphToolsHostType_CAMD C;
C.setGraph(BB_host.NumRows(),
rptr, cidx,
S.NumBlocks(),
S.RangeVector());
timer.reset();
C.computeOrdering();
double t_camd = timer.seconds();
if (verbose)
//.........这里部分代码省略.........
示例12: exampleStatByBlocks
KOKKOS_INLINE_FUNCTION
int exampleStatByBlocks(const string file_input,
const int treecut,
const int minblksize,
const int prunecut,
const int seed,
const int fill_level,
const int league_size,
const int histogram_size,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef CrsMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> CrsMatrixBaseType;
typedef GraphHelper_Scotch<CrsMatrixBaseType> GraphHelperType;
typedef SymbolicFactorHelper<CrsMatrixBaseType> SymbolicFactorHelperType;
typedef CrsMatrixView<CrsMatrixBaseType> CrsMatrixViewType;
typedef CrsMatrixBase<CrsMatrixViewType,ordinal_type,size_type,SpaceType,MemoryTraits> CrsHierMatrixBaseType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "StatByBlocks:: import input file = " << file_input << endl;
CrsMatrixBaseType AA("AA");
{
timer.reset();
ifstream in;
in.open(file_input);
if (!in.good()) {
cout << "Failed in open the file: " << file_input << endl;
return ++r_val;
}
AA.importMatrixMarket(in);
t = timer.seconds();
if (verbose)
cout << AA << endl;
}
cout << "StatByBlocks:: import input file::time = " << t << endl;
CrsMatrixBaseType UU("UU");
CrsHierMatrixBaseType HU("HU");
{
CrsMatrixBaseType PA("Permuted AA");
typename GraphHelperType::size_type_array rptr(AA.Label()+"Graph::RowPtrArray", AA.NumRows() + 1);
typename GraphHelperType::ordinal_type_array cidx(AA.Label()+"Graph::ColIndexArray", AA.NumNonZeros());
AA.convertGraph(rptr, cidx);
GraphHelperType S(AA.Label()+"ScotchHelper",
AA.NumRows(),
rptr,
cidx,
seed);
{
timer.reset();
S.computeOrdering(treecut, minblksize);
S.pruneTree(prunecut);
PA.copy(S.PermVector(), S.InvPermVector(), AA);
t = timer.seconds();
if (verbose)
cout << S << endl;
}
cout << "StatByBlocks:: reorder the matrix::time = " << t << endl;
{
SymbolicFactorHelperType F(PA, league_size);
timer.reset();
F.createNonZeroPattern(fill_level, Uplo::Upper, UU);
t = timer.seconds();
cout << "StatByBlocks:: AA (nnz) = " << AA.NumNonZeros() << ", UU (nnz) = " << UU.NumNonZeros() << endl;
}
cout << "StatByBlocks:: symbolic factorization::time = " << t << endl;
{
timer.reset();
CrsMatrixHelper::flat2hier(Uplo::Upper, UU, HU,
S.NumBlocks(),
S.RangeVector(),
S.TreeVector());
for (ordinal_type k=0;k<HU.NumNonZeros();++k)
HU.Value(k).fillRowViewArray();
t = timer.seconds();
cout << "StatByBlocks:: Hier (dof, nnz) = " << HU.NumRows() << ", " << HU.NumNonZeros() << endl;
}
//.........这里部分代码省略.........
示例13: exampleDenseTrsmMKL
KOKKOS_INLINE_FUNCTION
int exampleDenseTrsmMKL(const OrdinalType mmin,
const OrdinalType mmax,
const OrdinalType minc,
const OrdinalType k,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "DenseGemmMKL:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << " , k = "<< k << endl;
ostringstream os;
os.precision(3);
os << scientific;
for (ordinal_type m=mmin;m<=mmax;m+=minc) {
os.str("");
DenseMatrixBaseType AA("AA", m, m), BB("BB", m, k), BC("BC", m, k);
// setup upper triangular
for (ordinal_type j=0;j<AA.NumCols();++j) {
AA.Value(j,j) = 10.0;
for (ordinal_type i=0;i<j;++i)
AA.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
}
// setup one and right hand side is going to be overwritten by the product of AB
for (ordinal_type j=0;j<BB.NumCols();++j)
for (ordinal_type i=0;i<BB.NumRows();++i)
BB.Value(i,j) = 1.0;
Teuchos::BLAS<ordinal_type,value_type> blas;
blas.GEMM(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS,
m, k, m,
1.0,
AA.ValuePtr(), AA.ColStride(),
BB.ValuePtr(), BB.ColStride(),
0.0,
BC.ValuePtr(), BC.ColStride());
BB.copy(BC);
const double flop = get_flop_trsm_upper<value_type>(m, k);
os << "DenseTrsmMKL:: m = " << m << " k = " << k;
{
timer.reset();
Teuchos::BLAS<ordinal_type,value_type> blas;
const ordinal_type mm = AA.NumRows();
const ordinal_type nn = BB.NumCols();
blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::CONJ_TRANS,
Teuchos::NON_UNIT_DIAG,
mm, nn,
1.0,
AA.ValuePtr(), AA.ColStride(),
BB.ValuePtr(), BB.ColStride());
t = timer.seconds();
os << ":: MKL Performance = " << (flop/t/1.0e9) << " [GFLOPs] ";
}
cout << os.str() << endl;
}
return r_val;
}
示例14: exampleMatrixMarket
int exampleMatrixMarket(const std::string file_input,
const bool verbose) {
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
const bool detail = false;
std::cout << "DeviceSpace:: "; DeviceSpaceType::print_configuration(std::cout, detail);
std::cout << "HostSpace:: "; HostSpaceType::print_configuration(std::cout, detail);
typedef CrsMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> CrsMatrixBaseHostType;
int r_val = 0;
Kokkos::Impl::Timer timer;
CrsMatrixBaseHostType AA("AA");
timer.reset();
{
std::ifstream in;
in.open(file_input);
if (!in.good()) {
std::cout << "Failed in open the file: " << file_input << std::endl;
return -1;
}
MatrixMarket::read(AA, in);
}
double t_read = timer.seconds();
timer.reset();
{
std::string file_output = "mm-test-output.mtx";
std::ofstream out;
out.open(file_output);
if (!out.good()) {
std::cout << "Failed in open the file: " << file_output << std::endl;
return -1;
}
MatrixMarket::write(out, AA, "%% Test output");
}
double t_write = timer.seconds();
{
const auto prec = std::cout.precision();
std::cout.precision(4);
std::cout << std::scientific
<< "MatrixMarket:: dimension = " << AA.NumRows() << " x " << AA.NumCols()
<< ", " << " nnz = " << AA.NumNonZeros() << ", "
<< "read = " << t_read << " [sec], "
<< "write = " << t_write << " [sec] "
<< std::endl;
std::cout.unsetf(std::ios::scientific);
std::cout.precision(prec);
}
if (verbose) {
AA.showMe(std::cout) << std::endl;
}
CrsMatrixBaseHostType BB("BB");
BB.createConfTo(AA);
CrsMatrixTools::copy(BB, Uplo::Upper, 0, AA);
if (verbose) {
BB.setLabel("Copy::AA:Upper::0"); BB.showMe(std::cout) << std::endl;
}
CrsMatrixTools::copy(BB, Uplo::Upper, 1, AA);
if (verbose) {
BB.setLabel("Copy::AA:Upper::1"); BB.showMe(std::cout) << std::endl;
}
CrsMatrixTools::copy(BB, Uplo::Lower, 0, AA);
if (verbose) {
BB.setLabel("Copy::AA:Lower::0"); BB.showMe(std::cout) << std::endl;
}
CrsMatrixTools::copy(BB, Uplo::Lower, 1, AA);
if (verbose) {
BB.setLabel("Copy::AA:Lower::1"); BB.showMe(std::cout) << std::endl;
}
return r_val;
}
示例15: ComputeBasis_HGRAD_Vector
int ComputeBasis_HGRAD_Vector(const ordinal_type nworkset,
const ordinal_type C,
const ordinal_type order,
const bool verbose) {
typedef Vector<VectorTagType> VectorType;
typedef typename VectorTagType::value_type ValueType;
constexpr int VectorLength = VectorTagType::length;
Teuchos::RCP<std::ostream> verboseStream;
Teuchos::oblackholestream bhs; // outputs nothing
if (verbose)
verboseStream = Teuchos::rcp(&std::cout, false);
else
verboseStream = Teuchos::rcp(&bhs, false);
Teuchos::oblackholestream oldFormatState;
oldFormatState.copyfmt(std::cout);
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
*verboseStream << "DeviceSpace:: "; DeviceSpaceType::print_configuration(*verboseStream, false);
*verboseStream << "HostSpace:: "; HostSpaceType::print_configuration(*verboseStream, false);
*verboseStream << "VectorLength:: " << (VectorLength) << "\n";
using BasisTypeHost = Basis_HGRAD_HEX_C1_FEM<HostSpaceType,ValueType,ValueType>;
using ImplBasisType = Impl::Basis_HGRAD_HEX_C1_FEM;
using range_type = Kokkos::pair<ordinal_type,ordinal_type>;
constexpr size_t LLC_CAPACITY = 32*1024*1024;
Intrepid2::Test::Flush<LLC_CAPACITY,DeviceSpaceType> flush;
Kokkos::Impl::Timer timer;
double t_vectorize = 0;
int errorFlag = 0;
BasisTypeHost hostBasis;
const auto cellTopo = hostBasis.getBaseCellTopology();
auto cubature = DefaultCubatureFactory::create<DeviceSpaceType,ValueType,ValueType>(cellTopo, order);
const ordinal_type
numCells = C,
numCellsAdjusted = C/VectorLength + (C%VectorLength > 0),
numVerts = cellTopo.getVertexCount(),
numDofs = hostBasis.getCardinality(),
numPoints = cubature->getNumPoints(),
spaceDim = cubature->getDimension();
Kokkos::DynRankView<ValueType,HostSpaceType> dofCoordsHost("dofCoordsHost", numDofs, spaceDim);
hostBasis.getDofCoords(dofCoordsHost);
const auto refNodesHost = Kokkos::subview(dofCoordsHost, range_type(0, numVerts), Kokkos::ALL());
// pertub nodes
Kokkos::DynRankView<VectorType,HostSpaceType> worksetCellsHost("worksetCellsHost", numCellsAdjusted, numVerts, spaceDim);
for (ordinal_type cell=0;cell<numCells;++cell) {
for (ordinal_type i=0;i<numVerts;++i)
for (ordinal_type j=0;j<spaceDim;++j) {
ValueType val = (rand()/(RAND_MAX + 1.0))*0.2 -0.1;
worksetCellsHost(cell/VectorLength, i, j)[cell%VectorLength] = refNodesHost(i, j) + val;
}
}
auto worksetCells = Kokkos::create_mirror_view(typename DeviceSpaceType::memory_space(), worksetCellsHost);
Kokkos::deep_copy(worksetCells, worksetCellsHost);
Kokkos::DynRankView<ValueType,DeviceSpaceType> refPoints("refPoints", numPoints, spaceDim), refWeights("refWeights", numPoints);
cubature->getCubature(refPoints, refWeights);
std::cout
<< "===============================================================================\n"
<< " Performance Test evaluating ComputeBasis \n"
<< " # of workset = " << nworkset << "\n"
<< " Test Array Structure (C,F,P,D) = " << numCells << ", " << numDofs << ", " << numPoints << ", " << spaceDim << "\n"
<< "===============================================================================\n";
*verboseStream
<< "\n"
<< "===============================================================================\n"
<< "TEST 1: evaluateFields vector version\n"
<< "===============================================================================\n";
try {
Kokkos::DynRankView<ValueType,DeviceSpaceType>
refBasisValues("refBasisValues", numDofs, numPoints),
refBasisGrads ("refBasisGrads", numDofs, numPoints, spaceDim);
ImplBasisType::getValues<DeviceSpaceType>(refBasisValues, refPoints, OPERATOR_VALUE);
ImplBasisType::getValues<DeviceSpaceType>(refBasisGrads, refPoints, OPERATOR_GRAD);
const ordinal_type ibegin = -3;
// testing vertical approach
{
Kokkos::DynRankView<VectorType,DeviceSpaceType>
weightedBasisValues("weightedBasisValues", numCellsAdjusted, numDofs, numPoints),
weightedBasisGrads ("weightedBasisGrads", numCellsAdjusted, numDofs, numPoints, spaceDim);
//.........这里部分代码省略.........